|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.0, |
|
"eval_steps": 500, |
|
"global_step": 3126, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.597444089456869e-09, |
|
"logits/generated": 6.076260089874268, |
|
"logits/real": 4.217202663421631, |
|
"logps/generated": -793.698486328125, |
|
"logps/real": -221.5892333984375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/generated": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/real": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.597444089456869e-08, |
|
"logits/generated": 5.937064170837402, |
|
"logits/real": 4.38163423538208, |
|
"logps/generated": -943.8297729492188, |
|
"logps/real": -251.95458984375, |
|
"loss": 0.7034, |
|
"rewards/accuracies": 0.1111111119389534, |
|
"rewards/generated": 0.008679242804646492, |
|
"rewards/margins": -0.012087766081094742, |
|
"rewards/real": -0.0034085246734321117, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.194888178913738e-08, |
|
"logits/generated": 6.016368389129639, |
|
"logits/real": 4.054781913757324, |
|
"logps/generated": -866.6522216796875, |
|
"logps/real": -244.7873992919922, |
|
"loss": 0.6959, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/generated": -0.04229893162846565, |
|
"rewards/margins": 0.02972055971622467, |
|
"rewards/real": -0.01257836353033781, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.7923322683706064e-08, |
|
"logits/generated": 6.003698348999023, |
|
"logits/real": 4.410984992980957, |
|
"logps/generated": -772.19970703125, |
|
"logps/real": -265.46051025390625, |
|
"loss": 0.6868, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/generated": -0.002037657890468836, |
|
"rewards/margins": 0.010990817099809647, |
|
"rewards/real": 0.008953156881034374, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 6.389776357827476e-08, |
|
"logits/generated": 6.05269718170166, |
|
"logits/real": 4.136534214019775, |
|
"logps/generated": -916.8504028320312, |
|
"logps/real": -240.4838409423828, |
|
"loss": 0.7385, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/generated": 0.05573784187436104, |
|
"rewards/margins": -0.06023125723004341, |
|
"rewards/real": -0.004493414890021086, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 7.987220447284344e-08, |
|
"logits/generated": 6.0863237380981445, |
|
"logits/real": 4.26947546005249, |
|
"logps/generated": -910.7779541015625, |
|
"logps/real": -224.7820281982422, |
|
"loss": 0.7306, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/generated": 0.05095089226961136, |
|
"rewards/margins": -0.05160406976938248, |
|
"rewards/real": -0.0006531793624162674, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.584664536741213e-08, |
|
"logits/generated": 5.816224098205566, |
|
"logits/real": 4.575616359710693, |
|
"logps/generated": -828.2184448242188, |
|
"logps/real": -243.4587860107422, |
|
"loss": 0.7114, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/generated": 0.006193811539560556, |
|
"rewards/margins": -0.02020972967147827, |
|
"rewards/real": -0.014015915803611279, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.1182108626198082e-07, |
|
"logits/generated": 5.910815238952637, |
|
"logits/real": 4.322142601013184, |
|
"logps/generated": -864.3304443359375, |
|
"logps/real": -267.9166564941406, |
|
"loss": 0.7368, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/generated": 0.10107575356960297, |
|
"rewards/margins": -0.11164508759975433, |
|
"rewards/real": -0.010569351725280285, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.2779552715654952e-07, |
|
"logits/generated": 5.944818496704102, |
|
"logits/real": 4.048387050628662, |
|
"logps/generated": -961.0286865234375, |
|
"logps/real": -237.78018188476562, |
|
"loss": 0.678, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/generated": -0.04871482402086258, |
|
"rewards/margins": 0.04645959287881851, |
|
"rewards/real": -0.002255239523947239, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.437699680511182e-07, |
|
"logits/generated": 6.106154441833496, |
|
"logits/real": 4.130688667297363, |
|
"logps/generated": -899.5615234375, |
|
"logps/real": -257.23492431640625, |
|
"loss": 0.663, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/generated": -0.09308083355426788, |
|
"rewards/margins": 0.07399795949459076, |
|
"rewards/real": -0.01908286102116108, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.5974440894568688e-07, |
|
"logits/generated": 5.771765232086182, |
|
"logits/real": 4.03811502456665, |
|
"logps/generated": -820.4385986328125, |
|
"logps/real": -240.3477325439453, |
|
"loss": 0.721, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/generated": 0.0457366518676281, |
|
"rewards/margins": -0.05348087102174759, |
|
"rewards/real": -0.007744210306555033, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.757188498402556e-07, |
|
"logits/generated": 5.8602399826049805, |
|
"logits/real": 4.352300643920898, |
|
"logps/generated": -949.75537109375, |
|
"logps/real": -256.1276550292969, |
|
"loss": 0.6804, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/generated": -0.08218346536159515, |
|
"rewards/margins": 0.0958067774772644, |
|
"rewards/real": 0.013623319566249847, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.9169329073482426e-07, |
|
"logits/generated": 5.985620021820068, |
|
"logits/real": 4.446410179138184, |
|
"logps/generated": -827.1920776367188, |
|
"logps/real": -240.00082397460938, |
|
"loss": 0.6858, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/generated": -0.024061355739831924, |
|
"rewards/margins": 0.02317228354513645, |
|
"rewards/real": -0.0008890745230019093, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.0766773162939297e-07, |
|
"logits/generated": 5.976418972015381, |
|
"logits/real": 4.385097980499268, |
|
"logps/generated": -903.66357421875, |
|
"logps/real": -253.0922393798828, |
|
"loss": 0.6812, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/generated": -0.03790752962231636, |
|
"rewards/margins": 0.04170641303062439, |
|
"rewards/real": 0.0037988885305821896, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.2364217252396164e-07, |
|
"logits/generated": 5.8814568519592285, |
|
"logits/real": 4.022303581237793, |
|
"logps/generated": -872.1270751953125, |
|
"logps/real": -239.0474090576172, |
|
"loss": 0.6462, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/generated": -0.08364593237638474, |
|
"rewards/margins": 0.07660557329654694, |
|
"rewards/real": -0.007040367461740971, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.3961661341853033e-07, |
|
"logits/generated": 5.902901649475098, |
|
"logits/real": 4.500279426574707, |
|
"logps/generated": -909.7518310546875, |
|
"logps/real": -252.1283416748047, |
|
"loss": 0.6449, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/generated": -0.1788884699344635, |
|
"rewards/margins": 0.1675419807434082, |
|
"rewards/real": -0.011346508748829365, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.5559105431309904e-07, |
|
"logits/generated": 6.0863142013549805, |
|
"logits/real": 4.278581142425537, |
|
"logps/generated": -862.6253662109375, |
|
"logps/real": -249.1087188720703, |
|
"loss": 0.6591, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/generated": -0.06084873527288437, |
|
"rewards/margins": 0.06349506974220276, |
|
"rewards/real": 0.0026463475078344345, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.715654952076677e-07, |
|
"logits/generated": 5.8610758781433105, |
|
"logits/real": 4.366555213928223, |
|
"logps/generated": -893.8683471679688, |
|
"logps/real": -238.64443969726562, |
|
"loss": 0.6079, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/generated": -0.22141461074352264, |
|
"rewards/margins": 0.23539376258850098, |
|
"rewards/real": 0.01397914718836546, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.875399361022364e-07, |
|
"logits/generated": 5.8870625495910645, |
|
"logits/real": 4.547116279602051, |
|
"logps/generated": -906.3629150390625, |
|
"logps/real": -263.76654052734375, |
|
"loss": 0.5942, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/generated": -0.2859794497489929, |
|
"rewards/margins": 0.2683030664920807, |
|
"rewards/real": -0.017676372081041336, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 3.035143769968051e-07, |
|
"logits/generated": 6.080388069152832, |
|
"logits/real": 4.27817440032959, |
|
"logps/generated": -907.4010009765625, |
|
"logps/real": -263.9908447265625, |
|
"loss": 0.5832, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/generated": -0.28283512592315674, |
|
"rewards/margins": 0.27541953325271606, |
|
"rewards/real": -0.007415570318698883, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 3.1948881789137375e-07, |
|
"logits/generated": 6.112253665924072, |
|
"logits/real": 4.260523796081543, |
|
"logps/generated": -938.8836059570312, |
|
"logps/real": -248.11587524414062, |
|
"loss": 0.5842, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/generated": -0.30967745184898376, |
|
"rewards/margins": 0.29746749997138977, |
|
"rewards/real": -0.012209964916110039, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 3.354632587859425e-07, |
|
"logits/generated": 5.790924549102783, |
|
"logits/real": 4.103597640991211, |
|
"logps/generated": -907.6522216796875, |
|
"logps/real": -227.3259735107422, |
|
"loss": 0.6164, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/generated": -0.22061340510845184, |
|
"rewards/margins": 0.21634364128112793, |
|
"rewards/real": -0.004269786179065704, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 3.514376996805112e-07, |
|
"logits/generated": 5.998345375061035, |
|
"logits/real": 4.214221000671387, |
|
"logps/generated": -942.66552734375, |
|
"logps/real": -222.0115509033203, |
|
"loss": 0.5932, |
|
"rewards/accuracies": 0.75, |
|
"rewards/generated": -0.32513970136642456, |
|
"rewards/margins": 0.32551324367523193, |
|
"rewards/real": 0.0003735637292265892, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 3.6741214057507985e-07, |
|
"logits/generated": 6.107602596282959, |
|
"logits/real": 4.030685901641846, |
|
"logps/generated": -899.4200439453125, |
|
"logps/real": -238.90316772460938, |
|
"loss": 0.5673, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/generated": -0.3097040355205536, |
|
"rewards/margins": 0.31323733925819397, |
|
"rewards/real": 0.003533291397616267, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 3.833865814696485e-07, |
|
"logits/generated": 6.146908283233643, |
|
"logits/real": 4.572686672210693, |
|
"logps/generated": -849.7062377929688, |
|
"logps/real": -287.7304382324219, |
|
"loss": 0.6028, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/generated": -0.2624419331550598, |
|
"rewards/margins": 0.24513304233551025, |
|
"rewards/real": -0.017308901995420456, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 3.993610223642173e-07, |
|
"logits/generated": 6.0791335105896, |
|
"logits/real": 4.294358730316162, |
|
"logps/generated": -928.7376708984375, |
|
"logps/real": -228.5765380859375, |
|
"loss": 0.5843, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/generated": -0.3841578960418701, |
|
"rewards/margins": 0.3720285892486572, |
|
"rewards/real": -0.012129291892051697, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.1533546325878595e-07, |
|
"logits/generated": 6.011288642883301, |
|
"logits/real": 3.944628953933716, |
|
"logps/generated": -920.6962890625, |
|
"logps/real": -224.2725372314453, |
|
"loss": 0.5626, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/generated": -0.4267396926879883, |
|
"rewards/margins": 0.42054229974746704, |
|
"rewards/real": -0.006197371985763311, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.313099041533546e-07, |
|
"logits/generated": 5.999421119689941, |
|
"logits/real": 4.240687370300293, |
|
"logps/generated": -839.8876953125, |
|
"logps/real": -239.6907196044922, |
|
"loss": 0.5742, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/generated": -0.29800790548324585, |
|
"rewards/margins": 0.30169859528541565, |
|
"rewards/real": 0.003690724028274417, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.472843450479233e-07, |
|
"logits/generated": 6.157493591308594, |
|
"logits/real": 4.233771324157715, |
|
"logps/generated": -919.6163330078125, |
|
"logps/real": -256.02667236328125, |
|
"loss": 0.5615, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/generated": -0.436979204416275, |
|
"rewards/margins": 0.4427516460418701, |
|
"rewards/real": 0.00577241787686944, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.63258785942492e-07, |
|
"logits/generated": 5.8831915855407715, |
|
"logits/real": 4.111518859863281, |
|
"logps/generated": -915.33642578125, |
|
"logps/real": -250.9635772705078, |
|
"loss": 0.5469, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/generated": -0.44483208656311035, |
|
"rewards/margins": 0.45819053053855896, |
|
"rewards/real": 0.013358525931835175, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.792332268370607e-07, |
|
"logits/generated": 6.069736480712891, |
|
"logits/real": 4.062201499938965, |
|
"logps/generated": -1062.180419921875, |
|
"logps/real": -239.5394744873047, |
|
"loss": 0.5112, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/generated": -0.7640186548233032, |
|
"rewards/margins": 0.7979092597961426, |
|
"rewards/real": 0.03389066457748413, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.952076677316294e-07, |
|
"logits/generated": 6.100470542907715, |
|
"logits/real": 4.4795241355896, |
|
"logps/generated": -883.9137573242188, |
|
"logps/real": -260.9756164550781, |
|
"loss": 0.5219, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/generated": -0.466789186000824, |
|
"rewards/margins": 0.47098153829574585, |
|
"rewards/real": 0.0041923513635993, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.987557767507998e-07, |
|
"logits/generated": 5.904444694519043, |
|
"logits/real": 4.497801303863525, |
|
"logps/generated": -920.7421875, |
|
"logps/real": -258.6832580566406, |
|
"loss": 0.5076, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -0.6512015461921692, |
|
"rewards/margins": 0.6806803941726685, |
|
"rewards/real": 0.02947883866727352, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.969783149662282e-07, |
|
"logits/generated": 5.837555885314941, |
|
"logits/real": 4.39646053314209, |
|
"logps/generated": -887.0213623046875, |
|
"logps/real": -259.81976318359375, |
|
"loss": 0.4883, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/generated": -0.6606628894805908, |
|
"rewards/margins": 0.6972694396972656, |
|
"rewards/real": 0.036606594920158386, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.952008531816565e-07, |
|
"logits/generated": 6.078923225402832, |
|
"logits/real": 4.288995265960693, |
|
"logps/generated": -755.9134521484375, |
|
"logps/real": -254.28958129882812, |
|
"loss": 0.5167, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/generated": -0.4952046275138855, |
|
"rewards/margins": 0.5218986868858337, |
|
"rewards/real": 0.026694035157561302, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.93423391397085e-07, |
|
"logits/generated": 6.069632530212402, |
|
"logits/real": 4.2718353271484375, |
|
"logps/generated": -885.2103271484375, |
|
"logps/real": -246.41549682617188, |
|
"loss": 0.494, |
|
"rewards/accuracies": 0.75, |
|
"rewards/generated": -0.8642350435256958, |
|
"rewards/margins": 0.8856006860733032, |
|
"rewards/real": 0.021365612745285034, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.916459296125133e-07, |
|
"logits/generated": 6.0406084060668945, |
|
"logits/real": 4.213619232177734, |
|
"logps/generated": -862.3121337890625, |
|
"logps/real": -237.30038452148438, |
|
"loss": 0.4344, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -0.819617748260498, |
|
"rewards/margins": 0.8550539016723633, |
|
"rewards/real": 0.03543621301651001, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.898684678279417e-07, |
|
"logits/generated": 5.9395928382873535, |
|
"logits/real": 4.514933109283447, |
|
"logps/generated": -784.0012817382812, |
|
"logps/real": -274.2304992675781, |
|
"loss": 0.482, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -0.666130006313324, |
|
"rewards/margins": 0.7213379144668579, |
|
"rewards/real": 0.055207859724760056, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.8809100604337e-07, |
|
"logits/generated": 5.9382734298706055, |
|
"logits/real": 4.106156826019287, |
|
"logps/generated": -868.2293090820312, |
|
"logps/real": -257.92828369140625, |
|
"loss": 0.4572, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -0.924160361289978, |
|
"rewards/margins": 0.9683877229690552, |
|
"rewards/real": 0.044227343052625656, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.863135442587984e-07, |
|
"logits/generated": 6.171679496765137, |
|
"logits/real": 4.2907819747924805, |
|
"logps/generated": -979.2257080078125, |
|
"logps/real": -244.9657745361328, |
|
"loss": 0.4354, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/generated": -1.2005796432495117, |
|
"rewards/margins": 1.255791187286377, |
|
"rewards/real": 0.05521152541041374, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.845360824742267e-07, |
|
"logits/generated": 6.069159984588623, |
|
"logits/real": 4.032870292663574, |
|
"logps/generated": -804.0493774414062, |
|
"logps/real": -250.74868774414062, |
|
"loss": 0.447, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/generated": -0.7962594032287598, |
|
"rewards/margins": 0.8555679321289062, |
|
"rewards/real": 0.05930844694375992, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.827586206896552e-07, |
|
"logits/generated": 6.105320453643799, |
|
"logits/real": 4.221116542816162, |
|
"logps/generated": -797.972900390625, |
|
"logps/real": -244.3699188232422, |
|
"loss": 0.471, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/generated": -0.7431430816650391, |
|
"rewards/margins": 0.8322073817253113, |
|
"rewards/real": 0.08906435966491699, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.809811589050835e-07, |
|
"logits/generated": 5.868694305419922, |
|
"logits/real": 4.118344306945801, |
|
"logps/generated": -944.1519775390625, |
|
"logps/real": -250.259765625, |
|
"loss": 0.4199, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/generated": -1.201935887336731, |
|
"rewards/margins": 1.2849478721618652, |
|
"rewards/real": 0.08301188051700592, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.792036971205119e-07, |
|
"logits/generated": 6.04774284362793, |
|
"logits/real": 4.184063911437988, |
|
"logps/generated": -943.5716552734375, |
|
"logps/real": -233.9786834716797, |
|
"loss": 0.3929, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -1.1643246412277222, |
|
"rewards/margins": 1.2494871616363525, |
|
"rewards/real": 0.08516237884759903, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.774262353359402e-07, |
|
"logits/generated": 6.015780925750732, |
|
"logits/real": 4.106911659240723, |
|
"logps/generated": -873.0906982421875, |
|
"logps/real": -245.89151000976562, |
|
"loss": 0.4092, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -1.0168969631195068, |
|
"rewards/margins": 1.096966028213501, |
|
"rewards/real": 0.08006921410560608, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.7564877355136863e-07, |
|
"logits/generated": 6.1327619552612305, |
|
"logits/real": 4.286798000335693, |
|
"logps/generated": -913.6229248046875, |
|
"logps/real": -235.7820281982422, |
|
"loss": 0.3918, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -1.1738783121109009, |
|
"rewards/margins": 1.2836366891860962, |
|
"rewards/real": 0.1097583994269371, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.73871311766797e-07, |
|
"logits/generated": 5.843601226806641, |
|
"logits/real": 4.894453048706055, |
|
"logps/generated": -836.0406494140625, |
|
"logps/real": -262.8157653808594, |
|
"loss": 0.3896, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -1.122596263885498, |
|
"rewards/margins": 1.2202900648117065, |
|
"rewards/real": 0.09769367426633835, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.7209384998222536e-07, |
|
"logits/generated": 5.926724433898926, |
|
"logits/real": 4.339844226837158, |
|
"logps/generated": -835.6307373046875, |
|
"logps/real": -243.3745880126953, |
|
"loss": 0.3792, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -1.0902034044265747, |
|
"rewards/margins": 1.2509477138519287, |
|
"rewards/real": 0.16074436902999878, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.7031638819765373e-07, |
|
"logits/generated": 6.176774501800537, |
|
"logits/real": 4.369190692901611, |
|
"logps/generated": -1035.437255859375, |
|
"logps/real": -241.65518188476562, |
|
"loss": 0.3517, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -1.6769460439682007, |
|
"rewards/margins": 1.8335769176483154, |
|
"rewards/real": 0.15663087368011475, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.6853892641308215e-07, |
|
"logits/generated": 6.054518699645996, |
|
"logits/real": 4.261479377746582, |
|
"logps/generated": -991.4581909179688, |
|
"logps/real": -227.12417602539062, |
|
"loss": 0.3452, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -1.612874984741211, |
|
"rewards/margins": 1.7803363800048828, |
|
"rewards/real": 0.1674613654613495, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.6676146462851046e-07, |
|
"logits/generated": 5.990649700164795, |
|
"logits/real": 4.494472026824951, |
|
"logps/generated": -877.47412109375, |
|
"logps/real": -245.2666015625, |
|
"loss": 0.3664, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -1.3127868175506592, |
|
"rewards/margins": 1.4855382442474365, |
|
"rewards/real": 0.1727515310049057, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.649840028439388e-07, |
|
"logits/generated": 6.058517932891846, |
|
"logits/real": 4.018253803253174, |
|
"logps/generated": -849.6085815429688, |
|
"logps/real": -239.42333984375, |
|
"loss": 0.3424, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -1.2417773008346558, |
|
"rewards/margins": 1.4562361240386963, |
|
"rewards/real": 0.21445894241333008, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.632065410593672e-07, |
|
"logits/generated": 6.090991020202637, |
|
"logits/real": 4.206871509552002, |
|
"logps/generated": -858.64306640625, |
|
"logps/real": -231.8360595703125, |
|
"loss": 0.3043, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -1.3968095779418945, |
|
"rewards/margins": 1.6249290704727173, |
|
"rewards/real": 0.22811949253082275, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.6142907927479556e-07, |
|
"logits/generated": 5.986910820007324, |
|
"logits/real": 4.2822465896606445, |
|
"logps/generated": -1048.8765869140625, |
|
"logps/real": -231.92068481445312, |
|
"loss": 0.2983, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -2.237921714782715, |
|
"rewards/margins": 2.462768793106079, |
|
"rewards/real": 0.22484686970710754, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.59651617490224e-07, |
|
"logits/generated": 5.905124187469482, |
|
"logits/real": 4.177928447723389, |
|
"logps/generated": -821.7687377929688, |
|
"logps/real": -246.280517578125, |
|
"loss": 0.3157, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -1.3176755905151367, |
|
"rewards/margins": 1.5714536905288696, |
|
"rewards/real": 0.2537779211997986, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.578741557056523e-07, |
|
"logits/generated": 5.976016044616699, |
|
"logits/real": 4.194876194000244, |
|
"logps/generated": -845.0601806640625, |
|
"logps/real": -246.12539672851562, |
|
"loss": 0.2847, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -1.4623727798461914, |
|
"rewards/margins": 1.7181476354599, |
|
"rewards/real": 0.25577467679977417, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.560966939210807e-07, |
|
"logits/generated": 6.045588493347168, |
|
"logits/real": 4.3586626052856445, |
|
"logps/generated": -811.4501953125, |
|
"logps/real": -235.3633270263672, |
|
"loss": 0.2894, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -1.3929394483566284, |
|
"rewards/margins": 1.660951018333435, |
|
"rewards/real": 0.2680116295814514, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.54319232136509e-07, |
|
"logits/generated": 5.941956520080566, |
|
"logits/real": 3.9948837757110596, |
|
"logps/generated": -939.8425903320312, |
|
"logps/real": -252.4247283935547, |
|
"loss": 0.2729, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -1.8964020013809204, |
|
"rewards/margins": 2.198000907897949, |
|
"rewards/real": 0.3015987277030945, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.525417703519374e-07, |
|
"logits/generated": 5.852833271026611, |
|
"logits/real": 4.229121208190918, |
|
"logps/generated": -947.67333984375, |
|
"logps/real": -247.6307830810547, |
|
"loss": 0.2615, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -1.9862486124038696, |
|
"rewards/margins": 2.275512933731079, |
|
"rewards/real": 0.28926438093185425, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.507643085673658e-07, |
|
"logits/generated": 5.97081995010376, |
|
"logits/real": 4.184626579284668, |
|
"logps/generated": -939.2525634765625, |
|
"logps/real": -249.10293579101562, |
|
"loss": 0.2501, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -2.023348331451416, |
|
"rewards/margins": 2.3503973484039307, |
|
"rewards/real": 0.3270490765571594, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.489868467827941e-07, |
|
"logits/generated": 5.946187496185303, |
|
"logits/real": 3.98576021194458, |
|
"logps/generated": -868.517578125, |
|
"logps/real": -234.7804412841797, |
|
"loss": 0.2434, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -1.8050874471664429, |
|
"rewards/margins": 2.1618123054504395, |
|
"rewards/real": 0.35672444105148315, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.4720938499822254e-07, |
|
"logits/generated": 6.001513481140137, |
|
"logits/real": 4.200368881225586, |
|
"logps/generated": -938.6095581054688, |
|
"logps/real": -236.7099609375, |
|
"loss": 0.2342, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -2.136950731277466, |
|
"rewards/margins": 2.4946799278259277, |
|
"rewards/real": 0.3577292263507843, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.4543192321365085e-07, |
|
"logits/generated": 6.019902229309082, |
|
"logits/real": 4.102996349334717, |
|
"logps/generated": -877.0018310546875, |
|
"logps/real": -231.89395141601562, |
|
"loss": 0.2346, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -1.801348328590393, |
|
"rewards/margins": 2.227733850479126, |
|
"rewards/real": 0.42638540267944336, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.4365446142907927e-07, |
|
"logits/generated": 5.965851783752441, |
|
"logits/real": 4.574382781982422, |
|
"logps/generated": -923.7843627929688, |
|
"logps/real": -251.86672973632812, |
|
"loss": 0.2313, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -1.9577804803848267, |
|
"rewards/margins": 2.378849744796753, |
|
"rewards/real": 0.4210694432258606, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.4187699964450764e-07, |
|
"logits/generated": 6.019913196563721, |
|
"logits/real": 4.158315181732178, |
|
"logps/generated": -858.7822265625, |
|
"logps/real": -240.2414093017578, |
|
"loss": 0.1937, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -1.8835636377334595, |
|
"rewards/margins": 2.340017795562744, |
|
"rewards/real": 0.45645445585250854, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.4009953785993595e-07, |
|
"logits/generated": 6.02773380279541, |
|
"logits/real": 4.274472713470459, |
|
"logps/generated": -930.7633056640625, |
|
"logps/real": -252.86410522460938, |
|
"loss": 0.2073, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -2.2265586853027344, |
|
"rewards/margins": 2.6945266723632812, |
|
"rewards/real": 0.4679679274559021, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.3832207607536437e-07, |
|
"logits/generated": 6.097564220428467, |
|
"logits/real": 4.119298458099365, |
|
"logps/generated": -823.2561645507812, |
|
"logps/real": -236.7152557373047, |
|
"loss": 0.2025, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -1.7512767314910889, |
|
"rewards/margins": 2.2316291332244873, |
|
"rewards/real": 0.48035264015197754, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.365446142907927e-07, |
|
"logits/generated": 6.0582170486450195, |
|
"logits/real": 4.083959579467773, |
|
"logps/generated": -952.2449340820312, |
|
"logps/real": -228.7444305419922, |
|
"loss": 0.2019, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -2.267892599105835, |
|
"rewards/margins": 2.736295461654663, |
|
"rewards/real": 0.4684027135372162, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.347671525062211e-07, |
|
"logits/generated": 5.989724159240723, |
|
"logits/real": 4.3230791091918945, |
|
"logps/generated": -932.2943115234375, |
|
"logps/real": -237.28170776367188, |
|
"loss": 0.1903, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -2.2489707469940186, |
|
"rewards/margins": 2.7711246013641357, |
|
"rewards/real": 0.5221537947654724, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.3298969072164947e-07, |
|
"logits/generated": 5.9734578132629395, |
|
"logits/real": 4.305315971374512, |
|
"logps/generated": -898.1632080078125, |
|
"logps/real": -249.6607666015625, |
|
"loss": 0.1736, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -2.172926664352417, |
|
"rewards/margins": 2.7069246768951416, |
|
"rewards/real": 0.5339978337287903, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.3121222893707783e-07, |
|
"logits/generated": 5.79467248916626, |
|
"logits/real": 4.179832458496094, |
|
"logps/generated": -853.1522216796875, |
|
"logps/real": -225.7552490234375, |
|
"loss": 0.1769, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -2.0059478282928467, |
|
"rewards/margins": 2.5637969970703125, |
|
"rewards/real": 0.5578492879867554, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.294347671525062e-07, |
|
"logits/generated": 6.031177997589111, |
|
"logits/real": 4.149500846862793, |
|
"logps/generated": -914.326171875, |
|
"logps/real": -237.4337158203125, |
|
"loss": 0.1682, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -2.2514748573303223, |
|
"rewards/margins": 2.817643642425537, |
|
"rewards/real": 0.5661691427230835, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.276573053679346e-07, |
|
"logits/generated": 6.02950382232666, |
|
"logits/real": 4.436091423034668, |
|
"logps/generated": -924.0319213867188, |
|
"logps/real": -244.99362182617188, |
|
"loss": 0.1649, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -2.3590235710144043, |
|
"rewards/margins": 2.9966607093811035, |
|
"rewards/real": 0.6376368403434753, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.2587984358336293e-07, |
|
"logits/generated": 6.004773139953613, |
|
"logits/real": 4.440590858459473, |
|
"logps/generated": -880.44970703125, |
|
"logps/real": -252.4867401123047, |
|
"loss": 0.148, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -2.2073614597320557, |
|
"rewards/margins": 2.841465473175049, |
|
"rewards/real": 0.6341038942337036, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.241023817987913e-07, |
|
"logits/generated": 6.170002460479736, |
|
"logits/real": 4.281262397766113, |
|
"logps/generated": -804.2672119140625, |
|
"logps/real": -246.38973999023438, |
|
"loss": 0.1503, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -2.0122382640838623, |
|
"rewards/margins": 2.680762529373169, |
|
"rewards/real": 0.668523907661438, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.2232492001421966e-07, |
|
"logits/generated": 5.92216157913208, |
|
"logits/real": 4.2551093101501465, |
|
"logps/generated": -862.2703857421875, |
|
"logps/real": -258.5106201171875, |
|
"loss": 0.1396, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -2.400254249572754, |
|
"rewards/margins": 3.064797878265381, |
|
"rewards/real": 0.6645434498786926, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.2054745822964803e-07, |
|
"logits/generated": 5.7719502449035645, |
|
"logits/real": 4.651850700378418, |
|
"logps/generated": -891.9352416992188, |
|
"logps/real": -271.16015625, |
|
"loss": 0.1323, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -2.4314839839935303, |
|
"rewards/margins": 3.151881694793701, |
|
"rewards/real": 0.7203975319862366, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.1876999644507645e-07, |
|
"logits/generated": 6.078845024108887, |
|
"logits/real": 4.190167427062988, |
|
"logps/generated": -986.5220947265625, |
|
"logps/real": -244.7814483642578, |
|
"loss": 0.1271, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -2.8437342643737793, |
|
"rewards/margins": 3.5870704650878906, |
|
"rewards/real": 0.7433363795280457, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.1699253466050476e-07, |
|
"logits/generated": 5.826613903045654, |
|
"logits/real": 4.3144378662109375, |
|
"logps/generated": -897.0218505859375, |
|
"logps/real": -217.27749633789062, |
|
"loss": 0.1252, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -2.579515218734741, |
|
"rewards/margins": 3.3170909881591797, |
|
"rewards/real": 0.737575888633728, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.152150728759332e-07, |
|
"logits/generated": 5.925543785095215, |
|
"logits/real": 4.433269500732422, |
|
"logps/generated": -918.3221435546875, |
|
"logps/real": -254.0979766845703, |
|
"loss": 0.1275, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -2.530599594116211, |
|
"rewards/margins": 3.2494373321533203, |
|
"rewards/real": 0.7188378572463989, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.134376110913615e-07, |
|
"logits/generated": 5.992081642150879, |
|
"logits/real": 4.210297107696533, |
|
"logps/generated": -960.2722778320312, |
|
"logps/real": -226.6741943359375, |
|
"loss": 0.1126, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -2.807277202606201, |
|
"rewards/margins": 3.636183261871338, |
|
"rewards/real": 0.8289061784744263, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.1166014930678986e-07, |
|
"logits/generated": 6.153465747833252, |
|
"logits/real": 4.260440349578857, |
|
"logps/generated": -966.8284912109375, |
|
"logps/real": -226.76773071289062, |
|
"loss": 0.1122, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -2.9602110385894775, |
|
"rewards/margins": 3.7220828533172607, |
|
"rewards/real": 0.7618720531463623, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.098826875222183e-07, |
|
"logits/generated": 5.99381160736084, |
|
"logits/real": 4.221859931945801, |
|
"logps/generated": -903.6896362304688, |
|
"logps/real": -233.30349731445312, |
|
"loss": 0.1167, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -2.786527633666992, |
|
"rewards/margins": 3.6095592975616455, |
|
"rewards/real": 0.823030948638916, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.081052257376466e-07, |
|
"logits/generated": 5.980920314788818, |
|
"logits/real": 4.316037178039551, |
|
"logps/generated": -979.0846557617188, |
|
"logps/real": -240.3975830078125, |
|
"loss": 0.1015, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.2098584175109863, |
|
"rewards/margins": 4.0826005935668945, |
|
"rewards/real": 0.8727418780326843, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.06327763953075e-07, |
|
"logits/generated": 6.098940849304199, |
|
"logits/real": 4.4456987380981445, |
|
"logps/generated": -979.9505004882812, |
|
"logps/real": -263.83966064453125, |
|
"loss": 0.0951, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.254727840423584, |
|
"rewards/margins": 4.050951957702637, |
|
"rewards/real": 0.7962234616279602, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.045503021685033e-07, |
|
"logits/generated": 6.151425361633301, |
|
"logits/real": 4.164916038513184, |
|
"logps/generated": -920.0028076171875, |
|
"logps/real": -246.395263671875, |
|
"loss": 0.0947, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -2.9581751823425293, |
|
"rewards/margins": 3.8200302124023438, |
|
"rewards/real": 0.861855149269104, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.0277284038393174e-07, |
|
"logits/generated": 6.034658908843994, |
|
"logits/real": 4.340263366699219, |
|
"logps/generated": -912.1318359375, |
|
"logps/real": -238.3675537109375, |
|
"loss": 0.0904, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -2.746849536895752, |
|
"rewards/margins": 3.6980957984924316, |
|
"rewards/real": 0.9512465596199036, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.009953785993601e-07, |
|
"logits/generated": 5.890770435333252, |
|
"logits/real": 4.256333351135254, |
|
"logps/generated": -841.7523193359375, |
|
"logps/real": -251.98605346679688, |
|
"loss": 0.0886, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -2.5989060401916504, |
|
"rewards/margins": 3.512964963912964, |
|
"rewards/real": 0.9140589833259583, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 3.992179168147884e-07, |
|
"logits/generated": 5.99869441986084, |
|
"logits/real": 4.36562442779541, |
|
"logps/generated": -1008.8406372070312, |
|
"logps/real": -242.4652862548828, |
|
"loss": 0.0893, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.4944260120391846, |
|
"rewards/margins": 4.502266883850098, |
|
"rewards/real": 1.0078411102294922, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 3.9744045503021684e-07, |
|
"logits/generated": 5.980597496032715, |
|
"logits/real": 4.342673301696777, |
|
"logps/generated": -875.7488403320312, |
|
"logps/real": -243.07528686523438, |
|
"loss": 0.0872, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -2.8209424018859863, |
|
"rewards/margins": 3.841762065887451, |
|
"rewards/real": 1.0208194255828857, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 3.956629932456452e-07, |
|
"logits/generated": 5.762509346008301, |
|
"logits/real": 4.160869598388672, |
|
"logps/generated": -963.9265747070312, |
|
"logps/real": -229.8173828125, |
|
"loss": 0.072, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.1484429836273193, |
|
"rewards/margins": 4.151796340942383, |
|
"rewards/real": 1.0033533573150635, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 3.938855314610736e-07, |
|
"logits/generated": 6.13181209564209, |
|
"logits/real": 4.697200775146484, |
|
"logps/generated": -839.2921752929688, |
|
"logps/real": -260.0076904296875, |
|
"loss": 0.0733, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -2.6962039470672607, |
|
"rewards/margins": 3.7993292808532715, |
|
"rewards/real": 1.103124976158142, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 3.9210806967650194e-07, |
|
"logits/generated": 5.9377360343933105, |
|
"logits/real": 4.226620674133301, |
|
"logps/generated": -908.3089599609375, |
|
"logps/real": -254.44149780273438, |
|
"loss": 0.0785, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -2.9502556324005127, |
|
"rewards/margins": 3.9924514293670654, |
|
"rewards/real": 1.0421960353851318, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3.903306078919303e-07, |
|
"logits/generated": 6.1085710525512695, |
|
"logits/real": 4.342537879943848, |
|
"logps/generated": -1006.3308715820312, |
|
"logps/real": -224.99795532226562, |
|
"loss": 0.0711, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.459972381591797, |
|
"rewards/margins": 4.4290771484375, |
|
"rewards/real": 0.9691041707992554, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3.8855314610735867e-07, |
|
"logits/generated": 6.049094200134277, |
|
"logits/real": 4.437540531158447, |
|
"logps/generated": -905.2625732421875, |
|
"logps/real": -250.00546264648438, |
|
"loss": 0.0654, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.0280721187591553, |
|
"rewards/margins": 4.146524429321289, |
|
"rewards/real": 1.1184518337249756, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.867756843227871e-07, |
|
"logits/generated": 6.0964884757995605, |
|
"logits/real": 4.308868408203125, |
|
"logps/generated": -988.3762817382812, |
|
"logps/real": -213.0933074951172, |
|
"loss": 0.0675, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.6333088874816895, |
|
"rewards/margins": 4.766198635101318, |
|
"rewards/real": 1.132889986038208, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.849982225382154e-07, |
|
"logits/generated": 6.094457626342773, |
|
"logits/real": 4.595139980316162, |
|
"logps/generated": -874.7532348632812, |
|
"logps/real": -239.2107391357422, |
|
"loss": 0.071, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -2.91304612159729, |
|
"rewards/margins": 4.163543701171875, |
|
"rewards/real": 1.2504980564117432, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.8322076075364377e-07, |
|
"logits/generated": 5.932214260101318, |
|
"logits/real": 4.410408020019531, |
|
"logps/generated": -892.4168090820312, |
|
"logps/real": -231.3613739013672, |
|
"loss": 0.0559, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.2558155059814453, |
|
"rewards/margins": 4.5149030685424805, |
|
"rewards/real": 1.2590879201889038, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.8144329896907214e-07, |
|
"logits/generated": 5.978908538818359, |
|
"logits/real": 3.7718472480773926, |
|
"logps/generated": -904.0095825195312, |
|
"logps/real": -221.69680786132812, |
|
"loss": 0.0523, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.370866298675537, |
|
"rewards/margins": 4.739681243896484, |
|
"rewards/real": 1.3688147068023682, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.796658371845005e-07, |
|
"logits/generated": 5.914143085479736, |
|
"logits/real": 4.5271077156066895, |
|
"logps/generated": -804.7938232421875, |
|
"logps/real": -251.84097290039062, |
|
"loss": 0.0613, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -2.7886099815368652, |
|
"rewards/margins": 4.072835445404053, |
|
"rewards/real": 1.2842260599136353, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.778883753999289e-07, |
|
"logits/generated": 6.046614646911621, |
|
"logits/real": 4.076377868652344, |
|
"logps/generated": -828.2825317382812, |
|
"logps/real": -222.2332000732422, |
|
"loss": 0.058, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.041111469268799, |
|
"rewards/margins": 4.31611967086792, |
|
"rewards/real": 1.2750083208084106, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.7611091361535723e-07, |
|
"logits/generated": 5.892106056213379, |
|
"logits/real": 4.29874324798584, |
|
"logps/generated": -991.6868286132812, |
|
"logps/real": -222.35653686523438, |
|
"loss": 0.055, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.95190167427063, |
|
"rewards/margins": 5.255926132202148, |
|
"rewards/real": 1.3040244579315186, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.7433345183078565e-07, |
|
"logits/generated": 5.954197883605957, |
|
"logits/real": 3.857102632522583, |
|
"logps/generated": -923.1248168945312, |
|
"logps/real": -203.76199340820312, |
|
"loss": 0.0545, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.505117893218994, |
|
"rewards/margins": 4.928933143615723, |
|
"rewards/real": 1.4238157272338867, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.7255599004621397e-07, |
|
"logits/generated": 6.068178653717041, |
|
"logits/real": 4.234717845916748, |
|
"logps/generated": -960.6267700195312, |
|
"logps/real": -242.6820831298828, |
|
"loss": 0.0501, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.7443442344665527, |
|
"rewards/margins": 5.048287868499756, |
|
"rewards/real": 1.3039430379867554, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.7077852826164233e-07, |
|
"logits/generated": 6.109949588775635, |
|
"logits/real": 4.280592441558838, |
|
"logps/generated": -789.5339965820312, |
|
"logps/real": -248.1779022216797, |
|
"loss": 0.0447, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.201239824295044, |
|
"rewards/margins": 4.559616565704346, |
|
"rewards/real": 1.358377456665039, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.6900106647707075e-07, |
|
"logits/generated": 6.069329261779785, |
|
"logits/real": 4.076823711395264, |
|
"logps/generated": -924.1849365234375, |
|
"logps/real": -228.2224578857422, |
|
"loss": 0.0377, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.9169020652770996, |
|
"rewards/margins": 5.305215358734131, |
|
"rewards/real": 1.3883137702941895, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.6722360469249906e-07, |
|
"logits/generated": 6.091455936431885, |
|
"logits/real": 4.33205509185791, |
|
"logps/generated": -1016.7650146484375, |
|
"logps/real": -233.74496459960938, |
|
"loss": 0.0414, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -4.456690311431885, |
|
"rewards/margins": 5.847256183624268, |
|
"rewards/real": 1.3905656337738037, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.654461429079275e-07, |
|
"logits/generated": 6.010404109954834, |
|
"logits/real": 4.235081672668457, |
|
"logps/generated": -920.1266479492188, |
|
"logps/real": -240.25985717773438, |
|
"loss": 0.0433, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.6452674865722656, |
|
"rewards/margins": 5.162905693054199, |
|
"rewards/real": 1.5176377296447754, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.6366868112335585e-07, |
|
"logits/generated": 5.845242500305176, |
|
"logits/real": 4.197909355163574, |
|
"logps/generated": -866.0949096679688, |
|
"logps/real": -226.61862182617188, |
|
"loss": 0.037, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.452317476272583, |
|
"rewards/margins": 5.058979511260986, |
|
"rewards/real": 1.6066612005233765, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.618912193387842e-07, |
|
"logits/generated": 6.059525012969971, |
|
"logits/real": 4.2445173263549805, |
|
"logps/generated": -1042.3199462890625, |
|
"logps/real": -222.6482696533203, |
|
"loss": 0.0398, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -4.495182514190674, |
|
"rewards/margins": 6.020852088928223, |
|
"rewards/real": 1.5256696939468384, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.601137575542126e-07, |
|
"logits/generated": 6.201509952545166, |
|
"logits/real": 4.5010085105896, |
|
"logps/generated": -1009.8512573242188, |
|
"logps/real": -234.58670043945312, |
|
"loss": 0.0445, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -4.315596580505371, |
|
"rewards/margins": 5.6330060958862305, |
|
"rewards/real": 1.317409634590149, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.583362957696409e-07, |
|
"logits/generated": 6.0568976402282715, |
|
"logits/real": 4.108635425567627, |
|
"logps/generated": -997.2374267578125, |
|
"logps/real": -220.4910430908203, |
|
"loss": 0.0422, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -4.327475070953369, |
|
"rewards/margins": 5.861368179321289, |
|
"rewards/real": 1.5338925123214722, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.565588339850693e-07, |
|
"logits/generated": 6.054575443267822, |
|
"logits/real": 4.0250701904296875, |
|
"logps/generated": -973.4529418945312, |
|
"logps/real": -234.23666381835938, |
|
"loss": 0.0386, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -4.307805061340332, |
|
"rewards/margins": 5.763493537902832, |
|
"rewards/real": 1.455688714981079, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.547813722004977e-07, |
|
"logits/generated": 5.811452865600586, |
|
"logits/real": 4.26353120803833, |
|
"logps/generated": -964.6253051757812, |
|
"logps/real": -233.96548461914062, |
|
"loss": 0.0324, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -4.142745018005371, |
|
"rewards/margins": 5.800713539123535, |
|
"rewards/real": 1.6579687595367432, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.5300391041592605e-07, |
|
"logits/generated": 5.870312213897705, |
|
"logits/real": 4.17107629776001, |
|
"logps/generated": -863.7185668945312, |
|
"logps/real": -227.08627319335938, |
|
"loss": 0.032, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.778026580810547, |
|
"rewards/margins": 5.291924476623535, |
|
"rewards/real": 1.5138972997665405, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.512264486313544e-07, |
|
"logits/generated": 5.952390670776367, |
|
"logits/real": 4.353332996368408, |
|
"logps/generated": -848.7848510742188, |
|
"logps/real": -239.64132690429688, |
|
"loss": 0.0287, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.684706449508667, |
|
"rewards/margins": 5.332028388977051, |
|
"rewards/real": 1.6473219394683838, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.494489868467828e-07, |
|
"logits/generated": 5.984662055969238, |
|
"logits/real": 4.688880443572998, |
|
"logps/generated": -926.8069458007812, |
|
"logps/real": -222.42788696289062, |
|
"loss": 0.0319, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -4.1294732093811035, |
|
"rewards/margins": 5.777259349822998, |
|
"rewards/real": 1.6477859020233154, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.4767152506221114e-07, |
|
"logits/generated": 6.093937873840332, |
|
"logits/real": 4.4692511558532715, |
|
"logps/generated": -893.0154418945312, |
|
"logps/real": -249.5071563720703, |
|
"loss": 0.035, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.984126567840576, |
|
"rewards/margins": 5.740622520446777, |
|
"rewards/real": 1.7564961910247803, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.458940632776395e-07, |
|
"logits/generated": 6.014351844787598, |
|
"logits/real": 4.509620666503906, |
|
"logps/generated": -945.0499877929688, |
|
"logps/real": -208.66506958007812, |
|
"loss": 0.0301, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -4.119633674621582, |
|
"rewards/margins": 5.723677635192871, |
|
"rewards/real": 1.6040436029434204, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.441166014930679e-07, |
|
"logits/generated": 5.890795707702637, |
|
"logits/real": 4.306885719299316, |
|
"logps/generated": -920.9514770507812, |
|
"logps/real": -219.43441772460938, |
|
"loss": 0.03, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -4.295720100402832, |
|
"rewards/margins": 5.946078300476074, |
|
"rewards/real": 1.6503584384918213, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.4233913970849624e-07, |
|
"logits/generated": 6.092907905578613, |
|
"logits/real": 4.382439613342285, |
|
"logps/generated": -869.8155517578125, |
|
"logps/real": -213.0802459716797, |
|
"loss": 0.0317, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.7826313972473145, |
|
"rewards/margins": 5.640969276428223, |
|
"rewards/real": 1.858338713645935, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.405616779239246e-07, |
|
"logits/generated": 5.900615692138672, |
|
"logits/real": 4.525036334991455, |
|
"logps/generated": -850.4383544921875, |
|
"logps/real": -246.38864135742188, |
|
"loss": 0.0283, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.64056396484375, |
|
"rewards/margins": 5.370596885681152, |
|
"rewards/real": 1.7300331592559814, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.38784216139353e-07, |
|
"logits/generated": 6.163350582122803, |
|
"logits/real": 4.153713226318359, |
|
"logps/generated": -1029.7822265625, |
|
"logps/real": -221.838134765625, |
|
"loss": 0.0266, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -4.886613368988037, |
|
"rewards/margins": 6.8068389892578125, |
|
"rewards/real": 1.9202255010604858, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.370067543547814e-07, |
|
"logits/generated": 5.858575344085693, |
|
"logits/real": 4.069919109344482, |
|
"logps/generated": -794.5404663085938, |
|
"logps/real": -215.64620971679688, |
|
"loss": 0.026, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.5341129302978516, |
|
"rewards/margins": 5.426337242126465, |
|
"rewards/real": 1.8922239542007446, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.352292925702097e-07, |
|
"logits/generated": 6.137094497680664, |
|
"logits/real": 4.2704057693481445, |
|
"logps/generated": -988.0245361328125, |
|
"logps/real": -224.5236358642578, |
|
"loss": 0.0247, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -4.90227746963501, |
|
"rewards/margins": 6.6614089012146, |
|
"rewards/real": 1.7591317892074585, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.334518307856381e-07, |
|
"logits/generated": 5.977653503417969, |
|
"logits/real": 4.102234363555908, |
|
"logps/generated": -871.8009643554688, |
|
"logps/real": -239.428955078125, |
|
"loss": 0.0261, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.9591381549835205, |
|
"rewards/margins": 5.843039512634277, |
|
"rewards/real": 1.883901834487915, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.316743690010665e-07, |
|
"logits/generated": 5.898265361785889, |
|
"logits/real": 3.970461368560791, |
|
"logps/generated": -968.3258056640625, |
|
"logps/real": -222.0578155517578, |
|
"loss": 0.0248, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -4.621912479400635, |
|
"rewards/margins": 6.498594760894775, |
|
"rewards/real": 1.8766825199127197, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.298969072164948e-07, |
|
"logits/generated": 5.933172225952148, |
|
"logits/real": 4.601564407348633, |
|
"logps/generated": -935.9641723632812, |
|
"logps/real": -240.28604125976562, |
|
"loss": 0.0219, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -4.39015007019043, |
|
"rewards/margins": 6.343320846557617, |
|
"rewards/real": 1.9531705379486084, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.281194454319232e-07, |
|
"logits/generated": 6.0884270668029785, |
|
"logits/real": 4.594856262207031, |
|
"logps/generated": -970.0340576171875, |
|
"logps/real": -238.2774658203125, |
|
"loss": 0.0198, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -4.799683094024658, |
|
"rewards/margins": 6.7805304527282715, |
|
"rewards/real": 1.980847716331482, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.2634198364735154e-07, |
|
"logits/generated": 6.068641185760498, |
|
"logits/real": 4.550690650939941, |
|
"logps/generated": -885.751953125, |
|
"logps/real": -224.6710205078125, |
|
"loss": 0.0204, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -4.135249137878418, |
|
"rewards/margins": 5.933065891265869, |
|
"rewards/real": 1.797816514968872, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.2456452186277996e-07, |
|
"logits/generated": 5.979355812072754, |
|
"logits/real": 4.242101192474365, |
|
"logps/generated": -818.2512817382812, |
|
"logps/real": -227.32369995117188, |
|
"loss": 0.0215, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.9465458393096924, |
|
"rewards/margins": 6.159433364868164, |
|
"rewards/real": 2.2128875255584717, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.227870600782083e-07, |
|
"logits/generated": 6.055127143859863, |
|
"logits/real": 4.004499912261963, |
|
"logps/generated": -912.0535278320312, |
|
"logps/real": -221.82894897460938, |
|
"loss": 0.0182, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -4.248215675354004, |
|
"rewards/margins": 6.312341690063477, |
|
"rewards/real": 2.0641255378723145, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.210095982936367e-07, |
|
"logits/generated": 5.85116720199585, |
|
"logits/real": 4.39048433303833, |
|
"logps/generated": -948.9578857421875, |
|
"logps/real": -230.64669799804688, |
|
"loss": 0.0183, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -4.669097900390625, |
|
"rewards/margins": 6.7218804359436035, |
|
"rewards/real": 2.052781581878662, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.1923213650906505e-07, |
|
"logits/generated": 5.8959479331970215, |
|
"logits/real": 4.247786521911621, |
|
"logps/generated": -939.8728637695312, |
|
"logps/real": -230.90365600585938, |
|
"loss": 0.0206, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -4.3719964027404785, |
|
"rewards/margins": 6.431820869445801, |
|
"rewards/real": 2.0598244667053223, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.1745467472449337e-07, |
|
"logits/generated": 5.981402397155762, |
|
"logits/real": 4.520476341247559, |
|
"logps/generated": -1016.5609130859375, |
|
"logps/real": -234.46566772460938, |
|
"loss": 0.0206, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.179906368255615, |
|
"rewards/margins": 7.442817687988281, |
|
"rewards/real": 2.262911319732666, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.156772129399218e-07, |
|
"logits/generated": 5.772179126739502, |
|
"logits/real": 4.208775520324707, |
|
"logps/generated": -790.708984375, |
|
"logps/real": -210.6012420654297, |
|
"loss": 0.0221, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.570807933807373, |
|
"rewards/margins": 5.612199306488037, |
|
"rewards/real": 2.041391134262085, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.1389975115535015e-07, |
|
"logits/generated": 6.091591835021973, |
|
"logits/real": 4.308236122131348, |
|
"logps/generated": -937.48095703125, |
|
"logps/real": -217.81820678710938, |
|
"loss": 0.017, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -4.688237190246582, |
|
"rewards/margins": 6.935911655426025, |
|
"rewards/real": 2.2476744651794434, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.121222893707785e-07, |
|
"logits/generated": 5.910862922668457, |
|
"logits/real": 4.276075839996338, |
|
"logps/generated": -890.7863159179688, |
|
"logps/real": -241.32388305664062, |
|
"loss": 0.0155, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -4.329002380371094, |
|
"rewards/margins": 6.619669437408447, |
|
"rewards/real": 2.290666341781616, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.103448275862069e-07, |
|
"logits/generated": 6.195101261138916, |
|
"logits/real": 4.239165782928467, |
|
"logps/generated": -918.6710815429688, |
|
"logps/real": -233.1322479248047, |
|
"loss": 0.0161, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -4.716291904449463, |
|
"rewards/margins": 6.934659481048584, |
|
"rewards/real": 2.2183680534362793, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.0856736580163525e-07, |
|
"logits/generated": 6.146795749664307, |
|
"logits/real": 4.251899719238281, |
|
"logps/generated": -896.61669921875, |
|
"logps/real": -236.81289672851562, |
|
"loss": 0.0172, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -4.417421817779541, |
|
"rewards/margins": 6.579673767089844, |
|
"rewards/real": 2.162252426147461, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.067899040170636e-07, |
|
"logits/generated": 5.921133995056152, |
|
"logits/real": 4.311642646789551, |
|
"logps/generated": -1017.5269775390625, |
|
"logps/real": -231.27236938476562, |
|
"loss": 0.0163, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.362940311431885, |
|
"rewards/margins": 7.530875205993652, |
|
"rewards/real": 2.1679351329803467, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.05012442232492e-07, |
|
"logits/generated": 6.124849319458008, |
|
"logits/real": 4.260175704956055, |
|
"logps/generated": -1089.728759765625, |
|
"logps/real": -230.3025665283203, |
|
"loss": 0.0129, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.250988960266113, |
|
"rewards/margins": 8.456911087036133, |
|
"rewards/real": 2.2059216499328613, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.0323498044792035e-07, |
|
"logits/generated": 5.7611541748046875, |
|
"logits/real": 3.985320568084717, |
|
"logps/generated": -856.4290771484375, |
|
"logps/real": -219.84048461914062, |
|
"loss": 0.019, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -4.234499931335449, |
|
"rewards/margins": 6.346347808837891, |
|
"rewards/real": 2.1118481159210205, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.014575186633487e-07, |
|
"logits/generated": 6.099104881286621, |
|
"logits/real": 4.165801525115967, |
|
"logps/generated": -1022.8517456054688, |
|
"logps/real": -225.13583374023438, |
|
"loss": 0.0097, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.710265636444092, |
|
"rewards/margins": 8.138273239135742, |
|
"rewards/real": 2.428006649017334, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 2.996800568787771e-07, |
|
"logits/generated": 5.9258551597595215, |
|
"logits/real": 4.278181076049805, |
|
"logps/generated": -991.6013793945312, |
|
"logps/real": -224.7017822265625, |
|
"loss": 0.0131, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.471372604370117, |
|
"rewards/margins": 7.898287773132324, |
|
"rewards/real": 2.426915168762207, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 2.9790259509420545e-07, |
|
"logits/generated": 6.0136613845825195, |
|
"logits/real": 4.205864906311035, |
|
"logps/generated": -879.1495971679688, |
|
"logps/real": -233.78097534179688, |
|
"loss": 0.0158, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -4.704037189483643, |
|
"rewards/margins": 6.998660087585449, |
|
"rewards/real": 2.2946221828460693, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 2.9612513330963387e-07, |
|
"logits/generated": 5.957737922668457, |
|
"logits/real": 4.087791919708252, |
|
"logps/generated": -961.3818359375, |
|
"logps/real": -189.39364624023438, |
|
"loss": 0.0164, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.026106834411621, |
|
"rewards/margins": 7.373734951019287, |
|
"rewards/real": 2.347627639770508, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.943476715250622e-07, |
|
"logits/generated": 5.931456565856934, |
|
"logits/real": 4.309812545776367, |
|
"logps/generated": -880.28955078125, |
|
"logps/real": -227.7857666015625, |
|
"loss": 0.0142, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -4.744623184204102, |
|
"rewards/margins": 6.99019718170166, |
|
"rewards/real": 2.2455737590789795, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.9257020974049054e-07, |
|
"logits/generated": 6.020743370056152, |
|
"logits/real": 4.34235143661499, |
|
"logps/generated": -918.6298828125, |
|
"logps/real": -233.72021484375, |
|
"loss": 0.014, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -4.72176456451416, |
|
"rewards/margins": 7.291744232177734, |
|
"rewards/real": 2.5699801445007324, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.9079274795591896e-07, |
|
"logits/generated": 6.1958794593811035, |
|
"logits/real": 4.313004970550537, |
|
"logps/generated": -913.8150634765625, |
|
"logps/real": -237.1126251220703, |
|
"loss": 0.0125, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.31519889831543, |
|
"rewards/margins": 7.788308143615723, |
|
"rewards/real": 2.473109483718872, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.890152861713473e-07, |
|
"logits/generated": 5.900928497314453, |
|
"logits/real": 4.231713771820068, |
|
"logps/generated": -1001.9522705078125, |
|
"logps/real": -226.7261199951172, |
|
"loss": 0.011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.419561862945557, |
|
"rewards/margins": 7.794165134429932, |
|
"rewards/real": 2.3746023178100586, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 2.872378243867757e-07, |
|
"logits/generated": 5.979315280914307, |
|
"logits/real": 4.155876159667969, |
|
"logps/generated": -896.203125, |
|
"logps/real": -224.9453582763672, |
|
"loss": 0.0136, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -4.840609550476074, |
|
"rewards/margins": 7.281617164611816, |
|
"rewards/real": 2.441007137298584, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 2.85460362602204e-07, |
|
"logits/generated": 5.99771785736084, |
|
"logits/real": 4.335788249969482, |
|
"logps/generated": -985.0953369140625, |
|
"logps/real": -216.6324920654297, |
|
"loss": 0.0112, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.507411479949951, |
|
"rewards/margins": 8.085838317871094, |
|
"rewards/real": 2.5784270763397217, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 2.8368290081763243e-07, |
|
"logits/generated": 5.952843189239502, |
|
"logits/real": 4.346559524536133, |
|
"logps/generated": -945.6209106445312, |
|
"logps/real": -227.56997680664062, |
|
"loss": 0.0113, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.429326057434082, |
|
"rewards/margins": 8.042230606079102, |
|
"rewards/real": 2.612903118133545, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.819054390330608e-07, |
|
"logits/generated": 6.0241618156433105, |
|
"logits/real": 4.244063377380371, |
|
"logps/generated": -932.4935302734375, |
|
"logps/real": -231.8907012939453, |
|
"loss": 0.0101, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.1778459548950195, |
|
"rewards/margins": 7.901692867279053, |
|
"rewards/real": 2.723846435546875, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.801279772484891e-07, |
|
"logits/generated": 5.867781639099121, |
|
"logits/real": 4.314043045043945, |
|
"logps/generated": -851.2517700195312, |
|
"logps/real": -235.279296875, |
|
"loss": 0.0126, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -4.752983570098877, |
|
"rewards/margins": 7.189466953277588, |
|
"rewards/real": 2.436483383178711, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 2.783505154639175e-07, |
|
"logits/generated": 5.901434898376465, |
|
"logits/real": 4.186619281768799, |
|
"logps/generated": -1009.0958862304688, |
|
"logps/real": -212.00369262695312, |
|
"loss": 0.0079, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.709371089935303, |
|
"rewards/margins": 8.269929885864258, |
|
"rewards/real": 2.560558795928955, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 2.7657305367934584e-07, |
|
"logits/generated": 5.8600664138793945, |
|
"logits/real": 4.312530040740967, |
|
"logps/generated": -903.2091674804688, |
|
"logps/real": -217.84249877929688, |
|
"loss": 0.0091, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.082972049713135, |
|
"rewards/margins": 7.53509521484375, |
|
"rewards/real": 2.4521236419677734, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 2.7479559189477426e-07, |
|
"logits/generated": 6.000660419464111, |
|
"logits/real": 4.141573905944824, |
|
"logps/generated": -989.6737060546875, |
|
"logps/real": -216.5168914794922, |
|
"loss": 0.0089, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.764129161834717, |
|
"rewards/margins": 8.395139694213867, |
|
"rewards/real": 2.6310102939605713, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 2.730181301102026e-07, |
|
"logits/generated": 5.965339660644531, |
|
"logits/real": 4.219322204589844, |
|
"logps/generated": -854.77783203125, |
|
"logps/real": -235.1974639892578, |
|
"loss": 0.0086, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -4.901017665863037, |
|
"rewards/margins": 7.7865166664123535, |
|
"rewards/real": 2.8854994773864746, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 2.71240668325631e-07, |
|
"logits/generated": 6.012763023376465, |
|
"logits/real": 4.389718055725098, |
|
"logps/generated": -984.3873291015625, |
|
"logps/real": -239.3701629638672, |
|
"loss": 0.0062, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.781050682067871, |
|
"rewards/margins": 8.516217231750488, |
|
"rewards/real": 2.73516583442688, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 2.6946320654105936e-07, |
|
"logits/generated": 5.979636192321777, |
|
"logits/real": 4.670422554016113, |
|
"logps/generated": -967.5466918945312, |
|
"logps/real": -236.21444702148438, |
|
"loss": 0.0078, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.631641387939453, |
|
"rewards/margins": 8.182405471801758, |
|
"rewards/real": 2.5507636070251465, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 2.676857447564877e-07, |
|
"logits/generated": 6.084554195404053, |
|
"logits/real": 4.161642551422119, |
|
"logps/generated": -987.7828979492188, |
|
"logps/real": -225.3826446533203, |
|
"loss": 0.0083, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.720307350158691, |
|
"rewards/margins": 8.545812606811523, |
|
"rewards/real": 2.8255059719085693, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 2.659082829719161e-07, |
|
"logits/generated": 5.93519926071167, |
|
"logits/real": 4.27223539352417, |
|
"logps/generated": -930.1174926757812, |
|
"logps/real": -235.65243530273438, |
|
"loss": 0.0111, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.5342912673950195, |
|
"rewards/margins": 8.177888870239258, |
|
"rewards/real": 2.6435976028442383, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 2.6413082118734445e-07, |
|
"logits/generated": 6.053906440734863, |
|
"logits/real": 4.279298305511475, |
|
"logps/generated": -873.9427490234375, |
|
"logps/real": -234.75802612304688, |
|
"loss": 0.0103, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.114261627197266, |
|
"rewards/margins": 7.79233455657959, |
|
"rewards/real": 2.678072929382324, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 2.623533594027728e-07, |
|
"logits/generated": 5.621105194091797, |
|
"logits/real": 4.41392707824707, |
|
"logps/generated": -888.2545776367188, |
|
"logps/real": -219.32546997070312, |
|
"loss": 0.0103, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -5.186315536499023, |
|
"rewards/margins": 7.618834495544434, |
|
"rewards/real": 2.432518720626831, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 2.605758976182012e-07, |
|
"logits/generated": 6.062514781951904, |
|
"logits/real": 4.3643798828125, |
|
"logps/generated": -1034.1868896484375, |
|
"logps/real": -212.97476196289062, |
|
"loss": 0.0094, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.618842124938965, |
|
"rewards/margins": 9.310908317565918, |
|
"rewards/real": 2.6920673847198486, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 2.587984358336296e-07, |
|
"logits/generated": 5.926046848297119, |
|
"logits/real": 4.445965766906738, |
|
"logps/generated": -821.0653076171875, |
|
"logps/real": -250.19674682617188, |
|
"loss": 0.0098, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -4.612605094909668, |
|
"rewards/margins": 7.3182783126831055, |
|
"rewards/real": 2.7056736946105957, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 2.570209740490579e-07, |
|
"logits/generated": 5.849274635314941, |
|
"logits/real": 4.148129463195801, |
|
"logps/generated": -878.97021484375, |
|
"logps/real": -214.24234008789062, |
|
"loss": 0.0079, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.2036943435668945, |
|
"rewards/margins": 7.895529747009277, |
|
"rewards/real": 2.691835403442383, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 2.5524351226448634e-07, |
|
"logits/generated": 6.104997634887695, |
|
"logits/real": 4.673565864562988, |
|
"logps/generated": -864.9569091796875, |
|
"logps/real": -246.60794067382812, |
|
"loss": 0.0088, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.325832366943359, |
|
"rewards/margins": 8.104931831359863, |
|
"rewards/real": 2.7790980339050293, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 2.5346605047991465e-07, |
|
"logits/generated": 6.075316429138184, |
|
"logits/real": 4.084663391113281, |
|
"logps/generated": -872.2156372070312, |
|
"logps/real": -223.63442993164062, |
|
"loss": 0.0056, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.434564113616943, |
|
"rewards/margins": 8.21035385131836, |
|
"rewards/real": 2.7757906913757324, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 2.51688588695343e-07, |
|
"logits/generated": 6.089809894561768, |
|
"logits/real": 4.060254096984863, |
|
"logps/generated": -923.2381591796875, |
|
"logps/real": -191.72105407714844, |
|
"loss": 0.009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.731915473937988, |
|
"rewards/margins": 8.29061222076416, |
|
"rewards/real": 2.5586960315704346, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 2.499111269107714e-07, |
|
"logits/generated": 6.061944007873535, |
|
"logits/real": 4.62351131439209, |
|
"logps/generated": -841.89013671875, |
|
"logps/real": -263.7066955566406, |
|
"loss": 0.0087, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.006676197052002, |
|
"rewards/margins": 7.827229976654053, |
|
"rewards/real": 2.820553779602051, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 2.4813366512619975e-07, |
|
"logits/generated": 5.924272537231445, |
|
"logits/real": 4.0540995597839355, |
|
"logps/generated": -903.25, |
|
"logps/real": -217.80874633789062, |
|
"loss": 0.007, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.3574018478393555, |
|
"rewards/margins": 8.290170669555664, |
|
"rewards/real": 2.9327681064605713, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 2.4635620334162817e-07, |
|
"logits/generated": 6.056190013885498, |
|
"logits/real": 4.321590900421143, |
|
"logps/generated": -932.0973510742188, |
|
"logps/real": -225.2566375732422, |
|
"loss": 0.0064, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.739737510681152, |
|
"rewards/margins": 8.39258861541748, |
|
"rewards/real": 2.652851104736328, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 2.4457874155705653e-07, |
|
"logits/generated": 6.022032737731934, |
|
"logits/real": 4.070498466491699, |
|
"logps/generated": -932.9993286132812, |
|
"logps/real": -211.3143768310547, |
|
"loss": 0.0093, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.863167762756348, |
|
"rewards/margins": 8.546854972839355, |
|
"rewards/real": 2.683687448501587, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 2.428012797724849e-07, |
|
"logits/generated": 6.195624351501465, |
|
"logits/real": 4.141390800476074, |
|
"logps/generated": -957.7415161132812, |
|
"logps/real": -210.617431640625, |
|
"loss": 0.0069, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.7170329093933105, |
|
"rewards/margins": 8.636775016784668, |
|
"rewards/real": 2.919740915298462, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 2.4102381798791327e-07, |
|
"logits/generated": 6.033434867858887, |
|
"logits/real": 4.116759777069092, |
|
"logps/generated": -842.8616333007812, |
|
"logps/real": -230.01284790039062, |
|
"loss": 0.0072, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.187349319458008, |
|
"rewards/margins": 8.29853343963623, |
|
"rewards/real": 3.1111843585968018, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 2.392463562033416e-07, |
|
"logits/generated": 6.119296550750732, |
|
"logits/real": 4.1949968338012695, |
|
"logps/generated": -1000.3076171875, |
|
"logps/real": -228.13479614257812, |
|
"loss": 0.0053, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.538392066955566, |
|
"rewards/margins": 9.60668659210205, |
|
"rewards/real": 3.0682942867279053, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 2.3746889441877e-07, |
|
"logits/generated": 5.990731716156006, |
|
"logits/real": 4.539961814880371, |
|
"logps/generated": -782.185791015625, |
|
"logps/real": -251.5903778076172, |
|
"loss": 0.0054, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -4.510619163513184, |
|
"rewards/margins": 7.38437557220459, |
|
"rewards/real": 2.873756170272827, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 2.3569143263419836e-07, |
|
"logits/generated": 6.0371599197387695, |
|
"logits/real": 4.193976402282715, |
|
"logps/generated": -992.1876220703125, |
|
"logps/real": -220.98495483398438, |
|
"loss": 0.0047, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.436119079589844, |
|
"rewards/margins": 9.288522720336914, |
|
"rewards/real": 2.852402687072754, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 2.3391397084962673e-07, |
|
"logits/generated": 6.059049129486084, |
|
"logits/real": 4.382033348083496, |
|
"logps/generated": -784.0813598632812, |
|
"logps/real": -219.2273712158203, |
|
"loss": 0.0097, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -4.49301815032959, |
|
"rewards/margins": 7.499096870422363, |
|
"rewards/real": 3.0060791969299316, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 2.3213650906505507e-07, |
|
"logits/generated": 5.9765849113464355, |
|
"logits/real": 4.078164577484131, |
|
"logps/generated": -911.6340942382812, |
|
"logps/real": -189.57408142089844, |
|
"loss": 0.0055, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.761612415313721, |
|
"rewards/margins": 8.555708885192871, |
|
"rewards/real": 2.794097900390625, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 2.3035904728048346e-07, |
|
"logits/generated": 5.922765254974365, |
|
"logits/real": 4.084896564483643, |
|
"logps/generated": -967.2000122070312, |
|
"logps/real": -221.4872589111328, |
|
"loss": 0.0057, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.081551551818848, |
|
"rewards/margins": 9.249353408813477, |
|
"rewards/real": 3.167802572250366, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 2.2858158549591183e-07, |
|
"logits/generated": 6.103907585144043, |
|
"logits/real": 4.329716682434082, |
|
"logps/generated": -922.7809448242188, |
|
"logps/real": -229.83584594726562, |
|
"loss": 0.0055, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.810357570648193, |
|
"rewards/margins": 8.797757148742676, |
|
"rewards/real": 2.9873995780944824, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 2.268041237113402e-07, |
|
"logits/generated": 5.952910423278809, |
|
"logits/real": 4.254477024078369, |
|
"logps/generated": -829.0789184570312, |
|
"logps/real": -191.4152374267578, |
|
"loss": 0.0097, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -4.8961920738220215, |
|
"rewards/margins": 7.95013952255249, |
|
"rewards/real": 3.053947925567627, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 2.2502666192676856e-07, |
|
"logits/generated": 6.104610919952393, |
|
"logits/real": 4.344254016876221, |
|
"logps/generated": -927.4021606445312, |
|
"logps/real": -209.98464965820312, |
|
"loss": 0.008, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.0140275955200195, |
|
"rewards/margins": 8.953369140625, |
|
"rewards/real": 2.939342737197876, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 2.2324920014219693e-07, |
|
"logits/generated": 5.963999271392822, |
|
"logits/real": 4.088013648986816, |
|
"logps/generated": -981.2156982421875, |
|
"logps/real": -223.4355010986328, |
|
"loss": 0.0062, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.4049482345581055, |
|
"rewards/margins": 9.413580894470215, |
|
"rewards/real": 3.0086326599121094, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 2.2147173835762532e-07, |
|
"logits/generated": 6.034571647644043, |
|
"logits/real": 3.9964118003845215, |
|
"logps/generated": -919.56103515625, |
|
"logps/real": -212.9126739501953, |
|
"loss": 0.0082, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.920588970184326, |
|
"rewards/margins": 8.81024169921875, |
|
"rewards/real": 2.889652967453003, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 2.1969427657305366e-07, |
|
"logits/generated": 5.908297061920166, |
|
"logits/real": 4.290045738220215, |
|
"logps/generated": -903.4417724609375, |
|
"logps/real": -216.17636108398438, |
|
"loss": 0.0051, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.024001598358154, |
|
"rewards/margins": 8.847755432128906, |
|
"rewards/real": 2.8237533569335938, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 2.1791681478848203e-07, |
|
"logits/generated": 5.78277587890625, |
|
"logits/real": 4.178603172302246, |
|
"logps/generated": -897.1768798828125, |
|
"logps/real": -224.93814086914062, |
|
"loss": 0.0048, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.952237129211426, |
|
"rewards/margins": 9.044183731079102, |
|
"rewards/real": 3.091947555541992, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 2.161393530039104e-07, |
|
"logits/generated": 5.6169939041137695, |
|
"logits/real": 4.14485502243042, |
|
"logps/generated": -836.0411376953125, |
|
"logps/real": -214.97607421875, |
|
"loss": 0.0063, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.351627826690674, |
|
"rewards/margins": 8.435457229614258, |
|
"rewards/real": 3.0838301181793213, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 2.1436189121933878e-07, |
|
"logits/generated": 5.942612171173096, |
|
"logits/real": 4.3120036125183105, |
|
"logps/generated": -1033.1988525390625, |
|
"logps/real": -231.5688018798828, |
|
"loss": 0.0057, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.671760559082031, |
|
"rewards/margins": 9.734092712402344, |
|
"rewards/real": 3.0623319149017334, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 2.1258442943476715e-07, |
|
"logits/generated": 6.000979423522949, |
|
"logits/real": 3.862889051437378, |
|
"logps/generated": -996.46875, |
|
"logps/real": -188.82571411132812, |
|
"loss": 0.008, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.690791130065918, |
|
"rewards/margins": 9.859224319458008, |
|
"rewards/real": 3.168431520462036, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 2.1080696765019552e-07, |
|
"logits/generated": 6.081242084503174, |
|
"logits/real": 4.578803062438965, |
|
"logps/generated": -1046.6558837890625, |
|
"logps/real": -228.24392700195312, |
|
"loss": 0.005, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.087597846984863, |
|
"rewards/margins": 10.14047622680664, |
|
"rewards/real": 3.0528788566589355, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 2.0902950586562388e-07, |
|
"logits/generated": 5.984295845031738, |
|
"logits/real": 4.308724880218506, |
|
"logps/generated": -935.5621948242188, |
|
"logps/real": -218.9115753173828, |
|
"loss": 0.0079, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.082803726196289, |
|
"rewards/margins": 9.108025550842285, |
|
"rewards/real": 3.025221347808838, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 2.0725204408105225e-07, |
|
"logits/generated": 5.89461612701416, |
|
"logits/real": 4.198281288146973, |
|
"logps/generated": -961.24560546875, |
|
"logps/real": -220.1450653076172, |
|
"loss": 0.0045, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.416398048400879, |
|
"rewards/margins": 9.537870407104492, |
|
"rewards/real": 3.1214730739593506, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 2.0547458229648061e-07, |
|
"logits/generated": 6.187463283538818, |
|
"logits/real": 4.732372283935547, |
|
"logps/generated": -980.1790161132812, |
|
"logps/real": -251.7775421142578, |
|
"loss": 0.0048, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.859214782714844, |
|
"rewards/margins": 9.723730087280273, |
|
"rewards/real": 2.8645150661468506, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 2.0369712051190898e-07, |
|
"logits/generated": 5.920609951019287, |
|
"logits/real": 4.184802055358887, |
|
"logps/generated": -969.5177612304688, |
|
"logps/real": -220.58566284179688, |
|
"loss": 0.0037, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.475825309753418, |
|
"rewards/margins": 9.5746431350708, |
|
"rewards/real": 3.098818063735962, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 2.0191965872733735e-07, |
|
"logits/generated": 6.136545181274414, |
|
"logits/real": 4.201231002807617, |
|
"logps/generated": -993.6239013671875, |
|
"logps/real": -216.8855438232422, |
|
"loss": 0.0054, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.71942138671875, |
|
"rewards/margins": 9.831668853759766, |
|
"rewards/real": 3.1122474670410156, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 2.001421969427657e-07, |
|
"logits/generated": 5.892589569091797, |
|
"logits/real": 4.182487487792969, |
|
"logps/generated": -942.9978637695312, |
|
"logps/real": -218.97500610351562, |
|
"loss": 0.0049, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.580080986022949, |
|
"rewards/margins": 9.862131118774414, |
|
"rewards/real": 3.2820498943328857, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 1.983647351581941e-07, |
|
"logits/generated": 5.94655179977417, |
|
"logits/real": 4.0822672843933105, |
|
"logps/generated": -957.2570190429688, |
|
"logps/real": -196.55645751953125, |
|
"loss": 0.0029, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.333019256591797, |
|
"rewards/margins": 9.670191764831543, |
|
"rewards/real": 3.3371729850769043, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 1.9658727337362247e-07, |
|
"logits/generated": 5.719095230102539, |
|
"logits/real": 4.3129563331604, |
|
"logps/generated": -879.9132080078125, |
|
"logps/real": -218.69076538085938, |
|
"loss": 0.0042, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.011284828186035, |
|
"rewards/margins": 9.261569023132324, |
|
"rewards/real": 3.2502846717834473, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 1.9480981158905084e-07, |
|
"logits/generated": 5.970053195953369, |
|
"logits/real": 4.292898654937744, |
|
"logps/generated": -888.4221801757812, |
|
"logps/real": -221.7141876220703, |
|
"loss": 0.0073, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.928704738616943, |
|
"rewards/margins": 9.284444808959961, |
|
"rewards/real": 3.3557403087615967, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 1.9303234980447918e-07, |
|
"logits/generated": 5.851454257965088, |
|
"logits/real": 3.963120222091675, |
|
"logps/generated": -1002.041015625, |
|
"logps/real": -187.06405639648438, |
|
"loss": 0.0043, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.853819370269775, |
|
"rewards/margins": 10.24903678894043, |
|
"rewards/real": 3.3952178955078125, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 1.9125488801990754e-07, |
|
"logits/generated": 5.893982887268066, |
|
"logits/real": 4.2695136070251465, |
|
"logps/generated": -952.0750732421875, |
|
"logps/real": -224.6346435546875, |
|
"loss": 0.0039, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.340090751647949, |
|
"rewards/margins": 9.606979370117188, |
|
"rewards/real": 3.26688814163208, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 1.8947742623533593e-07, |
|
"logits/generated": 6.079073905944824, |
|
"logits/real": 4.082903861999512, |
|
"logps/generated": -1034.026123046875, |
|
"logps/real": -209.4253692626953, |
|
"loss": 0.0036, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.287435054779053, |
|
"rewards/margins": 10.478463172912598, |
|
"rewards/real": 3.191028356552124, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 1.876999644507643e-07, |
|
"logits/generated": 6.078752517700195, |
|
"logits/real": 4.00618314743042, |
|
"logps/generated": -836.09326171875, |
|
"logps/real": -218.91085815429688, |
|
"loss": 0.0052, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.683650970458984, |
|
"rewards/margins": 8.769237518310547, |
|
"rewards/real": 3.0855870246887207, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 1.8592250266619267e-07, |
|
"logits/generated": 6.1372151374816895, |
|
"logits/real": 4.399873733520508, |
|
"logps/generated": -925.8645629882812, |
|
"logps/real": -231.36962890625, |
|
"loss": 0.0032, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.495853424072266, |
|
"rewards/margins": 9.849853515625, |
|
"rewards/real": 3.353998899459839, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 1.8414504088162103e-07, |
|
"logits/generated": 6.150097370147705, |
|
"logits/real": 4.1030778884887695, |
|
"logps/generated": -969.1099853515625, |
|
"logps/real": -210.23385620117188, |
|
"loss": 0.0035, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.695557594299316, |
|
"rewards/margins": 9.90971851348877, |
|
"rewards/real": 3.214160203933716, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 1.8236757909704943e-07, |
|
"logits/generated": 5.9813079833984375, |
|
"logits/real": 4.158692359924316, |
|
"logps/generated": -843.7550659179688, |
|
"logps/real": -198.7861328125, |
|
"loss": 0.0043, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.507570743560791, |
|
"rewards/margins": 8.458051681518555, |
|
"rewards/real": 2.950481414794922, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 1.805901173124778e-07, |
|
"logits/generated": 6.074183940887451, |
|
"logits/real": 4.167660236358643, |
|
"logps/generated": -904.7354736328125, |
|
"logps/real": -216.07406616210938, |
|
"loss": 0.0057, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.98261833190918, |
|
"rewards/margins": 9.305191993713379, |
|
"rewards/real": 3.3225739002227783, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 1.7881265552790613e-07, |
|
"logits/generated": 6.014121055603027, |
|
"logits/real": 4.315189838409424, |
|
"logps/generated": -998.3967895507812, |
|
"logps/real": -213.5117950439453, |
|
"loss": 0.0037, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.877680778503418, |
|
"rewards/margins": 10.27568244934082, |
|
"rewards/real": 3.398001194000244, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 1.770351937433345e-07, |
|
"logits/generated": 6.0427069664001465, |
|
"logits/real": 4.096532344818115, |
|
"logps/generated": -962.1192626953125, |
|
"logps/real": -211.24386596679688, |
|
"loss": 0.0039, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.622589111328125, |
|
"rewards/margins": 9.86971664428711, |
|
"rewards/real": 3.24712872505188, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 1.7525773195876286e-07, |
|
"logits/generated": 5.948111534118652, |
|
"logits/real": 4.327864170074463, |
|
"logps/generated": -862.5738525390625, |
|
"logps/real": -236.5964813232422, |
|
"loss": 0.0034, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.988071441650391, |
|
"rewards/margins": 9.170713424682617, |
|
"rewards/real": 3.1826424598693848, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 1.7348027017419126e-07, |
|
"logits/generated": 6.061889171600342, |
|
"logits/real": 4.250190258026123, |
|
"logps/generated": -962.5604248046875, |
|
"logps/real": -212.2547149658203, |
|
"loss": 0.0056, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.656978607177734, |
|
"rewards/margins": 10.204740524291992, |
|
"rewards/real": 3.547761917114258, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 1.7170280838961962e-07, |
|
"logits/generated": 6.133549690246582, |
|
"logits/real": 4.258768081665039, |
|
"logps/generated": -847.89306640625, |
|
"logps/real": -213.8179931640625, |
|
"loss": 0.0024, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.887804985046387, |
|
"rewards/margins": 9.027533531188965, |
|
"rewards/real": 3.1397290229797363, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 1.69925346605048e-07, |
|
"logits/generated": 6.092435359954834, |
|
"logits/real": 3.95710825920105, |
|
"logps/generated": -1076.5191650390625, |
|
"logps/real": -204.7867889404297, |
|
"loss": 0.0046, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.864550590515137, |
|
"rewards/margins": 11.004741668701172, |
|
"rewards/real": 3.1401913166046143, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 1.6814788482047635e-07, |
|
"logits/generated": 6.069060325622559, |
|
"logits/real": 4.248088836669922, |
|
"logps/generated": -1034.2674560546875, |
|
"logps/real": -239.43045043945312, |
|
"loss": 0.003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.654637813568115, |
|
"rewards/margins": 11.039973258972168, |
|
"rewards/real": 3.385335922241211, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 1.6637042303590475e-07, |
|
"logits/generated": 6.127971649169922, |
|
"logits/real": 4.42900276184082, |
|
"logps/generated": -1067.667724609375, |
|
"logps/real": -212.1188507080078, |
|
"loss": 0.0028, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.7270331382751465, |
|
"rewards/margins": 11.030082702636719, |
|
"rewards/real": 3.3030498027801514, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 1.6459296125133309e-07, |
|
"logits/generated": 6.039584159851074, |
|
"logits/real": 4.363356113433838, |
|
"logps/generated": -967.5791015625, |
|
"logps/real": -234.0696258544922, |
|
"loss": 0.002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.919652462005615, |
|
"rewards/margins": 10.246850967407227, |
|
"rewards/real": 3.3271987438201904, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 1.6281549946676145e-07, |
|
"logits/generated": 5.848420143127441, |
|
"logits/real": 4.181252479553223, |
|
"logps/generated": -879.5675659179688, |
|
"logps/real": -204.01536560058594, |
|
"loss": 0.0029, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.058112144470215, |
|
"rewards/margins": 9.286849975585938, |
|
"rewards/real": 3.2287373542785645, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 1.6103803768218982e-07, |
|
"logits/generated": 6.0381035804748535, |
|
"logits/real": 4.147576332092285, |
|
"logps/generated": -888.4138793945312, |
|
"logps/real": -218.30050659179688, |
|
"loss": 0.0046, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.2195258140563965, |
|
"rewards/margins": 9.568934440612793, |
|
"rewards/real": 3.3494086265563965, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 1.5926057589761818e-07, |
|
"logits/generated": 6.021380424499512, |
|
"logits/real": 4.303619384765625, |
|
"logps/generated": -751.9827880859375, |
|
"logps/real": -225.69869995117188, |
|
"loss": 0.0028, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.188918113708496, |
|
"rewards/margins": 8.795283317565918, |
|
"rewards/real": 3.606365203857422, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 1.5748311411304658e-07, |
|
"logits/generated": 5.8918375968933105, |
|
"logits/real": 4.548980712890625, |
|
"logps/generated": -925.9884033203125, |
|
"logps/real": -212.60781860351562, |
|
"loss": 0.0044, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.482697486877441, |
|
"rewards/margins": 9.820296287536621, |
|
"rewards/real": 3.337599992752075, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 1.5570565232847494e-07, |
|
"logits/generated": 5.850405216217041, |
|
"logits/real": 4.177556037902832, |
|
"logps/generated": -941.3117065429688, |
|
"logps/real": -207.508544921875, |
|
"loss": 0.0035, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.867137908935547, |
|
"rewards/margins": 10.460260391235352, |
|
"rewards/real": 3.5931217670440674, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 1.539281905439033e-07, |
|
"logits/generated": 6.011968612670898, |
|
"logits/real": 4.186308860778809, |
|
"logps/generated": -850.3424072265625, |
|
"logps/real": -213.4459991455078, |
|
"loss": 0.004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.205097198486328, |
|
"rewards/margins": 9.331758499145508, |
|
"rewards/real": 3.1266608238220215, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 1.5215072875933165e-07, |
|
"logits/generated": 5.9976019859313965, |
|
"logits/real": 4.6466498374938965, |
|
"logps/generated": -959.1027221679688, |
|
"logps/real": -230.4766845703125, |
|
"loss": 0.0033, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.896917819976807, |
|
"rewards/margins": 10.29023551940918, |
|
"rewards/real": 3.3933181762695312, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 1.5037326697476004e-07, |
|
"logits/generated": 5.940140247344971, |
|
"logits/real": 3.9636178016662598, |
|
"logps/generated": -811.2144775390625, |
|
"logps/real": -191.14474487304688, |
|
"loss": 0.0044, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.637009620666504, |
|
"rewards/margins": 9.112122535705566, |
|
"rewards/real": 3.4751129150390625, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 1.485958051901884e-07, |
|
"logits/generated": 6.092099189758301, |
|
"logits/real": 4.396188259124756, |
|
"logps/generated": -1047.5501708984375, |
|
"logps/real": -235.4965362548828, |
|
"loss": 0.004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.702287197113037, |
|
"rewards/margins": 11.126907348632812, |
|
"rewards/real": 3.42461895942688, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 1.4681834340561677e-07, |
|
"logits/generated": 6.009620666503906, |
|
"logits/real": 4.459410667419434, |
|
"logps/generated": -1028.2261962890625, |
|
"logps/real": -224.2885284423828, |
|
"loss": 0.0025, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.595302581787109, |
|
"rewards/margins": 11.32234001159668, |
|
"rewards/real": 3.7270379066467285, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 1.4504088162104514e-07, |
|
"logits/generated": 6.041359901428223, |
|
"logits/real": 4.407500267028809, |
|
"logps/generated": -1056.5250244140625, |
|
"logps/real": -230.5962677001953, |
|
"loss": 0.0051, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -8.136027336120605, |
|
"rewards/margins": 11.669519424438477, |
|
"rewards/real": 3.533491611480713, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 1.432634198364735e-07, |
|
"logits/generated": 6.096525192260742, |
|
"logits/real": 4.020089149475098, |
|
"logps/generated": -940.7005615234375, |
|
"logps/real": -217.18240356445312, |
|
"loss": 0.002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.886039733886719, |
|
"rewards/margins": 10.523492813110352, |
|
"rewards/real": 3.637453079223633, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 1.414859580519019e-07, |
|
"logits/generated": 5.8838653564453125, |
|
"logits/real": 4.09322452545166, |
|
"logps/generated": -871.5797729492188, |
|
"logps/real": -189.5806427001953, |
|
"loss": 0.0043, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.9241437911987305, |
|
"rewards/margins": 9.4077730178833, |
|
"rewards/real": 3.483628511428833, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 1.3970849626733024e-07, |
|
"logits/generated": 6.006686687469482, |
|
"logits/real": 4.329590797424316, |
|
"logps/generated": -1036.4449462890625, |
|
"logps/real": -228.02566528320312, |
|
"loss": 0.0033, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.656490325927734, |
|
"rewards/margins": 10.992205619812012, |
|
"rewards/real": 3.3357155323028564, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 1.379310344827586e-07, |
|
"logits/generated": 6.243993282318115, |
|
"logits/real": 4.345526695251465, |
|
"logps/generated": -972.0113525390625, |
|
"logps/real": -213.86898803710938, |
|
"loss": 0.0086, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.251333713531494, |
|
"rewards/margins": 10.62246322631836, |
|
"rewards/real": 3.371129274368286, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 1.3615357269818697e-07, |
|
"logits/generated": 6.056420803070068, |
|
"logits/real": 4.507519245147705, |
|
"logps/generated": -959.6647338867188, |
|
"logps/real": -228.41311645507812, |
|
"loss": 0.005, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.193532466888428, |
|
"rewards/margins": 10.772246360778809, |
|
"rewards/real": 3.578713893890381, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 1.3437611091361536e-07, |
|
"logits/generated": 5.848100662231445, |
|
"logits/real": 4.477749824523926, |
|
"logps/generated": -885.1476440429688, |
|
"logps/real": -217.71826171875, |
|
"loss": 0.0036, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.187966823577881, |
|
"rewards/margins": 9.537440299987793, |
|
"rewards/real": 3.349473476409912, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 1.3259864912904373e-07, |
|
"logits/generated": 6.177857875823975, |
|
"logits/real": 3.999253511428833, |
|
"logps/generated": -849.5914916992188, |
|
"logps/real": -186.1688690185547, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.99931526184082, |
|
"rewards/margins": 9.616048812866211, |
|
"rewards/real": 3.6167335510253906, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 1.308211873444721e-07, |
|
"logits/generated": 5.982209205627441, |
|
"logits/real": 4.022818565368652, |
|
"logps/generated": -943.8885498046875, |
|
"logps/real": -191.94265747070312, |
|
"loss": 0.0032, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.117369174957275, |
|
"rewards/margins": 10.699490547180176, |
|
"rewards/real": 3.5821213722229004, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 1.2904372555990046e-07, |
|
"logits/generated": 6.1165690422058105, |
|
"logits/real": 4.515404224395752, |
|
"logps/generated": -1075.3916015625, |
|
"logps/real": -223.2095489501953, |
|
"loss": 0.0074, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.828172206878662, |
|
"rewards/margins": 11.10367202758789, |
|
"rewards/real": 3.2754998207092285, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 1.2726626377532883e-07, |
|
"logits/generated": 5.962019920349121, |
|
"logits/real": 4.166319847106934, |
|
"logps/generated": -1018.0177001953125, |
|
"logps/real": -193.1107177734375, |
|
"loss": 0.005, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.824549198150635, |
|
"rewards/margins": 11.278244972229004, |
|
"rewards/real": 3.4536960124969482, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 1.254888019907572e-07, |
|
"logits/generated": 5.941873073577881, |
|
"logits/real": 4.457114219665527, |
|
"logps/generated": -839.1423950195312, |
|
"logps/real": -225.7877960205078, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.322813987731934, |
|
"rewards/margins": 9.847611427307129, |
|
"rewards/real": 3.5247981548309326, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 1.2371134020618556e-07, |
|
"logits/generated": 6.1170220375061035, |
|
"logits/real": 4.145179748535156, |
|
"logps/generated": -971.7708740234375, |
|
"logps/real": -213.4941864013672, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.316300392150879, |
|
"rewards/margins": 10.892778396606445, |
|
"rewards/real": 3.5764777660369873, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 1.2193387842161392e-07, |
|
"logits/generated": 5.986542701721191, |
|
"logits/real": 4.025667190551758, |
|
"logps/generated": -830.68359375, |
|
"logps/real": -206.99441528320312, |
|
"loss": 0.0063, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.811788082122803, |
|
"rewards/margins": 9.347892761230469, |
|
"rewards/real": 3.5361053943634033, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 1.2015641663704232e-07, |
|
"logits/generated": 5.972803592681885, |
|
"logits/real": 4.296057224273682, |
|
"logps/generated": -1000.8699340820312, |
|
"logps/real": -219.41787719726562, |
|
"loss": 0.0036, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.70371150970459, |
|
"rewards/margins": 11.355711936950684, |
|
"rewards/real": 3.6520004272460938, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 1.1837895485247067e-07, |
|
"logits/generated": 6.02608585357666, |
|
"logits/real": 4.351847171783447, |
|
"logps/generated": -974.85693359375, |
|
"logps/real": -215.2225341796875, |
|
"loss": 0.0028, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.276649475097656, |
|
"rewards/margins": 10.775941848754883, |
|
"rewards/real": 3.4992916584014893, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 1.1660149306789902e-07, |
|
"logits/generated": 5.748597145080566, |
|
"logits/real": 4.363656997680664, |
|
"logps/generated": -966.9947509765625, |
|
"logps/real": -231.53848266601562, |
|
"loss": 0.0033, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.087010860443115, |
|
"rewards/margins": 10.6896390914917, |
|
"rewards/real": 3.6026291847229004, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 1.148240312833274e-07, |
|
"logits/generated": 6.186932563781738, |
|
"logits/real": 4.215136528015137, |
|
"logps/generated": -960.4109497070312, |
|
"logps/real": -200.14752197265625, |
|
"loss": 0.0026, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.6329665184021, |
|
"rewards/margins": 11.175156593322754, |
|
"rewards/real": 3.5421881675720215, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 1.1304656949875577e-07, |
|
"logits/generated": 5.933046340942383, |
|
"logits/real": 4.051640510559082, |
|
"logps/generated": -947.9317626953125, |
|
"logps/real": -187.852783203125, |
|
"loss": 0.0033, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.863749027252197, |
|
"rewards/margins": 10.43601131439209, |
|
"rewards/real": 3.57226300239563, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 1.1126910771418415e-07, |
|
"logits/generated": 6.067012786865234, |
|
"logits/real": 4.100882530212402, |
|
"logps/generated": -922.0003662109375, |
|
"logps/real": -218.1196746826172, |
|
"loss": 0.0038, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.713512420654297, |
|
"rewards/margins": 10.470623016357422, |
|
"rewards/real": 3.7571117877960205, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 1.094916459296125e-07, |
|
"logits/generated": 5.888331413269043, |
|
"logits/real": 4.533158302307129, |
|
"logps/generated": -941.0359497070312, |
|
"logps/real": -235.983154296875, |
|
"loss": 0.0019, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.0601677894592285, |
|
"rewards/margins": 10.506025314331055, |
|
"rewards/real": 3.445856809616089, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 1.0771418414504088e-07, |
|
"logits/generated": 6.122256278991699, |
|
"logits/real": 4.691792964935303, |
|
"logps/generated": -1032.4281005859375, |
|
"logps/real": -242.85391235351562, |
|
"loss": 0.002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.817018032073975, |
|
"rewards/margins": 11.140421867370605, |
|
"rewards/real": 3.3234035968780518, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 1.0593672236046925e-07, |
|
"logits/generated": 6.091986179351807, |
|
"logits/real": 4.298600196838379, |
|
"logps/generated": -979.9375, |
|
"logps/real": -207.18222045898438, |
|
"loss": 0.0019, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.318638801574707, |
|
"rewards/margins": 10.859162330627441, |
|
"rewards/real": 3.540522813796997, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 1.0415926057589762e-07, |
|
"logits/generated": 5.999621868133545, |
|
"logits/real": 4.219559192657471, |
|
"logps/generated": -949.9793090820312, |
|
"logps/real": -210.6415557861328, |
|
"loss": 0.003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.1169939041137695, |
|
"rewards/margins": 10.84516716003418, |
|
"rewards/real": 3.7281742095947266, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 1.0238179879132598e-07, |
|
"logits/generated": 5.945744514465332, |
|
"logits/real": 4.196887969970703, |
|
"logps/generated": -999.5768432617188, |
|
"logps/real": -197.3428192138672, |
|
"loss": 0.0015, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.847522735595703, |
|
"rewards/margins": 11.279948234558105, |
|
"rewards/real": 3.4324257373809814, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 1.0060433700675434e-07, |
|
"logits/generated": 6.025457859039307, |
|
"logits/real": 4.405503273010254, |
|
"logps/generated": -911.369140625, |
|
"logps/real": -226.285400390625, |
|
"loss": 0.0031, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.9073333740234375, |
|
"rewards/margins": 10.433539390563965, |
|
"rewards/real": 3.526205539703369, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 9.882687522218272e-08, |
|
"logits/generated": 5.8123369216918945, |
|
"logits/real": 4.084736347198486, |
|
"logps/generated": -892.3932495117188, |
|
"logps/real": -207.2658233642578, |
|
"loss": 0.0027, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.392351150512695, |
|
"rewards/margins": 10.307291030883789, |
|
"rewards/real": 3.914940595626831, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 9.704941343761109e-08, |
|
"logits/generated": 6.000952243804932, |
|
"logits/real": 4.174363136291504, |
|
"logps/generated": -1008.0392456054688, |
|
"logps/real": -216.5176239013672, |
|
"loss": 0.0026, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.5495429039001465, |
|
"rewards/margins": 11.444334030151367, |
|
"rewards/real": 3.8947906494140625, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 9.527195165303946e-08, |
|
"logits/generated": 5.935231685638428, |
|
"logits/real": 4.574646949768066, |
|
"logps/generated": -1026.3360595703125, |
|
"logps/real": -224.52346801757812, |
|
"loss": 0.0025, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.995066165924072, |
|
"rewards/margins": 11.642257690429688, |
|
"rewards/real": 3.6471920013427734, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 9.349448986846782e-08, |
|
"logits/generated": 6.165256500244141, |
|
"logits/real": 4.483449459075928, |
|
"logps/generated": -1073.19873046875, |
|
"logps/real": -215.8119354248047, |
|
"loss": 0.0031, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -8.13926887512207, |
|
"rewards/margins": 11.734591484069824, |
|
"rewards/real": 3.595323085784912, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 9.17170280838962e-08, |
|
"logits/generated": 6.178045272827148, |
|
"logits/real": 4.426362991333008, |
|
"logps/generated": -981.1510009765625, |
|
"logps/real": -221.8535614013672, |
|
"loss": 0.0024, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.534024238586426, |
|
"rewards/margins": 11.1114501953125, |
|
"rewards/real": 3.5774269104003906, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 8.993956629932455e-08, |
|
"logits/generated": 6.002736568450928, |
|
"logits/real": 4.408158302307129, |
|
"logps/generated": -1037.64697265625, |
|
"logps/real": -206.6656036376953, |
|
"loss": 0.0019, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.90814733505249, |
|
"rewards/margins": 11.406362533569336, |
|
"rewards/real": 3.498215913772583, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 8.816210451475293e-08, |
|
"logits/generated": 5.922328948974609, |
|
"logits/real": 4.30651330947876, |
|
"logps/generated": -883.3487548828125, |
|
"logps/real": -226.1007843017578, |
|
"loss": 0.0042, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.406916618347168, |
|
"rewards/margins": 10.177984237670898, |
|
"rewards/real": 3.7710673809051514, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 8.63846427301813e-08, |
|
"logits/generated": 5.988922595977783, |
|
"logits/real": 4.155168533325195, |
|
"logps/generated": -918.8670654296875, |
|
"logps/real": -203.3168487548828, |
|
"loss": 0.0023, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.007664680480957, |
|
"rewards/margins": 10.59145450592041, |
|
"rewards/real": 3.5837910175323486, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 8.460718094560966e-08, |
|
"logits/generated": 5.974033832550049, |
|
"logits/real": 4.323070526123047, |
|
"logps/generated": -953.7561645507812, |
|
"logps/real": -215.21804809570312, |
|
"loss": 0.003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.087655544281006, |
|
"rewards/margins": 10.743782043457031, |
|
"rewards/real": 3.656125545501709, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 8.282971916103803e-08, |
|
"logits/generated": 5.881442546844482, |
|
"logits/real": 4.603425025939941, |
|
"logps/generated": -941.333984375, |
|
"logps/real": -229.0399932861328, |
|
"loss": 0.0031, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.910244941711426, |
|
"rewards/margins": 10.281949996948242, |
|
"rewards/real": 3.3717052936553955, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 8.10522573764664e-08, |
|
"logits/generated": 6.160346031188965, |
|
"logits/real": 4.405267238616943, |
|
"logps/generated": -1009.9052734375, |
|
"logps/real": -224.79244995117188, |
|
"loss": 0.0035, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.713784694671631, |
|
"rewards/margins": 11.180540084838867, |
|
"rewards/real": 3.4667556285858154, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 7.927479559189478e-08, |
|
"logits/generated": 6.0979323387146, |
|
"logits/real": 4.142810821533203, |
|
"logps/generated": -919.0299072265625, |
|
"logps/real": -200.34954833984375, |
|
"loss": 0.0033, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.830052375793457, |
|
"rewards/margins": 10.462355613708496, |
|
"rewards/real": 3.6323037147521973, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 7.749733380732314e-08, |
|
"logits/generated": 5.893312454223633, |
|
"logits/real": 4.082333564758301, |
|
"logps/generated": -922.8045043945312, |
|
"logps/real": -211.110107421875, |
|
"loss": 0.0026, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.06411600112915, |
|
"rewards/margins": 11.093643188476562, |
|
"rewards/real": 4.029527187347412, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 7.571987202275151e-08, |
|
"logits/generated": 6.046978950500488, |
|
"logits/real": 4.000611305236816, |
|
"logps/generated": -851.3162231445312, |
|
"logps/real": -213.77294921875, |
|
"loss": 0.0044, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.470201015472412, |
|
"rewards/margins": 10.399231910705566, |
|
"rewards/real": 3.929030656814575, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 7.394241023817987e-08, |
|
"logits/generated": 6.083460807800293, |
|
"logits/real": 4.147582054138184, |
|
"logps/generated": -869.0367431640625, |
|
"logps/real": -222.88449096679688, |
|
"loss": 0.0023, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.679482936859131, |
|
"rewards/margins": 10.497906684875488, |
|
"rewards/real": 3.8184237480163574, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 7.216494845360824e-08, |
|
"logits/generated": 5.894058704376221, |
|
"logits/real": 4.494288921356201, |
|
"logps/generated": -1004.9658203125, |
|
"logps/real": -224.24234008789062, |
|
"loss": 0.0045, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.787278175354004, |
|
"rewards/margins": 11.781339645385742, |
|
"rewards/real": 3.994061231613159, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 7.038748666903662e-08, |
|
"logits/generated": 6.083498954772949, |
|
"logits/real": 4.627425670623779, |
|
"logps/generated": -1075.3470458984375, |
|
"logps/real": -226.70223999023438, |
|
"loss": 0.0016, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -8.463549613952637, |
|
"rewards/margins": 12.042739868164062, |
|
"rewards/real": 3.579190731048584, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 6.861002488446497e-08, |
|
"logits/generated": 5.915851593017578, |
|
"logits/real": 3.9263007640838623, |
|
"logps/generated": -1111.206298828125, |
|
"logps/real": -196.06072998046875, |
|
"loss": 0.0027, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -8.594639778137207, |
|
"rewards/margins": 12.281318664550781, |
|
"rewards/real": 3.6866791248321533, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 6.683256309989335e-08, |
|
"logits/generated": 5.970302581787109, |
|
"logits/real": 4.437585830688477, |
|
"logps/generated": -840.1096801757812, |
|
"logps/real": -226.9420928955078, |
|
"loss": 0.0017, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.3191237449646, |
|
"rewards/margins": 10.023898124694824, |
|
"rewards/real": 3.704774856567383, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 6.505510131532172e-08, |
|
"logits/generated": 6.018714904785156, |
|
"logits/real": 4.127536296844482, |
|
"logps/generated": -929.6011962890625, |
|
"logps/real": -204.32046508789062, |
|
"loss": 0.0024, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.2565131187438965, |
|
"rewards/margins": 10.836641311645508, |
|
"rewards/real": 3.5801289081573486, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 6.32776395307501e-08, |
|
"logits/generated": 5.825669765472412, |
|
"logits/real": 4.458681106567383, |
|
"logps/generated": -974.4544067382812, |
|
"logps/real": -212.163818359375, |
|
"loss": 0.0024, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.371243953704834, |
|
"rewards/margins": 10.979463577270508, |
|
"rewards/real": 3.6082186698913574, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 6.150017774617845e-08, |
|
"logits/generated": 5.914022922515869, |
|
"logits/real": 4.451127529144287, |
|
"logps/generated": -971.3380126953125, |
|
"logps/real": -248.98635864257812, |
|
"loss": 0.0053, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -7.341151237487793, |
|
"rewards/margins": 11.175028800964355, |
|
"rewards/real": 3.833878755569458, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 5.972271596160682e-08, |
|
"logits/generated": 6.177198886871338, |
|
"logits/real": 4.612268924713135, |
|
"logps/generated": -955.9030151367188, |
|
"logps/real": -221.28634643554688, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.392415523529053, |
|
"rewards/margins": 10.885235786437988, |
|
"rewards/real": 3.4928202629089355, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 5.794525417703519e-08, |
|
"logits/generated": 5.88433837890625, |
|
"logits/real": 4.339784145355225, |
|
"logps/generated": -990.3663330078125, |
|
"logps/real": -223.5160369873047, |
|
"loss": 0.0036, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.730770111083984, |
|
"rewards/margins": 11.330351829528809, |
|
"rewards/real": 3.599581480026245, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 5.616779239246356e-08, |
|
"logits/generated": 6.089413642883301, |
|
"logits/real": 4.758141994476318, |
|
"logps/generated": -946.3424072265625, |
|
"logps/real": -223.90957641601562, |
|
"loss": 0.0016, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.317401885986328, |
|
"rewards/margins": 11.000505447387695, |
|
"rewards/real": 3.6831040382385254, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 5.439033060789193e-08, |
|
"logits/generated": 5.951352119445801, |
|
"logits/real": 4.1157331466674805, |
|
"logps/generated": -876.4993896484375, |
|
"logps/real": -212.88046264648438, |
|
"loss": 0.0036, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.624871730804443, |
|
"rewards/margins": 10.744012832641602, |
|
"rewards/real": 4.119140625, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 5.26128688233203e-08, |
|
"logits/generated": 5.940617561340332, |
|
"logits/real": 3.8113338947296143, |
|
"logps/generated": -888.90087890625, |
|
"logps/real": -203.17901611328125, |
|
"loss": 0.0017, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.581952095031738, |
|
"rewards/margins": 10.407301902770996, |
|
"rewards/real": 3.825350522994995, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 5.0835407038748666e-08, |
|
"logits/generated": 5.964734077453613, |
|
"logits/real": 4.341010093688965, |
|
"logps/generated": -881.1970825195312, |
|
"logps/real": -232.49813842773438, |
|
"loss": 0.003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.593221187591553, |
|
"rewards/margins": 10.22175407409668, |
|
"rewards/real": 3.628532886505127, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 4.905794525417703e-08, |
|
"logits/generated": 5.8404388427734375, |
|
"logits/real": 4.435536861419678, |
|
"logps/generated": -969.4814453125, |
|
"logps/real": -215.01858520507812, |
|
"loss": 0.0017, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.376192569732666, |
|
"rewards/margins": 11.145563125610352, |
|
"rewards/real": 3.769369602203369, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 4.72804834696054e-08, |
|
"logits/generated": 5.9956512451171875, |
|
"logits/real": 4.474661827087402, |
|
"logps/generated": -909.1708984375, |
|
"logps/real": -216.86392211914062, |
|
"loss": 0.0016, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.260798454284668, |
|
"rewards/margins": 10.6810302734375, |
|
"rewards/real": 3.4202327728271484, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 4.550302168503377e-08, |
|
"logits/generated": 6.177473545074463, |
|
"logits/real": 4.074435234069824, |
|
"logps/generated": -931.0374145507812, |
|
"logps/real": -199.68115234375, |
|
"loss": 0.0081, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -7.1744065284729, |
|
"rewards/margins": 11.10449504852295, |
|
"rewards/real": 3.9300880432128906, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 4.372555990046214e-08, |
|
"logits/generated": 6.158198356628418, |
|
"logits/real": 4.45182466506958, |
|
"logps/generated": -1053.023681640625, |
|
"logps/real": -227.9947967529297, |
|
"loss": 0.004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -8.230512619018555, |
|
"rewards/margins": 11.803583145141602, |
|
"rewards/real": 3.5730698108673096, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 4.194809811589051e-08, |
|
"logits/generated": 5.763731956481934, |
|
"logits/real": 4.512047290802002, |
|
"logps/generated": -877.0777587890625, |
|
"logps/real": -239.983642578125, |
|
"loss": 0.0036, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.7064409255981445, |
|
"rewards/margins": 10.49316120147705, |
|
"rewards/real": 3.7867202758789062, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 4.0170636331318876e-08, |
|
"logits/generated": 6.0582170486450195, |
|
"logits/real": 4.427424430847168, |
|
"logps/generated": -1025.2308349609375, |
|
"logps/real": -215.9585418701172, |
|
"loss": 0.0027, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -8.088347434997559, |
|
"rewards/margins": 11.211756706237793, |
|
"rewards/real": 3.1234092712402344, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 3.839317454674725e-08, |
|
"logits/generated": 6.159816741943359, |
|
"logits/real": 4.5531134605407715, |
|
"logps/generated": -914.5631713867188, |
|
"logps/real": -223.43667602539062, |
|
"loss": 0.0034, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.19748067855835, |
|
"rewards/margins": 11.130581855773926, |
|
"rewards/real": 3.933100938796997, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 3.6615712762175614e-08, |
|
"logits/generated": 6.062338829040527, |
|
"logits/real": 3.9617767333984375, |
|
"logps/generated": -1065.44189453125, |
|
"logps/real": -200.53231811523438, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -8.692277908325195, |
|
"rewards/margins": 12.533926010131836, |
|
"rewards/real": 3.841648578643799, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 3.4838250977603974e-08, |
|
"logits/generated": 5.966034889221191, |
|
"logits/real": 4.169475555419922, |
|
"logps/generated": -900.4786376953125, |
|
"logps/real": -219.13211059570312, |
|
"loss": 0.0023, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.983071804046631, |
|
"rewards/margins": 10.687957763671875, |
|
"rewards/real": 3.7048866748809814, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 3.3060789193032346e-08, |
|
"logits/generated": 6.001954555511475, |
|
"logits/real": 4.039769649505615, |
|
"logps/generated": -948.9764404296875, |
|
"logps/real": -213.39022827148438, |
|
"loss": 0.0019, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.106842041015625, |
|
"rewards/margins": 10.968811988830566, |
|
"rewards/real": 3.8619697093963623, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 3.128332740846071e-08, |
|
"logits/generated": 6.0790205001831055, |
|
"logits/real": 4.268954753875732, |
|
"logps/generated": -969.9235229492188, |
|
"logps/real": -210.45849609375, |
|
"loss": 0.0016, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.430506706237793, |
|
"rewards/margins": 11.069308280944824, |
|
"rewards/real": 3.638800859451294, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 2.9505865623889085e-08, |
|
"logits/generated": 6.002804756164551, |
|
"logits/real": 4.1793718338012695, |
|
"logps/generated": -1044.44287109375, |
|
"logps/real": -208.38851928710938, |
|
"loss": 0.0026, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -8.205821990966797, |
|
"rewards/margins": 11.79900074005127, |
|
"rewards/real": 3.593177318572998, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 2.7728403839317454e-08, |
|
"logits/generated": 6.175272464752197, |
|
"logits/real": 4.347874641418457, |
|
"logps/generated": -908.1828002929688, |
|
"logps/real": -213.10421752929688, |
|
"loss": 0.0025, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.079012870788574, |
|
"rewards/margins": 10.570651054382324, |
|
"rewards/real": 3.4916369915008545, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 2.5950942054745824e-08, |
|
"logits/generated": 5.989743232727051, |
|
"logits/real": 4.590392112731934, |
|
"logps/generated": -881.7191162109375, |
|
"logps/real": -221.69577026367188, |
|
"loss": 0.0017, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.744132995605469, |
|
"rewards/margins": 10.367905616760254, |
|
"rewards/real": 3.6237728595733643, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 2.4173480270174193e-08, |
|
"logits/generated": 5.84287166595459, |
|
"logits/real": 4.579287052154541, |
|
"logps/generated": -880.3978271484375, |
|
"logps/real": -232.52279663085938, |
|
"loss": 0.0031, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.6468071937561035, |
|
"rewards/margins": 10.200614929199219, |
|
"rewards/real": 3.553807020187378, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 2.2396018485602556e-08, |
|
"logits/generated": 6.122179985046387, |
|
"logits/real": 4.397927761077881, |
|
"logps/generated": -998.4055786132812, |
|
"logps/real": -210.6758575439453, |
|
"loss": 0.0024, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.7901740074157715, |
|
"rewards/margins": 11.620798110961914, |
|
"rewards/real": 3.830622434616089, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 2.0618556701030925e-08, |
|
"logits/generated": 5.882719993591309, |
|
"logits/real": 4.193206787109375, |
|
"logps/generated": -996.2776489257812, |
|
"logps/real": -182.93798828125, |
|
"loss": 0.0019, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -8.111333847045898, |
|
"rewards/margins": 11.724261283874512, |
|
"rewards/real": 3.612928867340088, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.8841094916459295e-08, |
|
"logits/generated": 6.127943992614746, |
|
"logits/real": 4.209385395050049, |
|
"logps/generated": -1060.7899169921875, |
|
"logps/real": -205.1070098876953, |
|
"loss": 0.002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -8.274694442749023, |
|
"rewards/margins": 11.977832794189453, |
|
"rewards/real": 3.703138828277588, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.7063633131887664e-08, |
|
"logits/generated": 5.8941450119018555, |
|
"logits/real": 4.013119697570801, |
|
"logps/generated": -973.5771484375, |
|
"logps/real": -188.08303833007812, |
|
"loss": 0.0019, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.611605644226074, |
|
"rewards/margins": 11.475560188293457, |
|
"rewards/real": 3.8639538288116455, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.5286171347316033e-08, |
|
"logits/generated": 6.093392372131348, |
|
"logits/real": 4.220031261444092, |
|
"logps/generated": -948.8116455078125, |
|
"logps/real": -216.5891876220703, |
|
"loss": 0.0037, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.310966491699219, |
|
"rewards/margins": 10.79468059539795, |
|
"rewards/real": 3.4837143421173096, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.35087095627444e-08, |
|
"logits/generated": 6.005775451660156, |
|
"logits/real": 4.406448841094971, |
|
"logps/generated": -944.8621826171875, |
|
"logps/real": -192.6377716064453, |
|
"loss": 0.0028, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.452921390533447, |
|
"rewards/margins": 11.0402193069458, |
|
"rewards/real": 3.587296962738037, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.1731247778172769e-08, |
|
"logits/generated": 6.163327693939209, |
|
"logits/real": 4.344714641571045, |
|
"logps/generated": -917.35498046875, |
|
"logps/real": -198.57122802734375, |
|
"loss": 0.0016, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.304339408874512, |
|
"rewards/margins": 10.936470985412598, |
|
"rewards/real": 3.632131576538086, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 9.953785993601137e-09, |
|
"logits/generated": 6.108910083770752, |
|
"logits/real": 4.15443754196167, |
|
"logps/generated": -962.0145263671875, |
|
"logps/real": -203.24107360839844, |
|
"loss": 0.0027, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.498394012451172, |
|
"rewards/margins": 10.694067001342773, |
|
"rewards/real": 3.1956734657287598, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 8.176324209029506e-09, |
|
"logits/generated": 6.031611919403076, |
|
"logits/real": 3.9185752868652344, |
|
"logps/generated": -981.349609375, |
|
"logps/real": -206.8091278076172, |
|
"loss": 0.0025, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.681130409240723, |
|
"rewards/margins": 11.426141738891602, |
|
"rewards/real": 3.745011806488037, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 6.398862424457874e-09, |
|
"logits/generated": 5.970823764801025, |
|
"logits/real": 4.175595283508301, |
|
"logps/generated": -943.4645385742188, |
|
"logps/real": -184.49136352539062, |
|
"loss": 0.0026, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.951342582702637, |
|
"rewards/margins": 10.521745681762695, |
|
"rewards/real": 3.5704009532928467, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 4.621400639886242e-09, |
|
"logits/generated": 6.0879998207092285, |
|
"logits/real": 4.444035530090332, |
|
"logps/generated": -1086.6719970703125, |
|
"logps/real": -244.66543579101562, |
|
"loss": 0.0026, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -8.595160484313965, |
|
"rewards/margins": 12.099470138549805, |
|
"rewards/real": 3.5043106079101562, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 2.8439388553146107e-09, |
|
"logits/generated": 6.041680335998535, |
|
"logits/real": 4.341583728790283, |
|
"logps/generated": -994.8681640625, |
|
"logps/real": -205.96835327148438, |
|
"loss": 0.0038, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.797635078430176, |
|
"rewards/margins": 11.64828872680664, |
|
"rewards/real": 3.850653886795044, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.066477070742979e-09, |
|
"logits/generated": 6.018385887145996, |
|
"logits/real": 3.9701621532440186, |
|
"logps/generated": -901.3045654296875, |
|
"logps/real": -204.19090270996094, |
|
"loss": 0.0027, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.618832588195801, |
|
"rewards/margins": 10.127131462097168, |
|
"rewards/real": 3.5082993507385254, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 3126, |
|
"total_flos": 0.0, |
|
"train_loss": 0.12108022763431954, |
|
"train_runtime": 40273.1203, |
|
"train_samples_per_second": 2.483, |
|
"train_steps_per_second": 0.078 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 3126, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|