|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.0, |
|
"eval_steps": 500, |
|
"global_step": 500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 6.6666666666666675e-06, |
|
"logits/chosen": -3.1526219844818115, |
|
"logits/rejected": -3.3119924068450928, |
|
"logps/chosen": -18.28135108947754, |
|
"logps/rejected": -33.52398681640625, |
|
"loss": 0.6997, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.006796836853027344, |
|
"rewards/margins": -0.012901116162538528, |
|
"rewards/rejected": 0.006104278843849897, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 6.666666666666667e-05, |
|
"logits/chosen": -3.1073851585388184, |
|
"logits/rejected": -3.090308666229248, |
|
"logps/chosen": -20.141502380371094, |
|
"logps/rejected": -18.037580490112305, |
|
"loss": 0.694, |
|
"rewards/accuracies": 0.0833333358168602, |
|
"rewards/chosen": 0.001263597165234387, |
|
"rewards/margins": -0.001611270010471344, |
|
"rewards/rejected": 0.002874867059290409, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00013333333333333334, |
|
"logits/chosen": -3.0630269050598145, |
|
"logits/rejected": -3.1416983604431152, |
|
"logps/chosen": -31.93638038635254, |
|
"logps/rejected": -42.507789611816406, |
|
"loss": 0.6916, |
|
"rewards/accuracies": 0.2750000059604645, |
|
"rewards/chosen": 0.010843334719538689, |
|
"rewards/margins": 0.003288193140178919, |
|
"rewards/rejected": 0.0075551411136984825, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0002, |
|
"logits/chosen": -3.148637294769287, |
|
"logits/rejected": -3.150296211242676, |
|
"logps/chosen": -22.95195770263672, |
|
"logps/rejected": -23.612133026123047, |
|
"loss": 0.6974, |
|
"rewards/accuracies": 0.15000000596046448, |
|
"rewards/chosen": -0.002520971465855837, |
|
"rewards/margins": -0.0078018950298428535, |
|
"rewards/rejected": 0.005280924029648304, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0002666666666666667, |
|
"logits/chosen": -3.1271812915802, |
|
"logits/rejected": -3.0903429985046387, |
|
"logps/chosen": -34.57979965209961, |
|
"logps/rejected": -27.37040138244629, |
|
"loss": 0.6925, |
|
"rewards/accuracies": 0.125, |
|
"rewards/chosen": 0.03781440109014511, |
|
"rewards/margins": 0.006826506461948156, |
|
"rewards/rejected": 0.030987894162535667, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0003333333333333333, |
|
"logits/chosen": -3.0824079513549805, |
|
"logits/rejected": -3.0999526977539062, |
|
"logps/chosen": -29.16314697265625, |
|
"logps/rejected": -30.843231201171875, |
|
"loss": 0.7057, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": 0.08534860610961914, |
|
"rewards/margins": -0.013902002945542336, |
|
"rewards/rejected": 0.09925060719251633, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0004, |
|
"logits/chosen": -3.1495282649993896, |
|
"logits/rejected": -3.184638500213623, |
|
"logps/chosen": -19.72355079650879, |
|
"logps/rejected": -28.213886260986328, |
|
"loss": 0.7213, |
|
"rewards/accuracies": 0.07500000298023224, |
|
"rewards/chosen": -0.05634387582540512, |
|
"rewards/margins": -0.043291497975587845, |
|
"rewards/rejected": -0.01305237878113985, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00046666666666666666, |
|
"logits/chosen": -3.04826021194458, |
|
"logits/rejected": -3.005667209625244, |
|
"logps/chosen": -25.625635147094727, |
|
"logps/rejected": -24.768199920654297, |
|
"loss": 0.7002, |
|
"rewards/accuracies": 0.20000000298023224, |
|
"rewards/chosen": -0.06301303952932358, |
|
"rewards/margins": -0.00712633365765214, |
|
"rewards/rejected": -0.055886708199977875, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0005333333333333334, |
|
"logits/chosen": -3.048518419265747, |
|
"logits/rejected": -3.061340808868408, |
|
"logps/chosen": -32.414894104003906, |
|
"logps/rejected": -34.773399353027344, |
|
"loss": 0.7835, |
|
"rewards/accuracies": 0.32499998807907104, |
|
"rewards/chosen": -0.23218408226966858, |
|
"rewards/margins": -0.03052915260195732, |
|
"rewards/rejected": -0.20165491104125977, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0006, |
|
"logits/chosen": -3.035808563232422, |
|
"logits/rejected": -3.1053929328918457, |
|
"logps/chosen": -31.509021759033203, |
|
"logps/rejected": -45.353553771972656, |
|
"loss": 0.7051, |
|
"rewards/accuracies": 0.30000001192092896, |
|
"rewards/chosen": -0.3204951286315918, |
|
"rewards/margins": 0.283873975276947, |
|
"rewards/rejected": -0.604369044303894, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0006666666666666666, |
|
"logits/chosen": -2.959228992462158, |
|
"logits/rejected": -2.98266339302063, |
|
"logps/chosen": -51.024803161621094, |
|
"logps/rejected": -54.91625213623047, |
|
"loss": 1.0868, |
|
"rewards/accuracies": 0.20000000298023224, |
|
"rewards/chosen": -1.835883378982544, |
|
"rewards/margins": 0.3176426589488983, |
|
"rewards/rejected": -2.1535260677337646, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0007333333333333333, |
|
"logits/chosen": -3.0634117126464844, |
|
"logits/rejected": -3.0850846767425537, |
|
"logps/chosen": -23.597423553466797, |
|
"logps/rejected": -26.58676528930664, |
|
"loss": 0.7629, |
|
"rewards/accuracies": 0.20000000298023224, |
|
"rewards/chosen": -0.2058214694261551, |
|
"rewards/margins": -0.05166854336857796, |
|
"rewards/rejected": -0.15415294468402863, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0008, |
|
"logits/chosen": -3.1232190132141113, |
|
"logits/rejected": -3.1285691261291504, |
|
"logps/chosen": -20.47592544555664, |
|
"logps/rejected": -22.593311309814453, |
|
"loss": 0.7341, |
|
"rewards/accuracies": 0.15000000596046448, |
|
"rewards/chosen": -0.46454209089279175, |
|
"rewards/margins": -0.023245975375175476, |
|
"rewards/rejected": -0.44129619002342224, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.0008666666666666667, |
|
"logits/chosen": -2.975956678390503, |
|
"logits/rejected": -3.027247190475464, |
|
"logps/chosen": -34.07396697998047, |
|
"logps/rejected": -42.46125793457031, |
|
"loss": 0.723, |
|
"rewards/accuracies": 0.2750000059604645, |
|
"rewards/chosen": -0.4881093502044678, |
|
"rewards/margins": 0.3038038909435272, |
|
"rewards/rejected": -0.7919132113456726, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0009333333333333333, |
|
"logits/chosen": -2.981985569000244, |
|
"logits/rejected": -2.9559457302093506, |
|
"logps/chosen": -33.598899841308594, |
|
"logps/rejected": -40.866451263427734, |
|
"loss": 0.7877, |
|
"rewards/accuracies": 0.2750000059604645, |
|
"rewards/chosen": -0.35516494512557983, |
|
"rewards/margins": 0.4116950035095215, |
|
"rewards/rejected": -0.7668598890304565, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.001, |
|
"logits/chosen": -3.05903959274292, |
|
"logits/rejected": -3.0611279010772705, |
|
"logps/chosen": -20.234691619873047, |
|
"logps/rejected": -19.169904708862305, |
|
"loss": 0.9163, |
|
"rewards/accuracies": 0.10000000149011612, |
|
"rewards/chosen": -0.36899399757385254, |
|
"rewards/margins": -0.08086968958377838, |
|
"rewards/rejected": -0.28812432289123535, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.0009723756906077348, |
|
"logits/chosen": -2.9612436294555664, |
|
"logits/rejected": -2.9223554134368896, |
|
"logps/chosen": -32.5883674621582, |
|
"logps/rejected": -25.370834350585938, |
|
"loss": 0.7856, |
|
"rewards/accuracies": 0.17499999701976776, |
|
"rewards/chosen": -0.18723489344120026, |
|
"rewards/margins": -0.10649768263101578, |
|
"rewards/rejected": -0.08073721826076508, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.0009447513812154696, |
|
"logits/chosen": -2.9623398780822754, |
|
"logits/rejected": -2.914522171020508, |
|
"logps/chosen": -39.70682907104492, |
|
"logps/rejected": -33.20659637451172, |
|
"loss": 0.898, |
|
"rewards/accuracies": 0.20000000298023224, |
|
"rewards/chosen": -0.4028944969177246, |
|
"rewards/margins": -0.04350559413433075, |
|
"rewards/rejected": -0.35938888788223267, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.0009171270718232044, |
|
"logits/chosen": -3.0877394676208496, |
|
"logits/rejected": -3.092603921890259, |
|
"logps/chosen": -43.16739273071289, |
|
"logps/rejected": -47.593746185302734, |
|
"loss": 1.2201, |
|
"rewards/accuracies": 0.17499999701976776, |
|
"rewards/chosen": -2.5319814682006836, |
|
"rewards/margins": -0.10082467645406723, |
|
"rewards/rejected": -2.431157112121582, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.0008895027624309392, |
|
"logits/chosen": -2.09025239944458, |
|
"logits/rejected": -2.0903568267822266, |
|
"logps/chosen": -112.7010269165039, |
|
"logps/rejected": -105.31596374511719, |
|
"loss": 2.9619, |
|
"rewards/accuracies": 0.20000000298023224, |
|
"rewards/chosen": -7.883659362792969, |
|
"rewards/margins": -0.7001466751098633, |
|
"rewards/rejected": -7.1835126876831055, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.0008618784530386741, |
|
"logits/chosen": -1.8135408163070679, |
|
"logits/rejected": -1.8160464763641357, |
|
"logps/chosen": -75.82559967041016, |
|
"logps/rejected": -65.90226745605469, |
|
"loss": 2.555, |
|
"rewards/accuracies": 0.17499999701976776, |
|
"rewards/chosen": -5.0599846839904785, |
|
"rewards/margins": -0.6209059953689575, |
|
"rewards/rejected": -4.439078330993652, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.0008342541436464089, |
|
"logits/chosen": -2.6365649700164795, |
|
"logits/rejected": -2.633017063140869, |
|
"logps/chosen": -73.8572998046875, |
|
"logps/rejected": -115.65068054199219, |
|
"loss": 1.3204, |
|
"rewards/accuracies": 0.3499999940395355, |
|
"rewards/chosen": -4.614927291870117, |
|
"rewards/margins": 3.1844677925109863, |
|
"rewards/rejected": -7.799394130706787, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.0008066298342541437, |
|
"logits/chosen": -2.338550329208374, |
|
"logits/rejected": -2.337129831314087, |
|
"logps/chosen": -75.24410247802734, |
|
"logps/rejected": -89.40665435791016, |
|
"loss": 1.6189, |
|
"rewards/accuracies": 0.20000000298023224, |
|
"rewards/chosen": -5.084308624267578, |
|
"rewards/margins": 0.9201302528381348, |
|
"rewards/rejected": -6.004438877105713, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.0007790055248618785, |
|
"logits/chosen": -2.328997850418091, |
|
"logits/rejected": -2.326946496963501, |
|
"logps/chosen": -62.543540954589844, |
|
"logps/rejected": -84.85903930664062, |
|
"loss": 1.3971, |
|
"rewards/accuracies": 0.22499999403953552, |
|
"rewards/chosen": -3.8174233436584473, |
|
"rewards/margins": 1.793341875076294, |
|
"rewards/rejected": -5.610764503479004, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.0007513812154696133, |
|
"logits/chosen": -2.794644832611084, |
|
"logits/rejected": -2.7910995483398438, |
|
"logps/chosen": -58.40827560424805, |
|
"logps/rejected": -79.7098617553711, |
|
"loss": 1.2398, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": -3.556912660598755, |
|
"rewards/margins": 1.693927526473999, |
|
"rewards/rejected": -5.250839710235596, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0007237569060773481, |
|
"logits/chosen": -2.703537702560425, |
|
"logits/rejected": -2.7038962841033936, |
|
"logps/chosen": -40.56044006347656, |
|
"logps/rejected": -43.81157684326172, |
|
"loss": 1.3514, |
|
"rewards/accuracies": 0.10000000149011612, |
|
"rewards/chosen": -2.4380924701690674, |
|
"rewards/margins": 0.2092890739440918, |
|
"rewards/rejected": -2.647381544113159, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.0006961325966850829, |
|
"logits/chosen": -2.870227098464966, |
|
"logits/rejected": -2.891268253326416, |
|
"logps/chosen": -67.85215759277344, |
|
"logps/rejected": -91.36506652832031, |
|
"loss": 1.2686, |
|
"rewards/accuracies": 0.2750000059604645, |
|
"rewards/chosen": -3.6993489265441895, |
|
"rewards/margins": 1.7562892436981201, |
|
"rewards/rejected": -5.455638408660889, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.0006685082872928176, |
|
"logits/chosen": -3.1669762134552, |
|
"logits/rejected": -3.1949028968811035, |
|
"logps/chosen": -30.239299774169922, |
|
"logps/rejected": -42.69363021850586, |
|
"loss": 0.7342, |
|
"rewards/accuracies": 0.20000000298023224, |
|
"rewards/chosen": -0.33911675214767456, |
|
"rewards/margins": 0.5873344540596008, |
|
"rewards/rejected": -0.9264512062072754, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 0.0006408839779005525, |
|
"logits/chosen": -3.065441131591797, |
|
"logits/rejected": -3.0655035972595215, |
|
"logps/chosen": -39.723567962646484, |
|
"logps/rejected": -53.760826110839844, |
|
"loss": 1.3162, |
|
"rewards/accuracies": 0.15000000596046448, |
|
"rewards/chosen": -2.2118256092071533, |
|
"rewards/margins": 0.6879772543907166, |
|
"rewards/rejected": -2.8998026847839355, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 0.0006132596685082873, |
|
"logits/chosen": -2.6939263343811035, |
|
"logits/rejected": -2.6940252780914307, |
|
"logps/chosen": -60.013755798339844, |
|
"logps/rejected": -58.038536071777344, |
|
"loss": 2.1225, |
|
"rewards/accuracies": 0.20000000298023224, |
|
"rewards/chosen": -3.602609634399414, |
|
"rewards/margins": 0.10532107204198837, |
|
"rewards/rejected": -3.70793080329895, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.000585635359116022, |
|
"logits/chosen": -2.695570468902588, |
|
"logits/rejected": -2.6920197010040283, |
|
"logps/chosen": -46.42293930053711, |
|
"logps/rejected": -44.63296127319336, |
|
"loss": 0.9542, |
|
"rewards/accuracies": 0.17499999701976776, |
|
"rewards/chosen": -2.1268885135650635, |
|
"rewards/margins": 0.09368989616632462, |
|
"rewards/rejected": -2.22057843208313, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.000558011049723757, |
|
"logits/chosen": -3.1045467853546143, |
|
"logits/rejected": -3.1222219467163086, |
|
"logps/chosen": -22.447757720947266, |
|
"logps/rejected": -27.05214500427246, |
|
"loss": 0.7383, |
|
"rewards/accuracies": 0.15000000596046448, |
|
"rewards/chosen": -0.1806814968585968, |
|
"rewards/margins": 0.14409010112285614, |
|
"rewards/rejected": -0.32477161288261414, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 0.0005303867403314917, |
|
"logits/chosen": -3.1156842708587646, |
|
"logits/rejected": -3.130932569503784, |
|
"logps/chosen": -21.35702133178711, |
|
"logps/rejected": -20.4589900970459, |
|
"loss": 0.7337, |
|
"rewards/accuracies": 0.20000000298023224, |
|
"rewards/chosen": -0.29580003023147583, |
|
"rewards/margins": 0.08602263033390045, |
|
"rewards/rejected": -0.3818226456642151, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 0.0005027624309392266, |
|
"logits/chosen": -3.050220012664795, |
|
"logits/rejected": -3.046596050262451, |
|
"logps/chosen": -24.79575538635254, |
|
"logps/rejected": -33.02082443237305, |
|
"loss": 0.6601, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": -0.31610769033432007, |
|
"rewards/margins": 0.4491572976112366, |
|
"rewards/rejected": -0.7652650475502014, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 0.00047513812154696136, |
|
"logits/chosen": -3.1225409507751465, |
|
"logits/rejected": -3.142671823501587, |
|
"logps/chosen": -34.958404541015625, |
|
"logps/rejected": -37.874855041503906, |
|
"loss": 0.6053, |
|
"rewards/accuracies": 0.30000001192092896, |
|
"rewards/chosen": -0.23902547359466553, |
|
"rewards/margins": 0.42017507553100586, |
|
"rewards/rejected": -0.6592004895210266, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 0.00044751381215469617, |
|
"logits/chosen": -3.3166356086730957, |
|
"logits/rejected": -3.271238327026367, |
|
"logps/chosen": -41.72985076904297, |
|
"logps/rejected": -43.41400909423828, |
|
"loss": 0.7479, |
|
"rewards/accuracies": 0.30000001192092896, |
|
"rewards/chosen": -0.3358752131462097, |
|
"rewards/margins": 0.2891044020652771, |
|
"rewards/rejected": -0.6249796152114868, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 0.0004198895027624309, |
|
"logits/chosen": -3.2012417316436768, |
|
"logits/rejected": -3.2214763164520264, |
|
"logps/chosen": -29.948162078857422, |
|
"logps/rejected": -34.02383041381836, |
|
"loss": 0.7083, |
|
"rewards/accuracies": 0.15000000596046448, |
|
"rewards/chosen": -0.20552043616771698, |
|
"rewards/margins": -0.0057243406772613525, |
|
"rewards/rejected": -0.19979611039161682, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 0.00039226519337016573, |
|
"logits/chosen": -3.217780590057373, |
|
"logits/rejected": -3.2134361267089844, |
|
"logps/chosen": -16.843505859375, |
|
"logps/rejected": -17.496295928955078, |
|
"loss": 0.763, |
|
"rewards/accuracies": 0.10000000149011612, |
|
"rewards/chosen": -0.05251544713973999, |
|
"rewards/margins": -0.043300654739141464, |
|
"rewards/rejected": -0.009214771911501884, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 0.0003646408839779006, |
|
"logits/chosen": -3.168470859527588, |
|
"logits/rejected": -3.2095096111297607, |
|
"logps/chosen": -31.317157745361328, |
|
"logps/rejected": -40.99824523925781, |
|
"loss": 0.6099, |
|
"rewards/accuracies": 0.3499999940395355, |
|
"rewards/chosen": -0.06307787448167801, |
|
"rewards/margins": 0.3778603971004486, |
|
"rewards/rejected": -0.4409382939338684, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 0.0003370165745856354, |
|
"logits/chosen": -3.1807992458343506, |
|
"logits/rejected": -3.1729633808135986, |
|
"logps/chosen": -25.59493064880371, |
|
"logps/rejected": -24.889175415039062, |
|
"loss": 0.6768, |
|
"rewards/accuracies": 0.20000000298023224, |
|
"rewards/chosen": -0.03841208666563034, |
|
"rewards/margins": 0.2406325787305832, |
|
"rewards/rejected": -0.2790446877479553, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.00030939226519337016, |
|
"logits/chosen": -3.1939032077789307, |
|
"logits/rejected": -3.1782355308532715, |
|
"logps/chosen": -14.640347480773926, |
|
"logps/rejected": -13.197868347167969, |
|
"loss": 0.6587, |
|
"rewards/accuracies": 0.15000000596046448, |
|
"rewards/chosen": -0.013051311485469341, |
|
"rewards/margins": 0.11761553585529327, |
|
"rewards/rejected": -0.1306668370962143, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 0.00028176795580110497, |
|
"logits/chosen": -3.2121384143829346, |
|
"logits/rejected": -3.242738723754883, |
|
"logps/chosen": -26.492828369140625, |
|
"logps/rejected": -33.63092041015625, |
|
"loss": 0.6547, |
|
"rewards/accuracies": 0.2750000059604645, |
|
"rewards/chosen": -0.15410800278186798, |
|
"rewards/margins": 0.2064083367586136, |
|
"rewards/rejected": -0.36051633954048157, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 0.0002541436464088398, |
|
"logits/chosen": -3.217862367630005, |
|
"logits/rejected": -3.2220897674560547, |
|
"logps/chosen": -45.029319763183594, |
|
"logps/rejected": -50.58649444580078, |
|
"loss": 0.7074, |
|
"rewards/accuracies": 0.30000001192092896, |
|
"rewards/chosen": -0.21466748416423798, |
|
"rewards/margins": 0.43003931641578674, |
|
"rewards/rejected": -0.644706666469574, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 0.0002265193370165746, |
|
"logits/chosen": -3.2857704162597656, |
|
"logits/rejected": -3.285273313522339, |
|
"logps/chosen": -24.04534912109375, |
|
"logps/rejected": -25.24020767211914, |
|
"loss": 0.7055, |
|
"rewards/accuracies": 0.22499999403953552, |
|
"rewards/chosen": -0.05270111560821533, |
|
"rewards/margins": 0.19839158654212952, |
|
"rewards/rejected": -0.25109270215034485, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 0.0001988950276243094, |
|
"logits/chosen": -3.1307530403137207, |
|
"logits/rejected": -3.097612142562866, |
|
"logps/chosen": -25.230710983276367, |
|
"logps/rejected": -27.142175674438477, |
|
"loss": 0.6265, |
|
"rewards/accuracies": 0.2750000059604645, |
|
"rewards/chosen": 0.06528893858194351, |
|
"rewards/margins": 0.3360690474510193, |
|
"rewards/rejected": -0.27078011631965637, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 0.0001712707182320442, |
|
"logits/chosen": -3.210901975631714, |
|
"logits/rejected": -3.250978469848633, |
|
"logps/chosen": -16.752582550048828, |
|
"logps/rejected": -30.28323745727539, |
|
"loss": 0.5541, |
|
"rewards/accuracies": 0.30000001192092896, |
|
"rewards/chosen": 0.05518122762441635, |
|
"rewards/margins": 0.479708731174469, |
|
"rewards/rejected": -0.42452749609947205, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 0.000143646408839779, |
|
"logits/chosen": -3.048260450363159, |
|
"logits/rejected": -3.1042888164520264, |
|
"logps/chosen": -33.8494758605957, |
|
"logps/rejected": -49.50830841064453, |
|
"loss": 0.5998, |
|
"rewards/accuracies": 0.3499999940395355, |
|
"rewards/chosen": 0.0009274661424569786, |
|
"rewards/margins": 0.6367529630661011, |
|
"rewards/rejected": -0.635825514793396, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 0.0001160220994475138, |
|
"logits/chosen": -3.272062301635742, |
|
"logits/rejected": -3.2841033935546875, |
|
"logps/chosen": -16.145034790039062, |
|
"logps/rejected": -19.171789169311523, |
|
"loss": 0.6134, |
|
"rewards/accuracies": 0.22499999403953552, |
|
"rewards/chosen": 0.10014114528894424, |
|
"rewards/margins": 0.250404953956604, |
|
"rewards/rejected": -0.15026383101940155, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 8.839779005524861e-05, |
|
"logits/chosen": -3.1270499229431152, |
|
"logits/rejected": -3.1859707832336426, |
|
"logps/chosen": -28.680988311767578, |
|
"logps/rejected": -39.859764099121094, |
|
"loss": 0.6342, |
|
"rewards/accuracies": 0.32499998807907104, |
|
"rewards/chosen": -0.053285278379917145, |
|
"rewards/margins": 0.39891186356544495, |
|
"rewards/rejected": -0.4521971344947815, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 6.0773480662983424e-05, |
|
"logits/chosen": -3.140641212463379, |
|
"logits/rejected": -3.090985059738159, |
|
"logps/chosen": -32.661651611328125, |
|
"logps/rejected": -32.10502624511719, |
|
"loss": 0.7772, |
|
"rewards/accuracies": 0.20000000298023224, |
|
"rewards/chosen": -0.3610732853412628, |
|
"rewards/margins": 0.07261800020933151, |
|
"rewards/rejected": -0.43369120359420776, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 3.3149171270718233e-05, |
|
"logits/chosen": -3.1711134910583496, |
|
"logits/rejected": -3.190059185028076, |
|
"logps/chosen": -36.2880973815918, |
|
"logps/rejected": -48.17897415161133, |
|
"loss": 0.613, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.25595012307167053, |
|
"rewards/margins": 0.6426823139190674, |
|
"rewards/rejected": -0.8986324071884155, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_logits/chosen": -3.225074052810669, |
|
"eval_logits/rejected": -3.2351696491241455, |
|
"eval_logps/chosen": -28.331165313720703, |
|
"eval_logps/rejected": -31.33060073852539, |
|
"eval_loss": 0.7142000794410706, |
|
"eval_rewards/accuracies": 0.22200000286102295, |
|
"eval_rewards/chosen": -0.15271247923374176, |
|
"eval_rewards/margins": 0.16096290946006775, |
|
"eval_rewards/rejected": -0.3136754035949707, |
|
"eval_runtime": 411.5707, |
|
"eval_samples_per_second": 2.43, |
|
"eval_steps_per_second": 0.304, |
|
"step": 500 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 512, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|