|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 52, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"debug/policy_chosen_logits": -2.977583885192871, |
|
"debug/policy_chosen_logps": -214.68138122558594, |
|
"debug/policy_rejected_logits": -3.0865864753723145, |
|
"debug/policy_rejected_logps": -221.65213012695312, |
|
"debug/reference_chosen_logps": -214.68138122558594, |
|
"debug/reference_rejected_logps": -221.65213012695312, |
|
"epoch": 0.019230769230769232, |
|
"grad_norm": 5.166761446238812, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -2.977583885192871, |
|
"logits/rejected": -3.0865864753723145, |
|
"logps/chosen": -214.68138122558594, |
|
"logps/rejected": -221.65213012695312, |
|
"loss": 0.5, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.110696792602539, |
|
"debug/policy_chosen_logps": -208.40696716308594, |
|
"debug/policy_rejected_logits": -3.141209602355957, |
|
"debug/policy_rejected_logps": -220.74032592773438, |
|
"debug/reference_chosen_logps": -208.67347717285156, |
|
"debug/reference_rejected_logps": -220.81326293945312, |
|
"epoch": 0.038461538461538464, |
|
"grad_norm": 4.748582416027732, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.110696792602539, |
|
"logits/rejected": -3.141209602355957, |
|
"logps/chosen": -208.40696716308594, |
|
"logps/rejected": -220.74032592773438, |
|
"loss": 0.4986, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.0026651951484382153, |
|
"rewards/margins": 0.001935672713443637, |
|
"rewards/rejected": 0.000729522667825222, |
|
"step": 2 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.122938394546509, |
|
"debug/policy_chosen_logps": -191.87767028808594, |
|
"debug/policy_rejected_logits": -3.2007791996002197, |
|
"debug/policy_rejected_logps": -196.9674530029297, |
|
"debug/reference_chosen_logps": -192.09266662597656, |
|
"debug/reference_rejected_logps": -196.9289093017578, |
|
"epoch": 0.057692307692307696, |
|
"grad_norm": 4.85937471576945, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.122938394546509, |
|
"logits/rejected": -3.2007791996002197, |
|
"logps/chosen": -191.87767028808594, |
|
"logps/rejected": -196.9674530029297, |
|
"loss": 0.4979, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.0021500205621123314, |
|
"rewards/margins": 0.0025354004465043545, |
|
"rewards/rejected": -0.0003853798261843622, |
|
"step": 3 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -2.9445621967315674, |
|
"debug/policy_chosen_logps": -202.26788330078125, |
|
"debug/policy_rejected_logits": -2.958071708679199, |
|
"debug/policy_rejected_logps": -236.9288330078125, |
|
"debug/reference_chosen_logps": -202.63815307617188, |
|
"debug/reference_rejected_logps": -236.94044494628906, |
|
"epoch": 0.07692307692307693, |
|
"grad_norm": 4.68477948866532, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -2.9445621967315674, |
|
"logits/rejected": -2.958071708679199, |
|
"logps/chosen": -202.26788330078125, |
|
"logps/rejected": -236.9288330078125, |
|
"loss": 0.4975, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.0037027548532932997, |
|
"rewards/margins": 0.0035865209065377712, |
|
"rewards/rejected": 0.00011623383034020662, |
|
"step": 4 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.0143117904663086, |
|
"debug/policy_chosen_logps": -206.9991455078125, |
|
"debug/policy_rejected_logits": -3.0474984645843506, |
|
"debug/policy_rejected_logps": -184.9208984375, |
|
"debug/reference_chosen_logps": -207.888671875, |
|
"debug/reference_rejected_logps": -185.94973754882812, |
|
"epoch": 0.09615384615384616, |
|
"grad_norm": 5.103004709570949, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.0143117904663086, |
|
"logits/rejected": -3.0474984645843506, |
|
"logps/chosen": -206.9991455078125, |
|
"logps/rejected": -184.9208984375, |
|
"loss": 0.4956, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": 0.008895359002053738, |
|
"rewards/margins": -0.0013930893037468195, |
|
"rewards/rejected": 0.010288448072969913, |
|
"step": 5 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.1157450675964355, |
|
"debug/policy_chosen_logps": -191.06285095214844, |
|
"debug/policy_rejected_logits": -3.1180226802825928, |
|
"debug/policy_rejected_logps": -217.71697998046875, |
|
"debug/reference_chosen_logps": -192.79592895507812, |
|
"debug/reference_rejected_logps": -218.74090576171875, |
|
"epoch": 0.11538461538461539, |
|
"grad_norm": 4.434924732032029, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.1157450675964355, |
|
"logits/rejected": -3.1180226802825928, |
|
"logps/chosen": -191.06285095214844, |
|
"logps/rejected": -217.71697998046875, |
|
"loss": 0.4926, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.017330702394247055, |
|
"rewards/margins": 0.00709140719845891, |
|
"rewards/rejected": 0.010239295661449432, |
|
"step": 6 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.011009454727173, |
|
"debug/policy_chosen_logps": -214.3972625732422, |
|
"debug/policy_rejected_logits": -3.1553845405578613, |
|
"debug/policy_rejected_logps": -205.53399658203125, |
|
"debug/reference_chosen_logps": -215.999267578125, |
|
"debug/reference_rejected_logps": -204.93153381347656, |
|
"epoch": 0.1346153846153846, |
|
"grad_norm": 5.27755315826341, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.011009454727173, |
|
"logits/rejected": -3.1553845405578613, |
|
"logps/chosen": -214.3972625732422, |
|
"logps/rejected": -205.53399658203125, |
|
"loss": 0.4874, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.016020087525248528, |
|
"rewards/margins": 0.02204471454024315, |
|
"rewards/rejected": -0.006024627946317196, |
|
"step": 7 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.0167155265808105, |
|
"debug/policy_chosen_logps": -179.38259887695312, |
|
"debug/policy_rejected_logits": -3.06713604927063, |
|
"debug/policy_rejected_logps": -190.42263793945312, |
|
"debug/reference_chosen_logps": -180.68841552734375, |
|
"debug/reference_rejected_logps": -191.515869140625, |
|
"epoch": 0.15384615384615385, |
|
"grad_norm": 4.472209218113578, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.0167155265808105, |
|
"logits/rejected": -3.06713604927063, |
|
"logps/chosen": -179.38259887695312, |
|
"logps/rejected": -190.42263793945312, |
|
"loss": 0.4889, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": 0.013058356940746307, |
|
"rewards/margins": 0.002126025967299938, |
|
"rewards/rejected": 0.01093233097344637, |
|
"step": 8 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.1482882499694824, |
|
"debug/policy_chosen_logps": -191.95626831054688, |
|
"debug/policy_rejected_logits": -3.1080360412597656, |
|
"debug/policy_rejected_logps": -211.66036987304688, |
|
"debug/reference_chosen_logps": -192.33502197265625, |
|
"debug/reference_rejected_logps": -211.17388916015625, |
|
"epoch": 0.17307692307692307, |
|
"grad_norm": 4.711300291586481, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.1482882499694824, |
|
"logits/rejected": -3.1080360412597656, |
|
"logps/chosen": -191.95626831054688, |
|
"logps/rejected": -211.66036987304688, |
|
"loss": 0.4863, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.0037874598056077957, |
|
"rewards/margins": 0.008652305230498314, |
|
"rewards/rejected": -0.004864844959229231, |
|
"step": 9 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.047020196914673, |
|
"debug/policy_chosen_logps": -200.92193603515625, |
|
"debug/policy_rejected_logits": -3.1241281032562256, |
|
"debug/policy_rejected_logps": -228.44821166992188, |
|
"debug/reference_chosen_logps": -201.93951416015625, |
|
"debug/reference_rejected_logps": -226.07154846191406, |
|
"epoch": 0.19230769230769232, |
|
"grad_norm": 4.555391857522008, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.047020196914673, |
|
"logits/rejected": -3.1241281032562256, |
|
"logps/chosen": -200.92193603515625, |
|
"logps/rejected": -228.44821166992188, |
|
"loss": 0.4822, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.010175762698054314, |
|
"rewards/margins": 0.033942315727472305, |
|
"rewards/rejected": -0.02376655675470829, |
|
"step": 10 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.047590494155884, |
|
"debug/policy_chosen_logps": -193.70095825195312, |
|
"debug/policy_rejected_logits": -2.9977893829345703, |
|
"debug/policy_rejected_logps": -240.172607421875, |
|
"debug/reference_chosen_logps": -194.73997497558594, |
|
"debug/reference_rejected_logps": -239.21884155273438, |
|
"epoch": 0.21153846153846154, |
|
"grad_norm": 5.679651909554435, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.047590494155884, |
|
"logits/rejected": -2.9977893829345703, |
|
"logps/chosen": -193.70095825195312, |
|
"logps/rejected": -240.172607421875, |
|
"loss": 0.4719, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.010390243493020535, |
|
"rewards/margins": 0.019927941262722015, |
|
"rewards/rejected": -0.009537696838378906, |
|
"step": 11 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.063314199447632, |
|
"debug/policy_chosen_logps": -193.97750854492188, |
|
"debug/policy_rejected_logits": -2.8785159587860107, |
|
"debug/policy_rejected_logps": -206.611083984375, |
|
"debug/reference_chosen_logps": -195.43072509765625, |
|
"debug/reference_rejected_logps": -203.80999755859375, |
|
"epoch": 0.23076923076923078, |
|
"grad_norm": 5.02375537373724, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.063314199447632, |
|
"logits/rejected": -2.8785159587860107, |
|
"logps/chosen": -193.97750854492188, |
|
"logps/rejected": -206.611083984375, |
|
"loss": 0.4596, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.014532221481204033, |
|
"rewards/margins": 0.0425429493188858, |
|
"rewards/rejected": -0.02801072970032692, |
|
"step": 12 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.060885190963745, |
|
"debug/policy_chosen_logps": -198.6640625, |
|
"debug/policy_rejected_logits": -3.0962471961975098, |
|
"debug/policy_rejected_logps": -247.57131958007812, |
|
"debug/reference_chosen_logps": -202.70645141601562, |
|
"debug/reference_rejected_logps": -239.8416290283203, |
|
"epoch": 0.25, |
|
"grad_norm": 4.916536953774363, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.060885190963745, |
|
"logits/rejected": -3.0962471961975098, |
|
"logps/chosen": -198.6640625, |
|
"logps/rejected": -247.57131958007812, |
|
"loss": 0.4602, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.04042387008666992, |
|
"rewards/margins": 0.11772066354751587, |
|
"rewards/rejected": -0.07729679346084595, |
|
"step": 13 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.025531768798828, |
|
"debug/policy_chosen_logps": -200.4529266357422, |
|
"debug/policy_rejected_logits": -2.992382287979126, |
|
"debug/policy_rejected_logps": -220.69435119628906, |
|
"debug/reference_chosen_logps": -204.46011352539062, |
|
"debug/reference_rejected_logps": -221.68063354492188, |
|
"epoch": 0.2692307692307692, |
|
"grad_norm": 4.5758304591077765, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.025531768798828, |
|
"logits/rejected": -2.992382287979126, |
|
"logps/chosen": -200.4529266357422, |
|
"logps/rejected": -220.69435119628906, |
|
"loss": 0.4713, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.040071770548820496, |
|
"rewards/margins": 0.030209042131900787, |
|
"rewards/rejected": 0.009862728416919708, |
|
"step": 14 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.1965172290802, |
|
"debug/policy_chosen_logps": -186.35000610351562, |
|
"debug/policy_rejected_logits": -3.1802818775177, |
|
"debug/policy_rejected_logps": -210.35556030273438, |
|
"debug/reference_chosen_logps": -189.31112670898438, |
|
"debug/reference_rejected_logps": -211.51010131835938, |
|
"epoch": 0.28846153846153844, |
|
"grad_norm": 5.776855357402379, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.1965172290802, |
|
"logits/rejected": -3.1802818775177, |
|
"logps/chosen": -186.35000610351562, |
|
"logps/rejected": -210.35556030273438, |
|
"loss": 0.4747, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.02961129881441593, |
|
"rewards/margins": 0.018065985292196274, |
|
"rewards/rejected": 0.011545314453542233, |
|
"step": 15 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.004584550857544, |
|
"debug/policy_chosen_logps": -219.46209716796875, |
|
"debug/policy_rejected_logits": -2.997267007827759, |
|
"debug/policy_rejected_logps": -239.15829467773438, |
|
"debug/reference_chosen_logps": -221.38467407226562, |
|
"debug/reference_rejected_logps": -233.982177734375, |
|
"epoch": 0.3076923076923077, |
|
"grad_norm": 5.1995499627013, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.004584550857544, |
|
"logits/rejected": -2.997267007827759, |
|
"logps/chosen": -219.46209716796875, |
|
"logps/rejected": -239.15829467773438, |
|
"loss": 0.4659, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.019225770607590675, |
|
"rewards/margins": 0.07098689675331116, |
|
"rewards/rejected": -0.05176112800836563, |
|
"step": 16 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.0550475120544434, |
|
"debug/policy_chosen_logps": -196.09921264648438, |
|
"debug/policy_rejected_logits": -3.0315375328063965, |
|
"debug/policy_rejected_logps": -228.4376983642578, |
|
"debug/reference_chosen_logps": -196.76771545410156, |
|
"debug/reference_rejected_logps": -219.703857421875, |
|
"epoch": 0.3269230769230769, |
|
"grad_norm": 4.905311259261036, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.0550475120544434, |
|
"logits/rejected": -3.0315375328063965, |
|
"logps/chosen": -196.09921264648438, |
|
"logps/rejected": -228.4376983642578, |
|
"loss": 0.4668, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.006685066036880016, |
|
"rewards/margins": 0.09402336925268173, |
|
"rewards/rejected": -0.08733831346035004, |
|
"step": 17 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.109316110610962, |
|
"debug/policy_chosen_logps": -192.22500610351562, |
|
"debug/policy_rejected_logits": -3.070436954498291, |
|
"debug/policy_rejected_logps": -225.37353515625, |
|
"debug/reference_chosen_logps": -193.43701171875, |
|
"debug/reference_rejected_logps": -215.90811157226562, |
|
"epoch": 0.34615384615384615, |
|
"grad_norm": 5.438987049949224, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.109316110610962, |
|
"logits/rejected": -3.070436954498291, |
|
"logps/chosen": -192.22500610351562, |
|
"logps/rejected": -225.37353515625, |
|
"loss": 0.4604, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.012120017781853676, |
|
"rewards/margins": 0.10677413642406464, |
|
"rewards/rejected": -0.09465412050485611, |
|
"step": 18 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.156306505203247, |
|
"debug/policy_chosen_logps": -198.84149169921875, |
|
"debug/policy_rejected_logits": -3.163816213607788, |
|
"debug/policy_rejected_logps": -207.2037811279297, |
|
"debug/reference_chosen_logps": -199.2224884033203, |
|
"debug/reference_rejected_logps": -204.29345703125, |
|
"epoch": 0.36538461538461536, |
|
"grad_norm": 5.3459094259339395, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.156306505203247, |
|
"logits/rejected": -3.163816213607788, |
|
"logps/chosen": -198.84149169921875, |
|
"logps/rejected": -207.2037811279297, |
|
"loss": 0.4623, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.003809986636042595, |
|
"rewards/margins": 0.032913051545619965, |
|
"rewards/rejected": -0.02910306677222252, |
|
"step": 19 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.09970760345459, |
|
"debug/policy_chosen_logps": -195.97071838378906, |
|
"debug/policy_rejected_logits": -3.0445327758789062, |
|
"debug/policy_rejected_logps": -233.5975341796875, |
|
"debug/reference_chosen_logps": -196.68661499023438, |
|
"debug/reference_rejected_logps": -226.482666015625, |
|
"epoch": 0.38461538461538464, |
|
"grad_norm": 5.574869499523263, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.09970760345459, |
|
"logits/rejected": -3.0445327758789062, |
|
"logps/chosen": -195.97071838378906, |
|
"logps/rejected": -233.5975341796875, |
|
"loss": 0.4437, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.007159043103456497, |
|
"rewards/margins": 0.07830756902694702, |
|
"rewards/rejected": -0.07114852219820023, |
|
"step": 20 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.048816680908203, |
|
"debug/policy_chosen_logps": -187.65283203125, |
|
"debug/policy_rejected_logits": -3.0968563556671143, |
|
"debug/policy_rejected_logps": -222.02886962890625, |
|
"debug/reference_chosen_logps": -191.67349243164062, |
|
"debug/reference_rejected_logps": -218.13577270507812, |
|
"epoch": 0.40384615384615385, |
|
"grad_norm": 4.472533940520768, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.048816680908203, |
|
"logits/rejected": -3.0968563556671143, |
|
"logps/chosen": -187.65283203125, |
|
"logps/rejected": -222.02886962890625, |
|
"loss": 0.4355, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.04020654410123825, |
|
"rewards/margins": 0.07913752645254135, |
|
"rewards/rejected": -0.0389309898018837, |
|
"step": 21 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.1425256729125977, |
|
"debug/policy_chosen_logps": -201.33770751953125, |
|
"debug/policy_rejected_logits": -3.146122932434082, |
|
"debug/policy_rejected_logps": -221.27914428710938, |
|
"debug/reference_chosen_logps": -205.18829345703125, |
|
"debug/reference_rejected_logps": -218.09457397460938, |
|
"epoch": 0.4230769230769231, |
|
"grad_norm": 6.018529206237279, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.1425256729125977, |
|
"logits/rejected": -3.146122932434082, |
|
"logps/chosen": -201.33770751953125, |
|
"logps/rejected": -221.27914428710938, |
|
"loss": 0.4552, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.038505878299474716, |
|
"rewards/margins": 0.07035169750452042, |
|
"rewards/rejected": -0.0318458154797554, |
|
"step": 22 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.187493324279785, |
|
"debug/policy_chosen_logps": -197.76002502441406, |
|
"debug/policy_rejected_logits": -3.1831955909729004, |
|
"debug/policy_rejected_logps": -201.7500762939453, |
|
"debug/reference_chosen_logps": -200.38540649414062, |
|
"debug/reference_rejected_logps": -205.30821228027344, |
|
"epoch": 0.4423076923076923, |
|
"grad_norm": 4.661142809825874, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.187493324279785, |
|
"logits/rejected": -3.1831955909729004, |
|
"logps/chosen": -197.76002502441406, |
|
"logps/rejected": -201.7500762939453, |
|
"loss": 0.4572, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.02625381574034691, |
|
"rewards/margins": -0.009327447973191738, |
|
"rewards/rejected": 0.03558126464486122, |
|
"step": 23 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.20326566696167, |
|
"debug/policy_chosen_logps": -191.74195861816406, |
|
"debug/policy_rejected_logits": -3.187241792678833, |
|
"debug/policy_rejected_logps": -233.16586303710938, |
|
"debug/reference_chosen_logps": -196.749267578125, |
|
"debug/reference_rejected_logps": -227.8116455078125, |
|
"epoch": 0.46153846153846156, |
|
"grad_norm": 4.44342516621906, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.20326566696167, |
|
"logits/rejected": -3.187241792678833, |
|
"logps/chosen": -191.74195861816406, |
|
"logps/rejected": -233.16586303710938, |
|
"loss": 0.4244, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.05007295310497284, |
|
"rewards/margins": 0.10361497104167938, |
|
"rewards/rejected": -0.05354202166199684, |
|
"step": 24 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.142024517059326, |
|
"debug/policy_chosen_logps": -203.33738708496094, |
|
"debug/policy_rejected_logits": -3.017565965652466, |
|
"debug/policy_rejected_logps": -238.41554260253906, |
|
"debug/reference_chosen_logps": -205.4508056640625, |
|
"debug/reference_rejected_logps": -228.70516967773438, |
|
"epoch": 0.4807692307692308, |
|
"grad_norm": 4.368252676849126, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.142024517059326, |
|
"logits/rejected": -3.017565965652466, |
|
"logps/chosen": -203.33738708496094, |
|
"logps/rejected": -238.41554260253906, |
|
"loss": 0.4321, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.021134089678525925, |
|
"rewards/margins": 0.11823777854442596, |
|
"rewards/rejected": -0.09710369259119034, |
|
"step": 25 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.0380425453186035, |
|
"debug/policy_chosen_logps": -200.80526733398438, |
|
"debug/policy_rejected_logits": -3.081782102584839, |
|
"debug/policy_rejected_logps": -218.62149047851562, |
|
"debug/reference_chosen_logps": -203.88671875, |
|
"debug/reference_rejected_logps": -216.6439208984375, |
|
"epoch": 0.5, |
|
"grad_norm": 4.4745876509482025, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.0380425453186035, |
|
"logits/rejected": -3.081782102584839, |
|
"logps/chosen": -200.80526733398438, |
|
"logps/rejected": -218.62149047851562, |
|
"loss": 0.4491, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": 0.030814513564109802, |
|
"rewards/margins": 0.05059013515710831, |
|
"rewards/rejected": -0.019775621592998505, |
|
"step": 26 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.1514594554901123, |
|
"debug/policy_chosen_logps": -195.95147705078125, |
|
"debug/policy_rejected_logits": -2.9963762760162354, |
|
"debug/policy_rejected_logps": -275.3726806640625, |
|
"debug/reference_chosen_logps": -193.73902893066406, |
|
"debug/reference_rejected_logps": -263.09857177734375, |
|
"epoch": 0.5192307692307693, |
|
"grad_norm": 4.481499913956976, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.1514594554901123, |
|
"logits/rejected": -2.9963762760162354, |
|
"logps/chosen": -195.95147705078125, |
|
"logps/rejected": -275.3726806640625, |
|
"loss": 0.4362, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.02212451957166195, |
|
"rewards/margins": 0.10061690211296082, |
|
"rewards/rejected": -0.12274143099784851, |
|
"step": 27 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.1708903312683105, |
|
"debug/policy_chosen_logps": -188.44619750976562, |
|
"debug/policy_rejected_logits": -3.2108025550842285, |
|
"debug/policy_rejected_logps": -220.68988037109375, |
|
"debug/reference_chosen_logps": -194.43800354003906, |
|
"debug/reference_rejected_logps": -220.9051513671875, |
|
"epoch": 0.5384615384615384, |
|
"grad_norm": 4.764561262542065, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.1708903312683105, |
|
"logits/rejected": -3.2108025550842285, |
|
"logps/chosen": -188.44619750976562, |
|
"logps/rejected": -220.68988037109375, |
|
"loss": 0.4431, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.05991803854703903, |
|
"rewards/margins": 0.05776527523994446, |
|
"rewards/rejected": 0.002152767963707447, |
|
"step": 28 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.1714835166931152, |
|
"debug/policy_chosen_logps": -180.14614868164062, |
|
"debug/policy_rejected_logits": -3.08394718170166, |
|
"debug/policy_rejected_logps": -239.15164184570312, |
|
"debug/reference_chosen_logps": -184.656494140625, |
|
"debug/reference_rejected_logps": -224.29171752929688, |
|
"epoch": 0.5576923076923077, |
|
"grad_norm": 4.849507609873306, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.1714835166931152, |
|
"logits/rejected": -3.08394718170166, |
|
"logps/chosen": -180.14614868164062, |
|
"logps/rejected": -239.15164184570312, |
|
"loss": 0.3837, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.045103415846824646, |
|
"rewards/margins": 0.1937025487422943, |
|
"rewards/rejected": -0.14859913289546967, |
|
"step": 29 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.178560972213745, |
|
"debug/policy_chosen_logps": -210.00115966796875, |
|
"debug/policy_rejected_logits": -3.1779253482818604, |
|
"debug/policy_rejected_logps": -235.44644165039062, |
|
"debug/reference_chosen_logps": -213.55612182617188, |
|
"debug/reference_rejected_logps": -221.76431274414062, |
|
"epoch": 0.5769230769230769, |
|
"grad_norm": 4.895017276343638, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.178560972213745, |
|
"logits/rejected": -3.1779253482818604, |
|
"logps/chosen": -210.00115966796875, |
|
"logps/rejected": -235.44644165039062, |
|
"loss": 0.3956, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.03554954379796982, |
|
"rewards/margins": 0.1723707914352417, |
|
"rewards/rejected": -0.13682125508785248, |
|
"step": 30 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.108640670776367, |
|
"debug/policy_chosen_logps": -200.23190307617188, |
|
"debug/policy_rejected_logits": -3.1070878505706787, |
|
"debug/policy_rejected_logps": -206.5513153076172, |
|
"debug/reference_chosen_logps": -209.71009826660156, |
|
"debug/reference_rejected_logps": -205.9014892578125, |
|
"epoch": 0.5961538461538461, |
|
"grad_norm": 5.522641702393358, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.108640670776367, |
|
"logits/rejected": -3.1070878505706787, |
|
"logps/chosen": -200.23190307617188, |
|
"logps/rejected": -206.5513153076172, |
|
"loss": 0.4094, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.09478198736906052, |
|
"rewards/margins": 0.10128023475408554, |
|
"rewards/rejected": -0.0064982399344444275, |
|
"step": 31 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.165921211242676, |
|
"debug/policy_chosen_logps": -176.6615753173828, |
|
"debug/policy_rejected_logits": -3.197509288787842, |
|
"debug/policy_rejected_logps": -212.1288299560547, |
|
"debug/reference_chosen_logps": -191.31610107421875, |
|
"debug/reference_rejected_logps": -211.03121948242188, |
|
"epoch": 0.6153846153846154, |
|
"grad_norm": 5.252195941070867, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.165921211242676, |
|
"logits/rejected": -3.197509288787842, |
|
"logps/chosen": -176.6615753173828, |
|
"logps/rejected": -212.1288299560547, |
|
"loss": 0.3729, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.14654535055160522, |
|
"rewards/margins": 0.15752162039279938, |
|
"rewards/rejected": -0.010976276360452175, |
|
"step": 32 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.020163059234619, |
|
"debug/policy_chosen_logps": -203.67031860351562, |
|
"debug/policy_rejected_logits": -3.0926342010498047, |
|
"debug/policy_rejected_logps": -210.38372802734375, |
|
"debug/reference_chosen_logps": -209.38458251953125, |
|
"debug/reference_rejected_logps": -205.83929443359375, |
|
"epoch": 0.6346153846153846, |
|
"grad_norm": 4.8112163863395665, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.020163059234619, |
|
"logits/rejected": -3.0926342010498047, |
|
"logps/chosen": -203.67031860351562, |
|
"logps/rejected": -210.38372802734375, |
|
"loss": 0.3845, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.05714261904358864, |
|
"rewards/margins": 0.10258688032627106, |
|
"rewards/rejected": -0.04544425755739212, |
|
"step": 33 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.226917028427124, |
|
"debug/policy_chosen_logps": -197.40658569335938, |
|
"debug/policy_rejected_logits": -3.1886653900146484, |
|
"debug/policy_rejected_logps": -249.4960174560547, |
|
"debug/reference_chosen_logps": -208.50588989257812, |
|
"debug/reference_rejected_logps": -234.88482666015625, |
|
"epoch": 0.6538461538461539, |
|
"grad_norm": 4.70294242121678, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.226917028427124, |
|
"logits/rejected": -3.1886653900146484, |
|
"logps/chosen": -197.40658569335938, |
|
"logps/rejected": -249.4960174560547, |
|
"loss": 0.3647, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.11099302023649216, |
|
"rewards/margins": 0.25710487365722656, |
|
"rewards/rejected": -0.146111860871315, |
|
"step": 34 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.1596603393554688, |
|
"debug/policy_chosen_logps": -196.2579345703125, |
|
"debug/policy_rejected_logits": -3.1261677742004395, |
|
"debug/policy_rejected_logps": -231.90347290039062, |
|
"debug/reference_chosen_logps": -205.25323486328125, |
|
"debug/reference_rejected_logps": -227.40982055664062, |
|
"epoch": 0.6730769230769231, |
|
"grad_norm": 7.012486637898167, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.1596603393554688, |
|
"logits/rejected": -3.1261677742004395, |
|
"logps/chosen": -196.2579345703125, |
|
"logps/rejected": -231.90347290039062, |
|
"loss": 0.407, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.08995288610458374, |
|
"rewards/margins": 0.13488951325416565, |
|
"rewards/rejected": -0.044936634600162506, |
|
"step": 35 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.1420416831970215, |
|
"debug/policy_chosen_logps": -187.47885131835938, |
|
"debug/policy_rejected_logits": -3.067408800125122, |
|
"debug/policy_rejected_logps": -227.22024536132812, |
|
"debug/reference_chosen_logps": -199.94757080078125, |
|
"debug/reference_rejected_logps": -219.46438598632812, |
|
"epoch": 0.6923076923076923, |
|
"grad_norm": 5.480923899428052, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.1420416831970215, |
|
"logits/rejected": -3.067408800125122, |
|
"logps/chosen": -187.47885131835938, |
|
"logps/rejected": -227.22024536132812, |
|
"loss": 0.3983, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.12468719482421875, |
|
"rewards/margins": 0.2022458016872406, |
|
"rewards/rejected": -0.07755860686302185, |
|
"step": 36 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.134657621383667, |
|
"debug/policy_chosen_logps": -198.46621704101562, |
|
"debug/policy_rejected_logits": -3.191622495651245, |
|
"debug/policy_rejected_logps": -238.76580810546875, |
|
"debug/reference_chosen_logps": -210.65228271484375, |
|
"debug/reference_rejected_logps": -226.888427734375, |
|
"epoch": 0.7115384615384616, |
|
"grad_norm": 6.49950029807871, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.134657621383667, |
|
"logits/rejected": -3.191622495651245, |
|
"logps/chosen": -198.46621704101562, |
|
"logps/rejected": -238.76580810546875, |
|
"loss": 0.4017, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.12186044454574585, |
|
"rewards/margins": 0.24063441157341003, |
|
"rewards/rejected": -0.11877395212650299, |
|
"step": 37 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.2385828495025635, |
|
"debug/policy_chosen_logps": -170.713623046875, |
|
"debug/policy_rejected_logits": -3.207604169845581, |
|
"debug/policy_rejected_logps": -235.9530029296875, |
|
"debug/reference_chosen_logps": -183.66558837890625, |
|
"debug/reference_rejected_logps": -223.42767333984375, |
|
"epoch": 0.7307692307692307, |
|
"grad_norm": 5.516561491681134, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.2385828495025635, |
|
"logits/rejected": -3.207604169845581, |
|
"logps/chosen": -170.713623046875, |
|
"logps/rejected": -235.9530029296875, |
|
"loss": 0.4081, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.12951962649822235, |
|
"rewards/margins": 0.2547728717327118, |
|
"rewards/rejected": -0.12525323033332825, |
|
"step": 38 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.0979700088500977, |
|
"debug/policy_chosen_logps": -162.31280517578125, |
|
"debug/policy_rejected_logits": -3.186298131942749, |
|
"debug/policy_rejected_logps": -235.3626708984375, |
|
"debug/reference_chosen_logps": -172.52468872070312, |
|
"debug/reference_rejected_logps": -221.17556762695312, |
|
"epoch": 0.75, |
|
"grad_norm": 6.774526914117382, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.0979700088500977, |
|
"logits/rejected": -3.186298131942749, |
|
"logps/chosen": -162.31280517578125, |
|
"logps/rejected": -235.3626708984375, |
|
"loss": 0.3939, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.10211898386478424, |
|
"rewards/margins": 0.2439899444580078, |
|
"rewards/rejected": -0.14187094569206238, |
|
"step": 39 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.089684009552002, |
|
"debug/policy_chosen_logps": -196.1321563720703, |
|
"debug/policy_rejected_logits": -3.04327392578125, |
|
"debug/policy_rejected_logps": -216.24703979492188, |
|
"debug/reference_chosen_logps": -200.5460205078125, |
|
"debug/reference_rejected_logps": -207.2269287109375, |
|
"epoch": 0.7692307692307693, |
|
"grad_norm": 5.675881779895136, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.089684009552002, |
|
"logits/rejected": -3.04327392578125, |
|
"logps/chosen": -196.1321563720703, |
|
"logps/rejected": -216.24703979492188, |
|
"loss": 0.4025, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.04413875192403793, |
|
"rewards/margins": 0.1343398243188858, |
|
"rewards/rejected": -0.09020107239484787, |
|
"step": 40 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.1140072345733643, |
|
"debug/policy_chosen_logps": -187.277099609375, |
|
"debug/policy_rejected_logits": -3.2128348350524902, |
|
"debug/policy_rejected_logps": -254.4541015625, |
|
"debug/reference_chosen_logps": -192.44972229003906, |
|
"debug/reference_rejected_logps": -245.08372497558594, |
|
"epoch": 0.7884615384615384, |
|
"grad_norm": 6.653395887552183, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.1140072345733643, |
|
"logits/rejected": -3.2128348350524902, |
|
"logps/chosen": -187.277099609375, |
|
"logps/rejected": -254.4541015625, |
|
"loss": 0.4269, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.051726073026657104, |
|
"rewards/margins": 0.14542999863624573, |
|
"rewards/rejected": -0.09370393306016922, |
|
"step": 41 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.3676302433013916, |
|
"debug/policy_chosen_logps": -159.673095703125, |
|
"debug/policy_rejected_logits": -3.310539484024048, |
|
"debug/policy_rejected_logps": -204.17922973632812, |
|
"debug/reference_chosen_logps": -172.7714385986328, |
|
"debug/reference_rejected_logps": -196.74644470214844, |
|
"epoch": 0.8076923076923077, |
|
"grad_norm": 5.688046713439665, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.3676302433013916, |
|
"logits/rejected": -3.310539484024048, |
|
"logps/chosen": -159.673095703125, |
|
"logps/rejected": -204.17922973632812, |
|
"loss": 0.3881, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.1309833526611328, |
|
"rewards/margins": 0.20531128346920013, |
|
"rewards/rejected": -0.07432794570922852, |
|
"step": 42 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.048048496246338, |
|
"debug/policy_chosen_logps": -195.83262634277344, |
|
"debug/policy_rejected_logits": -3.1337571144104004, |
|
"debug/policy_rejected_logps": -220.1021728515625, |
|
"debug/reference_chosen_logps": -204.2503662109375, |
|
"debug/reference_rejected_logps": -214.80859375, |
|
"epoch": 0.8269230769230769, |
|
"grad_norm": 6.260667799966591, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.048048496246338, |
|
"logits/rejected": -3.1337571144104004, |
|
"logps/chosen": -195.83262634277344, |
|
"logps/rejected": -220.1021728515625, |
|
"loss": 0.4081, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.08417723327875137, |
|
"rewards/margins": 0.13711285591125488, |
|
"rewards/rejected": -0.05293561890721321, |
|
"step": 43 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.2857210636138916, |
|
"debug/policy_chosen_logps": -190.01461791992188, |
|
"debug/policy_rejected_logits": -3.210080146789551, |
|
"debug/policy_rejected_logps": -202.32200622558594, |
|
"debug/reference_chosen_logps": -192.96429443359375, |
|
"debug/reference_rejected_logps": -204.96524047851562, |
|
"epoch": 0.8461538461538461, |
|
"grad_norm": 5.41978019999578, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.2857210636138916, |
|
"logits/rejected": -3.210080146789551, |
|
"logps/chosen": -190.01461791992188, |
|
"logps/rejected": -202.32200622558594, |
|
"loss": 0.3812, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.02949686348438263, |
|
"rewards/margins": 0.0030643679201602936, |
|
"rewards/rejected": 0.026432491838932037, |
|
"step": 44 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.212117910385132, |
|
"debug/policy_chosen_logps": -168.555419921875, |
|
"debug/policy_rejected_logits": -3.191093683242798, |
|
"debug/policy_rejected_logps": -196.78350830078125, |
|
"debug/reference_chosen_logps": -184.03561401367188, |
|
"debug/reference_rejected_logps": -194.875, |
|
"epoch": 0.8653846153846154, |
|
"grad_norm": 8.557722940045432, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.212117910385132, |
|
"logits/rejected": -3.191093683242798, |
|
"logps/chosen": -168.555419921875, |
|
"logps/rejected": -196.78350830078125, |
|
"loss": 0.4219, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.1548018455505371, |
|
"rewards/margins": 0.1738869845867157, |
|
"rewards/rejected": -0.01908515766263008, |
|
"step": 45 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.4355483055114746, |
|
"debug/policy_chosen_logps": -174.82119750976562, |
|
"debug/policy_rejected_logits": -3.4658639430999756, |
|
"debug/policy_rejected_logps": -179.5398712158203, |
|
"debug/reference_chosen_logps": -183.99972534179688, |
|
"debug/reference_rejected_logps": -182.81251525878906, |
|
"epoch": 0.8846153846153846, |
|
"grad_norm": 5.9566531465155, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.4355483055114746, |
|
"logits/rejected": -3.4658639430999756, |
|
"logps/chosen": -174.82119750976562, |
|
"logps/rejected": -179.5398712158203, |
|
"loss": 0.4262, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.09178514778614044, |
|
"rewards/margins": 0.059058789163827896, |
|
"rewards/rejected": 0.032726362347602844, |
|
"step": 46 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.0792293548583984, |
|
"debug/policy_chosen_logps": -181.77552795410156, |
|
"debug/policy_rejected_logits": -3.082854986190796, |
|
"debug/policy_rejected_logps": -220.6517791748047, |
|
"debug/reference_chosen_logps": -191.10177612304688, |
|
"debug/reference_rejected_logps": -199.66552734375, |
|
"epoch": 0.9038461538461539, |
|
"grad_norm": 9.698103359515986, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.0792293548583984, |
|
"logits/rejected": -3.082854986190796, |
|
"logps/chosen": -181.77552795410156, |
|
"logps/rejected": -220.6517791748047, |
|
"loss": 0.3918, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.09326266497373581, |
|
"rewards/margins": 0.3031250536441803, |
|
"rewards/rejected": -0.20986239612102509, |
|
"step": 47 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.1799800395965576, |
|
"debug/policy_chosen_logps": -182.957763671875, |
|
"debug/policy_rejected_logits": -3.1335575580596924, |
|
"debug/policy_rejected_logps": -217.8682403564453, |
|
"debug/reference_chosen_logps": -194.94949340820312, |
|
"debug/reference_rejected_logps": -203.79934692382812, |
|
"epoch": 0.9230769230769231, |
|
"grad_norm": 6.73392677471291, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.1799800395965576, |
|
"logits/rejected": -3.1335575580596924, |
|
"logps/chosen": -182.957763671875, |
|
"logps/rejected": -217.8682403564453, |
|
"loss": 0.34, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.11991731822490692, |
|
"rewards/margins": 0.26060622930526733, |
|
"rewards/rejected": -0.1406889110803604, |
|
"step": 48 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.0402865409851074, |
|
"debug/policy_chosen_logps": -204.64993286132812, |
|
"debug/policy_rejected_logits": -3.1806182861328125, |
|
"debug/policy_rejected_logps": -223.6003875732422, |
|
"debug/reference_chosen_logps": -214.6299285888672, |
|
"debug/reference_rejected_logps": -218.46998596191406, |
|
"epoch": 0.9423076923076923, |
|
"grad_norm": 5.290743823323874, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.0402865409851074, |
|
"logits/rejected": -3.1806182861328125, |
|
"logps/chosen": -204.64993286132812, |
|
"logps/rejected": -223.6003875732422, |
|
"loss": 0.3762, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.09979984909296036, |
|
"rewards/margins": 0.1511038839817047, |
|
"rewards/rejected": -0.05130405351519585, |
|
"step": 49 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.2792398929595947, |
|
"debug/policy_chosen_logps": -172.821533203125, |
|
"debug/policy_rejected_logits": -3.199223756790161, |
|
"debug/policy_rejected_logps": -226.37725830078125, |
|
"debug/reference_chosen_logps": -185.41844177246094, |
|
"debug/reference_rejected_logps": -212.13818359375, |
|
"epoch": 0.9615384615384616, |
|
"grad_norm": 12.334225732861476, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.2792398929595947, |
|
"logits/rejected": -3.199223756790161, |
|
"logps/chosen": -172.821533203125, |
|
"logps/rejected": -226.37725830078125, |
|
"loss": 0.3587, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.12596909701824188, |
|
"rewards/margins": 0.268359899520874, |
|
"rewards/rejected": -0.14239083230495453, |
|
"step": 50 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.1161139011383057, |
|
"debug/policy_chosen_logps": -183.79429626464844, |
|
"debug/policy_rejected_logits": -3.1612720489501953, |
|
"debug/policy_rejected_logps": -238.53965759277344, |
|
"debug/reference_chosen_logps": -192.13421630859375, |
|
"debug/reference_rejected_logps": -220.17738342285156, |
|
"epoch": 0.9807692307692307, |
|
"grad_norm": 6.871974967844878, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.1161139011383057, |
|
"logits/rejected": -3.1612720489501953, |
|
"logps/chosen": -183.79429626464844, |
|
"logps/rejected": -238.53965759277344, |
|
"loss": 0.3778, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.0833992063999176, |
|
"rewards/margins": 0.26702186465263367, |
|
"rewards/rejected": -0.18362264335155487, |
|
"step": 51 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.1640007495880127, |
|
"debug/policy_chosen_logps": -184.35934448242188, |
|
"debug/policy_rejected_logits": -3.1118485927581787, |
|
"debug/policy_rejected_logps": -235.7113037109375, |
|
"debug/reference_chosen_logps": -200.53074645996094, |
|
"debug/reference_rejected_logps": -225.58468627929688, |
|
"epoch": 1.0, |
|
"grad_norm": 6.270849179735478, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.1640007495880127, |
|
"logits/rejected": -3.1118485927581787, |
|
"logps/chosen": -184.35934448242188, |
|
"logps/rejected": -235.7113037109375, |
|
"loss": 0.3221, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.1617138683795929, |
|
"rewards/margins": 0.2629801034927368, |
|
"rewards/rejected": -0.10126623511314392, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 52, |
|
"total_flos": 0.0, |
|
"train_loss": 0.43149175896094394, |
|
"train_runtime": 170.8012, |
|
"train_samples_per_second": 19.385, |
|
"train_steps_per_second": 0.304 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 52, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|