NicholasCorrado's picture
End of training
c209c79 verified
raw
history blame contribute delete
573 Bytes
{
"epoch": 0.9963369963369964,
"eval_logits/chosen": -2.5392236709594727,
"eval_logits/rejected": -2.520923614501953,
"eval_logps/chosen": -538.8605346679688,
"eval_logps/rejected": -740.7106323242188,
"eval_loss": 0.39680731296539307,
"eval_rewards/accuracies": 0.78125,
"eval_rewards/chosen": -1.508690357208252,
"eval_rewards/margins": 2.2141778469085693,
"eval_rewards/rejected": -3.7228684425354004,
"eval_runtime": 2.9476,
"eval_samples": 200,
"eval_samples_per_second": 67.851,
"eval_steps_per_second": 0.679
}