qwen2.5-0.5b-expo-IPO-25-1 / eval_results.json
hZzy's picture
End of training
3baa314 verified
{
"epoch": 1.9994743758212876,
"eval_chosen_logps": -132.98548889160156,
"eval_chosen_rewards": -0.4551008343696594,
"eval_log_diff_policy": 6.635798931121826,
"eval_logits": -2.239074468612671,
"eval_logp_accuracy": 0.5631991028785706,
"eval_loss": 44.87443923950195,
"eval_objective": 44.855743408203125,
"eval_rejected_logps": -139.6212921142578,
"eval_rejected_rewards": -0.518205463886261,
"eval_reward_accuracy": 0.5883668661117554,
"eval_runtime": 489.9987,
"eval_samples": 10722,
"eval_samples_per_second": 21.882,
"eval_steps_per_second": 0.912
}