|
{ |
|
"epoch": 1.9994743758212876, |
|
"eval_chosen_logps": -132.98548889160156, |
|
"eval_chosen_rewards": -0.4551008343696594, |
|
"eval_log_diff_policy": 6.635798931121826, |
|
"eval_logits": -2.239074468612671, |
|
"eval_logp_accuracy": 0.5631991028785706, |
|
"eval_loss": 44.87443923950195, |
|
"eval_objective": 44.855743408203125, |
|
"eval_rejected_logps": -139.6212921142578, |
|
"eval_rejected_rewards": -0.518205463886261, |
|
"eval_reward_accuracy": 0.5883668661117554, |
|
"eval_runtime": 489.9987, |
|
"eval_samples": 10722, |
|
"eval_samples_per_second": 21.882, |
|
"eval_steps_per_second": 0.912 |
|
} |