|
{ |
|
"epoch": 0.9994944388270981, |
|
"eval_chosen_logps": -94.18656158447266, |
|
"eval_chosen_rewards": 0.024786589667201042, |
|
"eval_log_diff_policy": 2.3620760440826416, |
|
"eval_logits": -2.1694083213806152, |
|
"eval_logp_accuracy": 0.5450223684310913, |
|
"eval_loss": 8.375486373901367, |
|
"eval_objective": 8.485334396362305, |
|
"eval_rejected_logps": -96.54862976074219, |
|
"eval_rejected_rewards": -0.07420787215232849, |
|
"eval_reward_accuracy": 0.6261186003684998, |
|
"eval_runtime": 1121.9363, |
|
"eval_samples": 10722, |
|
"eval_samples_per_second": 9.557, |
|
"eval_steps_per_second": 1.062 |
|
} |