{ "epoch": 1.0, "eval_logits/chosen": NaN, "eval_logits/rejected": 1.3347594738006592, "eval_logps/chosen": -344.3968200683594, "eval_logps/rejected": -316.984130859375, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.6458333134651184, "eval_rewards/chosen": 2.3433780670166016, "eval_rewards/margins": 0.7027374505996704, "eval_rewards/rejected": 1.6412450075149536, "eval_runtime": 8.3943, "eval_samples_per_second": 119.129, "eval_steps_per_second": 7.505 }