{ "epoch": 1.0, "eval_logits/chosen": -3.4375, "eval_logits/rejected": -3.546875, "eval_logps/chosen": -372.0, "eval_logps/rejected": -480.0, "eval_loss": 0.45736926794052124, "eval_rewards/accuracies": 0.7609890103340149, "eval_rewards/chosen": -2.3125, "eval_rewards/margins": 1.3671875, "eval_rewards/rejected": -3.671875, "eval_runtime": 2261.7941, "eval_samples": 92400, "eval_samples_per_second": 41.177, "eval_steps_per_second": 0.644 }