{ "epoch": 2.999297541394882, "eval_logits/chosen": 7.7498650550842285, "eval_logits/rejected": 7.4245452880859375, "eval_logps/chosen": -45.089073181152344, "eval_logps/rejected": -46.10942077636719, "eval_loss": 1.940507559083926e-07, "eval_rewards/accuracies": 0.5682492852210999, "eval_rewards/chosen": -45.089073181152344, "eval_rewards/margins": 1.0203527212142944, "eval_rewards/rejected": -46.10942077636719, "eval_runtime": 39.9113, "eval_samples": 1345, "eval_samples_per_second": 33.7, "eval_steps_per_second": 8.444 }