{ "epoch": 1.0, "eval_logits/chosen": -0.8234652280807495, "eval_logits/rejected": -0.8529273271560669, "eval_logps/chosen": -5.334253311157227, "eval_logps/rejected": -44.067596435546875, "eval_loss": 0.1368969976902008, "eval_rewards/accuracies": 0.940000057220459, "eval_rewards/chosen": -0.3242790699005127, "eval_rewards/margins": 3.4202423095703125, "eval_rewards/rejected": -3.7445216178894043, "eval_runtime": 90.6755, "eval_samples_per_second": 5.514, "eval_steps_per_second": 2.757 }