{ "epoch": 0.9990987511265611, "eval_logits/chosen": -1.4586249589920044, "eval_logits/rejected": -1.4727187156677246, "eval_logps/chosen": -321.156005859375, "eval_logps/rejected": -309.9280090332031, "eval_loss": 0.8538437485694885, "eval_rewards/accuracies": 0.3540000021457672, "eval_rewards/chosen": -0.07300500571727753, "eval_rewards/margins": -0.05590704455971718, "eval_rewards/rejected": -0.017119567841291428, "eval_runtime": 22.7058, "eval_samples_per_second": 44.042, "eval_steps_per_second": 5.505 }