{ "epoch": 0.9979342973279136, "eval_logits/chosen": -0.6978676319122314, "eval_logits/rejected": -0.647840678691864, "eval_logps/chosen": -443.60040283203125, "eval_logps/rejected": -591.8262939453125, "eval_loss": 0.4824504852294922, "eval_rewards/accuracies": 0.7739307284355164, "eval_rewards/chosen": -2.9100284576416016, "eval_rewards/margins": 1.4839025735855103, "eval_rewards/rejected": -4.393930435180664, "eval_runtime": 365.8162, "eval_samples": 1962, "eval_samples_per_second": 5.363, "eval_steps_per_second": 1.342, "total_flos": 0.0, "train_loss": 0.5391119274064007, "train_runtime": 25974.5898, "train_samples": 60028, "train_samples_per_second": 2.311, "train_steps_per_second": 0.018 }