{ "epoch": 1.0, "eval_logits/chosen": NaN, "eval_logits/rejected": 1.0357762575149536, "eval_logps/chosen": -364.1269836425781, "eval_logps/rejected": -333.6666564941406, "eval_loss": NaN, "eval_nll_loss": NaN, "eval_rewards/accuracies": 0.6329365372657776, "eval_rewards/chosen": 0.3672417402267456, "eval_rewards/margins": 0.3941950798034668, "eval_rewards/rejected": -0.02702743373811245, "eval_runtime": 8.3964, "eval_samples_per_second": 119.098, "eval_steps_per_second": 7.503 }