{ "epoch": 0.9997120644975526, "eval_logits/chosen": -0.44044333696365356, "eval_logits/rejected": 0.5402784943580627, "eval_logps/chosen": -702.1751708984375, "eval_logps/rejected": -1123.6224365234375, "eval_loss": 0.3159700930118561, "eval_rewards/accuracies": 0.820067286491394, "eval_rewards/chosen": -4.112070560455322, "eval_rewards/margins": 4.223231792449951, "eval_rewards/rejected": -8.335301399230957, "eval_runtime": 644.1576, "eval_samples": 7126, "eval_samples_per_second": 11.063, "eval_steps_per_second": 0.346 }