{ "epoch": 1.0, "eval_logits/chosen": -0.8234652280807495, "eval_logits/rejected": -0.8529273271560669, "eval_logps/chosen": -5.334253311157227, "eval_logps/rejected": -44.067596435546875, "eval_loss": 0.1368969976902008, "eval_rewards/accuracies": 0.940000057220459, "eval_rewards/chosen": -0.3242790699005127, "eval_rewards/margins": 3.4202423095703125, "eval_rewards/rejected": -3.7445216178894043, "eval_runtime": 90.6755, "eval_samples_per_second": 5.514, "eval_steps_per_second": 2.757, "total_flos": 3.5989630324781875e+17, "train_loss": 0.25213732730828975, "train_runtime": 8848.1085, "train_samples_per_second": 1.512, "train_steps_per_second": 0.189 }