backup-llama3.2-3b-dpo-mr / all_results.json
obiwit's picture
End of training
a1e2ef7 verified
raw
history blame contribute delete
698 Bytes
{
"epoch": 1.0,
"eval_logits/chosen": -2.90625,
"eval_logits/rejected": -3.171875,
"eval_logps/chosen": -684.0,
"eval_logps/rejected": -1088.0,
"eval_loss": 0.22805513441562653,
"eval_rewards/accuracies": 0.8979725241661072,
"eval_rewards/chosen": -4.96875,
"eval_rewards/margins": 4.3125,
"eval_rewards/rejected": -9.25,
"eval_runtime": 2936.8516,
"eval_samples": 104759,
"eval_samples_per_second": 33.312,
"eval_steps_per_second": 0.521,
"total_flos": 0.0,
"train_loss": 0.3429703987491938,
"train_runtime": 175730.8191,
"train_samples": 1849702,
"train_samples_per_second": 10.526,
"train_steps_per_second": 0.082
}