|
{ |
|
"epoch": 0.99, |
|
"eval_logits/chosen": -2.6191320419311523, |
|
"eval_logits/rejected": -2.66033935546875, |
|
"eval_logps/chosen": -198.21322631835938, |
|
"eval_logps/rejected": -216.69395446777344, |
|
"eval_loss": 3718.415283203125, |
|
"eval_rewards/accuracies": 0.5239361524581909, |
|
"eval_rewards/chosen": -0.424562007188797, |
|
"eval_rewards/margins": 0.069640152156353, |
|
"eval_rewards/rejected": -0.494202196598053, |
|
"eval_rewards/safe_rewards": -0.43978598713874817, |
|
"eval_rewards/unsafe_rewards": -0.4005235433578491, |
|
"eval_runtime": 140.0797, |
|
"eval_samples": 1487, |
|
"eval_samples_per_second": 10.615, |
|
"eval_steps_per_second": 0.336, |
|
"train_loss": 5218.799974524457, |
|
"train_runtime": 254.7215, |
|
"train_samples": 1487, |
|
"train_samples_per_second": 5.838, |
|
"train_steps_per_second": 0.181 |
|
} |