|
{ |
|
"epoch": 0.9994944388270981, |
|
"eval_chosen_logps": -94.18656158447266, |
|
"eval_chosen_rewards": 0.024786589667201042, |
|
"eval_log_diff_policy": 2.3620760440826416, |
|
"eval_logits": -2.1694083213806152, |
|
"eval_logp_accuracy": 0.5450223684310913, |
|
"eval_loss": 8.375486373901367, |
|
"eval_objective": 8.485334396362305, |
|
"eval_rejected_logps": -96.54862976074219, |
|
"eval_rejected_rewards": -0.07420787215232849, |
|
"eval_reward_accuracy": 0.6261186003684998, |
|
"eval_runtime": 1121.9363, |
|
"eval_samples": 10722, |
|
"eval_samples_per_second": 9.557, |
|
"eval_steps_per_second": 1.062, |
|
"total_flos": 0.0, |
|
"train_loss": 8.350798614108328, |
|
"train_runtime": 15801.7176, |
|
"train_samples": 71206, |
|
"train_samples_per_second": 4.506, |
|
"train_steps_per_second": 0.042 |
|
} |