mistral-7b-expo-7b-IPO-25-2 / all_results.json
hZzy's picture
End of training
5105481 verified
{
"epoch": 0.9994944388270981,
"eval_chosen_logps": -94.18656158447266,
"eval_chosen_rewards": 0.024786589667201042,
"eval_log_diff_policy": 2.3620760440826416,
"eval_logits": -2.1694083213806152,
"eval_logp_accuracy": 0.5450223684310913,
"eval_loss": 8.375486373901367,
"eval_objective": 8.485334396362305,
"eval_rejected_logps": -96.54862976074219,
"eval_rejected_rewards": -0.07420787215232849,
"eval_reward_accuracy": 0.6261186003684998,
"eval_runtime": 1121.9363,
"eval_samples": 10722,
"eval_samples_per_second": 9.557,
"eval_steps_per_second": 1.062,
"total_flos": 0.0,
"train_loss": 8.350798614108328,
"train_runtime": 15801.7176,
"train_samples": 71206,
"train_samples_per_second": 4.506,
"train_steps_per_second": 0.042
}