|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 20, |
|
"global_step": 48, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.021052631578947368, |
|
"grad_norm": 156.80313847716513, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -3.19140625, |
|
"logits/rejected": -3.1171875, |
|
"logps/chosen": -284.0, |
|
"logps/rejected": -347.0, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.42105263157894735, |
|
"grad_norm": 140.79071517526302, |
|
"learning_rate": 3.8023508512198257e-07, |
|
"logits/chosen": NaN, |
|
"logits/rejected": NaN, |
|
"logps/chosen": -294.0526428222656, |
|
"logps/rejected": -305.3552551269531, |
|
"loss": 0.6032, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.007361412048339844, |
|
"rewards/margins": 0.3393391966819763, |
|
"rewards/rejected": -0.34671854972839355, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.42105263157894735, |
|
"eval_logits/chosen": NaN, |
|
"eval_logits/rejected": NaN, |
|
"eval_logps/chosen": -293.04254150390625, |
|
"eval_logps/rejected": -301.6383056640625, |
|
"eval_loss": 0.5011141896247864, |
|
"eval_rewards/accuracies": 0.4976728856563568, |
|
"eval_rewards/chosen": -0.0284495260566473, |
|
"eval_rewards/margins": 0.8294028639793396, |
|
"eval_rewards/rejected": -0.8580036759376526, |
|
"eval_runtime": 292.0408, |
|
"eval_samples_per_second": 15.368, |
|
"eval_steps_per_second": 0.243, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.8421052631578947, |
|
"grad_norm": 108.6076169236696, |
|
"learning_rate": 5.212587789268649e-08, |
|
"logits/chosen": NaN, |
|
"logits/rejected": NaN, |
|
"logps/chosen": -298.98748779296875, |
|
"logps/rejected": -308.0874938964844, |
|
"loss": 0.4221, |
|
"rewards/accuracies": 0.5316406488418579, |
|
"rewards/chosen": 0.30016860365867615, |
|
"rewards/margins": 1.107550024986267, |
|
"rewards/rejected": -0.8070526123046875, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.8421052631578947, |
|
"eval_logits/chosen": NaN, |
|
"eval_logits/rejected": NaN, |
|
"eval_logps/chosen": -294.68084716796875, |
|
"eval_logps/rejected": -303.2127685546875, |
|
"eval_loss": 0.43298768997192383, |
|
"eval_rewards/accuracies": 0.5176196694374084, |
|
"eval_rewards/chosen": 0.4743392765522003, |
|
"eval_rewards/margins": 1.0208220481872559, |
|
"eval_rewards/rejected": -0.5463438630104065, |
|
"eval_runtime": 291.3347, |
|
"eval_samples_per_second": 15.405, |
|
"eval_steps_per_second": 0.244, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 48, |
|
"total_flos": 0.0, |
|
"train_loss": 0.48351796468098956, |
|
"train_runtime": 4731.3673, |
|
"train_samples_per_second": 3.793, |
|
"train_steps_per_second": 0.01 |
|
} |
|
], |
|
"logging_steps": 20, |
|
"max_steps": 48, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 10, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|