|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9981785063752276, |
|
"eval_steps": 500, |
|
"global_step": 274, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.18214936247723132, |
|
"grad_norm": 2.3424386978149414, |
|
"learning_rate": 4.5454545454545455e-06, |
|
"logits/chosen": 0.3313373327255249, |
|
"logits/rejected": 0.51214599609375, |
|
"logps/chosen": -133.43138122558594, |
|
"logps/rejected": -192.70355224609375, |
|
"loss": 0.4056, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.41640931367874146, |
|
"rewards/margins": 0.7543150186538696, |
|
"rewards/rejected": -0.33790573477745056, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.36429872495446264, |
|
"grad_norm": 2.2886452674865723, |
|
"learning_rate": 4.897914773180057e-06, |
|
"logits/chosen": 0.03562241047620773, |
|
"logits/rejected": 0.31711575388908386, |
|
"logps/chosen": -147.89747619628906, |
|
"logps/rejected": -268.35821533203125, |
|
"loss": 0.2839, |
|
"rewards/accuracies": 0.9950000047683716, |
|
"rewards/chosen": 0.28251099586486816, |
|
"rewards/margins": 1.3375725746154785, |
|
"rewards/rejected": -1.0550616979599, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.546448087431694, |
|
"grad_norm": 3.107114791870117, |
|
"learning_rate": 4.5557177668257975e-06, |
|
"logits/chosen": -0.2743435502052307, |
|
"logits/rejected": -0.10878603160381317, |
|
"logps/chosen": -208.27593994140625, |
|
"logps/rejected": -473.1844787597656, |
|
"loss": 0.157, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3748534321784973, |
|
"rewards/margins": 2.790405511856079, |
|
"rewards/rejected": -3.1652591228485107, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.7285974499089253, |
|
"grad_norm": 6.934810638427734, |
|
"learning_rate": 4.006586590948141e-06, |
|
"logits/chosen": -0.08417636901140213, |
|
"logits/rejected": -0.03048125095665455, |
|
"logps/chosen": -362.09002685546875, |
|
"logps/rejected": -824.4659423828125, |
|
"loss": 0.0918, |
|
"rewards/accuracies": 0.9850000143051147, |
|
"rewards/chosen": -1.878377079963684, |
|
"rewards/margins": 4.862216949462891, |
|
"rewards/rejected": -6.740594387054443, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.9107468123861566, |
|
"grad_norm": 7.77178430557251, |
|
"learning_rate": 3.3057982907103e-06, |
|
"logits/chosen": 0.220669686794281, |
|
"logits/rejected": 0.25975489616394043, |
|
"logps/chosen": -471.4226989746094, |
|
"logps/rejected": -1041.406005859375, |
|
"loss": 0.1067, |
|
"rewards/accuracies": 0.9850000143051147, |
|
"rewards/chosen": -2.964336633682251, |
|
"rewards/margins": 5.914289474487305, |
|
"rewards/rejected": -8.878626823425293, |
|
"step": 250 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 548, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|