|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 6.7272727272727275, |
|
"eval_steps": 500, |
|
"global_step": 20, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"completion_length": 463.46875, |
|
"epoch": 0.7272727272727273, |
|
"grad_norm": 4.7122392654418945, |
|
"kl": 0.0023938784142956138, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0, |
|
"reward": 1.3335974533110857, |
|
"reward_std": 0.9099930226802826, |
|
"rewards/concensus_correctness_reward_func": 0.11125000193715096, |
|
"rewards/consensus_reward_func": 0.3125, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 0.0, |
|
"rewards/question_recreation_reward_func": 0.5996600110083818, |
|
"rewards/soft_format_reward_func": 0.015625, |
|
"rewards/strict_format_reward_func": 0.015625, |
|
"rewards/xmlcount_reward_func": 0.27893750881776214, |
|
"step": 2 |
|
}, |
|
{ |
|
"completion_length": 473.5357142857143, |
|
"epoch": 1.3636363636363638, |
|
"grad_norm": 0.35275641083717346, |
|
"kl": 0.00162335585004517, |
|
"learning_rate": 4.864543104251586e-07, |
|
"loss": 0.0, |
|
"reward": 1.2401375004223414, |
|
"reward_std": 1.1636551831449782, |
|
"rewards/concensus_correctness_reward_func": 0.03235714137554169, |
|
"rewards/consensus_reward_func": 0.35714285714285715, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 0.07142857142857142, |
|
"rewards/question_recreation_reward_func": 0.6997446715831757, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.0, |
|
"rewards/xmlcount_reward_func": 0.07946428337267467, |
|
"step": 4 |
|
}, |
|
{ |
|
"completion_length": 548.2142857142857, |
|
"epoch": 2.0, |
|
"grad_norm": 0.29174864292144775, |
|
"kl": 0.001679773342662624, |
|
"learning_rate": 4.472851273490984e-07, |
|
"loss": 0.0, |
|
"reward": 1.5835289103644234, |
|
"reward_std": 1.2641019523143768, |
|
"rewards/concensus_correctness_reward_func": 0.0, |
|
"rewards/consensus_reward_func": 0.5714285714285714, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 0.07142857142857142, |
|
"rewards/question_recreation_reward_func": 0.5858860335179737, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.0, |
|
"rewards/xmlcount_reward_func": 0.35478571483067106, |
|
"step": 6 |
|
}, |
|
{ |
|
"completion_length": 506.09375, |
|
"epoch": 2.7272727272727275, |
|
"grad_norm": 0.43735402822494507, |
|
"kl": 0.0018278197894687764, |
|
"learning_rate": 3.867370395306068e-07, |
|
"loss": 0.0, |
|
"reward": 1.307539526373148, |
|
"reward_std": 0.752528958953917, |
|
"rewards/concensus_correctness_reward_func": 0.0260624997317791, |
|
"rewards/consensus_reward_func": 0.4375, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 0.0, |
|
"rewards/question_recreation_reward_func": 0.6156957671046257, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.0, |
|
"rewards/xmlcount_reward_func": 0.22828123718500137, |
|
"step": 8 |
|
}, |
|
{ |
|
"completion_length": 529.3571428571429, |
|
"epoch": 3.3636363636363638, |
|
"grad_norm": 0.46579766273498535, |
|
"kl": 0.0015770103948722994, |
|
"learning_rate": 3.1137137178519977e-07, |
|
"loss": 0.0, |
|
"reward": 1.6101772274289812, |
|
"reward_std": 1.2770880801337106, |
|
"rewards/concensus_correctness_reward_func": 0.07957142804350172, |
|
"rewards/consensus_reward_func": 0.6428571428571429, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 0.0, |
|
"rewards/question_recreation_reward_func": 0.7367129921913147, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.017857142857142856, |
|
"rewards/xmlcount_reward_func": 0.13317857363394328, |
|
"step": 10 |
|
}, |
|
{ |
|
"completion_length": 488.75, |
|
"epoch": 4.0, |
|
"grad_norm": 0.43027985095977783, |
|
"kl": 0.0018851734598034195, |
|
"learning_rate": 2.2935516363191693e-07, |
|
"loss": 0.0, |
|
"reward": 1.1790043881961279, |
|
"reward_std": 0.9463666294302259, |
|
"rewards/concensus_correctness_reward_func": 0.021571427583694458, |
|
"rewards/consensus_reward_func": 0.2857142857142857, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 0.0, |
|
"rewards/question_recreation_reward_func": 0.5378972675119128, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.03571428571428571, |
|
"rewards/xmlcount_reward_func": 0.29810712380068644, |
|
"step": 12 |
|
}, |
|
{ |
|
"completion_length": 443.28125, |
|
"epoch": 4.7272727272727275, |
|
"grad_norm": 0.35241565108299255, |
|
"kl": 0.0019940045895054936, |
|
"learning_rate": 1.4957614383675767e-07, |
|
"loss": 0.0, |
|
"reward": 1.4534906595945358, |
|
"reward_std": 1.537332609295845, |
|
"rewards/concensus_correctness_reward_func": 0.0293125007301569, |
|
"rewards/consensus_reward_func": 0.375, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 0.0, |
|
"rewards/question_recreation_reward_func": 0.6077094078063965, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.015625, |
|
"rewards/xmlcount_reward_func": 0.42584373615682125, |
|
"step": 14 |
|
}, |
|
{ |
|
"completion_length": 404.2857142857143, |
|
"epoch": 5.363636363636363, |
|
"grad_norm": 0.41596055030822754, |
|
"kl": 0.0019185203293870603, |
|
"learning_rate": 8.067960709356478e-08, |
|
"loss": 0.0, |
|
"reward": 1.3030461839267187, |
|
"reward_std": 1.104127287864685, |
|
"rewards/concensus_correctness_reward_func": 0.04614285698958805, |
|
"rewards/consensus_reward_func": 0.35714285714285715, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 0.0, |
|
"rewards/question_recreation_reward_func": 0.5750104912689754, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.017857142857142856, |
|
"rewards/xmlcount_reward_func": 0.3068928521658693, |
|
"step": 16 |
|
}, |
|
{ |
|
"completion_length": 512.0714285714286, |
|
"epoch": 6.0, |
|
"grad_norm": 0.3919805586338043, |
|
"kl": 0.0017016392750000314, |
|
"learning_rate": 3.013156219837776e-08, |
|
"loss": 0.0, |
|
"reward": 1.4165151970727103, |
|
"reward_std": 1.0552355008465903, |
|
"rewards/concensus_correctness_reward_func": 0.04207142761775425, |
|
"rewards/consensus_reward_func": 0.5, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 0.0, |
|
"rewards/question_recreation_reward_func": 0.71719377381461, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.017857142857142856, |
|
"rewards/xmlcount_reward_func": 0.13939285171883448, |
|
"step": 18 |
|
}, |
|
{ |
|
"completion_length": 558.65625, |
|
"epoch": 6.7272727272727275, |
|
"grad_norm": 0.35119664669036865, |
|
"kl": 0.0012935696577187628, |
|
"learning_rate": 3.4096741493194193e-09, |
|
"loss": 0.0, |
|
"reward": 1.1207781359553337, |
|
"reward_std": 1.0906498096883297, |
|
"rewards/concensus_correctness_reward_func": 0.0, |
|
"rewards/consensus_reward_func": 0.5625, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 0.0, |
|
"rewards/question_recreation_reward_func": 0.7737468928098679, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.0, |
|
"rewards/xmlcount_reward_func": -0.21546875592321157, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 6.7272727272727275, |
|
"step": 20, |
|
"total_flos": 0.0, |
|
"train_loss": 1.6483349213558541e-06, |
|
"train_runtime": 611.4563, |
|
"train_samples_per_second": 0.523, |
|
"train_steps_per_second": 0.033 |
|
} |
|
], |
|
"logging_steps": 2, |
|
"max_steps": 20, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 25, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|