|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 6.7272727272727275, |
|
"eval_steps": 500, |
|
"global_step": 20, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"completion_length": 482.3125, |
|
"epoch": 0.7272727272727273, |
|
"grad_norm": 0.6279861927032471, |
|
"kl": 0.0009842957915680017, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0, |
|
"reward": 0.6330392956733704, |
|
"reward_std": 1.1256224997341633, |
|
"rewards/concensus_correctness_reward_func": 0.015124999918043613, |
|
"rewards/consensus_reward_func": 0.1875, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 0.0, |
|
"rewards/question_recreation_reward_func": 0.5232893172651529, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.015625, |
|
"rewards/xmlcount_reward_func": -0.10850000288337469, |
|
"step": 2 |
|
}, |
|
{ |
|
"completion_length": 422.89285714285717, |
|
"epoch": 1.3636363636363638, |
|
"grad_norm": 0.6565085053443909, |
|
"kl": 0.0014388576881693943, |
|
"learning_rate": 4.864543104251586e-07, |
|
"loss": 0.0, |
|
"reward": 0.8594981346811567, |
|
"reward_std": 1.1113629596573966, |
|
"rewards/concensus_correctness_reward_func": 0.0, |
|
"rewards/consensus_reward_func": 0.2857142857142857, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 0.0, |
|
"rewards/question_recreation_reward_func": 0.5231409924370902, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.017857142857142856, |
|
"rewards/xmlcount_reward_func": 0.03278571793011257, |
|
"step": 4 |
|
}, |
|
{ |
|
"completion_length": 533.6785714285714, |
|
"epoch": 2.0, |
|
"grad_norm": 0.32601848244667053, |
|
"kl": 0.0011336713297558682, |
|
"learning_rate": 4.472851273490984e-07, |
|
"loss": 0.0, |
|
"reward": 1.016930375780378, |
|
"reward_std": 0.86872969354902, |
|
"rewards/concensus_correctness_reward_func": 0.0, |
|
"rewards/consensus_reward_func": 0.2857142857142857, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 0.0, |
|
"rewards/question_recreation_reward_func": 0.5773232643093381, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.0, |
|
"rewards/xmlcount_reward_func": 0.15389286247747286, |
|
"step": 6 |
|
}, |
|
{ |
|
"completion_length": 513.0, |
|
"epoch": 2.7272727272727275, |
|
"grad_norm": 0.4944967031478882, |
|
"kl": 0.0013540829895646311, |
|
"learning_rate": 3.867370395306068e-07, |
|
"loss": 0.0, |
|
"reward": 0.9314682334661484, |
|
"reward_std": 1.133194461464882, |
|
"rewards/concensus_correctness_reward_func": 0.0, |
|
"rewards/consensus_reward_func": 0.1875, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 0.0, |
|
"rewards/question_recreation_reward_func": 0.598999485373497, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.03125, |
|
"rewards/xmlcount_reward_func": 0.11371875554323196, |
|
"step": 8 |
|
}, |
|
{ |
|
"completion_length": 617.1428571428571, |
|
"epoch": 3.3636363636363638, |
|
"grad_norm": 0.38184112310409546, |
|
"kl": 0.0010740802307347102, |
|
"learning_rate": 3.1137137178519977e-07, |
|
"loss": 0.0, |
|
"reward": 1.5167566197259086, |
|
"reward_std": 1.177451640367508, |
|
"rewards/concensus_correctness_reward_func": 0.0, |
|
"rewards/consensus_reward_func": 0.7142857142857143, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 0.0, |
|
"rewards/question_recreation_reward_func": 0.5730780448232379, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.0, |
|
"rewards/xmlcount_reward_func": 0.22939286274569376, |
|
"step": 10 |
|
}, |
|
{ |
|
"completion_length": 527.3571428571429, |
|
"epoch": 4.0, |
|
"grad_norm": 0.28677794337272644, |
|
"kl": 0.001259099051821977, |
|
"learning_rate": 2.2935516363191693e-07, |
|
"loss": 0.0, |
|
"reward": 0.703001109617097, |
|
"reward_std": 0.8764347093445914, |
|
"rewards/concensus_correctness_reward_func": 0.0, |
|
"rewards/consensus_reward_func": 0.07142857142857142, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 0.0, |
|
"rewards/question_recreation_reward_func": 0.5151082554033825, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.017857142857142856, |
|
"rewards/xmlcount_reward_func": 0.09860714418547493, |
|
"step": 12 |
|
}, |
|
{ |
|
"completion_length": 502.65625, |
|
"epoch": 4.7272727272727275, |
|
"grad_norm": 7.51918363571167, |
|
"kl": 0.0011182346497662365, |
|
"learning_rate": 1.4957614383675767e-07, |
|
"loss": 0.0, |
|
"reward": 1.0275223618373275, |
|
"reward_std": 0.9978118315339088, |
|
"rewards/concensus_correctness_reward_func": 0.0, |
|
"rewards/consensus_reward_func": 0.3125, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 0.0, |
|
"rewards/question_recreation_reward_func": 0.5700535625219345, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.0, |
|
"rewards/xmlcount_reward_func": 0.1449687611311674, |
|
"step": 14 |
|
}, |
|
{ |
|
"completion_length": 450.17857142857144, |
|
"epoch": 5.363636363636363, |
|
"grad_norm": 2.624408721923828, |
|
"kl": 0.0018549877318686672, |
|
"learning_rate": 8.067960709356478e-08, |
|
"loss": 0.0, |
|
"reward": 0.9029674737581185, |
|
"reward_std": 0.8381789156368801, |
|
"rewards/concensus_correctness_reward_func": 0.0, |
|
"rewards/consensus_reward_func": 0.2857142857142857, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 0.07142857142857142, |
|
"rewards/question_recreation_reward_func": 0.5387888678482601, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.017857142857142856, |
|
"rewards/xmlcount_reward_func": -0.010821425489016942, |
|
"step": 16 |
|
}, |
|
{ |
|
"completion_length": 378.7142857142857, |
|
"epoch": 6.0, |
|
"grad_norm": 29.219697952270508, |
|
"kl": 0.0020587185031867455, |
|
"learning_rate": 3.013156219837776e-08, |
|
"loss": 0.0, |
|
"reward": 1.33051621062415, |
|
"reward_std": 1.2022017240524292, |
|
"rewards/concensus_correctness_reward_func": 0.0, |
|
"rewards/consensus_reward_func": 0.5, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 0.0, |
|
"rewards/question_recreation_reward_func": 0.627409074987684, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.03571428571428571, |
|
"rewards/xmlcount_reward_func": 0.16739285843712942, |
|
"step": 18 |
|
}, |
|
{ |
|
"completion_length": 499.46875, |
|
"epoch": 6.7272727272727275, |
|
"grad_norm": 2.2562317848205566, |
|
"kl": 0.0015057353666634299, |
|
"learning_rate": 3.4096741493194193e-09, |
|
"loss": 0.0, |
|
"reward": 1.1141281686723232, |
|
"reward_std": 1.3231780752539635, |
|
"rewards/concensus_correctness_reward_func": 0.0, |
|
"rewards/consensus_reward_func": 0.375, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 0.0, |
|
"rewards/question_recreation_reward_func": 0.6001906581223011, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.015625, |
|
"rewards/xmlcount_reward_func": 0.12331249937415123, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 6.7272727272727275, |
|
"step": 20, |
|
"total_flos": 0.0, |
|
"train_loss": 1.2697431884589604e-06, |
|
"train_runtime": 659.566, |
|
"train_samples_per_second": 0.485, |
|
"train_steps_per_second": 0.03 |
|
} |
|
], |
|
"logging_steps": 2, |
|
"max_steps": 20, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 25, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|