|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 6.7272727272727275, |
|
"eval_steps": 500, |
|
"global_step": 20, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"completion_length": 376.34375, |
|
"epoch": 0.7272727272727273, |
|
"grad_norm": 2.123795509338379, |
|
"kl": 0.0008059284700721037, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0, |
|
"reward": 0.8194242492318153, |
|
"reward_std": 0.8725666627287865, |
|
"rewards/concensus_correctness_reward_func": 0.0, |
|
"rewards/consensus_reward_func": 0.3125, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 0.0, |
|
"rewards/question_recreation_reward_func": 0.4507679883390665, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.015625, |
|
"rewards/xmlcount_reward_func": 0.040531257865950465, |
|
"step": 2 |
|
}, |
|
{ |
|
"completion_length": 463.57142857142856, |
|
"epoch": 1.3636363636363638, |
|
"grad_norm": 0.444406121969223, |
|
"kl": 0.0006558234370978815, |
|
"learning_rate": 4.864543104251586e-07, |
|
"loss": 0.0, |
|
"reward": 1.0296695147241866, |
|
"reward_std": 0.8633963976587568, |
|
"rewards/concensus_correctness_reward_func": 0.0, |
|
"rewards/consensus_reward_func": 0.21428571428571427, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 0.0, |
|
"rewards/question_recreation_reward_func": 0.23541951019849097, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.03571428571428571, |
|
"rewards/xmlcount_reward_func": 0.5442500114440918, |
|
"step": 4 |
|
}, |
|
{ |
|
"completion_length": 598.5, |
|
"epoch": 2.0, |
|
"grad_norm": 0.40745630860328674, |
|
"kl": 0.0007307843105601412, |
|
"learning_rate": 4.472851273490984e-07, |
|
"loss": 0.0, |
|
"reward": 0.7694277209894997, |
|
"reward_std": 1.1632391554968697, |
|
"rewards/concensus_correctness_reward_func": 0.0, |
|
"rewards/consensus_reward_func": 0.2857142857142857, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 0.0, |
|
"rewards/question_recreation_reward_func": 0.5034991694348199, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.017857142857142856, |
|
"rewards/xmlcount_reward_func": -0.037642872759274075, |
|
"step": 6 |
|
}, |
|
{ |
|
"completion_length": 560.1875, |
|
"epoch": 2.7272727272727275, |
|
"grad_norm": 0.2870396077632904, |
|
"kl": 0.0006468838182627223, |
|
"learning_rate": 3.867370395306068e-07, |
|
"loss": 0.0, |
|
"reward": 0.7364773657172918, |
|
"reward_std": 1.0139315500855446, |
|
"rewards/concensus_correctness_reward_func": 0.0, |
|
"rewards/consensus_reward_func": 0.25, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 0.0, |
|
"rewards/question_recreation_reward_func": 0.3468836098909378, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.015625, |
|
"rewards/xmlcount_reward_func": 0.1239687567576766, |
|
"step": 8 |
|
}, |
|
{ |
|
"completion_length": 543.0, |
|
"epoch": 3.3636363636363638, |
|
"grad_norm": 0.4253978729248047, |
|
"kl": 0.0007500549545511603, |
|
"learning_rate": 3.1137137178519977e-07, |
|
"loss": 0.0, |
|
"reward": 0.5847084107143539, |
|
"reward_std": 1.0092563756874628, |
|
"rewards/concensus_correctness_reward_func": 0.0, |
|
"rewards/consensus_reward_func": 0.21428571428571427, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 0.0, |
|
"rewards/question_recreation_reward_func": 0.42106556040900095, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.017857142857142856, |
|
"rewards/xmlcount_reward_func": -0.06849999725818634, |
|
"step": 10 |
|
}, |
|
{ |
|
"completion_length": 533.6428571428571, |
|
"epoch": 4.0, |
|
"grad_norm": 0.30162712931632996, |
|
"kl": 0.0008266470395028591, |
|
"learning_rate": 2.2935516363191693e-07, |
|
"loss": 0.0, |
|
"reward": 0.4913223215511867, |
|
"reward_std": 0.9674114840371268, |
|
"rewards/concensus_correctness_reward_func": 0.0, |
|
"rewards/consensus_reward_func": 0.07142857142857142, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 0.0, |
|
"rewards/question_recreation_reward_func": 0.40028661489486694, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.017857142857142856, |
|
"rewards/xmlcount_reward_func": 0.0017500030142920358, |
|
"step": 12 |
|
}, |
|
{ |
|
"completion_length": 566.34375, |
|
"epoch": 4.7272727272727275, |
|
"grad_norm": 0.6862844824790955, |
|
"kl": 0.0007115659973351285, |
|
"learning_rate": 1.4957614383675767e-07, |
|
"loss": 0.0, |
|
"reward": 0.9112295396625996, |
|
"reward_std": 1.0470195040106773, |
|
"rewards/concensus_correctness_reward_func": 0.0, |
|
"rewards/consensus_reward_func": 0.1875, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 0.0, |
|
"rewards/question_recreation_reward_func": 0.4396982938051224, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.015625, |
|
"rewards/xmlcount_reward_func": 0.26840626634657383, |
|
"step": 14 |
|
}, |
|
{ |
|
"completion_length": 535.8571428571429, |
|
"epoch": 5.363636363636363, |
|
"grad_norm": 0.2605418860912323, |
|
"kl": 0.0005985684318667543, |
|
"learning_rate": 8.067960709356478e-08, |
|
"loss": 0.0, |
|
"reward": 0.9383800370352608, |
|
"reward_std": 0.9764512436730521, |
|
"rewards/concensus_correctness_reward_func": 0.0, |
|
"rewards/consensus_reward_func": 0.21428571428571427, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 0.0, |
|
"rewards/question_recreation_reward_func": 0.42352288003478733, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.017857142857142856, |
|
"rewards/xmlcount_reward_func": 0.28271428868174553, |
|
"step": 16 |
|
}, |
|
{ |
|
"completion_length": 533.7857142857143, |
|
"epoch": 6.0, |
|
"grad_norm": 3.9433951377868652, |
|
"kl": 0.0008690485265105963, |
|
"learning_rate": 3.013156219837776e-08, |
|
"loss": 0.0, |
|
"reward": 1.395365025315966, |
|
"reward_std": 1.3007023589951652, |
|
"rewards/concensus_correctness_reward_func": 0.0, |
|
"rewards/consensus_reward_func": 0.5, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 0.0, |
|
"rewards/question_recreation_reward_func": 0.3816864735313824, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.07142857142857142, |
|
"rewards/xmlcount_reward_func": 0.4422500016433852, |
|
"step": 18 |
|
}, |
|
{ |
|
"completion_length": 471.875, |
|
"epoch": 6.7272727272727275, |
|
"grad_norm": 0.6051961183547974, |
|
"kl": 0.0007186525617726147, |
|
"learning_rate": 3.4096741493194193e-09, |
|
"loss": 0.0, |
|
"reward": 0.9024831224232912, |
|
"reward_std": 1.044289842247963, |
|
"rewards/concensus_correctness_reward_func": 0.0, |
|
"rewards/consensus_reward_func": 0.25, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 0.0, |
|
"rewards/question_recreation_reward_func": 0.4804831203073263, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.0, |
|
"rewards/xmlcount_reward_func": 0.17199999652802944, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 6.7272727272727275, |
|
"step": 20, |
|
"total_flos": 0.0, |
|
"train_loss": 6.874654843613825e-07, |
|
"train_runtime": 1539.8882, |
|
"train_samples_per_second": 0.208, |
|
"train_steps_per_second": 0.013 |
|
} |
|
], |
|
"logging_steps": 2, |
|
"max_steps": 20, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 25, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|