|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 5.0, |
|
"eval_steps": 500, |
|
"global_step": 20, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"completion_length": 283.8125, |
|
"epoch": 0.5714285714285714, |
|
"grad_norm": 1.1532014608383179, |
|
"kl": 0.013067460153251886, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0, |
|
"reward": 5.535428494215012, |
|
"reward_std": 6.1332239508628845, |
|
"rewards/concensus_correctness_reward_func": 2.964875027537346, |
|
"rewards/consensus_reward_func": 0.5, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 0.8125, |
|
"rewards/question_recreation_reward_func": 0.5587721578776836, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.046875, |
|
"rewards/xmlcount_reward_func": 0.6524062640964985, |
|
"step": 2 |
|
}, |
|
{ |
|
"completion_length": 243.58333333333334, |
|
"epoch": 1.0, |
|
"grad_norm": 0.8902457356452942, |
|
"kl": 0.014269542104254166, |
|
"learning_rate": 4.864543104251586e-07, |
|
"loss": 0.0, |
|
"reward": 4.376810510953267, |
|
"reward_std": 3.2900354166825614, |
|
"rewards/concensus_correctness_reward_func": 1.6458333333333333, |
|
"rewards/consensus_reward_func": 0.08333333333333333, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 1.5833333333333333, |
|
"rewards/question_recreation_reward_func": 0.5328105284521977, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.020833333333333332, |
|
"rewards/xmlcount_reward_func": 0.5106666708985964, |
|
"step": 4 |
|
}, |
|
{ |
|
"completion_length": 273.125, |
|
"epoch": 1.5714285714285714, |
|
"grad_norm": 0.4976244568824768, |
|
"kl": 0.011306149302981794, |
|
"learning_rate": 4.472851273490984e-07, |
|
"loss": 0.0, |
|
"reward": 7.522334858775139, |
|
"reward_std": 7.5043724700808525, |
|
"rewards/concensus_correctness_reward_func": 4.937187507748604, |
|
"rewards/consensus_reward_func": 0.625, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 0.875, |
|
"rewards/question_recreation_reward_func": 0.5563972145318985, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.0, |
|
"rewards/xmlcount_reward_func": 0.5287500089034438, |
|
"step": 6 |
|
}, |
|
{ |
|
"completion_length": 244.66666666666666, |
|
"epoch": 2.0, |
|
"grad_norm": 0.44919872283935547, |
|
"kl": 0.014499408192932606, |
|
"learning_rate": 3.867370395306068e-07, |
|
"loss": 0.0, |
|
"reward": 5.386006702979405, |
|
"reward_std": 6.149055267373721, |
|
"rewards/concensus_correctness_reward_func": 2.773583302895228, |
|
"rewards/consensus_reward_func": 0.25, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 1.3333333333333333, |
|
"rewards/question_recreation_reward_func": 0.5773818517724673, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.0, |
|
"rewards/xmlcount_reward_func": 0.4517083341876666, |
|
"step": 8 |
|
}, |
|
{ |
|
"completion_length": 261.0625, |
|
"epoch": 2.571428571428571, |
|
"grad_norm": 0.5340657234191895, |
|
"kl": 0.012978739512618631, |
|
"learning_rate": 3.1137137178519977e-07, |
|
"loss": 0.0, |
|
"reward": 7.555992126464844, |
|
"reward_std": 8.598402701318264, |
|
"rewards/concensus_correctness_reward_func": 5.023000039160252, |
|
"rewards/consensus_reward_func": 0.4375, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 1.0, |
|
"rewards/question_recreation_reward_func": 0.5316797159612179, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.0, |
|
"rewards/xmlcount_reward_func": 0.563812492415309, |
|
"step": 10 |
|
}, |
|
{ |
|
"completion_length": 258.4166666666667, |
|
"epoch": 3.0, |
|
"grad_norm": 0.4479375183582306, |
|
"kl": 0.012453029553095499, |
|
"learning_rate": 2.2935516363191693e-07, |
|
"loss": 0.0, |
|
"reward": 7.487985849380493, |
|
"reward_std": 7.63406256834666, |
|
"rewards/concensus_correctness_reward_func": 4.7774166117111845, |
|
"rewards/consensus_reward_func": 0.6666666666666666, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 1.0, |
|
"rewards/question_recreation_reward_func": 0.5623606691757838, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.020833333333333332, |
|
"rewards/xmlcount_reward_func": 0.4607083350419998, |
|
"step": 12 |
|
}, |
|
{ |
|
"completion_length": 276.59375, |
|
"epoch": 3.571428571428571, |
|
"grad_norm": 1.0146440267562866, |
|
"kl": 0.013750494923442602, |
|
"learning_rate": 1.4957614383675767e-07, |
|
"loss": 0.0, |
|
"reward": 7.103241473436356, |
|
"reward_std": 6.38451437279582, |
|
"rewards/concensus_correctness_reward_func": 4.362937547266483, |
|
"rewards/consensus_reward_func": 0.375, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 1.1875, |
|
"rewards/question_recreation_reward_func": 0.6148039405234158, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.015625, |
|
"rewards/xmlcount_reward_func": 0.547375014051795, |
|
"step": 14 |
|
}, |
|
{ |
|
"completion_length": 268.5, |
|
"epoch": 4.0, |
|
"grad_norm": 0.4467558264732361, |
|
"kl": 0.013235996011644602, |
|
"learning_rate": 8.067960709356478e-08, |
|
"loss": 0.0, |
|
"reward": 6.3883426288763685, |
|
"reward_std": 6.176097631454468, |
|
"rewards/concensus_correctness_reward_func": 3.920333390434583, |
|
"rewards/consensus_reward_func": 0.4166666666666667, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 1.0, |
|
"rewards/question_recreation_reward_func": 0.31771781543890637, |
|
"rewards/soft_format_reward_func": 0.020833333333333332, |
|
"rewards/strict_format_reward_func": 0.0625, |
|
"rewards/xmlcount_reward_func": 0.6502916713555654, |
|
"step": 16 |
|
}, |
|
{ |
|
"completion_length": 230.25, |
|
"epoch": 4.571428571428571, |
|
"grad_norm": 0.7665261626243591, |
|
"kl": 0.014356082305312157, |
|
"learning_rate": 3.013156219837776e-08, |
|
"loss": 0.0, |
|
"reward": 8.594983696937561, |
|
"reward_std": 6.361941149458289, |
|
"rewards/concensus_correctness_reward_func": 5.92093750089407, |
|
"rewards/consensus_reward_func": 0.75, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 0.8125, |
|
"rewards/question_recreation_reward_func": 0.49420230463147163, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.0, |
|
"rewards/xmlcount_reward_func": 0.6173437759280205, |
|
"step": 18 |
|
}, |
|
{ |
|
"completion_length": 246.75, |
|
"epoch": 5.0, |
|
"grad_norm": 0.45120424032211304, |
|
"kl": 0.014536370212833086, |
|
"learning_rate": 3.4096741493194193e-09, |
|
"loss": 0.0, |
|
"reward": 3.8146777749061584, |
|
"reward_std": 3.0375414018829665, |
|
"rewards/concensus_correctness_reward_func": 1.2678333421548207, |
|
"rewards/consensus_reward_func": 0.16666666666666666, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 1.25, |
|
"rewards/question_recreation_reward_func": 0.42113616565863293, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.0, |
|
"rewards/xmlcount_reward_func": 0.7090416798988978, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 20, |
|
"total_flos": 0.0, |
|
"train_loss": 1.1729459947673604e-05, |
|
"train_runtime": 312.4193, |
|
"train_samples_per_second": 1.024, |
|
"train_steps_per_second": 0.064 |
|
} |
|
], |
|
"logging_steps": 2, |
|
"max_steps": 20, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 7, |
|
"save_steps": 25, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|