|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 5.0, |
|
"eval_steps": 500, |
|
"global_step": 20, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"completion_length": 264.8125, |
|
"epoch": 0.5714285714285714, |
|
"grad_norm": 0.5896669626235962, |
|
"kl": 0.008757207309827209, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0, |
|
"reward": 3.556011885404587, |
|
"reward_std": 3.830547973513603, |
|
"rewards/concensus_correctness_reward_func": 1.936812499538064, |
|
"rewards/consensus_reward_func": 0.375, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 0.375, |
|
"rewards/question_recreation_reward_func": 0.4231681600213051, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.0, |
|
"rewards/xmlcount_reward_func": 0.44603125285357237, |
|
"step": 2 |
|
}, |
|
{ |
|
"completion_length": 230.5, |
|
"epoch": 1.0, |
|
"grad_norm": 0.4409516453742981, |
|
"kl": 0.008206194887558619, |
|
"learning_rate": 4.864543104251586e-07, |
|
"loss": 0.0, |
|
"reward": 9.294309139251709, |
|
"reward_std": 8.018560727437338, |
|
"rewards/concensus_correctness_reward_func": 6.433000023166339, |
|
"rewards/consensus_reward_func": 0.6666666666666666, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 1.0833333333333333, |
|
"rewards/question_recreation_reward_func": 0.4835590223471324, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.041666666666666664, |
|
"rewards/xmlcount_reward_func": 0.586083342631658, |
|
"step": 4 |
|
}, |
|
{ |
|
"completion_length": 252.96875, |
|
"epoch": 1.5714285714285714, |
|
"grad_norm": 0.809605062007904, |
|
"kl": 0.00894307589624077, |
|
"learning_rate": 4.472851273490984e-07, |
|
"loss": 0.0, |
|
"reward": 5.214481353759766, |
|
"reward_std": 5.1277751959860325, |
|
"rewards/concensus_correctness_reward_func": 3.1976874992251396, |
|
"rewards/consensus_reward_func": 0.4375, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 0.625, |
|
"rewards/question_recreation_reward_func": 0.4166688732802868, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.0, |
|
"rewards/xmlcount_reward_func": 0.5376249849796295, |
|
"step": 6 |
|
}, |
|
{ |
|
"completion_length": 289.0833333333333, |
|
"epoch": 2.0, |
|
"grad_norm": 0.4980953633785248, |
|
"kl": 0.007897172511244813, |
|
"learning_rate": 3.867370395306068e-07, |
|
"loss": 0.0, |
|
"reward": 5.711512823899587, |
|
"reward_std": 6.444633464018504, |
|
"rewards/concensus_correctness_reward_func": 3.68374993900458, |
|
"rewards/consensus_reward_func": 0.5, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 0.75, |
|
"rewards/question_recreation_reward_func": 0.45838790635267895, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.0, |
|
"rewards/xmlcount_reward_func": 0.31937499592701596, |
|
"step": 8 |
|
}, |
|
{ |
|
"completion_length": 252.1875, |
|
"epoch": 2.571428571428571, |
|
"grad_norm": 0.6080841422080994, |
|
"kl": 0.010039961838629097, |
|
"learning_rate": 3.1137137178519977e-07, |
|
"loss": 0.0, |
|
"reward": 4.326304629445076, |
|
"reward_std": 4.895156145095825, |
|
"rewards/concensus_correctness_reward_func": 2.0929999724030495, |
|
"rewards/consensus_reward_func": 0.25, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 1.0, |
|
"rewards/question_recreation_reward_func": 0.4917107969522476, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.03125, |
|
"rewards/xmlcount_reward_func": 0.4603437501937151, |
|
"step": 10 |
|
}, |
|
{ |
|
"completion_length": 265.25, |
|
"epoch": 3.0, |
|
"grad_norm": 0.42046964168548584, |
|
"kl": 0.007893032704790434, |
|
"learning_rate": 2.2935516363191693e-07, |
|
"loss": 0.0, |
|
"reward": 5.077903230985005, |
|
"reward_std": 4.715359782179196, |
|
"rewards/concensus_correctness_reward_func": 2.6067499990264573, |
|
"rewards/consensus_reward_func": 0.5833333333333334, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 0.6666666666666666, |
|
"rewards/question_recreation_reward_func": 0.5395283401012421, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.0625, |
|
"rewards/xmlcount_reward_func": 0.6191250036160151, |
|
"step": 12 |
|
}, |
|
{ |
|
"completion_length": 260.625, |
|
"epoch": 3.571428571428571, |
|
"grad_norm": 0.7956599593162537, |
|
"kl": 0.007486780465114862, |
|
"learning_rate": 1.4957614383675767e-07, |
|
"loss": 0.0, |
|
"reward": 5.287028789520264, |
|
"reward_std": 6.096508968621492, |
|
"rewards/concensus_correctness_reward_func": 2.754937469959259, |
|
"rewards/consensus_reward_func": 0.375, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 1.0625, |
|
"rewards/question_recreation_reward_func": 0.6092475522309542, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.015625, |
|
"rewards/xmlcount_reward_func": 0.4697187477722764, |
|
"step": 14 |
|
}, |
|
{ |
|
"completion_length": 251.04166666666666, |
|
"epoch": 4.0, |
|
"grad_norm": 0.6040255427360535, |
|
"kl": 0.011015481315553188, |
|
"learning_rate": 8.067960709356478e-08, |
|
"loss": 0.0, |
|
"reward": 4.457539727290471, |
|
"reward_std": 2.4467248568932214, |
|
"rewards/concensus_correctness_reward_func": 2.63541666418314, |
|
"rewards/consensus_reward_func": 0.5833333333333334, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 0.3333333333333333, |
|
"rewards/question_recreation_reward_func": 0.22862321510910988, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.041666666666666664, |
|
"rewards/xmlcount_reward_func": 0.6351666748523712, |
|
"step": 16 |
|
}, |
|
{ |
|
"completion_length": 275.34375, |
|
"epoch": 4.571428571428571, |
|
"grad_norm": 0.697487473487854, |
|
"kl": 0.006818667257903144, |
|
"learning_rate": 3.013156219837776e-08, |
|
"loss": 0.0, |
|
"reward": 5.823377624154091, |
|
"reward_std": 4.7229442447423935, |
|
"rewards/concensus_correctness_reward_func": 3.429312475025654, |
|
"rewards/consensus_reward_func": 0.5625, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 0.75, |
|
"rewards/question_recreation_reward_func": 0.5220653265714645, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.03125, |
|
"rewards/xmlcount_reward_func": 0.5282500144094229, |
|
"step": 18 |
|
}, |
|
{ |
|
"completion_length": 201.45833333333334, |
|
"epoch": 5.0, |
|
"grad_norm": 0.4893139898777008, |
|
"kl": 0.013442914622525374, |
|
"learning_rate": 3.4096741493194193e-09, |
|
"loss": 0.0, |
|
"reward": 5.362700551748276, |
|
"reward_std": 4.852403928836186, |
|
"rewards/concensus_correctness_reward_func": 3.37349999944369, |
|
"rewards/consensus_reward_func": 0.5, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 0.4166666666666667, |
|
"rewards/question_recreation_reward_func": 0.38161713629961014, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.041666666666666664, |
|
"rewards/xmlcount_reward_func": 0.6492500106493632, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 20, |
|
"total_flos": 0.0, |
|
"train_loss": 7.851024111005245e-06, |
|
"train_runtime": 560.7474, |
|
"train_samples_per_second": 0.571, |
|
"train_steps_per_second": 0.036 |
|
} |
|
], |
|
"logging_steps": 2, |
|
"max_steps": 20, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 7, |
|
"save_steps": 25, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|