haedahae's picture
End of training
06c8106 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 5.0,
"eval_steps": 500,
"global_step": 20,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"completion_length": 264.8125,
"epoch": 0.5714285714285714,
"grad_norm": 0.5896669626235962,
"kl": 0.008757207309827209,
"learning_rate": 5e-07,
"loss": 0.0,
"reward": 3.556011885404587,
"reward_std": 3.830547973513603,
"rewards/concensus_correctness_reward_func": 1.936812499538064,
"rewards/consensus_reward_func": 0.375,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.375,
"rewards/question_recreation_reward_func": 0.4231681600213051,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.44603125285357237,
"step": 2
},
{
"completion_length": 230.5,
"epoch": 1.0,
"grad_norm": 0.4409516453742981,
"kl": 0.008206194887558619,
"learning_rate": 4.864543104251586e-07,
"loss": 0.0,
"reward": 9.294309139251709,
"reward_std": 8.018560727437338,
"rewards/concensus_correctness_reward_func": 6.433000023166339,
"rewards/consensus_reward_func": 0.6666666666666666,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 1.0833333333333333,
"rewards/question_recreation_reward_func": 0.4835590223471324,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.041666666666666664,
"rewards/xmlcount_reward_func": 0.586083342631658,
"step": 4
},
{
"completion_length": 252.96875,
"epoch": 1.5714285714285714,
"grad_norm": 0.809605062007904,
"kl": 0.00894307589624077,
"learning_rate": 4.472851273490984e-07,
"loss": 0.0,
"reward": 5.214481353759766,
"reward_std": 5.1277751959860325,
"rewards/concensus_correctness_reward_func": 3.1976874992251396,
"rewards/consensus_reward_func": 0.4375,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.625,
"rewards/question_recreation_reward_func": 0.4166688732802868,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.5376249849796295,
"step": 6
},
{
"completion_length": 289.0833333333333,
"epoch": 2.0,
"grad_norm": 0.4980953633785248,
"kl": 0.007897172511244813,
"learning_rate": 3.867370395306068e-07,
"loss": 0.0,
"reward": 5.711512823899587,
"reward_std": 6.444633464018504,
"rewards/concensus_correctness_reward_func": 3.68374993900458,
"rewards/consensus_reward_func": 0.5,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.75,
"rewards/question_recreation_reward_func": 0.45838790635267895,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.31937499592701596,
"step": 8
},
{
"completion_length": 252.1875,
"epoch": 2.571428571428571,
"grad_norm": 0.6080841422080994,
"kl": 0.010039961838629097,
"learning_rate": 3.1137137178519977e-07,
"loss": 0.0,
"reward": 4.326304629445076,
"reward_std": 4.895156145095825,
"rewards/concensus_correctness_reward_func": 2.0929999724030495,
"rewards/consensus_reward_func": 0.25,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 1.0,
"rewards/question_recreation_reward_func": 0.4917107969522476,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.03125,
"rewards/xmlcount_reward_func": 0.4603437501937151,
"step": 10
},
{
"completion_length": 265.25,
"epoch": 3.0,
"grad_norm": 0.42046964168548584,
"kl": 0.007893032704790434,
"learning_rate": 2.2935516363191693e-07,
"loss": 0.0,
"reward": 5.077903230985005,
"reward_std": 4.715359782179196,
"rewards/concensus_correctness_reward_func": 2.6067499990264573,
"rewards/consensus_reward_func": 0.5833333333333334,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.6666666666666666,
"rewards/question_recreation_reward_func": 0.5395283401012421,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.0625,
"rewards/xmlcount_reward_func": 0.6191250036160151,
"step": 12
},
{
"completion_length": 260.625,
"epoch": 3.571428571428571,
"grad_norm": 0.7956599593162537,
"kl": 0.007486780465114862,
"learning_rate": 1.4957614383675767e-07,
"loss": 0.0,
"reward": 5.287028789520264,
"reward_std": 6.096508968621492,
"rewards/concensus_correctness_reward_func": 2.754937469959259,
"rewards/consensus_reward_func": 0.375,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 1.0625,
"rewards/question_recreation_reward_func": 0.6092475522309542,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.015625,
"rewards/xmlcount_reward_func": 0.4697187477722764,
"step": 14
},
{
"completion_length": 251.04166666666666,
"epoch": 4.0,
"grad_norm": 0.6040255427360535,
"kl": 0.011015481315553188,
"learning_rate": 8.067960709356478e-08,
"loss": 0.0,
"reward": 4.457539727290471,
"reward_std": 2.4467248568932214,
"rewards/concensus_correctness_reward_func": 2.63541666418314,
"rewards/consensus_reward_func": 0.5833333333333334,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.3333333333333333,
"rewards/question_recreation_reward_func": 0.22862321510910988,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.041666666666666664,
"rewards/xmlcount_reward_func": 0.6351666748523712,
"step": 16
},
{
"completion_length": 275.34375,
"epoch": 4.571428571428571,
"grad_norm": 0.697487473487854,
"kl": 0.006818667257903144,
"learning_rate": 3.013156219837776e-08,
"loss": 0.0,
"reward": 5.823377624154091,
"reward_std": 4.7229442447423935,
"rewards/concensus_correctness_reward_func": 3.429312475025654,
"rewards/consensus_reward_func": 0.5625,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.75,
"rewards/question_recreation_reward_func": 0.5220653265714645,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.03125,
"rewards/xmlcount_reward_func": 0.5282500144094229,
"step": 18
},
{
"completion_length": 201.45833333333334,
"epoch": 5.0,
"grad_norm": 0.4893139898777008,
"kl": 0.013442914622525374,
"learning_rate": 3.4096741493194193e-09,
"loss": 0.0,
"reward": 5.362700551748276,
"reward_std": 4.852403928836186,
"rewards/concensus_correctness_reward_func": 3.37349999944369,
"rewards/consensus_reward_func": 0.5,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.4166666666666667,
"rewards/question_recreation_reward_func": 0.38161713629961014,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.041666666666666664,
"rewards/xmlcount_reward_func": 0.6492500106493632,
"step": 20
},
{
"epoch": 5.0,
"step": 20,
"total_flos": 0.0,
"train_loss": 7.851024111005245e-06,
"train_runtime": 560.7474,
"train_samples_per_second": 0.571,
"train_steps_per_second": 0.036
}
],
"logging_steps": 2,
"max_steps": 20,
"num_input_tokens_seen": 0,
"num_train_epochs": 7,
"save_steps": 25,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}