wyceee's picture
End of training
9861ea3 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 20,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"completion_length": 421.84375,
"epoch": 0.1,
"grad_norm": 7.744514465332031,
"kl": 0.0,
"learning_rate": 4.965903258506806e-07,
"loss": -0.0,
"reward": 0.3874333486892283,
"reward_std": 0.8486921527073719,
"rewards/concensus_correctness_reward_func": 0.03393750078976154,
"rewards/consensus_reward_func": 0.0625,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.0,
"rewards/question_recreation_reward_func": 0.3380895941518247,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": -0.04709374811500311,
"step": 2
},
{
"completion_length": 391.84375,
"epoch": 0.2,
"grad_norm": 5.300667762756348,
"kl": 0.0019505722484609578,
"learning_rate": 4.698684378016222e-07,
"loss": 0.0,
"reward": 1.3110849247314036,
"reward_std": 1.6469321683980525,
"rewards/concensus_correctness_reward_func": 0.625,
"rewards/consensus_reward_func": 0.0,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.125,
"rewards/question_recreation_reward_func": 0.3452412204351276,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.21584375109523535,
"step": 4
},
{
"completion_length": 394.46875,
"epoch": 0.3,
"grad_norm": 46.67064666748047,
"kl": 0.0019083435508946422,
"learning_rate": 4.193203929064353e-07,
"loss": 0.0,
"reward": 0.3914839383214712,
"reward_std": 1.3497825153172016,
"rewards/concensus_correctness_reward_func": 0.05831250175833702,
"rewards/consensus_reward_func": 0.0625,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.0625,
"rewards/question_recreation_reward_func": 0.31582766558858566,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": -0.10765624674968421,
"step": 6
},
{
"completion_length": 411.25,
"epoch": 0.4,
"grad_norm": 16.974180221557617,
"kl": 0.00413768243015511,
"learning_rate": 3.5042385616324236e-07,
"loss": 0.0,
"reward": 0.5314379204064608,
"reward_std": 0.5463541564531624,
"rewards/concensus_correctness_reward_func": 0.0018749999580904841,
"rewards/consensus_reward_func": 0.0,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.1875,
"rewards/question_recreation_reward_func": 0.2779379215789959,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.0641250021290034,
"step": 8
},
{
"completion_length": 385.9375,
"epoch": 0.5,
"grad_norm": 11.258064270019531,
"kl": 0.002582737106422428,
"learning_rate": 2.706448363680831e-07,
"loss": 0.0,
"reward": 2.051718756556511,
"reward_std": 0.7069938564673066,
"rewards/concensus_correctness_reward_func": 1.3228750005364418,
"rewards/consensus_reward_func": 0.0625,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.1875,
"rewards/question_recreation_reward_func": 0.4470312101766467,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.0318124967161566,
"step": 10
},
{
"completion_length": 446.59375,
"epoch": 0.6,
"grad_norm": 4.753056526184082,
"kl": 0.004790044869878329,
"learning_rate": 1.886286282148002e-07,
"loss": 0.0,
"reward": 0.8615806391462684,
"reward_std": 0.8717811293900013,
"rewards/concensus_correctness_reward_func": 0.08181250095367432,
"rewards/consensus_reward_func": 0.0625,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.25,
"rewards/question_recreation_reward_func": 0.474549381993711,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": -0.0072812524158507586,
"step": 12
},
{
"completion_length": 401.125,
"epoch": 0.7,
"grad_norm": 6.981433868408203,
"kl": 0.0025986589171225205,
"learning_rate": 1.1326296046939333e-07,
"loss": 0.0,
"reward": 2.256255905609578,
"reward_std": 2.391559364972636,
"rewards/concensus_correctness_reward_func": 1.361625000834465,
"rewards/consensus_reward_func": 0.125,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.1875,
"rewards/question_recreation_reward_func": 0.35309968960064,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.22903124894946814,
"step": 14
},
{
"completion_length": 395.125,
"epoch": 0.8,
"grad_norm": 7.406398773193359,
"kl": 0.008011827812879346,
"learning_rate": 5.271487265090163e-08,
"loss": 0.0,
"reward": 0.8543199766427279,
"reward_std": 1.0620909905992448,
"rewards/concensus_correctness_reward_func": 0.13006250001490116,
"rewards/consensus_reward_func": 0.0,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.1875,
"rewards/question_recreation_reward_func": 0.5446637291461229,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": -0.00790624599903822,
"step": 16
},
{
"completion_length": 370.03125,
"epoch": 0.9,
"grad_norm": 56.691341400146484,
"kl": 0.011987929798124242,
"learning_rate": 1.3545689574841341e-08,
"loss": 0.0,
"reward": 1.4162302482873201,
"reward_std": 1.8340731484349817,
"rewards/concensus_correctness_reward_func": 0.6328125,
"rewards/consensus_reward_func": 0.0,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.0625,
"rewards/question_recreation_reward_func": 0.4869490059791133,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.23396874405443668,
"step": 18
},
{
"completion_length": 429.40625,
"epoch": 1.0,
"grad_norm": 5.524323463439941,
"kl": 0.00813784722413402,
"learning_rate": 0.0,
"loss": 0.0,
"reward": 1.216457948088646,
"reward_std": 0.9376593017950654,
"rewards/concensus_correctness_reward_func": 0.2383749932050705,
"rewards/consensus_reward_func": 0.0,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.25,
"rewards/question_recreation_reward_func": 0.44552044360898435,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.2825624970719218,
"step": 20
},
{
"epoch": 1.0,
"step": 20,
"total_flos": 0.0,
"train_loss": 4.647485911846161e-06,
"train_runtime": 435.1003,
"train_samples_per_second": 0.735,
"train_steps_per_second": 0.046
}
],
"logging_steps": 2,
"max_steps": 20,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 25,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}