|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 6.7272727272727275, |
|
"eval_steps": 500, |
|
"global_step": 20, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"completion_length": 510.96875, |
|
"epoch": 0.7272727272727273, |
|
"grad_norm": 1.453367829322815, |
|
"kl": 0.0006581695124623366, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0, |
|
"reward": 0.7741625420749187, |
|
"reward_std": 0.9262352548539639, |
|
"rewards/concensus_correctness_reward_func": 0.0, |
|
"rewards/consensus_reward_func": 0.3125, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 0.0, |
|
"rewards/question_recreation_reward_func": 0.6100688222795725, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.015625, |
|
"rewards/xmlcount_reward_func": -0.16403124295175076, |
|
"step": 2 |
|
}, |
|
{ |
|
"completion_length": 421.10714285714283, |
|
"epoch": 1.3636363636363638, |
|
"grad_norm": 0.7830181121826172, |
|
"kl": 0.000890114635694772, |
|
"learning_rate": 4.864543104251586e-07, |
|
"loss": 0.0, |
|
"reward": 0.9196308766092572, |
|
"reward_std": 0.9299162881714957, |
|
"rewards/concensus_correctness_reward_func": 0.008928571428571428, |
|
"rewards/consensus_reward_func": 0.5714285714285714, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 0.0, |
|
"rewards/question_recreation_reward_func": 0.4515237451664039, |
|
"rewards/soft_format_reward_func": 0.017857142857142856, |
|
"rewards/strict_format_reward_func": 0.0, |
|
"rewards/xmlcount_reward_func": -0.13010714309556143, |
|
"step": 4 |
|
}, |
|
{ |
|
"completion_length": 456.5, |
|
"epoch": 2.0, |
|
"grad_norm": 4.7355804443359375, |
|
"kl": 0.0007553862607372659, |
|
"learning_rate": 4.472851273490984e-07, |
|
"loss": 0.0, |
|
"reward": 0.9059551358222961, |
|
"reward_std": 0.9832929755960192, |
|
"rewards/concensus_correctness_reward_func": 0.018785715103149414, |
|
"rewards/consensus_reward_func": 0.42857142857142855, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 0.0, |
|
"rewards/question_recreation_reward_func": 0.5872765609196254, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.0, |
|
"rewards/xmlcount_reward_func": -0.1286785728963358, |
|
"step": 6 |
|
}, |
|
{ |
|
"completion_length": 484.59375, |
|
"epoch": 2.7272727272727275, |
|
"grad_norm": 1.9618520736694336, |
|
"kl": 0.0008095701414276846, |
|
"learning_rate": 3.867370395306068e-07, |
|
"loss": 0.0, |
|
"reward": 1.1015731655061245, |
|
"reward_std": 1.1763431653380394, |
|
"rewards/concensus_correctness_reward_func": 0.0, |
|
"rewards/consensus_reward_func": 0.5625, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 0.0, |
|
"rewards/question_recreation_reward_func": 0.5851044002920389, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.015625, |
|
"rewards/xmlcount_reward_func": -0.061656259931623936, |
|
"step": 8 |
|
}, |
|
{ |
|
"completion_length": 410.0357142857143, |
|
"epoch": 3.3636363636363638, |
|
"grad_norm": 0.5187426805496216, |
|
"kl": 0.0008327829002934907, |
|
"learning_rate": 3.1137137178519977e-07, |
|
"loss": 0.0, |
|
"reward": 1.1646665313414164, |
|
"reward_std": 1.260940168585096, |
|
"rewards/concensus_correctness_reward_func": 0.03757143020629883, |
|
"rewards/consensus_reward_func": 0.35714285714285715, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 0.0, |
|
"rewards/question_recreation_reward_func": 0.4700236969760486, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.017857142857142856, |
|
"rewards/xmlcount_reward_func": 0.28207142438207355, |
|
"step": 10 |
|
}, |
|
{ |
|
"completion_length": 464.9642857142857, |
|
"epoch": 4.0, |
|
"grad_norm": 0.388344943523407, |
|
"kl": 0.0007031915218768907, |
|
"learning_rate": 2.2935516363191693e-07, |
|
"loss": 0.0, |
|
"reward": 0.9917694830468723, |
|
"reward_std": 0.8420192471572331, |
|
"rewards/concensus_correctness_reward_func": 0.0, |
|
"rewards/consensus_reward_func": 0.35714285714285715, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 0.0, |
|
"rewards/question_recreation_reward_func": 0.48387659234660013, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.0, |
|
"rewards/xmlcount_reward_func": 0.1507500016263553, |
|
"step": 12 |
|
}, |
|
{ |
|
"completion_length": 443.6875, |
|
"epoch": 4.7272727272727275, |
|
"grad_norm": 0.33633482456207275, |
|
"kl": 0.0007299767530639656, |
|
"learning_rate": 1.4957614383675767e-07, |
|
"loss": 0.0, |
|
"reward": 1.0675941202789545, |
|
"reward_std": 1.0126370824873447, |
|
"rewards/concensus_correctness_reward_func": 0.016437500715255737, |
|
"rewards/consensus_reward_func": 0.3125, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 0.0, |
|
"rewards/question_recreation_reward_func": 0.5887503828853369, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.0, |
|
"rewards/xmlcount_reward_func": 0.14990626275539398, |
|
"step": 14 |
|
}, |
|
{ |
|
"completion_length": 520.8214285714286, |
|
"epoch": 5.363636363636363, |
|
"grad_norm": 0.3216298222541809, |
|
"kl": 0.0007666656913767968, |
|
"learning_rate": 8.067960709356478e-08, |
|
"loss": 0.0, |
|
"reward": 1.5415238652910506, |
|
"reward_std": 1.5021810361317225, |
|
"rewards/concensus_correctness_reward_func": 0.0, |
|
"rewards/consensus_reward_func": 0.7857142857142857, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 0.0, |
|
"rewards/question_recreation_reward_func": 0.6235595515796116, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.017857142857142856, |
|
"rewards/xmlcount_reward_func": 0.11439285959516253, |
|
"step": 16 |
|
}, |
|
{ |
|
"completion_length": 468.7857142857143, |
|
"epoch": 6.0, |
|
"grad_norm": 0.36451953649520874, |
|
"kl": 0.0007882461733450847, |
|
"learning_rate": 3.013156219837776e-08, |
|
"loss": 0.0, |
|
"reward": 1.0603240898677282, |
|
"reward_std": 0.9460750988551548, |
|
"rewards/concensus_correctness_reward_func": 0.0, |
|
"rewards/consensus_reward_func": 0.35714285714285715, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 0.0, |
|
"rewards/question_recreation_reward_func": 0.639431217951434, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.05357142857142857, |
|
"rewards/xmlcount_reward_func": 0.01017856172152928, |
|
"step": 18 |
|
}, |
|
{ |
|
"completion_length": 503.0, |
|
"epoch": 6.7272727272727275, |
|
"grad_norm": 0.4202514886856079, |
|
"kl": 0.0007476642495021224, |
|
"learning_rate": 3.4096741493194193e-09, |
|
"loss": 0.0, |
|
"reward": 1.4001894146203995, |
|
"reward_std": 1.0581434704363346, |
|
"rewards/concensus_correctness_reward_func": 0.0, |
|
"rewards/consensus_reward_func": 0.5, |
|
"rewards/cumulative_reward_2": 0.0, |
|
"rewards/final_correctness_reward_func": 0.0, |
|
"rewards/question_recreation_reward_func": 0.7372518964111805, |
|
"rewards/soft_format_reward_func": 0.0, |
|
"rewards/strict_format_reward_func": 0.015625, |
|
"rewards/xmlcount_reward_func": 0.1473125098273158, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 6.7272727272727275, |
|
"step": 20, |
|
"total_flos": 0.0, |
|
"train_loss": 7.141880530525668e-07, |
|
"train_runtime": 1692.5748, |
|
"train_samples_per_second": 0.189, |
|
"train_steps_per_second": 0.012 |
|
} |
|
], |
|
"logging_steps": 2, |
|
"max_steps": 20, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 25, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|