Jarrodbarnes's picture
End of training
54247c3 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.6956521739130435,
"eval_steps": 500,
"global_step": 20,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"completion_length": 167.71875,
"epoch": 0.06956521739130435,
"grad_norm": 46.50515365600586,
"kl": 0.0,
"learning_rate": 4.965903258506806e-07,
"loss": 0.0,
"reward": 0.23886918861535378,
"reward_std": 0.04501778557471425,
"rewards/concensus_correctness_reward_func": 0.0,
"rewards/consensus_reward_func": 0.125,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.0,
"rewards/question_recreation_reward_func": 0.11086919068475254,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.003000000026077032,
"step": 2
},
{
"completion_length": 213.9375,
"epoch": 0.1391304347826087,
"grad_norm": 199004.953125,
"kl": 0.004053308793572796,
"learning_rate": 4.698684378016222e-07,
"loss": 0.0,
"reward": 0.08304696858976968,
"reward_std": 0.027729042252758518,
"rewards/concensus_correctness_reward_func": 0.0,
"rewards/consensus_reward_func": 0.0,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.0,
"rewards/question_recreation_reward_func": 0.08317196861025877,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": -0.0001250000059371814,
"step": 4
},
{
"completion_length": 92.65625,
"epoch": 0.20869565217391303,
"grad_norm": 10.352762222290039,
"kl": 0.011977128890293898,
"learning_rate": 4.193203929064353e-07,
"loss": 0.0,
"reward": 0.29487171000801027,
"reward_std": 0.11191323117236607,
"rewards/concensus_correctness_reward_func": 0.014999999664723873,
"rewards/consensus_reward_func": 0.125,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.0625,
"rewards/question_recreation_reward_func": 0.08846545231062919,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.00390625,
"step": 6
},
{
"completion_length": 138.34375,
"epoch": 0.2782608695652174,
"grad_norm": 44472596.0,
"kl": 0.007592094505525893,
"learning_rate": 3.5042385616324236e-07,
"loss": 0.0,
"reward": 0.15320231160148978,
"reward_std": 0.131714646180626,
"rewards/concensus_correctness_reward_func": 0.006562499795109034,
"rewards/consensus_reward_func": 0.0625,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.0,
"rewards/question_recreation_reward_func": 0.0841398109914735,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.0,
"step": 8
},
{
"completion_length": 111.40625,
"epoch": 0.34782608695652173,
"grad_norm": 36.51017379760742,
"kl": 0.2421079762170848,
"learning_rate": 2.706448363680831e-07,
"loss": 0.0002,
"reward": 0.16755636170273647,
"reward_std": 0.14275544960651132,
"rewards/concensus_correctness_reward_func": 0.0,
"rewards/consensus_reward_func": 0.0,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.0625,
"rewards/question_recreation_reward_func": 0.10265011055162176,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.002406249986961484,
"step": 10
},
{
"completion_length": 112.75,
"epoch": 0.41739130434782606,
"grad_norm": 22.138507843017578,
"kl": 0.003069994276302168,
"learning_rate": 1.886286282148002e-07,
"loss": 0.0,
"reward": 0.13111776718869805,
"reward_std": 0.03130402306851465,
"rewards/concensus_correctness_reward_func": 0.0,
"rewards/consensus_reward_func": 0.0,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.0,
"rewards/question_recreation_reward_func": 0.13111776718869805,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.0,
"step": 12
},
{
"completion_length": 230.28125,
"epoch": 0.48695652173913045,
"grad_norm": 16.621665954589844,
"kl": 0.0028904032737955276,
"learning_rate": 1.1326296046939333e-07,
"loss": 0.0,
"reward": 0.07638290524482727,
"reward_std": 0.04352241018023051,
"rewards/concensus_correctness_reward_func": 0.0,
"rewards/consensus_reward_func": 0.0,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.0,
"rewards/question_recreation_reward_func": 0.07244540535612032,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.003937500063329935,
"step": 14
},
{
"completion_length": 212.46875,
"epoch": 0.5565217391304348,
"grad_norm": 31.35605812072754,
"kl": 0.019228663304602378,
"learning_rate": 5.271487265090163e-08,
"loss": 0.0,
"reward": 0.22756990580819547,
"reward_std": 0.21891088442862383,
"rewards/concensus_correctness_reward_func": 0.0078125,
"rewards/consensus_reward_func": 0.0625,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.0625,
"rewards/question_recreation_reward_func": 0.0947574048768729,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.0,
"step": 16
},
{
"completion_length": 216.84375,
"epoch": 0.6260869565217392,
"grad_norm": 11.983558654785156,
"kl": 0.038972208321865764,
"learning_rate": 1.3545689574841341e-08,
"loss": 0.0,
"reward": 0.0798949726158753,
"reward_std": 0.03412955232488457,
"rewards/concensus_correctness_reward_func": 0.0,
"rewards/consensus_reward_func": 0.0,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.0,
"rewards/question_recreation_reward_func": 0.0798949726158753,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.0,
"step": 18
},
{
"completion_length": 193.875,
"epoch": 0.6956521739130435,
"grad_norm": 49818.66796875,
"kl": 0.03839717447408475,
"learning_rate": 0.0,
"loss": 0.0,
"reward": 0.07137113180942833,
"reward_std": 0.025341270804347005,
"rewards/concensus_correctness_reward_func": 0.0,
"rewards/consensus_reward_func": 0.0,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.0,
"rewards/question_recreation_reward_func": 0.07137113180942833,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.0,
"step": 20
},
{
"epoch": 0.6956521739130435,
"step": 20,
"total_flos": 0.0,
"train_loss": 3.689589379973768e-05,
"train_runtime": 3788.7825,
"train_samples_per_second": 0.084,
"train_steps_per_second": 0.005
}
],
"logging_steps": 2,
"max_steps": 20,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 25,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}