| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 5.0, | |
| "eval_steps": 500, | |
| "global_step": 20, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "completion_length": 326.59375, | |
| "epoch": 0.5714285714285714, | |
| "grad_norm": 4.934439182281494, | |
| "kl": 0.0, | |
| "learning_rate": 5e-07, | |
| "loss": 0.0, | |
| "reward": 0.9843985685729422, | |
| "reward_std": 0.7370969671756029, | |
| "rewards/concensus_correctness_reward_func": -0.02437499910593033, | |
| "rewards/consensus_reward_func": 0.25, | |
| "rewards/cumulative_reward_2": 0.0, | |
| "rewards/final_correctness_reward_func": 0.0, | |
| "rewards/question_recreation_reward_func": 0.39177357318112627, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": 0.3670000056736171, | |
| "step": 2 | |
| }, | |
| { | |
| "completion_length": 371.0, | |
| "epoch": 1.0, | |
| "grad_norm": 3.6815781593322754, | |
| "kl": 0.0011949266287653397, | |
| "learning_rate": 4.864543104251586e-07, | |
| "loss": 0.0, | |
| "reward": 1.5001778639853, | |
| "reward_std": 0.7827915487190088, | |
| "rewards/concensus_correctness_reward_func": 0.03458333263794581, | |
| "rewards/consensus_reward_func": 0.25, | |
| "rewards/cumulative_reward_2": 0.0, | |
| "rewards/final_correctness_reward_func": 0.25, | |
| "rewards/question_recreation_reward_func": 0.4009695214529832, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": 0.5646250148614248, | |
| "step": 4 | |
| }, | |
| { | |
| "completion_length": 359.96875, | |
| "epoch": 1.5714285714285714, | |
| "grad_norm": 4.556028842926025, | |
| "kl": 0.0010278360459778924, | |
| "learning_rate": 4.472851273490984e-07, | |
| "loss": 0.0, | |
| "reward": 1.1807272167643532, | |
| "reward_std": 0.653216159567819, | |
| "rewards/concensus_correctness_reward_func": 0.06068750098347664, | |
| "rewards/consensus_reward_func": 0.125, | |
| "rewards/cumulative_reward_2": 0.0, | |
| "rewards/final_correctness_reward_func": 0.0625, | |
| "rewards/question_recreation_reward_func": 0.47382097283843905, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": 0.45871875062584877, | |
| "step": 6 | |
| }, | |
| { | |
| "completion_length": 439.2916666666667, | |
| "epoch": 2.0, | |
| "grad_norm": 2.2755231857299805, | |
| "kl": 0.0013096223750229303, | |
| "learning_rate": 3.867370395306068e-07, | |
| "loss": 0.0, | |
| "reward": 0.7530886133511862, | |
| "reward_std": 0.9865690590813756, | |
| "rewards/concensus_correctness_reward_func": 0.0, | |
| "rewards/consensus_reward_func": 0.0, | |
| "rewards/cumulative_reward_2": 0.0, | |
| "rewards/final_correctness_reward_func": 0.08333333333333333, | |
| "rewards/question_recreation_reward_func": 0.37442193475241464, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": 0.295333335796992, | |
| "step": 8 | |
| }, | |
| { | |
| "completion_length": 370.40625, | |
| "epoch": 2.571428571428571, | |
| "grad_norm": 6.62834358215332, | |
| "kl": 0.0012265997065696865, | |
| "learning_rate": 3.1137137178519977e-07, | |
| "loss": 0.0, | |
| "reward": 0.8175993217155337, | |
| "reward_std": 1.0472558625042439, | |
| "rewards/concensus_correctness_reward_func": 0.05756250023841858, | |
| "rewards/consensus_reward_func": 0.125, | |
| "rewards/cumulative_reward_2": 0.0, | |
| "rewards/final_correctness_reward_func": 0.0, | |
| "rewards/question_recreation_reward_func": 0.4674430672894232, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": 0.16759375017136335, | |
| "step": 10 | |
| }, | |
| { | |
| "completion_length": 356.4583333333333, | |
| "epoch": 3.0, | |
| "grad_norm": 3.410081148147583, | |
| "kl": 0.001667177789689352, | |
| "learning_rate": 2.2935516363191693e-07, | |
| "loss": 0.0, | |
| "reward": 1.0313266267379124, | |
| "reward_std": 0.6612533178801338, | |
| "rewards/concensus_correctness_reward_func": 0.0790833334128062, | |
| "rewards/consensus_reward_func": 0.0, | |
| "rewards/cumulative_reward_2": 0.0, | |
| "rewards/final_correctness_reward_func": 0.0, | |
| "rewards/question_recreation_reward_func": 0.572909953383108, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": 0.37933334335684776, | |
| "step": 12 | |
| }, | |
| { | |
| "completion_length": 401.03125, | |
| "epoch": 3.571428571428571, | |
| "grad_norm": 6.091869354248047, | |
| "kl": 0.0018705126858549193, | |
| "learning_rate": 1.4957614383675767e-07, | |
| "loss": 0.0, | |
| "reward": 0.7606599755818024, | |
| "reward_std": 0.9554330813698471, | |
| "rewards/concensus_correctness_reward_func": 0.06806249916553497, | |
| "rewards/consensus_reward_func": 0.0625, | |
| "rewards/cumulative_reward_2": 0.0, | |
| "rewards/final_correctness_reward_func": 0.0, | |
| "rewards/question_recreation_reward_func": 0.5348162406007759, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": 0.09528125822544098, | |
| "step": 14 | |
| }, | |
| { | |
| "completion_length": 407.9166666666667, | |
| "epoch": 4.0, | |
| "grad_norm": 2.649594306945801, | |
| "kl": 0.001604035545218115, | |
| "learning_rate": 8.067960709356478e-08, | |
| "loss": 0.0, | |
| "reward": 0.6916571507851282, | |
| "reward_std": 1.01990426153255, | |
| "rewards/concensus_correctness_reward_func": 0.024666666984558105, | |
| "rewards/consensus_reward_func": 0.08333333333333333, | |
| "rewards/cumulative_reward_2": 0.0, | |
| "rewards/final_correctness_reward_func": 0.0, | |
| "rewards/question_recreation_reward_func": 0.4062821501865983, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": 0.17737499624490738, | |
| "step": 16 | |
| }, | |
| { | |
| "completion_length": 412.125, | |
| "epoch": 4.571428571428571, | |
| "grad_norm": 119.07859802246094, | |
| "kl": 0.0031047585580381565, | |
| "learning_rate": 3.013156219837776e-08, | |
| "loss": 0.0, | |
| "reward": 0.8601471782312728, | |
| "reward_std": 0.6142369294539094, | |
| "rewards/concensus_correctness_reward_func": 0.0, | |
| "rewards/consensus_reward_func": 0.0625, | |
| "rewards/cumulative_reward_2": 0.0, | |
| "rewards/final_correctness_reward_func": 0.0625, | |
| "rewards/question_recreation_reward_func": 0.464647185173817, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": 0.2705000061541796, | |
| "step": 18 | |
| }, | |
| { | |
| "completion_length": 346.3333333333333, | |
| "epoch": 5.0, | |
| "grad_norm": 4.018592834472656, | |
| "kl": 0.0016965343481084953, | |
| "learning_rate": 3.4096741493194193e-09, | |
| "loss": 0.0, | |
| "reward": 1.0170780109862487, | |
| "reward_std": 0.9925470755745968, | |
| "rewards/concensus_correctness_reward_func": -0.0019999990860621133, | |
| "rewards/consensus_reward_func": 0.25, | |
| "rewards/cumulative_reward_2": 0.0, | |
| "rewards/final_correctness_reward_func": 0.08333333333333333, | |
| "rewards/question_recreation_reward_func": 0.32816133989642066, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": 0.35758334149916965, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "step": 20, | |
| "total_flos": 0.0, | |
| "train_loss": 1.2790784239768983e-06, | |
| "train_runtime": 291.7059, | |
| "train_samples_per_second": 1.097, | |
| "train_steps_per_second": 0.069 | |
| } | |
| ], | |
| "logging_steps": 2, | |
| "max_steps": 20, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 7, | |
| "save_steps": 25, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |