|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.4878048780487805, |
|
"eval_steps": 500, |
|
"global_step": 10, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 127.0, |
|
"completions/max_terminated_length": 127.0, |
|
"completions/mean_length": 59.25, |
|
"completions/mean_terminated_length": 59.25, |
|
"completions/min_length": 13.0, |
|
"completions/min_terminated_length": 13.0, |
|
"epoch": 0.0975609756097561, |
|
"frac_reward_zero_std": 0.0, |
|
"grad_norm": 25.766353607177734, |
|
"kl": 0.0, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0186, |
|
"num_tokens": 1498.0, |
|
"reward": 0.049393012188374996, |
|
"reward_std": 0.048807840794324875, |
|
"rewards/concensus_correctness_reward_func/mean": 0.0, |
|
"rewards/concensus_correctness_reward_func/std": 0.0, |
|
"rewards/consensus_reward_func/mean": 0.0, |
|
"rewards/consensus_reward_func/std": 0.0, |
|
"rewards/cumulative_reward_2/mean": 0.0, |
|
"rewards/cumulative_reward_2/std": 0.0, |
|
"rewards/final_correctness_reward_func/mean": 0.0, |
|
"rewards/final_correctness_reward_func/std": 0.0, |
|
"rewards/question_recreation_reward_func/mean": 0.02114301174879074, |
|
"rewards/question_recreation_reward_func/std": 0.0122159318998456, |
|
"rewards/soft_format_reward_func/mean": 0.0, |
|
"rewards/soft_format_reward_func/std": 0.0, |
|
"rewards/strict_format_reward_func/mean": 0.0, |
|
"rewards/strict_format_reward_func/std": 0.0, |
|
"rewards/xmlcount_reward_func/mean": 0.02824999950826168, |
|
"rewards/xmlcount_reward_func/std": 0.05649999901652336, |
|
"step": 2 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 132.0, |
|
"completions/max_terminated_length": 132.0, |
|
"completions/mean_length": 53.75, |
|
"completions/mean_terminated_length": 53.75, |
|
"completions/min_length": 24.5, |
|
"completions/min_terminated_length": 24.5, |
|
"epoch": 0.1951219512195122, |
|
"frac_reward_zero_std": 0.25, |
|
"grad_norm": 19.915145874023438, |
|
"kl": 0.0023184213787317276, |
|
"learning_rate": 4.415111107797445e-07, |
|
"loss": 0.0505, |
|
"num_tokens": 2952.0, |
|
"reward": 0.04935022257268429, |
|
"reward_std": 0.05023687332868576, |
|
"rewards/concensus_correctness_reward_func/mean": 0.0, |
|
"rewards/concensus_correctness_reward_func/std": 0.0, |
|
"rewards/consensus_reward_func/mean": 0.0, |
|
"rewards/consensus_reward_func/std": 0.0, |
|
"rewards/cumulative_reward_2/mean": 0.0, |
|
"rewards/cumulative_reward_2/std": 0.0, |
|
"rewards/final_correctness_reward_func/mean": 0.0, |
|
"rewards/final_correctness_reward_func/std": 0.0, |
|
"rewards/question_recreation_reward_func/mean": 0.018100222572684288, |
|
"rewards/question_recreation_reward_func/std": 0.012556762900203466, |
|
"rewards/soft_format_reward_func/mean": 0.0, |
|
"rewards/soft_format_reward_func/std": 0.0, |
|
"rewards/strict_format_reward_func/mean": 0.0, |
|
"rewards/strict_format_reward_func/std": 0.0, |
|
"rewards/xmlcount_reward_func/mean": 0.03125, |
|
"rewards/xmlcount_reward_func/std": 0.0625, |
|
"step": 4 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 67.5, |
|
"completions/max_terminated_length": 67.5, |
|
"completions/mean_length": 28.25, |
|
"completions/mean_terminated_length": 28.25, |
|
"completions/min_length": 5.5, |
|
"completions/min_terminated_length": 5.5, |
|
"epoch": 0.2926829268292683, |
|
"frac_reward_zero_std": 0.0, |
|
"grad_norm": 97.34249877929688, |
|
"kl": 0.01801438198890537, |
|
"learning_rate": 2.934120444167326e-07, |
|
"loss": -0.16, |
|
"num_tokens": 4202.0, |
|
"reward": 0.114079300314188, |
|
"reward_std": 0.08150303550064564, |
|
"rewards/concensus_correctness_reward_func/mean": 0.0, |
|
"rewards/concensus_correctness_reward_func/std": 0.0, |
|
"rewards/consensus_reward_func/mean": 0.0, |
|
"rewards/consensus_reward_func/std": 0.0, |
|
"rewards/cumulative_reward_2/mean": 0.0, |
|
"rewards/cumulative_reward_2/std": 0.0, |
|
"rewards/final_correctness_reward_func/mean": 0.0, |
|
"rewards/final_correctness_reward_func/std": 0.0, |
|
"rewards/question_recreation_reward_func/mean": 0.08282929984852672, |
|
"rewards/question_recreation_reward_func/std": 0.03306010598316789, |
|
"rewards/soft_format_reward_func/mean": 0.0, |
|
"rewards/soft_format_reward_func/std": 0.0, |
|
"rewards/strict_format_reward_func/mean": 0.0, |
|
"rewards/strict_format_reward_func/std": 0.0, |
|
"rewards/xmlcount_reward_func/mean": 0.03125, |
|
"rewards/xmlcount_reward_func/std": 0.0625, |
|
"step": 6 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 75.5, |
|
"completions/max_terminated_length": 75.5, |
|
"completions/mean_length": 36.875, |
|
"completions/mean_terminated_length": 36.875, |
|
"completions/min_length": 16.0, |
|
"completions/min_terminated_length": 16.0, |
|
"epoch": 0.3902439024390244, |
|
"frac_reward_zero_std": 0.25, |
|
"grad_norm": 50.915489196777344, |
|
"kl": 0.019150954321958125, |
|
"learning_rate": 1.2500000000000005e-07, |
|
"loss": -0.0363, |
|
"num_tokens": 5521.0, |
|
"reward": 0.13781297951936722, |
|
"reward_std": 0.03136043483391404, |
|
"rewards/concensus_correctness_reward_func/mean": 0.0, |
|
"rewards/concensus_correctness_reward_func/std": 0.0, |
|
"rewards/consensus_reward_func/mean": 0.0, |
|
"rewards/consensus_reward_func/std": 0.0, |
|
"rewards/cumulative_reward_2/mean": 0.0, |
|
"rewards/cumulative_reward_2/std": 0.0, |
|
"rewards/final_correctness_reward_func/mean": 0.0, |
|
"rewards/final_correctness_reward_func/std": 0.0, |
|
"rewards/question_recreation_reward_func/mean": 0.028437979985028505, |
|
"rewards/question_recreation_reward_func/std": 0.010751228081062436, |
|
"rewards/soft_format_reward_func/mean": 0.0, |
|
"rewards/soft_format_reward_func/std": 0.0, |
|
"rewards/strict_format_reward_func/mean": 0.0, |
|
"rewards/strict_format_reward_func/std": 0.0, |
|
"rewards/xmlcount_reward_func/mean": 0.109375, |
|
"rewards/xmlcount_reward_func/std": 0.13200797885656357, |
|
"step": 8 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 120.5, |
|
"completions/max_terminated_length": 120.5, |
|
"completions/mean_length": 44.25, |
|
"completions/mean_terminated_length": 44.25, |
|
"completions/min_length": 12.5, |
|
"completions/min_terminated_length": 12.5, |
|
"epoch": 0.4878048780487805, |
|
"frac_reward_zero_std": 0.25, |
|
"grad_norm": 31.54133415222168, |
|
"kl": 0.02116560866124928, |
|
"learning_rate": 1.507684480352292e-08, |
|
"loss": 0.0382, |
|
"num_tokens": 6899.0, |
|
"reward": 0.08553153276443481, |
|
"reward_std": 0.019916290184482932, |
|
"rewards/concensus_correctness_reward_func/mean": 0.0, |
|
"rewards/concensus_correctness_reward_func/std": 0.0, |
|
"rewards/consensus_reward_func/mean": 0.0, |
|
"rewards/consensus_reward_func/std": 0.0, |
|
"rewards/cumulative_reward_2/mean": 0.0, |
|
"rewards/cumulative_reward_2/std": 0.0, |
|
"rewards/final_correctness_reward_func/mean": 0.0, |
|
"rewards/final_correctness_reward_func/std": 0.0, |
|
"rewards/question_recreation_reward_func/mean": 0.013406533282250166, |
|
"rewards/question_recreation_reward_func/std": 0.013068773550912738, |
|
"rewards/soft_format_reward_func/mean": 0.0, |
|
"rewards/soft_format_reward_func/std": 0.0, |
|
"rewards/strict_format_reward_func/mean": 0.0, |
|
"rewards/strict_format_reward_func/std": 0.0, |
|
"rewards/xmlcount_reward_func/mean": 0.07212499901652336, |
|
"rewards/xmlcount_reward_func/std": 0.09649057686328888, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.4878048780487805, |
|
"step": 10, |
|
"total_flos": 0.0, |
|
"train_loss": -0.01780639439821243, |
|
"train_runtime": 1859.9628, |
|
"train_samples_per_second": 0.022, |
|
"train_steps_per_second": 0.005 |
|
} |
|
], |
|
"logging_steps": 2, |
|
"max_steps": 10, |
|
"num_input_tokens_seen": 6899, |
|
"num_train_epochs": 1, |
|
"save_steps": 10, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|