Iedha's picture
End of training
1a9617c verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.4878048780487805,
"eval_steps": 500,
"global_step": 10,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 127.0,
"completions/max_terminated_length": 127.0,
"completions/mean_length": 59.25,
"completions/mean_terminated_length": 59.25,
"completions/min_length": 13.0,
"completions/min_terminated_length": 13.0,
"epoch": 0.0975609756097561,
"frac_reward_zero_std": 0.0,
"grad_norm": 25.766353607177734,
"kl": 0.0,
"learning_rate": 5e-07,
"loss": 0.0186,
"num_tokens": 1498.0,
"reward": 0.049393012188374996,
"reward_std": 0.048807840794324875,
"rewards/concensus_correctness_reward_func/mean": 0.0,
"rewards/concensus_correctness_reward_func/std": 0.0,
"rewards/consensus_reward_func/mean": 0.0,
"rewards/consensus_reward_func/std": 0.0,
"rewards/cumulative_reward_2/mean": 0.0,
"rewards/cumulative_reward_2/std": 0.0,
"rewards/final_correctness_reward_func/mean": 0.0,
"rewards/final_correctness_reward_func/std": 0.0,
"rewards/question_recreation_reward_func/mean": 0.02114301174879074,
"rewards/question_recreation_reward_func/std": 0.0122159318998456,
"rewards/soft_format_reward_func/mean": 0.0,
"rewards/soft_format_reward_func/std": 0.0,
"rewards/strict_format_reward_func/mean": 0.0,
"rewards/strict_format_reward_func/std": 0.0,
"rewards/xmlcount_reward_func/mean": 0.02824999950826168,
"rewards/xmlcount_reward_func/std": 0.05649999901652336,
"step": 2
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 132.0,
"completions/max_terminated_length": 132.0,
"completions/mean_length": 53.75,
"completions/mean_terminated_length": 53.75,
"completions/min_length": 24.5,
"completions/min_terminated_length": 24.5,
"epoch": 0.1951219512195122,
"frac_reward_zero_std": 0.25,
"grad_norm": 19.915145874023438,
"kl": 0.0023184213787317276,
"learning_rate": 4.415111107797445e-07,
"loss": 0.0505,
"num_tokens": 2952.0,
"reward": 0.04935022257268429,
"reward_std": 0.05023687332868576,
"rewards/concensus_correctness_reward_func/mean": 0.0,
"rewards/concensus_correctness_reward_func/std": 0.0,
"rewards/consensus_reward_func/mean": 0.0,
"rewards/consensus_reward_func/std": 0.0,
"rewards/cumulative_reward_2/mean": 0.0,
"rewards/cumulative_reward_2/std": 0.0,
"rewards/final_correctness_reward_func/mean": 0.0,
"rewards/final_correctness_reward_func/std": 0.0,
"rewards/question_recreation_reward_func/mean": 0.018100222572684288,
"rewards/question_recreation_reward_func/std": 0.012556762900203466,
"rewards/soft_format_reward_func/mean": 0.0,
"rewards/soft_format_reward_func/std": 0.0,
"rewards/strict_format_reward_func/mean": 0.0,
"rewards/strict_format_reward_func/std": 0.0,
"rewards/xmlcount_reward_func/mean": 0.03125,
"rewards/xmlcount_reward_func/std": 0.0625,
"step": 4
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 67.5,
"completions/max_terminated_length": 67.5,
"completions/mean_length": 28.25,
"completions/mean_terminated_length": 28.25,
"completions/min_length": 5.5,
"completions/min_terminated_length": 5.5,
"epoch": 0.2926829268292683,
"frac_reward_zero_std": 0.0,
"grad_norm": 97.34249877929688,
"kl": 0.01801438198890537,
"learning_rate": 2.934120444167326e-07,
"loss": -0.16,
"num_tokens": 4202.0,
"reward": 0.114079300314188,
"reward_std": 0.08150303550064564,
"rewards/concensus_correctness_reward_func/mean": 0.0,
"rewards/concensus_correctness_reward_func/std": 0.0,
"rewards/consensus_reward_func/mean": 0.0,
"rewards/consensus_reward_func/std": 0.0,
"rewards/cumulative_reward_2/mean": 0.0,
"rewards/cumulative_reward_2/std": 0.0,
"rewards/final_correctness_reward_func/mean": 0.0,
"rewards/final_correctness_reward_func/std": 0.0,
"rewards/question_recreation_reward_func/mean": 0.08282929984852672,
"rewards/question_recreation_reward_func/std": 0.03306010598316789,
"rewards/soft_format_reward_func/mean": 0.0,
"rewards/soft_format_reward_func/std": 0.0,
"rewards/strict_format_reward_func/mean": 0.0,
"rewards/strict_format_reward_func/std": 0.0,
"rewards/xmlcount_reward_func/mean": 0.03125,
"rewards/xmlcount_reward_func/std": 0.0625,
"step": 6
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 75.5,
"completions/max_terminated_length": 75.5,
"completions/mean_length": 36.875,
"completions/mean_terminated_length": 36.875,
"completions/min_length": 16.0,
"completions/min_terminated_length": 16.0,
"epoch": 0.3902439024390244,
"frac_reward_zero_std": 0.25,
"grad_norm": 50.915489196777344,
"kl": 0.019150954321958125,
"learning_rate": 1.2500000000000005e-07,
"loss": -0.0363,
"num_tokens": 5521.0,
"reward": 0.13781297951936722,
"reward_std": 0.03136043483391404,
"rewards/concensus_correctness_reward_func/mean": 0.0,
"rewards/concensus_correctness_reward_func/std": 0.0,
"rewards/consensus_reward_func/mean": 0.0,
"rewards/consensus_reward_func/std": 0.0,
"rewards/cumulative_reward_2/mean": 0.0,
"rewards/cumulative_reward_2/std": 0.0,
"rewards/final_correctness_reward_func/mean": 0.0,
"rewards/final_correctness_reward_func/std": 0.0,
"rewards/question_recreation_reward_func/mean": 0.028437979985028505,
"rewards/question_recreation_reward_func/std": 0.010751228081062436,
"rewards/soft_format_reward_func/mean": 0.0,
"rewards/soft_format_reward_func/std": 0.0,
"rewards/strict_format_reward_func/mean": 0.0,
"rewards/strict_format_reward_func/std": 0.0,
"rewards/xmlcount_reward_func/mean": 0.109375,
"rewards/xmlcount_reward_func/std": 0.13200797885656357,
"step": 8
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 120.5,
"completions/max_terminated_length": 120.5,
"completions/mean_length": 44.25,
"completions/mean_terminated_length": 44.25,
"completions/min_length": 12.5,
"completions/min_terminated_length": 12.5,
"epoch": 0.4878048780487805,
"frac_reward_zero_std": 0.25,
"grad_norm": 31.54133415222168,
"kl": 0.02116560866124928,
"learning_rate": 1.507684480352292e-08,
"loss": 0.0382,
"num_tokens": 6899.0,
"reward": 0.08553153276443481,
"reward_std": 0.019916290184482932,
"rewards/concensus_correctness_reward_func/mean": 0.0,
"rewards/concensus_correctness_reward_func/std": 0.0,
"rewards/consensus_reward_func/mean": 0.0,
"rewards/consensus_reward_func/std": 0.0,
"rewards/cumulative_reward_2/mean": 0.0,
"rewards/cumulative_reward_2/std": 0.0,
"rewards/final_correctness_reward_func/mean": 0.0,
"rewards/final_correctness_reward_func/std": 0.0,
"rewards/question_recreation_reward_func/mean": 0.013406533282250166,
"rewards/question_recreation_reward_func/std": 0.013068773550912738,
"rewards/soft_format_reward_func/mean": 0.0,
"rewards/soft_format_reward_func/std": 0.0,
"rewards/strict_format_reward_func/mean": 0.0,
"rewards/strict_format_reward_func/std": 0.0,
"rewards/xmlcount_reward_func/mean": 0.07212499901652336,
"rewards/xmlcount_reward_func/std": 0.09649057686328888,
"step": 10
},
{
"epoch": 0.4878048780487805,
"step": 10,
"total_flos": 0.0,
"train_loss": -0.01780639439821243,
"train_runtime": 1859.9628,
"train_samples_per_second": 0.022,
"train_steps_per_second": 0.005
}
],
"logging_steps": 2,
"max_steps": 10,
"num_input_tokens_seen": 6899,
"num_train_epochs": 1,
"save_steps": 10,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}