trainer_state.json · Iedha/Qwen2.5-0.5B-Instruct-Gensyn-Swarm-lethal_tawny

Qwen2.5-0.5B-Instruct-Gensyn-Swarm-lethal_tawny_deer / trainer_state.json

End of training

1a9617c verified 30 days ago

9.58 kB

	{
	"best_global_step": null,
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 0.4878048780487805,
	"eval_steps": 500,
	"global_step": 10,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 127.0,
	"completions/max_terminated_length": 127.0,
	"completions/mean_length": 59.25,
	"completions/mean_terminated_length": 59.25,
	"completions/min_length": 13.0,
	"completions/min_terminated_length": 13.0,
	"epoch": 0.0975609756097561,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 25.766353607177734,
	"kl": 0.0,
	"learning_rate": 5e-07,
	"loss": 0.0186,
	"num_tokens": 1498.0,
	"reward": 0.049393012188374996,
	"reward_std": 0.048807840794324875,
	"rewards/concensus_correctness_reward_func/mean": 0.0,
	"rewards/concensus_correctness_reward_func/std": 0.0,
	"rewards/consensus_reward_func/mean": 0.0,
	"rewards/consensus_reward_func/std": 0.0,
	"rewards/cumulative_reward_2/mean": 0.0,
	"rewards/cumulative_reward_2/std": 0.0,
	"rewards/final_correctness_reward_func/mean": 0.0,
	"rewards/final_correctness_reward_func/std": 0.0,
	"rewards/question_recreation_reward_func/mean": 0.02114301174879074,
	"rewards/question_recreation_reward_func/std": 0.0122159318998456,
	"rewards/soft_format_reward_func/mean": 0.0,
	"rewards/soft_format_reward_func/std": 0.0,
	"rewards/strict_format_reward_func/mean": 0.0,
	"rewards/strict_format_reward_func/std": 0.0,
	"rewards/xmlcount_reward_func/mean": 0.02824999950826168,
	"rewards/xmlcount_reward_func/std": 0.05649999901652336,
	"step": 2
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 132.0,
	"completions/max_terminated_length": 132.0,
	"completions/mean_length": 53.75,
	"completions/mean_terminated_length": 53.75,
	"completions/min_length": 24.5,
	"completions/min_terminated_length": 24.5,
	"epoch": 0.1951219512195122,
	"frac_reward_zero_std": 0.25,
	"grad_norm": 19.915145874023438,
	"kl": 0.0023184213787317276,
	"learning_rate": 4.415111107797445e-07,
	"loss": 0.0505,
	"num_tokens": 2952.0,
	"reward": 0.04935022257268429,
	"reward_std": 0.05023687332868576,
	"rewards/concensus_correctness_reward_func/mean": 0.0,
	"rewards/concensus_correctness_reward_func/std": 0.0,
	"rewards/consensus_reward_func/mean": 0.0,
	"rewards/consensus_reward_func/std": 0.0,
	"rewards/cumulative_reward_2/mean": 0.0,
	"rewards/cumulative_reward_2/std": 0.0,
	"rewards/final_correctness_reward_func/mean": 0.0,
	"rewards/final_correctness_reward_func/std": 0.0,
	"rewards/question_recreation_reward_func/mean": 0.018100222572684288,
	"rewards/question_recreation_reward_func/std": 0.012556762900203466,
	"rewards/soft_format_reward_func/mean": 0.0,
	"rewards/soft_format_reward_func/std": 0.0,
	"rewards/strict_format_reward_func/mean": 0.0,
	"rewards/strict_format_reward_func/std": 0.0,
	"rewards/xmlcount_reward_func/mean": 0.03125,
	"rewards/xmlcount_reward_func/std": 0.0625,
	"step": 4
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 67.5,
	"completions/max_terminated_length": 67.5,
	"completions/mean_length": 28.25,
	"completions/mean_terminated_length": 28.25,
	"completions/min_length": 5.5,
	"completions/min_terminated_length": 5.5,
	"epoch": 0.2926829268292683,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 97.34249877929688,
	"kl": 0.01801438198890537,
	"learning_rate": 2.934120444167326e-07,
	"loss": -0.16,
	"num_tokens": 4202.0,
	"reward": 0.114079300314188,
	"reward_std": 0.08150303550064564,
	"rewards/concensus_correctness_reward_func/mean": 0.0,
	"rewards/concensus_correctness_reward_func/std": 0.0,
	"rewards/consensus_reward_func/mean": 0.0,
	"rewards/consensus_reward_func/std": 0.0,
	"rewards/cumulative_reward_2/mean": 0.0,
	"rewards/cumulative_reward_2/std": 0.0,
	"rewards/final_correctness_reward_func/mean": 0.0,
	"rewards/final_correctness_reward_func/std": 0.0,
	"rewards/question_recreation_reward_func/mean": 0.08282929984852672,
	"rewards/question_recreation_reward_func/std": 0.03306010598316789,
	"rewards/soft_format_reward_func/mean": 0.0,
	"rewards/soft_format_reward_func/std": 0.0,
	"rewards/strict_format_reward_func/mean": 0.0,
	"rewards/strict_format_reward_func/std": 0.0,
	"rewards/xmlcount_reward_func/mean": 0.03125,
	"rewards/xmlcount_reward_func/std": 0.0625,
	"step": 6
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 75.5,
	"completions/max_terminated_length": 75.5,
	"completions/mean_length": 36.875,
	"completions/mean_terminated_length": 36.875,
	"completions/min_length": 16.0,
	"completions/min_terminated_length": 16.0,
	"epoch": 0.3902439024390244,
	"frac_reward_zero_std": 0.25,
	"grad_norm": 50.915489196777344,
	"kl": 0.019150954321958125,
	"learning_rate": 1.2500000000000005e-07,
	"loss": -0.0363,
	"num_tokens": 5521.0,
	"reward": 0.13781297951936722,
	"reward_std": 0.03136043483391404,
	"rewards/concensus_correctness_reward_func/mean": 0.0,
	"rewards/concensus_correctness_reward_func/std": 0.0,
	"rewards/consensus_reward_func/mean": 0.0,
	"rewards/consensus_reward_func/std": 0.0,
	"rewards/cumulative_reward_2/mean": 0.0,
	"rewards/cumulative_reward_2/std": 0.0,
	"rewards/final_correctness_reward_func/mean": 0.0,
	"rewards/final_correctness_reward_func/std": 0.0,
	"rewards/question_recreation_reward_func/mean": 0.028437979985028505,
	"rewards/question_recreation_reward_func/std": 0.010751228081062436,
	"rewards/soft_format_reward_func/mean": 0.0,
	"rewards/soft_format_reward_func/std": 0.0,
	"rewards/strict_format_reward_func/mean": 0.0,
	"rewards/strict_format_reward_func/std": 0.0,
	"rewards/xmlcount_reward_func/mean": 0.109375,
	"rewards/xmlcount_reward_func/std": 0.13200797885656357,
	"step": 8
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 120.5,
	"completions/max_terminated_length": 120.5,
	"completions/mean_length": 44.25,
	"completions/mean_terminated_length": 44.25,
	"completions/min_length": 12.5,
	"completions/min_terminated_length": 12.5,
	"epoch": 0.4878048780487805,
	"frac_reward_zero_std": 0.25,
	"grad_norm": 31.54133415222168,
	"kl": 0.02116560866124928,
	"learning_rate": 1.507684480352292e-08,
	"loss": 0.0382,
	"num_tokens": 6899.0,
	"reward": 0.08553153276443481,
	"reward_std": 0.019916290184482932,
	"rewards/concensus_correctness_reward_func/mean": 0.0,
	"rewards/concensus_correctness_reward_func/std": 0.0,
	"rewards/consensus_reward_func/mean": 0.0,
	"rewards/consensus_reward_func/std": 0.0,
	"rewards/cumulative_reward_2/mean": 0.0,
	"rewards/cumulative_reward_2/std": 0.0,
	"rewards/final_correctness_reward_func/mean": 0.0,
	"rewards/final_correctness_reward_func/std": 0.0,
	"rewards/question_recreation_reward_func/mean": 0.013406533282250166,
	"rewards/question_recreation_reward_func/std": 0.013068773550912738,
	"rewards/soft_format_reward_func/mean": 0.0,
	"rewards/soft_format_reward_func/std": 0.0,
	"rewards/strict_format_reward_func/mean": 0.0,
	"rewards/strict_format_reward_func/std": 0.0,
	"rewards/xmlcount_reward_func/mean": 0.07212499901652336,
	"rewards/xmlcount_reward_func/std": 0.09649057686328888,
	"step": 10
	},
	{
	"epoch": 0.4878048780487805,
	"step": 10,
	"total_flos": 0.0,
	"train_loss": -0.01780639439821243,
	"train_runtime": 1859.9628,
	"train_samples_per_second": 0.022,
	"train_steps_per_second": 0.005
	}
	],
	"logging_steps": 2,
	"max_steps": 10,
	"num_input_tokens_seen": 6899,
	"num_train_epochs": 1,
	"save_steps": 10,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 0.0,
	"train_batch_size": 2,
	"trial_name": null,
	"trial_params": null
	}