|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.023075615147319584, |
|
"eval_steps": 500, |
|
"global_step": 1900, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.75, |
|
"completions/max_length": 256.0, |
|
"completions/max_terminated_length": 215.0, |
|
"completions/mean_length": 226.375, |
|
"completions/mean_terminated_length": 137.5, |
|
"completions/min_length": 60.0, |
|
"completions/min_terminated_length": 60.0, |
|
"entropy": 1.7876319885253906, |
|
"epoch": 1.2145060603852413e-05, |
|
"frac_reward_zero_std": 0.0, |
|
"grad_norm": 0.45122280716896057, |
|
"kl": 0.0, |
|
"learning_rate": 0.0, |
|
"loss": -0.0, |
|
"num_tokens": 3903.0, |
|
"reward": 7250.048828125, |
|
"reward_std": 2273.237548828125, |
|
"rewards/reward_long_completions/mean": 806.75, |
|
"rewards/reward_long_completions/std": 293.8428955078125, |
|
"rewards/reward_long_words/mean": 6.343996047973633, |
|
"rewards/reward_long_words/std": 2.0528388023376465, |
|
"step": 1 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.9934869739478958, |
|
"completions/max_length": 256.0, |
|
"completions/max_terminated_length": 3.8597194388777556, |
|
"completions/mean_length": 255.0433366733467, |
|
"completions/mean_terminated_length": 2.520774917755433, |
|
"completions/min_length": 251.1442885771543, |
|
"completions/min_terminated_length": 1.3006012024048097, |
|
"entropy": 0.46742610163357157, |
|
"epoch": 0.006072530301926207, |
|
"frac_reward_zero_std": 0.8136272545090181, |
|
"grad_norm": 1.1401318111836645e-08, |
|
"kl": 0.27695997151695956, |
|
"learning_rate": 0.00019183846457059546, |
|
"loss": 0.0111, |
|
"num_tokens": 1761588.0, |
|
"reward": 26948.982748700528, |
|
"reward_std": 173.13352758659866, |
|
"rewards/reward_long_completions/mean": 3006.4551603206414, |
|
"rewards/reward_long_completions/std": 26.415839506175093, |
|
"rewards/reward_long_words/mean": 11.271554429927665, |
|
"rewards/reward_long_words/std": 0.46356010581049434, |
|
"step": 500 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 1.0, |
|
"completions/max_length": 256.0, |
|
"completions/max_terminated_length": 0.0, |
|
"completions/mean_length": 256.0, |
|
"completions/mean_terminated_length": 0.0, |
|
"completions/min_length": 256.0, |
|
"completions/min_terminated_length": 0.0, |
|
"entropy": 8.449839063473518e-08, |
|
"epoch": 0.012145060603852413, |
|
"frac_reward_zero_std": 1.0, |
|
"grad_norm": 1.2536369276006099e-08, |
|
"kl": 0.30570593059062956, |
|
"learning_rate": 0.00014850305016500775, |
|
"loss": 0.0122, |
|
"num_tokens": 3432260.0, |
|
"reward": 29819.603515625, |
|
"reward_std": 0.0, |
|
"rewards/reward_long_completions/mean": 3327.0, |
|
"rewards/reward_long_completions/std": 0.0, |
|
"rewards/reward_long_words/mean": 12.0, |
|
"rewards/reward_long_words/std": 0.0, |
|
"step": 1000 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 1.0, |
|
"completions/max_length": 256.0, |
|
"completions/max_terminated_length": 0.0, |
|
"completions/mean_length": 256.0, |
|
"completions/mean_terminated_length": 0.0, |
|
"completions/min_length": 256.0, |
|
"completions/min_terminated_length": 0.0, |
|
"entropy": 8.525463378106223e-08, |
|
"epoch": 0.01821759090577862, |
|
"frac_reward_zero_std": 1.0, |
|
"grad_norm": 8.738510359762586e-09, |
|
"kl": 0.2979220716804266, |
|
"learning_rate": 9.352737661482471e-05, |
|
"loss": 0.0119, |
|
"num_tokens": 5134968.0, |
|
"reward": 29819.603515625, |
|
"reward_std": 0.0, |
|
"rewards/reward_long_completions/mean": 3327.0, |
|
"rewards/reward_long_completions/std": 0.0, |
|
"rewards/reward_long_words/mean": 12.0, |
|
"rewards/reward_long_words/std": 0.0, |
|
"step": 1500 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2126, |
|
"num_input_tokens_seen": 6451696, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|