{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9952755905511811, "eval_steps": 10000000000, "global_step": 79, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "clip_ratio": 0.0, "completion_length": 797.146240234375, "epoch": 0.012598425196850394, "grad_norm": 0.22265445878992737, "learning_rate": 1.25e-07, "loss": 0.201, "num_tokens": 871723.0, "reward": 0.5814732387661934, "reward_std": 0.4103100262582302, "rewards/accuracy_reward": 0.5747767761349678, "rewards/format_reward": 0.01339285762514919, "step": 1 }, { "clip_ratio": 0.0, "completion_length": 804.7539443969727, "epoch": 0.06299212598425197, "grad_norm": 0.2417152167110338, "learning_rate": 6.249999999999999e-07, "loss": 0.1642, "num_tokens": 4403713.0, "reward": 0.5676618544384837, "reward_std": 0.4300461960956454, "rewards/accuracy_reward": 0.563895090483129, "rewards/format_reward": 0.007533482450526208, "step": 5 }, { "clip_ratio": 0.0, "completion_length": 785.3382049560547, "epoch": 0.12598425196850394, "grad_norm": 0.3011761966028881, "learning_rate": 9.980434110374724e-07, "loss": 0.1628, "num_tokens": 8734420.0, "reward": 0.575000024586916, "reward_std": 0.40654933378100394, "rewards/accuracy_reward": 0.5716517880558968, "rewards/format_reward": 0.006696428824216128, "step": 10 }, { "clip_ratio": 0.0, "completion_length": 807.4542770385742, "epoch": 0.1889763779527559, "grad_norm": 0.1895048136482896, "learning_rate": 9.762072666790656e-07, "loss": 0.1844, "num_tokens": 13144543.0, "reward": 0.5949777036905288, "reward_std": 0.3915623873472214, "rewards/accuracy_reward": 0.5912946447730064, "rewards/format_reward": 0.007366071711294353, "step": 15 }, { "clip_ratio": 0.0, "completion_length": 804.9518280029297, "epoch": 0.25196850393700787, "grad_norm": 0.17095132727584114, "learning_rate": 9.311572862600138e-07, "loss": 0.1833, "num_tokens": 17551799.0, "reward": 0.6332589581608772, "reward_std": 0.37318109199404714, "rewards/accuracy_reward": 0.6314732164144516, "rewards/format_reward": 0.00357142873108387, "step": 20 }, { "clip_ratio": 0.0, "completion_length": 797.7531616210938, "epoch": 0.31496062992125984, "grad_norm": 0.18012116491915073, "learning_rate": 8.650895363529172e-07, "loss": 0.1815, "num_tokens": 21890173.0, "reward": 0.6541294917464257, "reward_std": 0.3551797144114971, "rewards/accuracy_reward": 0.6547733508050442, "rewards/format_reward": 0.0015625000698491931, "step": 25 }, { "clip_ratio": 0.0, "completion_length": 759.0582946777344, "epoch": 0.3779527559055118, "grad_norm": 0.22639137855339267, "learning_rate": 7.812246438203903e-07, "loss": 0.1692, "num_tokens": 26076066.0, "reward": 0.708482176065445, "reward_std": 0.30925857946276664, "rewards/accuracy_reward": 0.7082589268684387, "rewards/format_reward": 0.00044642859138548373, "step": 30 }, { "clip_ratio": 0.0, "completion_length": 739.7466842651368, "epoch": 0.4409448818897638, "grad_norm": 0.37314514493140266, "learning_rate": 6.836507988323784e-07, "loss": 0.1384, "num_tokens": 30202131.0, "reward": 0.7066964626312255, "reward_std": 0.2901428207755089, "rewards/accuracy_reward": 0.7066964276134968, "rewards/format_reward": 0.0, "step": 35 }, { "clip_ratio": 0.0, "completion_length": 772.0323989868164, "epoch": 0.5039370078740157, "grad_norm": 0.2614993046463775, "learning_rate": 5.771244664826511e-07, "loss": 0.1437, "num_tokens": 34466708.0, "reward": 0.6880580708384514, "reward_std": 0.3077801916748285, "rewards/accuracy_reward": 0.6879464246332645, "rewards/format_reward": 0.00022321429569274187, "step": 40 }, { "clip_ratio": 0.0, "completion_length": 713.0567276000977, "epoch": 0.5669291338582677, "grad_norm": 0.1454705351797755, "learning_rate": 4.6683852178244817e-07, "loss": 0.0997, "num_tokens": 38522522.0, "reward": 0.7095982447266579, "reward_std": 0.2782834365963936, "rewards/accuracy_reward": 0.7095982141792774, "rewards/format_reward": 0.0, "step": 45 }, { "clip_ratio": 0.0, "completion_length": 742.1245864868164, "epoch": 0.6299212598425197, "grad_norm": 0.14001943959476107, "learning_rate": 3.5816911083285164e-07, "loss": 0.0861, "num_tokens": 42626344.0, "reward": 0.7125000342726707, "reward_std": 0.26790192127227785, "rewards/accuracy_reward": 0.7125, "rewards/format_reward": 0.0, "step": 50 }, { "clip_ratio": 0.0, "completion_length": 725.4094085693359, "epoch": 0.6929133858267716, "grad_norm": 0.11596792832277024, "learning_rate": 2.5641357801960184e-07, "loss": 0.0765, "num_tokens": 46657290.0, "reward": 0.712500037252903, "reward_std": 0.2700365446507931, "rewards/accuracy_reward": 0.7125000022351742, "rewards/format_reward": 0.0, "step": 55 }, { "clip_ratio": 0.0, "completion_length": 716.4462371826172, "epoch": 0.7559055118110236, "grad_norm": 0.22234439564713598, "learning_rate": 1.665322345816746e-07, "loss": 0.0849, "num_tokens": 50648721.0, "reward": 0.7366071820259095, "reward_std": 0.24836960211396217, "rewards/accuracy_reward": 0.7366071425378322, "rewards/format_reward": 0.0, "step": 60 }, { "clip_ratio": 0.0, "completion_length": 717.4730224609375, "epoch": 0.8188976377952756, "grad_norm": 0.182600444186133, "learning_rate": 9.290655664821296e-08, "loss": 0.0909, "num_tokens": 54634528.0, "reward": 0.7285714581608772, "reward_std": 0.25840977653861047, "rewards/accuracy_reward": 0.7285714313387871, "rewards/format_reward": 0.0, "step": 65 }, { "clip_ratio": 0.0, "completion_length": 712.1507034301758, "epoch": 0.8818897637795275, "grad_norm": 0.17072451326668445, "learning_rate": 3.912559994556086e-08, "loss": 0.0883, "num_tokens": 58570507.0, "reward": 0.7503348544239998, "reward_std": 0.25088600218296053, "rewards/accuracy_reward": 0.7515796698629856, "rewards/format_reward": 0.00022321429569274187, "step": 70 }, { "clip_ratio": 0.0, "completion_length": 719.5852981567383, "epoch": 0.9448818897637795, "grad_norm": 0.2933895566333781, "learning_rate": 7.811042888637209e-09, "loss": 0.0763, "num_tokens": 62600049.0, "reward": 0.7198661029338836, "reward_std": 0.2631711885333061, "rewards/accuracy_reward": 0.7198660731315613, "rewards/format_reward": 0.0, "step": 75 }, { "clip_ratio": 0.0, "completion_length": 755.7718505859375, "epoch": 0.9952755905511811, "num_tokens": 65965046.0, "reward": 0.7045201249420643, "reward_std": 0.28055303543806076, "rewards/accuracy_reward": 0.7042410708963871, "rewards/format_reward": 0.0005580357392318547, "step": 79, "total_flos": 0.0, "train_loss": 0.12618592532375192, "train_runtime": 35348.2691, "train_samples_per_second": 0.251, "train_steps_per_second": 0.002 } ], "logging_steps": 5, "max_steps": 79, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }