{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.0, "eval_steps": 500, "global_step": 4120, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.24271844660194175, "grad_norm": 0.29302138090133667, "learning_rate": 9.393203883495146e-05, "loss": 2.8318, "step": 250 }, { "epoch": 0.4854368932038835, "grad_norm": 0.24823348224163055, "learning_rate": 8.786407766990292e-05, "loss": 2.4178, "step": 500 }, { "epoch": 0.7281553398058253, "grad_norm": 0.2736216187477112, "learning_rate": 8.179611650485438e-05, "loss": 2.3486, "step": 750 }, { "epoch": 0.970873786407767, "grad_norm": 0.23155324161052704, "learning_rate": 7.572815533980583e-05, "loss": 2.321, "step": 1000 }, { "epoch": 1.2135922330097086, "grad_norm": 0.3087535500526428, "learning_rate": 6.966019417475728e-05, "loss": 2.3124, "step": 1250 }, { "epoch": 1.4563106796116505, "grad_norm": 0.2022610604763031, "learning_rate": 6.359223300970875e-05, "loss": 2.307, "step": 1500 }, { "epoch": 1.6990291262135924, "grad_norm": 0.1987154483795166, "learning_rate": 5.752427184466019e-05, "loss": 2.3019, "step": 1750 }, { "epoch": 1.941747572815534, "grad_norm": 0.16502627730369568, "learning_rate": 5.145631067961165e-05, "loss": 2.3073, "step": 2000 }, { "epoch": 2.1844660194174756, "grad_norm": 0.19471481442451477, "learning_rate": 4.538834951456311e-05, "loss": 2.2879, "step": 2250 }, { "epoch": 2.4271844660194173, "grad_norm": 0.20921674370765686, "learning_rate": 3.9320388349514564e-05, "loss": 2.2888, "step": 2500 }, { "epoch": 2.6699029126213594, "grad_norm": 0.2216738611459732, "learning_rate": 3.325242718446602e-05, "loss": 2.2997, "step": 2750 }, { "epoch": 2.912621359223301, "grad_norm": 0.2476879507303238, "learning_rate": 2.7184466019417475e-05, "loss": 2.2748, "step": 3000 }, { "epoch": 3.1553398058252426, "grad_norm": 0.2942667603492737, "learning_rate": 2.111650485436893e-05, "loss": 2.2749, "step": 3250 }, { "epoch": 3.3980582524271843, "grad_norm": 0.2994736135005951, "learning_rate": 1.5048543689320387e-05, "loss": 2.2738, "step": 3500 }, { "epoch": 3.6407766990291264, "grad_norm": 0.2753419876098633, "learning_rate": 8.980582524271845e-06, "loss": 2.2557, "step": 3750 }, { "epoch": 3.883495145631068, "grad_norm": 0.277048796415329, "learning_rate": 2.912621359223301e-06, "loss": 2.2512, "step": 4000 }, { "epoch": 4.0, "step": 4120, "total_flos": 7.617568236584731e+18, "train_loss": 2.3324776880949445, "train_runtime": 61881.1241, "train_samples_per_second": 8.522, "train_steps_per_second": 0.067 } ], "logging_steps": 250, "max_steps": 4120, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 7.617568236584731e+18, "train_batch_size": 8, "trial_name": null, "trial_params": null }