{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.946236559139785, "eval_steps": 500, "global_step": 11, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.08602150537634409, "grad_norm": 0.979141891002655, "learning_rate": 4.898732434036244e-05, "loss": 0.9567, "num_input_tokens_seen": 2097152, "step": 1 }, { "epoch": 0.17204301075268819, "grad_norm": 0.6088763475418091, "learning_rate": 4.6031338320779534e-05, "loss": 0.9146, "num_input_tokens_seen": 4194304, "step": 2 }, { "epoch": 0.25806451612903225, "grad_norm": 1.0809179544448853, "learning_rate": 4.137151834863213e-05, "loss": 0.8763, "num_input_tokens_seen": 6291456, "step": 3 }, { "epoch": 0.34408602150537637, "grad_norm": 1.0754276514053345, "learning_rate": 3.5385375325047166e-05, "loss": 0.9113, "num_input_tokens_seen": 8388608, "step": 4 }, { "epoch": 0.43010752688172044, "grad_norm": 0.4839036166667938, "learning_rate": 2.8557870956832132e-05, "loss": 0.8786, "num_input_tokens_seen": 10485760, "step": 5 }, { "epoch": 0.5161290322580645, "grad_norm": 0.6119584441184998, "learning_rate": 2.1442129043167874e-05, "loss": 0.8796, "num_input_tokens_seen": 12582912, "step": 6 }, { "epoch": 0.6021505376344086, "grad_norm": 0.33435240387916565, "learning_rate": 1.4614624674952842e-05, "loss": 0.8462, "num_input_tokens_seen": 14680064, "step": 7 }, { "epoch": 0.6881720430107527, "grad_norm": 0.32259252667427063, "learning_rate": 8.628481651367876e-06, "loss": 0.8707, "num_input_tokens_seen": 16777216, "step": 8 }, { "epoch": 0.7741935483870968, "grad_norm": 0.1886083334684372, "learning_rate": 3.968661679220468e-06, "loss": 0.8411, "num_input_tokens_seen": 18874368, "step": 9 }, { "epoch": 0.8602150537634409, "grad_norm": 0.16534186899662018, "learning_rate": 1.0126756596375686e-06, "loss": 0.8267, "num_input_tokens_seen": 20971520, "step": 10 }, { "epoch": 0.946236559139785, "grad_norm": 0.1587287038564682, "learning_rate": 0.0, "loss": 0.8827, "num_input_tokens_seen": 23068672, "step": 11 }, { "epoch": 0.946236559139785, "num_input_tokens_seen": 23068672, "step": 11, "total_flos": 1.0387718738584535e+18, "train_loss": 0.8804038058627736, "train_runtime": 1203.6155, "train_samples_per_second": 4.928, "train_steps_per_second": 0.009 } ], "logging_steps": 1, "max_steps": 11, "num_input_tokens_seen": 23068672, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.0387718738584535e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }