{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.08683853459972862, "eval_steps": 5, "global_step": 20, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.004341926729986432, "grad_norm": 3.3317363262176514, "learning_rate": 1e-05, "loss": 3.8736, "step": 1 }, { "epoch": 0.004341926729986432, "eval_loss": 4.2282562255859375, "eval_runtime": 69.4265, "eval_samples_per_second": 5.589, "eval_steps_per_second": 2.794, "step": 1 }, { "epoch": 0.008683853459972864, "grad_norm": 3.3094608783721924, "learning_rate": 2e-05, "loss": 3.9504, "step": 2 }, { "epoch": 0.013025780189959294, "grad_norm": 3.510456085205078, "learning_rate": 3e-05, "loss": 4.1357, "step": 3 }, { "epoch": 0.017367706919945727, "grad_norm": 3.0727248191833496, "learning_rate": 4e-05, "loss": 3.6872, "step": 4 }, { "epoch": 0.021709633649932156, "grad_norm": 3.095470428466797, "learning_rate": 5e-05, "loss": 3.7915, "step": 5 }, { "epoch": 0.021709633649932156, "eval_loss": 4.091055870056152, "eval_runtime": 3.6207, "eval_samples_per_second": 107.163, "eval_steps_per_second": 53.581, "step": 5 }, { "epoch": 0.026051560379918588, "grad_norm": 2.981228828430176, "learning_rate": 6e-05, "loss": 3.6453, "step": 6 }, { "epoch": 0.03039348710990502, "grad_norm": 3.3638360500335693, "learning_rate": 7e-05, "loss": 3.9298, "step": 7 }, { "epoch": 0.034735413839891455, "grad_norm": 3.848174810409546, "learning_rate": 8e-05, "loss": 4.1513, "step": 8 }, { "epoch": 0.03907734056987788, "grad_norm": 2.8415369987487793, "learning_rate": 9e-05, "loss": 4.1745, "step": 9 }, { "epoch": 0.04341926729986431, "grad_norm": 2.2789957523345947, "learning_rate": 0.0001, "loss": 3.2271, "step": 10 }, { "epoch": 0.04341926729986431, "eval_loss": 3.6719045639038086, "eval_runtime": 3.6766, "eval_samples_per_second": 105.531, "eval_steps_per_second": 52.766, "step": 10 }, { "epoch": 0.04776119402985075, "grad_norm": 2.937727212905884, "learning_rate": 9.755282581475769e-05, "loss": 3.8382, "step": 11 }, { "epoch": 0.052103120759837175, "grad_norm": 2.4996044635772705, "learning_rate": 9.045084971874738e-05, "loss": 3.0171, "step": 12 }, { "epoch": 0.05644504748982361, "grad_norm": 2.593256711959839, "learning_rate": 7.938926261462366e-05, "loss": 3.3513, "step": 13 }, { "epoch": 0.06078697421981004, "grad_norm": 2.700037956237793, "learning_rate": 6.545084971874738e-05, "loss": 3.4148, "step": 14 }, { "epoch": 0.06512890094979647, "grad_norm": 2.4664251804351807, "learning_rate": 5e-05, "loss": 3.2387, "step": 15 }, { "epoch": 0.06512890094979647, "eval_loss": 3.2880284786224365, "eval_runtime": 3.6181, "eval_samples_per_second": 107.239, "eval_steps_per_second": 53.619, "step": 15 }, { "epoch": 0.06947082767978291, "grad_norm": 2.7127010822296143, "learning_rate": 3.4549150281252636e-05, "loss": 3.009, "step": 16 }, { "epoch": 0.07381275440976934, "grad_norm": 2.4328489303588867, "learning_rate": 2.061073738537635e-05, "loss": 2.683, "step": 17 }, { "epoch": 0.07815468113975577, "grad_norm": 3.2602317333221436, "learning_rate": 9.549150281252633e-06, "loss": 3.215, "step": 18 }, { "epoch": 0.0824966078697422, "grad_norm": 3.8587803840637207, "learning_rate": 2.4471741852423237e-06, "loss": 3.2797, "step": 19 }, { "epoch": 0.08683853459972862, "grad_norm": 3.2040271759033203, "learning_rate": 0.0, "loss": 3.0642, "step": 20 }, { "epoch": 0.08683853459972862, "eval_loss": 3.188815116882324, "eval_runtime": 3.6622, "eval_samples_per_second": 105.948, "eval_steps_per_second": 52.974, "step": 20 } ], "logging_steps": 1, "max_steps": 20, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 5, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 977193847750656.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }