{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.02294630564479119, "eval_steps": 50, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00011473152822395595, "eval_loss": 0.37808579206466675, "eval_runtime": 130.6054, "eval_samples_per_second": 28.1, "eval_steps_per_second": 14.05, "step": 1 }, { "epoch": 0.0011473152822395595, "grad_norm": 0.5192480087280273, "learning_rate": 0.0002, "loss": 0.1734, "step": 10 }, { "epoch": 0.002294630564479119, "grad_norm": 0.0413350947201252, "learning_rate": 0.0002, "loss": 0.0051, "step": 20 }, { "epoch": 0.0034419458467186783, "grad_norm": 0.002669590525329113, "learning_rate": 0.0002, "loss": 0.0099, "step": 30 }, { "epoch": 0.004589261128958238, "grad_norm": 0.06390757113695145, "learning_rate": 0.0002, "loss": 0.01, "step": 40 }, { "epoch": 0.005736576411197797, "grad_norm": 0.0011162913870066404, "learning_rate": 0.0002, "loss": 0.0076, "step": 50 }, { "epoch": 0.005736576411197797, "eval_loss": 0.004289441276341677, "eval_runtime": 130.6395, "eval_samples_per_second": 28.093, "eval_steps_per_second": 14.046, "step": 50 }, { "epoch": 0.006883891693437357, "grad_norm": 0.23345448076725006, "learning_rate": 0.0002, "loss": 0.0076, "step": 60 }, { "epoch": 0.008031206975676917, "grad_norm": 0.24305281043052673, "learning_rate": 0.0002, "loss": 0.0081, "step": 70 }, { "epoch": 0.009178522257916476, "grad_norm": 0.05415942892432213, "learning_rate": 0.0002, "loss": 0.0032, "step": 80 }, { "epoch": 0.010325837540156035, "grad_norm": 0.14089266955852509, "learning_rate": 0.0002, "loss": 0.0028, "step": 90 }, { "epoch": 0.011473152822395595, "grad_norm": 0.4434452950954437, "learning_rate": 0.0002, "loss": 0.003, "step": 100 }, { "epoch": 0.011473152822395595, "eval_loss": 0.005989062134176493, "eval_runtime": 130.6565, "eval_samples_per_second": 28.089, "eval_steps_per_second": 14.044, "step": 100 }, { "epoch": 0.012620468104635154, "grad_norm": 0.07541525363922119, "learning_rate": 0.0002, "loss": 0.0107, "step": 110 }, { "epoch": 0.013767783386874713, "grad_norm": 0.1701805144548416, "learning_rate": 0.0002, "loss": 0.0046, "step": 120 }, { "epoch": 0.014915098669114273, "grad_norm": 0.07125061750411987, "learning_rate": 0.0002, "loss": 0.0031, "step": 130 }, { "epoch": 0.016062413951353834, "grad_norm": 0.020574383437633514, "learning_rate": 0.0002, "loss": 0.004, "step": 140 }, { "epoch": 0.01720972923359339, "grad_norm": 0.0034719579853117466, "learning_rate": 0.0002, "loss": 0.0117, "step": 150 }, { "epoch": 0.01720972923359339, "eval_loss": 0.003394440980628133, "eval_runtime": 130.6011, "eval_samples_per_second": 28.101, "eval_steps_per_second": 14.05, "step": 150 }, { "epoch": 0.018357044515832952, "grad_norm": 1.1995950937271118, "learning_rate": 0.0002, "loss": 0.0103, "step": 160 }, { "epoch": 0.01950435979807251, "grad_norm": 0.2900412380695343, "learning_rate": 0.0002, "loss": 0.001, "step": 170 }, { "epoch": 0.02065167508031207, "grad_norm": 0.0037594300229102373, "learning_rate": 0.0002, "loss": 0.0103, "step": 180 }, { "epoch": 0.02179899036255163, "grad_norm": 0.0467398539185524, "learning_rate": 0.0002, "loss": 0.0065, "step": 190 }, { "epoch": 0.02294630564479119, "grad_norm": 0.06587290018796921, "learning_rate": 0.0002, "loss": 0.007, "step": 200 }, { "epoch": 0.02294630564479119, "eval_loss": 0.010427200235426426, "eval_runtime": 130.676, "eval_samples_per_second": 28.085, "eval_steps_per_second": 14.042, "step": 200 } ], "logging_steps": 10, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.84607147524096e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }