{ "best_metric": 1.7957185506820679, "best_model_checkpoint": "miner_id_24/checkpoint-200", "epoch": 0.050777530942557915, "eval_steps": 100, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0002538876547127896, "eval_loss": 7.499389171600342, "eval_runtime": 684.9692, "eval_samples_per_second": 2.422, "eval_steps_per_second": 1.212, "step": 1 }, { "epoch": 0.0025388765471278957, "grad_norm": 20.01273536682129, "learning_rate": 1e-05, "loss": 7.0184, "step": 10 }, { "epoch": 0.0050777530942557915, "grad_norm": 14.389914512634277, "learning_rate": 2e-05, "loss": 4.4882, "step": 20 }, { "epoch": 0.007616629641383688, "grad_norm": 9.800585746765137, "learning_rate": 3e-05, "loss": 2.4237, "step": 30 }, { "epoch": 0.010155506188511583, "grad_norm": 19.282072067260742, "learning_rate": 4e-05, "loss": 1.9135, "step": 40 }, { "epoch": 0.012694382735639479, "grad_norm": 8.14541244506836, "learning_rate": 5e-05, "loss": 1.8893, "step": 50 }, { "epoch": 0.015233259282767376, "grad_norm": 6.925289154052734, "learning_rate": 6e-05, "loss": 1.9096, "step": 60 }, { "epoch": 0.017772135829895272, "grad_norm": 7.16434907913208, "learning_rate": 7e-05, "loss": 1.8435, "step": 70 }, { "epoch": 0.020311012377023166, "grad_norm": 7.456660270690918, "learning_rate": 8e-05, "loss": 1.9227, "step": 80 }, { "epoch": 0.022849888924151063, "grad_norm": 6.74895715713501, "learning_rate": 9e-05, "loss": 1.8224, "step": 90 }, { "epoch": 0.025388765471278957, "grad_norm": 6.2767510414123535, "learning_rate": 0.0001, "loss": 1.7577, "step": 100 }, { "epoch": 0.025388765471278957, "eval_loss": 1.8877848386764526, "eval_runtime": 684.2226, "eval_samples_per_second": 2.425, "eval_steps_per_second": 1.213, "step": 100 }, { "epoch": 0.027927642018406855, "grad_norm": 5.097551345825195, "learning_rate": 9.755282581475769e-05, "loss": 2.0565, "step": 110 }, { "epoch": 0.030466518565534752, "grad_norm": 5.463619709014893, "learning_rate": 9.045084971874738e-05, "loss": 1.8281, "step": 120 }, { "epoch": 0.03300539511266265, "grad_norm": 6.433900356292725, "learning_rate": 7.938926261462366e-05, "loss": 1.8498, "step": 130 }, { "epoch": 0.035544271659790544, "grad_norm": 5.998843193054199, "learning_rate": 6.545084971874738e-05, "loss": 2.0344, "step": 140 }, { "epoch": 0.03808314820691844, "grad_norm": 5.395852088928223, "learning_rate": 5e-05, "loss": 1.9115, "step": 150 }, { "epoch": 0.04062202475404633, "grad_norm": 4.1980390548706055, "learning_rate": 3.4549150281252636e-05, "loss": 1.9682, "step": 160 }, { "epoch": 0.04316090130117423, "grad_norm": 3.98956561088562, "learning_rate": 2.061073738537635e-05, "loss": 1.8356, "step": 170 }, { "epoch": 0.04569977784830213, "grad_norm": 4.076719284057617, "learning_rate": 9.549150281252633e-06, "loss": 1.8401, "step": 180 }, { "epoch": 0.04823865439543002, "grad_norm": 4.793858051300049, "learning_rate": 2.4471741852423237e-06, "loss": 1.7489, "step": 190 }, { "epoch": 0.050777530942557915, "grad_norm": 4.629513740539551, "learning_rate": 0.0, "loss": 1.6943, "step": 200 }, { "epoch": 0.050777530942557915, "eval_loss": 1.7957185506820679, "eval_runtime": 684.0192, "eval_samples_per_second": 2.425, "eval_steps_per_second": 1.213, "step": 200 } ], "logging_steps": 10, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.541499732885504e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null }