{ "best_metric": 0.40518489480018616, "best_model_checkpoint": "miner_id_24/checkpoint-50", "epoch": 0.006744225257123588, "eval_steps": 50, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00013488450514247176, "grad_norm": 0.7375573515892029, "learning_rate": 1e-05, "loss": 0.619, "step": 1 }, { "epoch": 0.00013488450514247176, "eval_loss": 0.5535552501678467, "eval_runtime": 262.1617, "eval_samples_per_second": 47.631, "eval_steps_per_second": 11.909, "step": 1 }, { "epoch": 0.0002697690102849435, "grad_norm": 0.7183876633644104, "learning_rate": 2e-05, "loss": 0.6297, "step": 2 }, { "epoch": 0.00040465351542741526, "grad_norm": 0.6348766088485718, "learning_rate": 3e-05, "loss": 0.5755, "step": 3 }, { "epoch": 0.000539538020569887, "grad_norm": 0.5318180322647095, "learning_rate": 4e-05, "loss": 0.5212, "step": 4 }, { "epoch": 0.0006744225257123588, "grad_norm": 0.4897218644618988, "learning_rate": 5e-05, "loss": 0.4963, "step": 5 }, { "epoch": 0.0008093070308548305, "grad_norm": 0.4619118273258209, "learning_rate": 6e-05, "loss": 0.4524, "step": 6 }, { "epoch": 0.0009441915359973024, "grad_norm": 0.4772312641143799, "learning_rate": 7e-05, "loss": 0.4057, "step": 7 }, { "epoch": 0.001079076041139774, "grad_norm": 0.49830466508865356, "learning_rate": 8e-05, "loss": 0.4654, "step": 8 }, { "epoch": 0.0012139605462822458, "grad_norm": 0.5150027275085449, "learning_rate": 9e-05, "loss": 0.4528, "step": 9 }, { "epoch": 0.0013488450514247176, "grad_norm": 0.4883197247982025, "learning_rate": 0.0001, "loss": 0.4683, "step": 10 }, { "epoch": 0.0014837295565671893, "grad_norm": 0.5665213465690613, "learning_rate": 9.999316524962345e-05, "loss": 0.5624, "step": 11 }, { "epoch": 0.001618614061709661, "grad_norm": 0.4944498836994171, "learning_rate": 9.997266286704631e-05, "loss": 0.5727, "step": 12 }, { "epoch": 0.0017534985668521328, "grad_norm": 0.457690566778183, "learning_rate": 9.993849845741524e-05, "loss": 0.4831, "step": 13 }, { "epoch": 0.0018883830719946047, "grad_norm": 0.4723391830921173, "learning_rate": 9.989068136093873e-05, "loss": 0.4807, "step": 14 }, { "epoch": 0.0020232675771370764, "grad_norm": 0.46450331807136536, "learning_rate": 9.98292246503335e-05, "loss": 0.5013, "step": 15 }, { "epoch": 0.002158152082279548, "grad_norm": 0.4473508596420288, "learning_rate": 9.975414512725057e-05, "loss": 0.4734, "step": 16 }, { "epoch": 0.00229303658742202, "grad_norm": 0.44036874175071716, "learning_rate": 9.966546331768191e-05, "loss": 0.4924, "step": 17 }, { "epoch": 0.0024279210925644916, "grad_norm": 0.43315592408180237, "learning_rate": 9.956320346634876e-05, "loss": 0.4552, "step": 18 }, { "epoch": 0.0025628055977069634, "grad_norm": 0.42718714475631714, "learning_rate": 9.944739353007344e-05, "loss": 0.4542, "step": 19 }, { "epoch": 0.002697690102849435, "grad_norm": 0.42780622839927673, "learning_rate": 9.931806517013612e-05, "loss": 0.4212, "step": 20 }, { "epoch": 0.002832574607991907, "grad_norm": 0.48322251439094543, "learning_rate": 9.917525374361912e-05, "loss": 0.4701, "step": 21 }, { "epoch": 0.0029674591131343786, "grad_norm": 0.4247865080833435, "learning_rate": 9.901899829374047e-05, "loss": 0.4139, "step": 22 }, { "epoch": 0.0031023436182768503, "grad_norm": 0.46833139657974243, "learning_rate": 9.884934153917997e-05, "loss": 0.4707, "step": 23 }, { "epoch": 0.003237228123419322, "grad_norm": 0.4637615382671356, "learning_rate": 9.86663298624003e-05, "loss": 0.4475, "step": 24 }, { "epoch": 0.003372112628561794, "grad_norm": 0.47445815801620483, "learning_rate": 9.847001329696653e-05, "loss": 0.427, "step": 25 }, { "epoch": 0.0035069971337042655, "grad_norm": 0.4429916441440582, "learning_rate": 9.826044551386744e-05, "loss": 0.3974, "step": 26 }, { "epoch": 0.0036418816388467377, "grad_norm": 0.4321354031562805, "learning_rate": 9.803768380684242e-05, "loss": 0.4111, "step": 27 }, { "epoch": 0.0037767661439892094, "grad_norm": 0.42871013283729553, "learning_rate": 9.780178907671789e-05, "loss": 0.3995, "step": 28 }, { "epoch": 0.003911650649131681, "grad_norm": 0.4513198435306549, "learning_rate": 9.755282581475769e-05, "loss": 0.3615, "step": 29 }, { "epoch": 0.004046535154274153, "grad_norm": 0.4309406280517578, "learning_rate": 9.729086208503174e-05, "loss": 0.3835, "step": 30 }, { "epoch": 0.004181419659416624, "grad_norm": 0.4923899173736572, "learning_rate": 9.701596950580806e-05, "loss": 0.4012, "step": 31 }, { "epoch": 0.004316304164559096, "grad_norm": 0.4715842008590698, "learning_rate": 9.672822322997305e-05, "loss": 0.344, "step": 32 }, { "epoch": 0.004451188669701568, "grad_norm": 0.4990752041339874, "learning_rate": 9.642770192448536e-05, "loss": 0.414, "step": 33 }, { "epoch": 0.00458607317484404, "grad_norm": 0.5114120841026306, "learning_rate": 9.611448774886924e-05, "loss": 0.4346, "step": 34 }, { "epoch": 0.004720957679986511, "grad_norm": 0.5033982396125793, "learning_rate": 9.578866633275288e-05, "loss": 0.3961, "step": 35 }, { "epoch": 0.004855842185128983, "grad_norm": 0.5176622271537781, "learning_rate": 9.545032675245813e-05, "loss": 0.3634, "step": 36 }, { "epoch": 0.0049907266902714555, "grad_norm": 0.4883003234863281, "learning_rate": 9.509956150664796e-05, "loss": 0.3363, "step": 37 }, { "epoch": 0.005125611195413927, "grad_norm": 0.49539199471473694, "learning_rate": 9.473646649103818e-05, "loss": 0.3966, "step": 38 }, { "epoch": 0.005260495700556399, "grad_norm": 0.5086225271224976, "learning_rate": 9.43611409721806e-05, "loss": 0.3377, "step": 39 }, { "epoch": 0.00539538020569887, "grad_norm": 0.47139525413513184, "learning_rate": 9.397368756032445e-05, "loss": 0.325, "step": 40 }, { "epoch": 0.005530264710841342, "grad_norm": 0.5271187424659729, "learning_rate": 9.357421218136386e-05, "loss": 0.3476, "step": 41 }, { "epoch": 0.005665149215983814, "grad_norm": 0.5539581179618835, "learning_rate": 9.316282404787871e-05, "loss": 0.3642, "step": 42 }, { "epoch": 0.005800033721126286, "grad_norm": 0.5593342781066895, "learning_rate": 9.273963562927695e-05, "loss": 0.3203, "step": 43 }, { "epoch": 0.005934918226268757, "grad_norm": 0.563308835029602, "learning_rate": 9.230476262104677e-05, "loss": 0.325, "step": 44 }, { "epoch": 0.006069802731411229, "grad_norm": 0.5519070625305176, "learning_rate": 9.185832391312644e-05, "loss": 0.3488, "step": 45 }, { "epoch": 0.006204687236553701, "grad_norm": 0.5657252669334412, "learning_rate": 9.140044155740101e-05, "loss": 0.3344, "step": 46 }, { "epoch": 0.006339571741696173, "grad_norm": 0.5870583057403564, "learning_rate": 9.093124073433463e-05, "loss": 0.3087, "step": 47 }, { "epoch": 0.006474456246838644, "grad_norm": 0.6925362944602966, "learning_rate": 9.045084971874738e-05, "loss": 0.361, "step": 48 }, { "epoch": 0.006609340751981116, "grad_norm": 0.6307148337364197, "learning_rate": 8.995939984474624e-05, "loss": 0.3025, "step": 49 }, { "epoch": 0.006744225257123588, "grad_norm": 0.7876100540161133, "learning_rate": 8.945702546981969e-05, "loss": 0.2675, "step": 50 }, { "epoch": 0.006744225257123588, "eval_loss": 0.40518489480018616, "eval_runtime": 262.0707, "eval_samples_per_second": 47.647, "eval_steps_per_second": 11.913, "step": 50 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 4791659107713024.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }