|
{ |
|
"best_metric": 0.40518489480018616, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-50", |
|
"epoch": 0.006744225257123588, |
|
"eval_steps": 50, |
|
"global_step": 50, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00013488450514247176, |
|
"grad_norm": 0.7375573515892029, |
|
"learning_rate": 1e-05, |
|
"loss": 0.619, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00013488450514247176, |
|
"eval_loss": 0.5535552501678467, |
|
"eval_runtime": 262.1617, |
|
"eval_samples_per_second": 47.631, |
|
"eval_steps_per_second": 11.909, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0002697690102849435, |
|
"grad_norm": 0.7183876633644104, |
|
"learning_rate": 2e-05, |
|
"loss": 0.6297, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.00040465351542741526, |
|
"grad_norm": 0.6348766088485718, |
|
"learning_rate": 3e-05, |
|
"loss": 0.5755, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.000539538020569887, |
|
"grad_norm": 0.5318180322647095, |
|
"learning_rate": 4e-05, |
|
"loss": 0.5212, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0006744225257123588, |
|
"grad_norm": 0.4897218644618988, |
|
"learning_rate": 5e-05, |
|
"loss": 0.4963, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0008093070308548305, |
|
"grad_norm": 0.4619118273258209, |
|
"learning_rate": 6e-05, |
|
"loss": 0.4524, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0009441915359973024, |
|
"grad_norm": 0.4772312641143799, |
|
"learning_rate": 7e-05, |
|
"loss": 0.4057, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.001079076041139774, |
|
"grad_norm": 0.49830466508865356, |
|
"learning_rate": 8e-05, |
|
"loss": 0.4654, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0012139605462822458, |
|
"grad_norm": 0.5150027275085449, |
|
"learning_rate": 9e-05, |
|
"loss": 0.4528, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0013488450514247176, |
|
"grad_norm": 0.4883197247982025, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4683, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0014837295565671893, |
|
"grad_norm": 0.5665213465690613, |
|
"learning_rate": 9.999316524962345e-05, |
|
"loss": 0.5624, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.001618614061709661, |
|
"grad_norm": 0.4944498836994171, |
|
"learning_rate": 9.997266286704631e-05, |
|
"loss": 0.5727, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.0017534985668521328, |
|
"grad_norm": 0.457690566778183, |
|
"learning_rate": 9.993849845741524e-05, |
|
"loss": 0.4831, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.0018883830719946047, |
|
"grad_norm": 0.4723391830921173, |
|
"learning_rate": 9.989068136093873e-05, |
|
"loss": 0.4807, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.0020232675771370764, |
|
"grad_norm": 0.46450331807136536, |
|
"learning_rate": 9.98292246503335e-05, |
|
"loss": 0.5013, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.002158152082279548, |
|
"grad_norm": 0.4473508596420288, |
|
"learning_rate": 9.975414512725057e-05, |
|
"loss": 0.4734, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.00229303658742202, |
|
"grad_norm": 0.44036874175071716, |
|
"learning_rate": 9.966546331768191e-05, |
|
"loss": 0.4924, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.0024279210925644916, |
|
"grad_norm": 0.43315592408180237, |
|
"learning_rate": 9.956320346634876e-05, |
|
"loss": 0.4552, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0025628055977069634, |
|
"grad_norm": 0.42718714475631714, |
|
"learning_rate": 9.944739353007344e-05, |
|
"loss": 0.4542, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.002697690102849435, |
|
"grad_norm": 0.42780622839927673, |
|
"learning_rate": 9.931806517013612e-05, |
|
"loss": 0.4212, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.002832574607991907, |
|
"grad_norm": 0.48322251439094543, |
|
"learning_rate": 9.917525374361912e-05, |
|
"loss": 0.4701, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.0029674591131343786, |
|
"grad_norm": 0.4247865080833435, |
|
"learning_rate": 9.901899829374047e-05, |
|
"loss": 0.4139, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.0031023436182768503, |
|
"grad_norm": 0.46833139657974243, |
|
"learning_rate": 9.884934153917997e-05, |
|
"loss": 0.4707, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.003237228123419322, |
|
"grad_norm": 0.4637615382671356, |
|
"learning_rate": 9.86663298624003e-05, |
|
"loss": 0.4475, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.003372112628561794, |
|
"grad_norm": 0.47445815801620483, |
|
"learning_rate": 9.847001329696653e-05, |
|
"loss": 0.427, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.0035069971337042655, |
|
"grad_norm": 0.4429916441440582, |
|
"learning_rate": 9.826044551386744e-05, |
|
"loss": 0.3974, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.0036418816388467377, |
|
"grad_norm": 0.4321354031562805, |
|
"learning_rate": 9.803768380684242e-05, |
|
"loss": 0.4111, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.0037767661439892094, |
|
"grad_norm": 0.42871013283729553, |
|
"learning_rate": 9.780178907671789e-05, |
|
"loss": 0.3995, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.003911650649131681, |
|
"grad_norm": 0.4513198435306549, |
|
"learning_rate": 9.755282581475769e-05, |
|
"loss": 0.3615, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.004046535154274153, |
|
"grad_norm": 0.4309406280517578, |
|
"learning_rate": 9.729086208503174e-05, |
|
"loss": 0.3835, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.004181419659416624, |
|
"grad_norm": 0.4923899173736572, |
|
"learning_rate": 9.701596950580806e-05, |
|
"loss": 0.4012, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.004316304164559096, |
|
"grad_norm": 0.4715842008590698, |
|
"learning_rate": 9.672822322997305e-05, |
|
"loss": 0.344, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.004451188669701568, |
|
"grad_norm": 0.4990752041339874, |
|
"learning_rate": 9.642770192448536e-05, |
|
"loss": 0.414, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.00458607317484404, |
|
"grad_norm": 0.5114120841026306, |
|
"learning_rate": 9.611448774886924e-05, |
|
"loss": 0.4346, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.004720957679986511, |
|
"grad_norm": 0.5033982396125793, |
|
"learning_rate": 9.578866633275288e-05, |
|
"loss": 0.3961, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.004855842185128983, |
|
"grad_norm": 0.5176622271537781, |
|
"learning_rate": 9.545032675245813e-05, |
|
"loss": 0.3634, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.0049907266902714555, |
|
"grad_norm": 0.4883003234863281, |
|
"learning_rate": 9.509956150664796e-05, |
|
"loss": 0.3363, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.005125611195413927, |
|
"grad_norm": 0.49539199471473694, |
|
"learning_rate": 9.473646649103818e-05, |
|
"loss": 0.3966, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.005260495700556399, |
|
"grad_norm": 0.5086225271224976, |
|
"learning_rate": 9.43611409721806e-05, |
|
"loss": 0.3377, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.00539538020569887, |
|
"grad_norm": 0.47139525413513184, |
|
"learning_rate": 9.397368756032445e-05, |
|
"loss": 0.325, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.005530264710841342, |
|
"grad_norm": 0.5271187424659729, |
|
"learning_rate": 9.357421218136386e-05, |
|
"loss": 0.3476, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.005665149215983814, |
|
"grad_norm": 0.5539581179618835, |
|
"learning_rate": 9.316282404787871e-05, |
|
"loss": 0.3642, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.005800033721126286, |
|
"grad_norm": 0.5593342781066895, |
|
"learning_rate": 9.273963562927695e-05, |
|
"loss": 0.3203, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.005934918226268757, |
|
"grad_norm": 0.563308835029602, |
|
"learning_rate": 9.230476262104677e-05, |
|
"loss": 0.325, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.006069802731411229, |
|
"grad_norm": 0.5519070625305176, |
|
"learning_rate": 9.185832391312644e-05, |
|
"loss": 0.3488, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.006204687236553701, |
|
"grad_norm": 0.5657252669334412, |
|
"learning_rate": 9.140044155740101e-05, |
|
"loss": 0.3344, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.006339571741696173, |
|
"grad_norm": 0.5870583057403564, |
|
"learning_rate": 9.093124073433463e-05, |
|
"loss": 0.3087, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.006474456246838644, |
|
"grad_norm": 0.6925362944602966, |
|
"learning_rate": 9.045084971874738e-05, |
|
"loss": 0.361, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.006609340751981116, |
|
"grad_norm": 0.6307148337364197, |
|
"learning_rate": 8.995939984474624e-05, |
|
"loss": 0.3025, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.006744225257123588, |
|
"grad_norm": 0.7876100540161133, |
|
"learning_rate": 8.945702546981969e-05, |
|
"loss": 0.2675, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.006744225257123588, |
|
"eval_loss": 0.40518489480018616, |
|
"eval_runtime": 262.0707, |
|
"eval_samples_per_second": 47.647, |
|
"eval_steps_per_second": 11.913, |
|
"step": 50 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4791659107713024.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|