{ "best_metric": 1.0053794384002686, "best_model_checkpoint": "miner_id_24/checkpoint-50", "epoch": 0.18930430667297682, "eval_steps": 25, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.003786086133459536, "grad_norm": 2.95168399810791, "learning_rate": 5e-05, "loss": 1.5794, "step": 1 }, { "epoch": 0.003786086133459536, "eval_loss": 2.123389959335327, "eval_runtime": 2.0352, "eval_samples_per_second": 24.568, "eval_steps_per_second": 6.388, "step": 1 }, { "epoch": 0.007572172266919072, "grad_norm": 3.173819065093994, "learning_rate": 0.0001, "loss": 1.6652, "step": 2 }, { "epoch": 0.011358258400378608, "grad_norm": 2.73962664604187, "learning_rate": 9.990365154573717e-05, "loss": 1.5925, "step": 3 }, { "epoch": 0.015144344533838144, "grad_norm": 1.5520139932632446, "learning_rate": 9.961501876182148e-05, "loss": 1.2602, "step": 4 }, { "epoch": 0.01893043066729768, "grad_norm": 1.4195196628570557, "learning_rate": 9.913533761814537e-05, "loss": 1.2748, "step": 5 }, { "epoch": 0.022716516800757217, "grad_norm": 2.6151468753814697, "learning_rate": 9.846666218300807e-05, "loss": 1.4098, "step": 6 }, { "epoch": 0.026502602934216753, "grad_norm": 2.7280309200286865, "learning_rate": 9.761185582727977e-05, "loss": 1.2418, "step": 7 }, { "epoch": 0.03028868906767629, "grad_norm": 1.9285898208618164, "learning_rate": 9.657457896300791e-05, "loss": 1.3466, "step": 8 }, { "epoch": 0.034074775201135825, "grad_norm": 1.6772706508636475, "learning_rate": 9.535927336897098e-05, "loss": 1.2299, "step": 9 }, { "epoch": 0.03786086133459536, "grad_norm": 1.4668546915054321, "learning_rate": 9.397114317029975e-05, "loss": 1.1112, "step": 10 }, { "epoch": 0.0416469474680549, "grad_norm": 1.4179223775863647, "learning_rate": 9.241613255361455e-05, "loss": 1.1843, "step": 11 }, { "epoch": 0.04543303360151443, "grad_norm": 1.7838054895401, "learning_rate": 9.070090031310558e-05, "loss": 1.1332, "step": 12 }, { "epoch": 0.04921911973497397, "grad_norm": 2.4858312606811523, "learning_rate": 8.883279133655399e-05, "loss": 1.2983, "step": 13 }, { "epoch": 0.053005205868433505, "grad_norm": 2.607907772064209, "learning_rate": 8.681980515339464e-05, "loss": 1.3057, "step": 14 }, { "epoch": 0.05679129200189304, "grad_norm": 2.128920316696167, "learning_rate": 8.467056167950311e-05, "loss": 1.2775, "step": 15 }, { "epoch": 0.06057737813535258, "grad_norm": 1.7699257135391235, "learning_rate": 8.239426430539243e-05, "loss": 1.1653, "step": 16 }, { "epoch": 0.06436346426881212, "grad_norm": 1.3187021017074585, "learning_rate": 8.000066048588211e-05, "loss": 1.1756, "step": 17 }, { "epoch": 0.06814955040227165, "grad_norm": 1.0856595039367676, "learning_rate": 7.75e-05, "loss": 1.2513, "step": 18 }, { "epoch": 0.07193563653573119, "grad_norm": 2.1683356761932373, "learning_rate": 7.490299105985507e-05, "loss": 1.2837, "step": 19 }, { "epoch": 0.07572172266919072, "grad_norm": 2.570289134979248, "learning_rate": 7.222075445642904e-05, "loss": 1.3552, "step": 20 }, { "epoch": 0.07950780880265026, "grad_norm": 2.68222975730896, "learning_rate": 6.946477593864228e-05, "loss": 1.3637, "step": 21 }, { "epoch": 0.0832938949361098, "grad_norm": 2.4270741939544678, "learning_rate": 6.664685702961344e-05, "loss": 1.1851, "step": 22 }, { "epoch": 0.08707998106956934, "grad_norm": 2.465186357498169, "learning_rate": 6.377906449072578e-05, "loss": 1.2125, "step": 23 }, { "epoch": 0.09086606720302887, "grad_norm": 2.1026418209075928, "learning_rate": 6.087367864990233e-05, "loss": 1.0742, "step": 24 }, { "epoch": 0.09465215333648841, "grad_norm": 2.22967791557312, "learning_rate": 5.794314081535644e-05, "loss": 1.0804, "step": 25 }, { "epoch": 0.09465215333648841, "eval_loss": 1.0526127815246582, "eval_runtime": 2.0278, "eval_samples_per_second": 24.657, "eval_steps_per_second": 6.411, "step": 25 }, { "epoch": 0.09843823946994794, "grad_norm": 0.7997608184814453, "learning_rate": 5.500000000000001e-05, "loss": 1.0595, "step": 26 }, { "epoch": 0.10222432560340748, "grad_norm": 0.9007599353790283, "learning_rate": 5.205685918464356e-05, "loss": 1.1109, "step": 27 }, { "epoch": 0.10601041173686701, "grad_norm": 0.9283623099327087, "learning_rate": 4.912632135009769e-05, "loss": 1.1029, "step": 28 }, { "epoch": 0.10979649787032655, "grad_norm": 0.9904077053070068, "learning_rate": 4.6220935509274235e-05, "loss": 1.1447, "step": 29 }, { "epoch": 0.11358258400378608, "grad_norm": 0.9736183285713196, "learning_rate": 4.3353142970386564e-05, "loss": 1.1642, "step": 30 }, { "epoch": 0.11736867013724563, "grad_norm": 0.8450034856796265, "learning_rate": 4.053522406135775e-05, "loss": 1.2258, "step": 31 }, { "epoch": 0.12115475627070516, "grad_norm": 0.9699908494949341, "learning_rate": 3.777924554357096e-05, "loss": 1.112, "step": 32 }, { "epoch": 0.1249408424041647, "grad_norm": 1.067191243171692, "learning_rate": 3.509700894014496e-05, "loss": 1.0924, "step": 33 }, { "epoch": 0.12872692853762424, "grad_norm": 1.109052062034607, "learning_rate": 3.250000000000001e-05, "loss": 1.1621, "step": 34 }, { "epoch": 0.13251301467108376, "grad_norm": 1.1744805574417114, "learning_rate": 2.9999339514117912e-05, "loss": 1.0986, "step": 35 }, { "epoch": 0.1362991008045433, "grad_norm": 1.281468391418457, "learning_rate": 2.760573569460757e-05, "loss": 1.022, "step": 36 }, { "epoch": 0.14008518693800284, "grad_norm": 1.494238257408142, "learning_rate": 2.53294383204969e-05, "loss": 1.0764, "step": 37 }, { "epoch": 0.14387127307146239, "grad_norm": 0.6449403166770935, "learning_rate": 2.3180194846605367e-05, "loss": 0.9728, "step": 38 }, { "epoch": 0.1476573592049219, "grad_norm": 0.5640941262245178, "learning_rate": 2.1167208663446025e-05, "loss": 1.0272, "step": 39 }, { "epoch": 0.15144344533838144, "grad_norm": 0.5843005776405334, "learning_rate": 1.9299099686894423e-05, "loss": 1.0219, "step": 40 }, { "epoch": 0.155229531471841, "grad_norm": 0.591860294342041, "learning_rate": 1.758386744638546e-05, "loss": 1.0287, "step": 41 }, { "epoch": 0.15901561760530053, "grad_norm": 0.6270464658737183, "learning_rate": 1.602885682970026e-05, "loss": 1.1001, "step": 42 }, { "epoch": 0.16280170373876005, "grad_norm": 0.6936808824539185, "learning_rate": 1.464072663102903e-05, "loss": 1.1502, "step": 43 }, { "epoch": 0.1665877898722196, "grad_norm": 1.0310381650924683, "learning_rate": 1.3425421036992098e-05, "loss": 1.0836, "step": 44 }, { "epoch": 0.17037387600567913, "grad_norm": 1.016648769378662, "learning_rate": 1.2388144172720251e-05, "loss": 1.0123, "step": 45 }, { "epoch": 0.17415996213913867, "grad_norm": 1.1334302425384521, "learning_rate": 1.1533337816991932e-05, "loss": 1.0984, "step": 46 }, { "epoch": 0.1779460482725982, "grad_norm": 1.206661343574524, "learning_rate": 1.0864662381854632e-05, "loss": 1.0054, "step": 47 }, { "epoch": 0.18173213440605773, "grad_norm": 1.2735713720321655, "learning_rate": 1.0384981238178534e-05, "loss": 0.9726, "step": 48 }, { "epoch": 0.18551822053951728, "grad_norm": 1.4444059133529663, "learning_rate": 1.0096348454262845e-05, "loss": 1.0457, "step": 49 }, { "epoch": 0.18930430667297682, "grad_norm": 1.7697523832321167, "learning_rate": 1e-05, "loss": 1.0625, "step": 50 }, { "epoch": 0.18930430667297682, "eval_loss": 1.0053794384002686, "eval_runtime": 2.0429, "eval_samples_per_second": 24.475, "eval_steps_per_second": 6.364, "step": 50 } ], "logging_steps": 1, "max_steps": 50, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 25, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 1, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.589463780163584e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }