{ "best_metric": 0.32453402876853943, "best_model_checkpoint": "miner_id_24/checkpoint-100", "epoch": 0.12849341471249598, "eval_steps": 50, "global_step": 100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00128493414712496, "grad_norm": 0.2153976410627365, "learning_rate": 1.6666666666666668e-07, "loss": 0.2219, "step": 1 }, { "epoch": 0.00128493414712496, "eval_loss": 0.5323071479797363, "eval_runtime": 104.1803, "eval_samples_per_second": 12.584, "eval_steps_per_second": 1.574, "step": 1 }, { "epoch": 0.00256986829424992, "grad_norm": 0.30113792419433594, "learning_rate": 3.3333333333333335e-07, "loss": 0.2754, "step": 2 }, { "epoch": 0.0038548024413748794, "grad_norm": 0.34443554282188416, "learning_rate": 5.000000000000001e-07, "loss": 0.2138, "step": 3 }, { "epoch": 0.00513973658849984, "grad_norm": 0.39076605439186096, "learning_rate": 6.666666666666667e-07, "loss": 0.2594, "step": 4 }, { "epoch": 0.006424670735624799, "grad_norm": 0.4116816818714142, "learning_rate": 8.333333333333333e-07, "loss": 0.2973, "step": 5 }, { "epoch": 0.007709604882749759, "grad_norm": 0.47579216957092285, "learning_rate": 1.0000000000000002e-06, "loss": 0.3334, "step": 6 }, { "epoch": 0.008994539029874718, "grad_norm": 0.4625336229801178, "learning_rate": 1.1666666666666668e-06, "loss": 0.378, "step": 7 }, { "epoch": 0.01027947317699968, "grad_norm": 0.49837028980255127, "learning_rate": 1.3333333333333334e-06, "loss": 0.3781, "step": 8 }, { "epoch": 0.011564407324124639, "grad_norm": 0.6171607971191406, "learning_rate": 1.5e-06, "loss": 0.4556, "step": 9 }, { "epoch": 0.012849341471249599, "grad_norm": 0.5609787106513977, "learning_rate": 1.6666666666666667e-06, "loss": 0.3851, "step": 10 }, { "epoch": 0.014134275618374558, "grad_norm": 0.5493508577346802, "learning_rate": 1.8333333333333333e-06, "loss": 0.361, "step": 11 }, { "epoch": 0.015419209765499518, "grad_norm": 0.6691854000091553, "learning_rate": 2.0000000000000003e-06, "loss": 0.3836, "step": 12 }, { "epoch": 0.016704143912624477, "grad_norm": 0.6240688562393188, "learning_rate": 2.166666666666667e-06, "loss": 0.4312, "step": 13 }, { "epoch": 0.017989078059749437, "grad_norm": 0.7265613675117493, "learning_rate": 2.3333333333333336e-06, "loss": 0.402, "step": 14 }, { "epoch": 0.019274012206874396, "grad_norm": 0.7323911190032959, "learning_rate": 2.5e-06, "loss": 0.4389, "step": 15 }, { "epoch": 0.02055894635399936, "grad_norm": 0.7426925301551819, "learning_rate": 2.666666666666667e-06, "loss": 0.391, "step": 16 }, { "epoch": 0.02184388050112432, "grad_norm": 0.8133726716041565, "learning_rate": 2.8333333333333335e-06, "loss": 0.4516, "step": 17 }, { "epoch": 0.023128814648249278, "grad_norm": 0.7418370842933655, "learning_rate": 3e-06, "loss": 0.394, "step": 18 }, { "epoch": 0.024413748795374238, "grad_norm": 0.7176808714866638, "learning_rate": 3.1666666666666667e-06, "loss": 0.3914, "step": 19 }, { "epoch": 0.025698682942499197, "grad_norm": 0.7384213805198669, "learning_rate": 3.3333333333333333e-06, "loss": 0.492, "step": 20 }, { "epoch": 0.026983617089624157, "grad_norm": 0.8748512268066406, "learning_rate": 3.5e-06, "loss": 0.4822, "step": 21 }, { "epoch": 0.028268551236749116, "grad_norm": 0.9340311884880066, "learning_rate": 3.6666666666666666e-06, "loss": 0.4356, "step": 22 }, { "epoch": 0.029553485383874076, "grad_norm": 0.760717511177063, "learning_rate": 3.833333333333334e-06, "loss": 0.3861, "step": 23 }, { "epoch": 0.030838419530999035, "grad_norm": 0.8021165728569031, "learning_rate": 4.000000000000001e-06, "loss": 0.3977, "step": 24 }, { "epoch": 0.032123353678123995, "grad_norm": 0.8144139051437378, "learning_rate": 4.166666666666667e-06, "loss": 0.4304, "step": 25 }, { "epoch": 0.033408287825248954, "grad_norm": 0.7808437347412109, "learning_rate": 4.333333333333334e-06, "loss": 0.4361, "step": 26 }, { "epoch": 0.034693221972373914, "grad_norm": 1.1297154426574707, "learning_rate": 4.5e-06, "loss": 0.5637, "step": 27 }, { "epoch": 0.03597815611949887, "grad_norm": 0.9760102033615112, "learning_rate": 4.666666666666667e-06, "loss": 0.4492, "step": 28 }, { "epoch": 0.03726309026662383, "grad_norm": 0.9299658536911011, "learning_rate": 4.833333333333333e-06, "loss": 0.3935, "step": 29 }, { "epoch": 0.03854802441374879, "grad_norm": 0.998177170753479, "learning_rate": 5e-06, "loss": 0.4626, "step": 30 }, { "epoch": 0.03983295856087375, "grad_norm": 1.0855931043624878, "learning_rate": 4.997482666353287e-06, "loss": 0.3724, "step": 31 }, { "epoch": 0.04111789270799872, "grad_norm": 1.125369668006897, "learning_rate": 4.989935734988098e-06, "loss": 0.4277, "step": 32 }, { "epoch": 0.04240282685512368, "grad_norm": 1.185484528541565, "learning_rate": 4.977374404419838e-06, "loss": 0.5637, "step": 33 }, { "epoch": 0.04368776100224864, "grad_norm": 1.4676731824874878, "learning_rate": 4.959823971496575e-06, "loss": 0.4642, "step": 34 }, { "epoch": 0.0449726951493736, "grad_norm": 1.6404002904891968, "learning_rate": 4.937319780454559e-06, "loss": 0.5588, "step": 35 }, { "epoch": 0.046257629296498556, "grad_norm": 1.335082769393921, "learning_rate": 4.909907151739634e-06, "loss": 0.4545, "step": 36 }, { "epoch": 0.047542563443623516, "grad_norm": 1.3332980871200562, "learning_rate": 4.8776412907378845e-06, "loss": 0.4349, "step": 37 }, { "epoch": 0.048827497590748475, "grad_norm": 1.369439959526062, "learning_rate": 4.8405871765993435e-06, "loss": 0.4278, "step": 38 }, { "epoch": 0.050112431737873435, "grad_norm": 1.502086877822876, "learning_rate": 4.7988194313786275e-06, "loss": 0.5379, "step": 39 }, { "epoch": 0.051397365884998394, "grad_norm": 1.5149955749511719, "learning_rate": 4.752422169756048e-06, "loss": 0.4262, "step": 40 }, { "epoch": 0.052682300032123354, "grad_norm": 1.4554944038391113, "learning_rate": 4.701488829641845e-06, "loss": 0.5276, "step": 41 }, { "epoch": 0.05396723417924831, "grad_norm": 1.277449369430542, "learning_rate": 4.646121984004666e-06, "loss": 0.4139, "step": 42 }, { "epoch": 0.05525216832637327, "grad_norm": 1.4300509691238403, "learning_rate": 4.586433134303257e-06, "loss": 0.5761, "step": 43 }, { "epoch": 0.05653710247349823, "grad_norm": 1.1660631895065308, "learning_rate": 4.522542485937369e-06, "loss": 0.4325, "step": 44 }, { "epoch": 0.05782203662062319, "grad_norm": 1.6382452249526978, "learning_rate": 4.454578706170075e-06, "loss": 0.4775, "step": 45 }, { "epoch": 0.05910697076774815, "grad_norm": 1.5593774318695068, "learning_rate": 4.382678665009028e-06, "loss": 0.373, "step": 46 }, { "epoch": 0.06039190491487311, "grad_norm": 1.3182231187820435, "learning_rate": 4.3069871595684795e-06, "loss": 0.3875, "step": 47 }, { "epoch": 0.06167683906199807, "grad_norm": 1.720353364944458, "learning_rate": 4.227656622467162e-06, "loss": 0.5628, "step": 48 }, { "epoch": 0.06296177320912304, "grad_norm": 1.7283390760421753, "learning_rate": 4.144846814849282e-06, "loss": 0.4885, "step": 49 }, { "epoch": 0.06424670735624799, "grad_norm": 2.8064615726470947, "learning_rate": 4.058724504646834e-06, "loss": 0.7336, "step": 50 }, { "epoch": 0.06424670735624799, "eval_loss": 0.34164541959762573, "eval_runtime": 105.4694, "eval_samples_per_second": 12.43, "eval_steps_per_second": 1.555, "step": 50 }, { "epoch": 0.06553164150337296, "grad_norm": 0.26072192192077637, "learning_rate": 3.969463130731183e-06, "loss": 0.1646, "step": 51 }, { "epoch": 0.06681657565049791, "grad_norm": 0.3280903398990631, "learning_rate": 3.8772424536302565e-06, "loss": 0.2271, "step": 52 }, { "epoch": 0.06810150979762288, "grad_norm": 0.34677988290786743, "learning_rate": 3.782248193514766e-06, "loss": 0.2508, "step": 53 }, { "epoch": 0.06938644394474783, "grad_norm": 0.4058302044868469, "learning_rate": 3.684671656182497e-06, "loss": 0.2262, "step": 54 }, { "epoch": 0.0706713780918728, "grad_norm": 0.38339221477508545, "learning_rate": 3.5847093477938955e-06, "loss": 0.2467, "step": 55 }, { "epoch": 0.07195631223899775, "grad_norm": 0.4589427411556244, "learning_rate": 3.4825625791348093e-06, "loss": 0.2348, "step": 56 }, { "epoch": 0.07324124638612271, "grad_norm": 0.5955134034156799, "learning_rate": 3.3784370602033572e-06, "loss": 0.3195, "step": 57 }, { "epoch": 0.07452618053324767, "grad_norm": 0.492436021566391, "learning_rate": 3.272542485937369e-06, "loss": 0.2931, "step": 58 }, { "epoch": 0.07581111468037263, "grad_norm": 0.5189871191978455, "learning_rate": 3.165092113916688e-06, "loss": 0.273, "step": 59 }, { "epoch": 0.07709604882749758, "grad_norm": 0.5466316938400269, "learning_rate": 3.056302334890786e-06, "loss": 0.3282, "step": 60 }, { "epoch": 0.07838098297462255, "grad_norm": 0.6036167144775391, "learning_rate": 2.946392236996592e-06, "loss": 0.2782, "step": 61 }, { "epoch": 0.0796659171217475, "grad_norm": 0.600953221321106, "learning_rate": 2.835583164544139e-06, "loss": 0.2846, "step": 62 }, { "epoch": 0.08095085126887247, "grad_norm": 0.6118208765983582, "learning_rate": 2.724098272258584e-06, "loss": 0.298, "step": 63 }, { "epoch": 0.08223578541599744, "grad_norm": 0.5902886986732483, "learning_rate": 2.6121620758762877e-06, "loss": 0.3588, "step": 64 }, { "epoch": 0.08352071956312239, "grad_norm": 0.5028055906295776, "learning_rate": 2.5e-06, "loss": 0.2523, "step": 65 }, { "epoch": 0.08480565371024736, "grad_norm": 0.6613095998764038, "learning_rate": 2.3878379241237136e-06, "loss": 0.2476, "step": 66 }, { "epoch": 0.08609058785737231, "grad_norm": 0.5534865260124207, "learning_rate": 2.2759017277414165e-06, "loss": 0.2462, "step": 67 }, { "epoch": 0.08737552200449727, "grad_norm": 0.7256202697753906, "learning_rate": 2.1644168354558623e-06, "loss": 0.3748, "step": 68 }, { "epoch": 0.08866045615162223, "grad_norm": 0.7539519667625427, "learning_rate": 2.053607763003409e-06, "loss": 0.3342, "step": 69 }, { "epoch": 0.0899453902987472, "grad_norm": 0.6876543760299683, "learning_rate": 1.9436976651092143e-06, "loss": 0.2865, "step": 70 }, { "epoch": 0.09123032444587215, "grad_norm": 0.7085744738578796, "learning_rate": 1.8349078860833125e-06, "loss": 0.2928, "step": 71 }, { "epoch": 0.09251525859299711, "grad_norm": 0.6572118997573853, "learning_rate": 1.7274575140626318e-06, "loss": 0.2248, "step": 72 }, { "epoch": 0.09380019274012207, "grad_norm": 0.6709907054901123, "learning_rate": 1.6215629397966432e-06, "loss": 0.3372, "step": 73 }, { "epoch": 0.09508512688724703, "grad_norm": 0.7788072824478149, "learning_rate": 1.5174374208651913e-06, "loss": 0.3332, "step": 74 }, { "epoch": 0.09637006103437198, "grad_norm": 0.7085012793540955, "learning_rate": 1.415290652206105e-06, "loss": 0.2757, "step": 75 }, { "epoch": 0.09765499518149695, "grad_norm": 0.8017731308937073, "learning_rate": 1.3153283438175036e-06, "loss": 0.3319, "step": 76 }, { "epoch": 0.0989399293286219, "grad_norm": 0.7940526604652405, "learning_rate": 1.217751806485235e-06, "loss": 0.3149, "step": 77 }, { "epoch": 0.10022486347574687, "grad_norm": 0.8877953290939331, "learning_rate": 1.122757546369744e-06, "loss": 0.3214, "step": 78 }, { "epoch": 0.10150979762287182, "grad_norm": 0.9085245728492737, "learning_rate": 1.0305368692688175e-06, "loss": 0.3384, "step": 79 }, { "epoch": 0.10279473176999679, "grad_norm": 0.7721331715583801, "learning_rate": 9.412754953531664e-07, "loss": 0.263, "step": 80 }, { "epoch": 0.10407966591712174, "grad_norm": 1.0416125059127808, "learning_rate": 8.551531851507186e-07, "loss": 0.4007, "step": 81 }, { "epoch": 0.10536460006424671, "grad_norm": 0.8873374462127686, "learning_rate": 7.723433775328385e-07, "loss": 0.3045, "step": 82 }, { "epoch": 0.10664953421137166, "grad_norm": 0.8218931555747986, "learning_rate": 6.930128404315214e-07, "loss": 0.2967, "step": 83 }, { "epoch": 0.10793446835849663, "grad_norm": 0.9357181191444397, "learning_rate": 6.17321334990973e-07, "loss": 0.3382, "step": 84 }, { "epoch": 0.1092194025056216, "grad_norm": 1.129144549369812, "learning_rate": 5.454212938299256e-07, "loss": 0.3723, "step": 85 }, { "epoch": 0.11050433665274655, "grad_norm": 0.9294447302818298, "learning_rate": 4.774575140626317e-07, "loss": 0.3518, "step": 86 }, { "epoch": 0.11178927079987151, "grad_norm": 1.224295735359192, "learning_rate": 4.1356686569674344e-07, "loss": 0.4988, "step": 87 }, { "epoch": 0.11307420494699646, "grad_norm": 1.0254179239273071, "learning_rate": 3.538780159953348e-07, "loss": 0.3184, "step": 88 }, { "epoch": 0.11435913909412143, "grad_norm": 1.141256332397461, "learning_rate": 2.98511170358155e-07, "loss": 0.3678, "step": 89 }, { "epoch": 0.11564407324124638, "grad_norm": 1.2963924407958984, "learning_rate": 2.4757783024395244e-07, "loss": 0.432, "step": 90 }, { "epoch": 0.11692900738837135, "grad_norm": 1.0066637992858887, "learning_rate": 2.0118056862137358e-07, "loss": 0.3393, "step": 91 }, { "epoch": 0.1182139415354963, "grad_norm": 1.2111726999282837, "learning_rate": 1.59412823400657e-07, "loss": 0.4263, "step": 92 }, { "epoch": 0.11949887568262127, "grad_norm": 1.2096489667892456, "learning_rate": 1.223587092621162e-07, "loss": 0.3961, "step": 93 }, { "epoch": 0.12078380982974622, "grad_norm": 1.258070468902588, "learning_rate": 9.00928482603669e-08, "loss": 0.4429, "step": 94 }, { "epoch": 0.12206874397687119, "grad_norm": 1.3796954154968262, "learning_rate": 6.268021954544095e-08, "loss": 0.3849, "step": 95 }, { "epoch": 0.12335367812399614, "grad_norm": 1.115661382675171, "learning_rate": 4.017602850342584e-08, "loss": 0.2734, "step": 96 }, { "epoch": 0.12463861227112111, "grad_norm": 1.1489228010177612, "learning_rate": 2.262559558016325e-08, "loss": 0.3193, "step": 97 }, { "epoch": 0.12592354641824607, "grad_norm": 1.5417214632034302, "learning_rate": 1.006426501190233e-08, "loss": 0.4079, "step": 98 }, { "epoch": 0.127208480565371, "grad_norm": 1.401361346244812, "learning_rate": 2.5173336467135266e-09, "loss": 0.3922, "step": 99 }, { "epoch": 0.12849341471249598, "grad_norm": 2.738529920578003, "learning_rate": 0.0, "loss": 0.7924, "step": 100 }, { "epoch": 0.12849341471249598, "eval_loss": 0.32453402876853943, "eval_runtime": 105.3757, "eval_samples_per_second": 12.441, "eval_steps_per_second": 1.556, "step": 100 } ], "logging_steps": 1, "max_steps": 100, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.5273699984801792e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }