{ "best_metric": 3.1905009746551514, "best_model_checkpoint": "miner_id_24/checkpoint-150", "epoch": 0.2674591381872214, "eval_steps": 150, "global_step": 450, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0005943536404160475, "eval_loss": 6.33011531829834, "eval_runtime": 210.4742, "eval_samples_per_second": 13.465, "eval_steps_per_second": 3.369, "step": 1 }, { "epoch": 0.005943536404160475, "grad_norm": 3.636090040206909, "learning_rate": 2e-05, "loss": 4.2971, "step": 10 }, { "epoch": 0.01188707280832095, "grad_norm": 3.378282308578491, "learning_rate": 4e-05, "loss": 3.9504, "step": 20 }, { "epoch": 0.017830609212481426, "grad_norm": 3.461188554763794, "learning_rate": 6e-05, "loss": 3.3025, "step": 30 }, { "epoch": 0.0237741456166419, "grad_norm": 3.8042867183685303, "learning_rate": 8e-05, "loss": 3.0173, "step": 40 }, { "epoch": 0.029717682020802376, "grad_norm": 5.867399215698242, "learning_rate": 0.0001, "loss": 3.1382, "step": 50 }, { "epoch": 0.03566121842496285, "grad_norm": 2.080888271331787, "learning_rate": 9.999074760873505e-05, "loss": 3.1589, "step": 60 }, { "epoch": 0.041604754829123326, "grad_norm": 2.627453088760376, "learning_rate": 9.996299385920993e-05, "loss": 3.0502, "step": 70 }, { "epoch": 0.0475482912332838, "grad_norm": 2.772899627685547, "learning_rate": 9.991674902296665e-05, "loss": 3.0374, "step": 80 }, { "epoch": 0.05349182763744428, "grad_norm": 3.1544461250305176, "learning_rate": 9.985203021501797e-05, "loss": 2.9162, "step": 90 }, { "epoch": 0.05943536404160475, "grad_norm": 6.360644340515137, "learning_rate": 9.976886138751321e-05, "loss": 2.9194, "step": 100 }, { "epoch": 0.06537890044576523, "grad_norm": 2.4774982929229736, "learning_rate": 9.96672733208737e-05, "loss": 3.2118, "step": 110 }, { "epoch": 0.0713224368499257, "grad_norm": 2.2214813232421875, "learning_rate": 9.954730361240105e-05, "loss": 3.0673, "step": 120 }, { "epoch": 0.07726597325408618, "grad_norm": 2.6016104221343994, "learning_rate": 9.940899666236258e-05, "loss": 2.9907, "step": 130 }, { "epoch": 0.08320950965824665, "grad_norm": 3.2741599082946777, "learning_rate": 9.925240365755893e-05, "loss": 2.9132, "step": 140 }, { "epoch": 0.08915304606240713, "grad_norm": 4.4667534828186035, "learning_rate": 9.907758255238013e-05, "loss": 3.0036, "step": 150 }, { "epoch": 0.08915304606240713, "eval_loss": 3.1905009746551514, "eval_runtime": 212.6607, "eval_samples_per_second": 13.326, "eval_steps_per_second": 3.334, "step": 150 }, { "epoch": 0.0950965824665676, "grad_norm": 1.8121700286865234, "learning_rate": 9.888459804735679e-05, "loss": 3.1284, "step": 160 }, { "epoch": 0.10104011887072809, "grad_norm": 2.5314412117004395, "learning_rate": 9.867352156521488e-05, "loss": 3.1552, "step": 170 }, { "epoch": 0.10698365527488855, "grad_norm": 2.6681604385375977, "learning_rate": 9.844443122444238e-05, "loss": 2.9376, "step": 180 }, { "epoch": 0.11292719167904904, "grad_norm": 2.87355375289917, "learning_rate": 9.819741181037799e-05, "loss": 2.8799, "step": 190 }, { "epoch": 0.1188707280832095, "grad_norm": 4.919130325317383, "learning_rate": 9.793255474383249e-05, "loss": 2.8424, "step": 200 }, { "epoch": 0.12481426448736999, "grad_norm": 2.0036211013793945, "learning_rate": 9.764995804725424e-05, "loss": 3.2481, "step": 210 }, { "epoch": 0.13075780089153047, "grad_norm": 2.4777944087982178, "learning_rate": 9.734972630845151e-05, "loss": 3.0006, "step": 220 }, { "epoch": 0.13670133729569092, "grad_norm": 2.4493539333343506, "learning_rate": 9.703197064188498e-05, "loss": 2.9379, "step": 230 }, { "epoch": 0.1426448736998514, "grad_norm": 3.2666187286376953, "learning_rate": 9.669680864754484e-05, "loss": 2.8128, "step": 240 }, { "epoch": 0.1485884101040119, "grad_norm": 7.568844795227051, "learning_rate": 9.63443643674274e-05, "loss": 2.8483, "step": 250 }, { "epoch": 0.15453194650817237, "grad_norm": 1.986729383468628, "learning_rate": 9.597476823962784e-05, "loss": 3.2472, "step": 260 }, { "epoch": 0.16047548291233285, "grad_norm": 2.4101827144622803, "learning_rate": 9.558815705006555e-05, "loss": 3.0007, "step": 270 }, { "epoch": 0.1664190193164933, "grad_norm": 2.539492607116699, "learning_rate": 9.51846738818602e-05, "loss": 2.9353, "step": 280 }, { "epoch": 0.1723625557206538, "grad_norm": 2.762788772583008, "learning_rate": 9.476446806237749e-05, "loss": 2.8221, "step": 290 }, { "epoch": 0.17830609212481427, "grad_norm": 7.846682071685791, "learning_rate": 9.432769510796353e-05, "loss": 2.8917, "step": 300 }, { "epoch": 0.17830609212481427, "eval_loss": 3.3436031341552734, "eval_runtime": 212.4479, "eval_samples_per_second": 13.34, "eval_steps_per_second": 3.337, "step": 300 }, { "epoch": 0.18424962852897475, "grad_norm": 2.2458980083465576, "learning_rate": 9.387451666638906e-05, "loss": 3.2819, "step": 310 }, { "epoch": 0.1901931649331352, "grad_norm": 2.2853057384490967, "learning_rate": 9.340510045702427e-05, "loss": 3.0085, "step": 320 }, { "epoch": 0.1961367013372957, "grad_norm": 2.456028461456299, "learning_rate": 9.291962020876654e-05, "loss": 2.8806, "step": 330 }, { "epoch": 0.20208023774145617, "grad_norm": 2.9242990016937256, "learning_rate": 9.241825559574424e-05, "loss": 2.8353, "step": 340 }, { "epoch": 0.20802377414561665, "grad_norm": 8.866583824157715, "learning_rate": 9.190119217081996e-05, "loss": 2.8203, "step": 350 }, { "epoch": 0.2139673105497771, "grad_norm": 2.047957420349121, "learning_rate": 9.136862129691838e-05, "loss": 3.3765, "step": 360 }, { "epoch": 0.2199108469539376, "grad_norm": 2.7228925228118896, "learning_rate": 9.082074007620356e-05, "loss": 3.086, "step": 370 }, { "epoch": 0.22585438335809807, "grad_norm": 2.6000123023986816, "learning_rate": 9.025775127713232e-05, "loss": 2.8523, "step": 380 }, { "epoch": 0.23179791976225855, "grad_norm": 2.826296091079712, "learning_rate": 8.967986325941056e-05, "loss": 2.8088, "step": 390 }, { "epoch": 0.237741456166419, "grad_norm": 13.53122329711914, "learning_rate": 8.908728989688015e-05, "loss": 2.8237, "step": 400 }, { "epoch": 0.2436849925705795, "grad_norm": 2.1314334869384766, "learning_rate": 8.848025049836522e-05, "loss": 3.3274, "step": 410 }, { "epoch": 0.24962852897473997, "grad_norm": 2.417358875274658, "learning_rate": 8.785896972650694e-05, "loss": 3.0393, "step": 420 }, { "epoch": 0.2555720653789004, "grad_norm": 2.595625877380371, "learning_rate": 8.72236775146167e-05, "loss": 2.9731, "step": 430 }, { "epoch": 0.26151560178306094, "grad_norm": 2.7309069633483887, "learning_rate": 8.657460898157905e-05, "loss": 2.7837, "step": 440 }, { "epoch": 0.2674591381872214, "grad_norm": 6.042367458343506, "learning_rate": 8.5912004344835e-05, "loss": 2.7581, "step": 450 }, { "epoch": 0.2674591381872214, "eval_loss": 3.2376186847686768, "eval_runtime": 212.7494, "eval_samples_per_second": 13.321, "eval_steps_per_second": 3.333, "step": 450 } ], "logging_steps": 10, "max_steps": 1683, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 150, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 2, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 2 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6.658201808189522e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }