{ "best_metric": 1.7377251386642456, "best_model_checkpoint": "miner_id_24/checkpoint-200", "epoch": 0.18509949097639983, "eval_steps": 50, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0009254974548819991, "eval_loss": 2.0847232341766357, "eval_runtime": 162.4017, "eval_samples_per_second": 11.207, "eval_steps_per_second": 2.802, "step": 1 }, { "epoch": 0.00925497454881999, "grad_norm": 0.5288553833961487, "learning_rate": 5e-05, "loss": 1.9788, "step": 10 }, { "epoch": 0.01850994909763998, "grad_norm": 0.2687051594257355, "learning_rate": 0.0001, "loss": 1.8967, "step": 20 }, { "epoch": 0.027764923646459973, "grad_norm": 0.285468190908432, "learning_rate": 9.924038765061042e-05, "loss": 1.8612, "step": 30 }, { "epoch": 0.03701989819527996, "grad_norm": 0.3615187406539917, "learning_rate": 9.698463103929542e-05, "loss": 1.8278, "step": 40 }, { "epoch": 0.04627487274409996, "grad_norm": 0.8302805423736572, "learning_rate": 9.330127018922194e-05, "loss": 1.7995, "step": 50 }, { "epoch": 0.04627487274409996, "eval_loss": 1.8223581314086914, "eval_runtime": 164.1133, "eval_samples_per_second": 11.09, "eval_steps_per_second": 2.772, "step": 50 }, { "epoch": 0.055529847292919945, "grad_norm": 0.2162926346063614, "learning_rate": 8.83022221559489e-05, "loss": 1.8218, "step": 60 }, { "epoch": 0.06478482184173993, "grad_norm": 0.22706399857997894, "learning_rate": 8.213938048432697e-05, "loss": 1.7809, "step": 70 }, { "epoch": 0.07403979639055992, "grad_norm": 0.26259857416152954, "learning_rate": 7.500000000000001e-05, "loss": 1.775, "step": 80 }, { "epoch": 0.08329477093937991, "grad_norm": 0.33791765570640564, "learning_rate": 6.710100716628344e-05, "loss": 1.7634, "step": 90 }, { "epoch": 0.09254974548819991, "grad_norm": 0.899003267288208, "learning_rate": 5.868240888334653e-05, "loss": 1.7645, "step": 100 }, { "epoch": 0.09254974548819991, "eval_loss": 1.7726473808288574, "eval_runtime": 164.1416, "eval_samples_per_second": 11.088, "eval_steps_per_second": 2.772, "step": 100 }, { "epoch": 0.1018047200370199, "grad_norm": 0.21113358438014984, "learning_rate": 5e-05, "loss": 1.7978, "step": 110 }, { "epoch": 0.11105969458583989, "grad_norm": 0.23512417078018188, "learning_rate": 4.131759111665349e-05, "loss": 1.7734, "step": 120 }, { "epoch": 0.12031466913465988, "grad_norm": 0.28493714332580566, "learning_rate": 3.289899283371657e-05, "loss": 1.7711, "step": 130 }, { "epoch": 0.12956964368347987, "grad_norm": 0.33335134387016296, "learning_rate": 2.500000000000001e-05, "loss": 1.7619, "step": 140 }, { "epoch": 0.13882461823229986, "grad_norm": 0.7424925565719604, "learning_rate": 1.7860619515673033e-05, "loss": 1.7731, "step": 150 }, { "epoch": 0.13882461823229986, "eval_loss": 1.7417677640914917, "eval_runtime": 164.0774, "eval_samples_per_second": 11.092, "eval_steps_per_second": 2.773, "step": 150 }, { "epoch": 0.14807959278111985, "grad_norm": 0.22447636723518372, "learning_rate": 1.1697777844051105e-05, "loss": 1.7912, "step": 160 }, { "epoch": 0.15733456732993983, "grad_norm": 0.23410490155220032, "learning_rate": 6.698729810778065e-06, "loss": 1.7734, "step": 170 }, { "epoch": 0.16658954187875982, "grad_norm": 0.2778407037258148, "learning_rate": 3.0153689607045845e-06, "loss": 1.7592, "step": 180 }, { "epoch": 0.1758445164275798, "grad_norm": 0.3438330888748169, "learning_rate": 7.596123493895991e-07, "loss": 1.7603, "step": 190 }, { "epoch": 0.18509949097639983, "grad_norm": 0.7146674394607544, "learning_rate": 0.0, "loss": 1.7467, "step": 200 }, { "epoch": 0.18509949097639983, "eval_loss": 1.7377251386642456, "eval_runtime": 163.7691, "eval_samples_per_second": 11.113, "eval_steps_per_second": 2.778, "step": 200 } ], "logging_steps": 10, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.5796047653502976e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }