{ "best_metric": 1.0038142204284668, "best_model_checkpoint": "miner_id_24/checkpoint-800", "epoch": 0.6780379277465833, "eval_steps": 100, "global_step": 800, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0008475474096832291, "eval_loss": 1.7017370462417603, "eval_runtime": 592.1509, "eval_samples_per_second": 3.357, "eval_steps_per_second": 0.839, "step": 1 }, { "epoch": 0.04237737048416146, "grad_norm": 180.50335693359375, "learning_rate": 0.0001085, "loss": 9.6737, "step": 50 }, { "epoch": 0.08475474096832292, "grad_norm": 35.1514778137207, "learning_rate": 0.000217, "loss": 9.1546, "step": 100 }, { "epoch": 0.08475474096832292, "eval_loss": 1.4327971935272217, "eval_runtime": 591.9001, "eval_samples_per_second": 3.359, "eval_steps_per_second": 0.84, "step": 100 }, { "epoch": 0.12713211145248438, "grad_norm": 33.78520584106445, "learning_rate": 0.00021427967847172783, "loss": 9.1724, "step": 150 }, { "epoch": 0.16950948193664583, "grad_norm": 30.819656372070312, "learning_rate": 0.00020625512216741247, "loss": 9.0184, "step": 200 }, { "epoch": 0.16950948193664583, "eval_loss": 1.431421160697937, "eval_runtime": 593.9151, "eval_samples_per_second": 3.347, "eval_steps_per_second": 0.837, "step": 200 }, { "epoch": 0.2118868524208073, "grad_norm": 31.55472183227539, "learning_rate": 0.00019332871584778124, "loss": 9.0235, "step": 250 }, { "epoch": 0.25426422290496875, "grad_norm": 39.07125473022461, "learning_rate": 0.0001761486435016726, "loss": 9.0832, "step": 300 }, { "epoch": 0.25426422290496875, "eval_loss": 1.3427064418792725, "eval_runtime": 594.1739, "eval_samples_per_second": 3.346, "eval_steps_per_second": 0.836, "step": 300 }, { "epoch": 0.2966415933891302, "grad_norm": 29.832670211791992, "learning_rate": 0.00015557638569425503, "loss": 8.9316, "step": 350 }, { "epoch": 0.33901896387329167, "grad_norm": 31.09575653076172, "learning_rate": 0.0001326435213342601, "loss": 8.8142, "step": 400 }, { "epoch": 0.33901896387329167, "eval_loss": 1.2957122325897217, "eval_runtime": 592.8915, "eval_samples_per_second": 3.353, "eval_steps_per_second": 0.838, "step": 400 }, { "epoch": 0.3813963343574531, "grad_norm": 23.03346061706543, "learning_rate": 0.0001085, "loss": 8.8109, "step": 450 }, { "epoch": 0.4237737048416146, "grad_norm": 32.9949951171875, "learning_rate": 8.435647866573989e-05, "loss": 8.6298, "step": 500 }, { "epoch": 0.4237737048416146, "eval_loss": 1.1177974939346313, "eval_runtime": 592.3944, "eval_samples_per_second": 3.356, "eval_steps_per_second": 0.839, "step": 500 }, { "epoch": 0.46615107532577604, "grad_norm": 25.86236000061035, "learning_rate": 6.142361430574494e-05, "loss": 8.3883, "step": 550 }, { "epoch": 0.5085284458099375, "grad_norm": 25.12655258178711, "learning_rate": 4.085135649832741e-05, "loss": 8.4129, "step": 600 }, { "epoch": 0.5085284458099375, "eval_loss": 1.041265606880188, "eval_runtime": 593.3051, "eval_samples_per_second": 3.351, "eval_steps_per_second": 0.838, "step": 600 }, { "epoch": 0.550905816294099, "grad_norm": 45.71076965332031, "learning_rate": 2.3671284152218764e-05, "loss": 8.1111, "step": 650 }, { "epoch": 0.5932831867782604, "grad_norm": 22.455829620361328, "learning_rate": 1.0744877832587534e-05, "loss": 8.1041, "step": 700 }, { "epoch": 0.5932831867782604, "eval_loss": 1.0079442262649536, "eval_runtime": 591.8525, "eval_samples_per_second": 3.359, "eval_steps_per_second": 0.84, "step": 700 }, { "epoch": 0.6356605572624219, "grad_norm": 25.23514747619629, "learning_rate": 2.720321528272137e-06, "loss": 8.1489, "step": 750 }, { "epoch": 0.6780379277465833, "grad_norm": 25.03769302368164, "learning_rate": 0.0, "loss": 8.0241, "step": 800 }, { "epoch": 0.6780379277465833, "eval_loss": 1.0038142204284668, "eval_runtime": 594.1032, "eval_samples_per_second": 3.346, "eval_steps_per_second": 0.837, "step": 800 } ], "logging_steps": 50, "max_steps": 800, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.413778734304461e+18, "train_batch_size": 4, "trial_name": null, "trial_params": null }