lesso17's picture
Training in progress, step 800, checkpoint
3779b93 verified
{
"best_metric": 1.0038142204284668,
"best_model_checkpoint": "miner_id_24/checkpoint-800",
"epoch": 0.6780379277465833,
"eval_steps": 100,
"global_step": 800,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0008475474096832291,
"eval_loss": 1.7017370462417603,
"eval_runtime": 592.1509,
"eval_samples_per_second": 3.357,
"eval_steps_per_second": 0.839,
"step": 1
},
{
"epoch": 0.04237737048416146,
"grad_norm": 180.50335693359375,
"learning_rate": 0.0001085,
"loss": 9.6737,
"step": 50
},
{
"epoch": 0.08475474096832292,
"grad_norm": 35.1514778137207,
"learning_rate": 0.000217,
"loss": 9.1546,
"step": 100
},
{
"epoch": 0.08475474096832292,
"eval_loss": 1.4327971935272217,
"eval_runtime": 591.9001,
"eval_samples_per_second": 3.359,
"eval_steps_per_second": 0.84,
"step": 100
},
{
"epoch": 0.12713211145248438,
"grad_norm": 33.78520584106445,
"learning_rate": 0.00021427967847172783,
"loss": 9.1724,
"step": 150
},
{
"epoch": 0.16950948193664583,
"grad_norm": 30.819656372070312,
"learning_rate": 0.00020625512216741247,
"loss": 9.0184,
"step": 200
},
{
"epoch": 0.16950948193664583,
"eval_loss": 1.431421160697937,
"eval_runtime": 593.9151,
"eval_samples_per_second": 3.347,
"eval_steps_per_second": 0.837,
"step": 200
},
{
"epoch": 0.2118868524208073,
"grad_norm": 31.55472183227539,
"learning_rate": 0.00019332871584778124,
"loss": 9.0235,
"step": 250
},
{
"epoch": 0.25426422290496875,
"grad_norm": 39.07125473022461,
"learning_rate": 0.0001761486435016726,
"loss": 9.0832,
"step": 300
},
{
"epoch": 0.25426422290496875,
"eval_loss": 1.3427064418792725,
"eval_runtime": 594.1739,
"eval_samples_per_second": 3.346,
"eval_steps_per_second": 0.836,
"step": 300
},
{
"epoch": 0.2966415933891302,
"grad_norm": 29.832670211791992,
"learning_rate": 0.00015557638569425503,
"loss": 8.9316,
"step": 350
},
{
"epoch": 0.33901896387329167,
"grad_norm": 31.09575653076172,
"learning_rate": 0.0001326435213342601,
"loss": 8.8142,
"step": 400
},
{
"epoch": 0.33901896387329167,
"eval_loss": 1.2957122325897217,
"eval_runtime": 592.8915,
"eval_samples_per_second": 3.353,
"eval_steps_per_second": 0.838,
"step": 400
},
{
"epoch": 0.3813963343574531,
"grad_norm": 23.03346061706543,
"learning_rate": 0.0001085,
"loss": 8.8109,
"step": 450
},
{
"epoch": 0.4237737048416146,
"grad_norm": 32.9949951171875,
"learning_rate": 8.435647866573989e-05,
"loss": 8.6298,
"step": 500
},
{
"epoch": 0.4237737048416146,
"eval_loss": 1.1177974939346313,
"eval_runtime": 592.3944,
"eval_samples_per_second": 3.356,
"eval_steps_per_second": 0.839,
"step": 500
},
{
"epoch": 0.46615107532577604,
"grad_norm": 25.86236000061035,
"learning_rate": 6.142361430574494e-05,
"loss": 8.3883,
"step": 550
},
{
"epoch": 0.5085284458099375,
"grad_norm": 25.12655258178711,
"learning_rate": 4.085135649832741e-05,
"loss": 8.4129,
"step": 600
},
{
"epoch": 0.5085284458099375,
"eval_loss": 1.041265606880188,
"eval_runtime": 593.3051,
"eval_samples_per_second": 3.351,
"eval_steps_per_second": 0.838,
"step": 600
},
{
"epoch": 0.550905816294099,
"grad_norm": 45.71076965332031,
"learning_rate": 2.3671284152218764e-05,
"loss": 8.1111,
"step": 650
},
{
"epoch": 0.5932831867782604,
"grad_norm": 22.455829620361328,
"learning_rate": 1.0744877832587534e-05,
"loss": 8.1041,
"step": 700
},
{
"epoch": 0.5932831867782604,
"eval_loss": 1.0079442262649536,
"eval_runtime": 591.8525,
"eval_samples_per_second": 3.359,
"eval_steps_per_second": 0.84,
"step": 700
},
{
"epoch": 0.6356605572624219,
"grad_norm": 25.23514747619629,
"learning_rate": 2.720321528272137e-06,
"loss": 8.1489,
"step": 750
},
{
"epoch": 0.6780379277465833,
"grad_norm": 25.03769302368164,
"learning_rate": 0.0,
"loss": 8.0241,
"step": 800
},
{
"epoch": 0.6780379277465833,
"eval_loss": 1.0038142204284668,
"eval_runtime": 594.1032,
"eval_samples_per_second": 3.346,
"eval_steps_per_second": 0.837,
"step": 800
}
],
"logging_steps": 50,
"max_steps": 800,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 3,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3.413778734304461e+18,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}