File size: 1,903 Bytes
e5d5534 9c67982 e5d5534 9c67982 e5d5534 9c67982 e5d5534 9c67982 e5d5534 9c67982 e5d5534 9c67982 e5d5534 9c67982 e5d5534 9c67982 e5d5534 9c67982 e5d5534 9c67982 e5d5534 9c67982 e5d5534 9c67982 e5d5534 9c67982 e5d5534 9c67982 e5d5534 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.7272727272727275,
"eval_steps": 500,
"global_step": 15,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.18,
"grad_norm": 8.112652061034474,
"learning_rate": 1e-05,
"loss": 1.4798,
"step": 1
},
{
"epoch": 0.91,
"grad_norm": 2.848398983756101,
"learning_rate": 1.7485107481711014e-05,
"loss": 1.2659,
"step": 5
},
{
"epoch": 0.91,
"eval_loss": 1.2053821086883545,
"eval_runtime": 1.8264,
"eval_samples_per_second": 3.833,
"eval_steps_per_second": 0.548,
"step": 5
},
{
"epoch": 1.82,
"grad_norm": 2.6874796106077996,
"learning_rate": 6.453951129574644e-06,
"loss": 1.0094,
"step": 10
},
{
"epoch": 2.0,
"eval_loss": 1.1690208911895752,
"eval_runtime": 1.8589,
"eval_samples_per_second": 3.766,
"eval_steps_per_second": 0.538,
"step": 11
},
{
"epoch": 2.73,
"grad_norm": 1.7499037267304558,
"learning_rate": 0.0,
"loss": 0.8249,
"step": 15
},
{
"epoch": 2.73,
"eval_loss": 1.1581600904464722,
"eval_runtime": 1.8666,
"eval_samples_per_second": 3.75,
"eval_steps_per_second": 0.536,
"step": 15
},
{
"epoch": 2.73,
"step": 15,
"total_flos": 3088349921280.0,
"train_loss": 1.0476287603378296,
"train_runtime": 318.5273,
"train_samples_per_second": 3.306,
"train_steps_per_second": 0.047
}
],
"logging_steps": 5,
"max_steps": 15,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"total_flos": 3088349921280.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}
|