File size: 2,364 Bytes
0234e87 f2fbbd5 0234e87 f2fbbd5 0234e87 f2fbbd5 0234e87 f2fbbd5 0234e87 f2fbbd5 0234e87 f2fbbd5 0234e87 f2fbbd5 0234e87 f2fbbd5 0234e87 f2fbbd5 0234e87 f2fbbd5 0234e87 f2fbbd5 0234e87 f2fbbd5 0234e87 f2fbbd5 0234e87 f2fbbd5 0234e87 f2fbbd5 0234e87 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 42,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.023809523809523808,
"grad_norm": 0.35546875,
"learning_rate": 4e-05,
"loss": 0.9948,
"step": 1
},
{
"epoch": 0.11904761904761904,
"grad_norm": 0.31640625,
"learning_rate": 0.0002,
"loss": 1.1099,
"step": 5
},
{
"epoch": 0.23809523809523808,
"grad_norm": 0.31640625,
"learning_rate": 0.0001911228490388136,
"loss": 1.048,
"step": 10
},
{
"epoch": 0.35714285714285715,
"grad_norm": 0.267578125,
"learning_rate": 0.00016606747233900815,
"loss": 0.9912,
"step": 15
},
{
"epoch": 0.47619047619047616,
"grad_norm": 0.2578125,
"learning_rate": 0.00012928227712765504,
"loss": 0.9396,
"step": 20
},
{
"epoch": 0.5952380952380952,
"grad_norm": 0.2294921875,
"learning_rate": 8.729821802531212e-05,
"loss": 0.997,
"step": 25
},
{
"epoch": 0.7142857142857143,
"grad_norm": 0.2421875,
"learning_rate": 4.756927164427685e-05,
"loss": 1.0419,
"step": 30
},
{
"epoch": 0.8333333333333334,
"grad_norm": 0.248046875,
"learning_rate": 1.7149035075615794e-05,
"loss": 1.0231,
"step": 35
},
{
"epoch": 0.9523809523809523,
"grad_norm": 0.208984375,
"learning_rate": 1.4384089652291543e-06,
"loss": 0.9758,
"step": 40
},
{
"epoch": 1.0,
"eval_loss": 1.0181750059127808,
"eval_runtime": 88.1603,
"eval_samples_per_second": 3.755,
"eval_steps_per_second": 0.476,
"step": 42
},
{
"epoch": 1.0,
"step": 42,
"total_flos": 2.935565656994611e+16,
"train_loss": 1.0143198739914667,
"train_runtime": 408.5529,
"train_samples_per_second": 0.818,
"train_steps_per_second": 0.103
}
],
"logging_steps": 5,
"max_steps": 42,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 10,
"total_flos": 2.935565656994611e+16,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}
|