File size: 4,488 Bytes
61bced2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 |
{
"best_global_step": 800,
"best_metric": 0.10116679221391678,
"best_model_checkpoint": "/tmp/results/checkpoint-800",
"epoch": 0.8812999173781327,
"eval_steps": 200,
"global_step": 800,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.055081244836133296,
"grad_norm": 0.15732981264591217,
"learning_rate": 0.00019640234948604994,
"loss": 1.2703,
"step": 50
},
{
"epoch": 0.11016248967226659,
"grad_norm": 0.13457506895065308,
"learning_rate": 0.00019273127753303965,
"loss": 0.2101,
"step": 100
},
{
"epoch": 0.16524373450839988,
"grad_norm": 0.11798923462629318,
"learning_rate": 0.00018906020558002938,
"loss": 0.1801,
"step": 150
},
{
"epoch": 0.22032497934453318,
"grad_norm": 0.10562872886657715,
"learning_rate": 0.00018538913362701911,
"loss": 0.1621,
"step": 200
},
{
"epoch": 0.22032497934453318,
"eval_loss": 0.15831586718559265,
"eval_runtime": 95.9669,
"eval_samples_per_second": 33.626,
"eval_steps_per_second": 4.21,
"step": 200
},
{
"epoch": 0.27540622418066646,
"grad_norm": 0.11607076972723007,
"learning_rate": 0.00018171806167400882,
"loss": 0.1526,
"step": 250
},
{
"epoch": 0.33048746901679976,
"grad_norm": 0.20103491842746735,
"learning_rate": 0.00017804698972099853,
"loss": 0.1498,
"step": 300
},
{
"epoch": 0.38556871385293306,
"grad_norm": 0.20092593133449554,
"learning_rate": 0.00017437591776798826,
"loss": 0.1481,
"step": 350
},
{
"epoch": 0.44064995868906637,
"grad_norm": 0.15526321530342102,
"learning_rate": 0.000170704845814978,
"loss": 0.1533,
"step": 400
},
{
"epoch": 0.44064995868906637,
"eval_loss": 0.13016295433044434,
"eval_runtime": 96.1932,
"eval_samples_per_second": 33.547,
"eval_steps_per_second": 4.2,
"step": 400
},
{
"epoch": 0.49573120352519967,
"grad_norm": 0.23773688077926636,
"learning_rate": 0.0001670337738619677,
"loss": 0.1434,
"step": 450
},
{
"epoch": 0.5508124483613329,
"grad_norm": 0.14756543934345245,
"learning_rate": 0.00016336270190895743,
"loss": 0.1316,
"step": 500
},
{
"epoch": 0.6058936931974662,
"grad_norm": 0.19601435959339142,
"learning_rate": 0.00015969162995594716,
"loss": 0.1301,
"step": 550
},
{
"epoch": 0.6609749380335995,
"grad_norm": 0.1771874874830246,
"learning_rate": 0.00015602055800293687,
"loss": 0.1215,
"step": 600
},
{
"epoch": 0.6609749380335995,
"eval_loss": 0.11715047806501389,
"eval_runtime": 95.8683,
"eval_samples_per_second": 33.661,
"eval_steps_per_second": 4.214,
"step": 600
},
{
"epoch": 0.7160561828697328,
"grad_norm": 0.16557306051254272,
"learning_rate": 0.00015234948604992657,
"loss": 0.1139,
"step": 650
},
{
"epoch": 0.7711374277058661,
"grad_norm": 0.2927381992340088,
"learning_rate": 0.0001486784140969163,
"loss": 0.1214,
"step": 700
},
{
"epoch": 0.8262186725419994,
"grad_norm": 0.3338780701160431,
"learning_rate": 0.00014500734214390604,
"loss": 0.1285,
"step": 750
},
{
"epoch": 0.8812999173781327,
"grad_norm": 0.2900579273700714,
"learning_rate": 0.00014133627019089574,
"loss": 0.1274,
"step": 800
},
{
"epoch": 0.8812999173781327,
"eval_loss": 0.10116679221391678,
"eval_runtime": 94.9307,
"eval_samples_per_second": 33.993,
"eval_steps_per_second": 4.256,
"step": 800
}
],
"logging_steps": 50,
"max_steps": 2724,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 200,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 164707172352000.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}
|