Training in progress, step 300, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 194563400
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d6de441aaa8485c64b502422cd0a7f66e324dc19f216e281253db3e91c440d3a
|
3 |
size 194563400
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 99236212
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3e36b77114a8d774d669476f30b11995ffb7598179f55e979f3316e32cb2b971
|
3 |
size 99236212
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0b674501ac52ce92a0082a78aa20edda450504487603ac5f2a1dcf66f24ee75e
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0d93420319c4318ff13366855f16b6ec61d99b866bdf2a20293a1621b040b36f
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 20,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -2087,6 +2087,154 @@
|
|
2087 |
"eval_samples_per_second": 7.953,
|
2088 |
"eval_steps_per_second": 7.953,
|
2089 |
"step": 280
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2090 |
}
|
2091 |
],
|
2092 |
"logging_steps": 1,
|
@@ -2101,12 +2249,12 @@
|
|
2101 |
"should_evaluate": false,
|
2102 |
"should_log": false,
|
2103 |
"should_save": true,
|
2104 |
-
"should_training_stop":
|
2105 |
},
|
2106 |
"attributes": {}
|
2107 |
}
|
2108 |
},
|
2109 |
-
"total_flos":
|
2110 |
"train_batch_size": 1,
|
2111 |
"trial_name": null,
|
2112 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.25763512425527346,
|
5 |
"eval_steps": 20,
|
6 |
+
"global_step": 300,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
2087 |
"eval_samples_per_second": 7.953,
|
2088 |
"eval_steps_per_second": 7.953,
|
2089 |
"step": 280
|
2090 |
+
},
|
2091 |
+
{
|
2092 |
+
"epoch": 0.2413182330524395,
|
2093 |
+
"grad_norm": 2.2641775608062744,
|
2094 |
+
"learning_rate": 3.166200546578718e-06,
|
2095 |
+
"loss": 1.4545,
|
2096 |
+
"step": 281
|
2097 |
+
},
|
2098 |
+
{
|
2099 |
+
"epoch": 0.24217701679995707,
|
2100 |
+
"grad_norm": 1.9874449968338013,
|
2101 |
+
"learning_rate": 2.8427160470641253e-06,
|
2102 |
+
"loss": 1.5352,
|
2103 |
+
"step": 282
|
2104 |
+
},
|
2105 |
+
{
|
2106 |
+
"epoch": 0.24303580054747465,
|
2107 |
+
"grad_norm": 1.7880980968475342,
|
2108 |
+
"learning_rate": 2.5365011072835117e-06,
|
2109 |
+
"loss": 1.8229,
|
2110 |
+
"step": 283
|
2111 |
+
},
|
2112 |
+
{
|
2113 |
+
"epoch": 0.24389458429499222,
|
2114 |
+
"grad_norm": 1.6125706434249878,
|
2115 |
+
"learning_rate": 2.2475916629177415e-06,
|
2116 |
+
"loss": 1.3356,
|
2117 |
+
"step": 284
|
2118 |
+
},
|
2119 |
+
{
|
2120 |
+
"epoch": 0.2447533680425098,
|
2121 |
+
"grad_norm": 2.2507503032684326,
|
2122 |
+
"learning_rate": 1.9760216187710787e-06,
|
2123 |
+
"loss": 1.6077,
|
2124 |
+
"step": 285
|
2125 |
+
},
|
2126 |
+
{
|
2127 |
+
"epoch": 0.24561215179002738,
|
2128 |
+
"grad_norm": 1.8500958681106567,
|
2129 |
+
"learning_rate": 1.7218228447922867e-06,
|
2130 |
+
"loss": 1.742,
|
2131 |
+
"step": 286
|
2132 |
+
},
|
2133 |
+
{
|
2134 |
+
"epoch": 0.24647093553754496,
|
2135 |
+
"grad_norm": 2.065246105194092,
|
2136 |
+
"learning_rate": 1.4850251723345196e-06,
|
2137 |
+
"loss": 1.575,
|
2138 |
+
"step": 287
|
2139 |
+
},
|
2140 |
+
{
|
2141 |
+
"epoch": 0.24732971928506253,
|
2142 |
+
"grad_norm": 1.8860384225845337,
|
2143 |
+
"learning_rate": 1.2656563906545902e-06,
|
2144 |
+
"loss": 1.6296,
|
2145 |
+
"step": 288
|
2146 |
+
},
|
2147 |
+
{
|
2148 |
+
"epoch": 0.2481885030325801,
|
2149 |
+
"grad_norm": 1.577683448791504,
|
2150 |
+
"learning_rate": 1.0637422436516274e-06,
|
2151 |
+
"loss": 1.5374,
|
2152 |
+
"step": 289
|
2153 |
+
},
|
2154 |
+
{
|
2155 |
+
"epoch": 0.2490472867800977,
|
2156 |
+
"grad_norm": 1.9685717821121216,
|
2157 |
+
"learning_rate": 8.793064268460604e-07,
|
2158 |
+
"loss": 1.6663,
|
2159 |
+
"step": 290
|
2160 |
+
},
|
2161 |
+
{
|
2162 |
+
"epoch": 0.24990607052761526,
|
2163 |
+
"grad_norm": 2.094910144805908,
|
2164 |
+
"learning_rate": 7.123705845987093e-07,
|
2165 |
+
"loss": 1.7775,
|
2166 |
+
"step": 291
|
2167 |
+
},
|
2168 |
+
{
|
2169 |
+
"epoch": 0.25076485427513284,
|
2170 |
+
"grad_norm": 1.6965657472610474,
|
2171 |
+
"learning_rate": 5.629543075708176e-07,
|
2172 |
+
"loss": 1.4766,
|
2173 |
+
"step": 292
|
2174 |
+
},
|
2175 |
+
{
|
2176 |
+
"epoch": 0.2516236380226504,
|
2177 |
+
"grad_norm": 1.9982954263687134,
|
2178 |
+
"learning_rate": 4.310751304249738e-07,
|
2179 |
+
"loss": 1.8619,
|
2180 |
+
"step": 293
|
2181 |
+
},
|
2182 |
+
{
|
2183 |
+
"epoch": 0.252482421770168,
|
2184 |
+
"grad_norm": 1.7990736961364746,
|
2185 |
+
"learning_rate": 3.167485297673411e-07,
|
2186 |
+
"loss": 1.5422,
|
2187 |
+
"step": 294
|
2188 |
+
},
|
2189 |
+
{
|
2190 |
+
"epoch": 0.2533412055176856,
|
2191 |
+
"grad_norm": 2.147096633911133,
|
2192 |
+
"learning_rate": 2.1998792233142714e-07,
|
2193 |
+
"loss": 1.7929,
|
2194 |
+
"step": 295
|
2195 |
+
},
|
2196 |
+
{
|
2197 |
+
"epoch": 0.25419998926520315,
|
2198 |
+
"grad_norm": 2.139371871948242,
|
2199 |
+
"learning_rate": 1.4080466340349316e-07,
|
2200 |
+
"loss": 1.4746,
|
2201 |
+
"step": 296
|
2202 |
+
},
|
2203 |
+
{
|
2204 |
+
"epoch": 0.25505877301272073,
|
2205 |
+
"grad_norm": 2.0672645568847656,
|
2206 |
+
"learning_rate": 7.92080454900701e-08,
|
2207 |
+
"loss": 1.8796,
|
2208 |
+
"step": 297
|
2209 |
+
},
|
2210 |
+
{
|
2211 |
+
"epoch": 0.2559175567602383,
|
2212 |
+
"grad_norm": 1.8566962480545044,
|
2213 |
+
"learning_rate": 3.5205297227380855e-08,
|
2214 |
+
"loss": 1.5411,
|
2215 |
+
"step": 298
|
2216 |
+
},
|
2217 |
+
{
|
2218 |
+
"epoch": 0.2567763405077559,
|
2219 |
+
"grad_norm": 2.228825092315674,
|
2220 |
+
"learning_rate": 8.801582533035644e-09,
|
2221 |
+
"loss": 1.3654,
|
2222 |
+
"step": 299
|
2223 |
+
},
|
2224 |
+
{
|
2225 |
+
"epoch": 0.25763512425527346,
|
2226 |
+
"grad_norm": 1.640992522239685,
|
2227 |
+
"learning_rate": 0.0,
|
2228 |
+
"loss": 1.6715,
|
2229 |
+
"step": 300
|
2230 |
+
},
|
2231 |
+
{
|
2232 |
+
"epoch": 0.25763512425527346,
|
2233 |
+
"eval_loss": 1.7131195068359375,
|
2234 |
+
"eval_runtime": 47.8512,
|
2235 |
+
"eval_samples_per_second": 7.962,
|
2236 |
+
"eval_steps_per_second": 7.962,
|
2237 |
+
"step": 300
|
2238 |
}
|
2239 |
],
|
2240 |
"logging_steps": 1,
|
|
|
2249 |
"should_evaluate": false,
|
2250 |
"should_log": false,
|
2251 |
"should_save": true,
|
2252 |
+
"should_training_stop": true
|
2253 |
},
|
2254 |
"attributes": {}
|
2255 |
}
|
2256 |
},
|
2257 |
+
"total_flos": 4.22811663335424e+16,
|
2258 |
"train_batch_size": 1,
|
2259 |
"trial_name": null,
|
2260 |
"trial_params": null
|