0x1202 commited on
Commit
515053c
·
verified ·
1 Parent(s): fe7d5e0

Training in progress, step 300, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3903b5024dc5f0d8b8a92afcd0bc0705ec18ecf50f06a8aa9c295580586eed35
3
  size 550593184
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c152cb6a91e6eb19cce5696bb5aab7af49b1436d63206846d7950a059f6829cc
3
  size 550593184
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1305feada2e21237b6b96d8fdac7c8b019d61f4f01ce1a6acc73cb4f4e23185b
3
- size 280341460
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61f1d7db0293fb74147f7fe567624adc37460bac20ea378baaff44290172671e
3
+ size 280342100
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6fb9850f80127889f9cca1b5407738ecf32e2907f409d68234d66eb9d18ab953
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30497404fbe87652d1676a91ddfb5d898aa726c54fd96fe0a00ea56154a09678
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cc33e131fa6defcf31352ebc1dc63541771f8d9732b2772a4a16ecb6c33c3697
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd6612e1be5fc1a945d1a2e93ec2df274cca4c095f65d292f5fd095af43ba016
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.30073416233062744,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-150",
4
- "epoch": 0.04601579875757343,
5
  "eval_steps": 50,
6
- "global_step": 150,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -144,6 +144,135 @@
144
  "eval_samples_per_second": 7.168,
145
  "eval_steps_per_second": 1.793,
146
  "step": 150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
  }
148
  ],
149
  "logging_steps": 10,
@@ -172,7 +301,7 @@
172
  "attributes": {}
173
  }
174
  },
175
- "total_flos": 4.277954354746491e+17,
176
  "train_batch_size": 8,
177
  "trial_name": null,
178
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.2922627031803131,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-300",
4
+ "epoch": 0.09203159751514686,
5
  "eval_steps": 50,
6
+ "global_step": 300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
144
  "eval_samples_per_second": 7.168,
145
  "eval_steps_per_second": 1.793,
146
  "step": 150
147
+ },
148
+ {
149
+ "epoch": 0.049083518674744996,
150
+ "grad_norm": 0.09948628395795822,
151
+ "learning_rate": 7.396244933600285e-05,
152
+ "loss": 0.28,
153
+ "step": 160
154
+ },
155
+ {
156
+ "epoch": 0.05215123859191656,
157
+ "grad_norm": 0.10903914272785187,
158
+ "learning_rate": 7.077075065009433e-05,
159
+ "loss": 0.3643,
160
+ "step": 170
161
+ },
162
+ {
163
+ "epoch": 0.05521895850908812,
164
+ "grad_norm": 0.11725682765245438,
165
+ "learning_rate": 6.747320897995493e-05,
166
+ "loss": 0.3636,
167
+ "step": 180
168
+ },
169
+ {
170
+ "epoch": 0.05828667842625968,
171
+ "grad_norm": 0.10994742065668106,
172
+ "learning_rate": 6.408662784207149e-05,
173
+ "loss": 0.2732,
174
+ "step": 190
175
+ },
176
+ {
177
+ "epoch": 0.061354398343431246,
178
+ "grad_norm": 0.251674085855484,
179
+ "learning_rate": 6.062826447764883e-05,
180
+ "loss": 0.2081,
181
+ "step": 200
182
+ },
183
+ {
184
+ "epoch": 0.061354398343431246,
185
+ "eval_loss": 0.295357346534729,
186
+ "eval_runtime": 767.7606,
187
+ "eval_samples_per_second": 7.151,
188
+ "eval_steps_per_second": 1.788,
189
+ "step": 200
190
+ },
191
+ {
192
+ "epoch": 0.0644221182606028,
193
+ "grad_norm": 0.09594530612230301,
194
+ "learning_rate": 5.7115741913664264e-05,
195
+ "loss": 0.2597,
196
+ "step": 210
197
+ },
198
+ {
199
+ "epoch": 0.06748983817777437,
200
+ "grad_norm": 0.10894957929849625,
201
+ "learning_rate": 5.3566959159961615e-05,
202
+ "loss": 0.3608,
203
+ "step": 220
204
+ },
205
+ {
206
+ "epoch": 0.07055755809494593,
207
+ "grad_norm": 0.103450708091259,
208
+ "learning_rate": 5e-05,
209
+ "loss": 0.3554,
210
+ "step": 230
211
+ },
212
+ {
213
+ "epoch": 0.07362527801211749,
214
+ "grad_norm": 0.1029270738363266,
215
+ "learning_rate": 4.643304084003839e-05,
216
+ "loss": 0.282,
217
+ "step": 240
218
+ },
219
+ {
220
+ "epoch": 0.07669299792928906,
221
+ "grad_norm": 0.35574325919151306,
222
+ "learning_rate": 4.288425808633575e-05,
223
+ "loss": 0.2095,
224
+ "step": 250
225
+ },
226
+ {
227
+ "epoch": 0.07669299792928906,
228
+ "eval_loss": 0.29382532835006714,
229
+ "eval_runtime": 766.7704,
230
+ "eval_samples_per_second": 7.16,
231
+ "eval_steps_per_second": 1.791,
232
+ "step": 250
233
+ },
234
+ {
235
+ "epoch": 0.07976071784646062,
236
+ "grad_norm": 0.08866327255964279,
237
+ "learning_rate": 3.937173552235117e-05,
238
+ "loss": 0.2725,
239
+ "step": 260
240
+ },
241
+ {
242
+ "epoch": 0.08282843776363218,
243
+ "grad_norm": 0.10799609124660492,
244
+ "learning_rate": 3.591337215792852e-05,
245
+ "loss": 0.3543,
246
+ "step": 270
247
+ },
248
+ {
249
+ "epoch": 0.08589615768080375,
250
+ "grad_norm": 0.10899989306926727,
251
+ "learning_rate": 3.2526791020045086e-05,
252
+ "loss": 0.3616,
253
+ "step": 280
254
+ },
255
+ {
256
+ "epoch": 0.0889638775979753,
257
+ "grad_norm": 0.1085992082953453,
258
+ "learning_rate": 2.9229249349905684e-05,
259
+ "loss": 0.292,
260
+ "step": 290
261
+ },
262
+ {
263
+ "epoch": 0.09203159751514686,
264
+ "grad_norm": 0.2433381974697113,
265
+ "learning_rate": 2.603755066399718e-05,
266
+ "loss": 0.2056,
267
+ "step": 300
268
+ },
269
+ {
270
+ "epoch": 0.09203159751514686,
271
+ "eval_loss": 0.2922627031803131,
272
+ "eval_runtime": 766.4427,
273
+ "eval_samples_per_second": 7.163,
274
+ "eval_steps_per_second": 1.791,
275
+ "step": 300
276
  }
277
  ],
278
  "logging_steps": 10,
 
301
  "attributes": {}
302
  }
303
  },
304
+ "total_flos": 8.56285344058835e+17,
305
  "train_batch_size": 8,
306
  "trial_name": null,
307
  "trial_params": null