aleegis12 commited on
Commit
6c374ed
·
verified ·
1 Parent(s): e12be23

Training in progress, step 300, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ad7f15aa7cf92ae92aacba4d59c18a316c824b0a86c252777d42fe24882c3f2b
3
  size 335604696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a5e7dea7fe298f2244e4d1bcb914ecc65c16b9a7fbbd7a988143f8098c39aaf
3
  size 335604696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7162f7c324b135b25b7c94bf7500e10ee7cdec98014427c18dd7c9433c4facda
3
- size 170920084
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1477cb53e58f7c719ae010191e299f05f66e2e80fae8f908d26e2f5067181df1
3
+ size 170920532
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3b067a5979c14304aadd7c21f2a9649c1655c45b1704aec24c06feb0d8680648
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a41c5e264f01fb6957f992fd4eb0516dc59794255b639c5ba46d3fc78811a6b8
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cc33e131fa6defcf31352ebc1dc63541771f8d9732b2772a4a16ecb6c33c3697
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd6612e1be5fc1a945d1a2e93ec2df274cca4c095f65d292f5fd095af43ba016
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 3.6887431144714355,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-150",
4
- "epoch": 0.22900763358778625,
5
  "eval_steps": 50,
6
- "global_step": 150,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -144,6 +144,135 @@
144
  "eval_samples_per_second": 7.004,
145
  "eval_steps_per_second": 1.753,
146
  "step": 150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
  }
148
  ],
149
  "logging_steps": 10,
@@ -172,7 +301,7 @@
172
  "attributes": {}
173
  }
174
  },
175
- "total_flos": 4.356701040641311e+17,
176
  "train_batch_size": 8,
177
  "trial_name": null,
178
  "trial_params": null
 
1
  {
2
+ "best_metric": 3.6560232639312744,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-300",
4
+ "epoch": 0.4580152671755725,
5
  "eval_steps": 50,
6
+ "global_step": 300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
144
  "eval_samples_per_second": 7.004,
145
  "eval_steps_per_second": 1.753,
146
  "step": 150
147
+ },
148
+ {
149
+ "epoch": 0.24427480916030533,
150
+ "grad_norm": 0.778315544128418,
151
+ "learning_rate": 7.396244933600285e-05,
152
+ "loss": 3.26,
153
+ "step": 160
154
+ },
155
+ {
156
+ "epoch": 0.2595419847328244,
157
+ "grad_norm": 0.7379830479621887,
158
+ "learning_rate": 7.077075065009433e-05,
159
+ "loss": 3.4725,
160
+ "step": 170
161
+ },
162
+ {
163
+ "epoch": 0.2748091603053435,
164
+ "grad_norm": 0.6918920278549194,
165
+ "learning_rate": 6.747320897995493e-05,
166
+ "loss": 3.6311,
167
+ "step": 180
168
+ },
169
+ {
170
+ "epoch": 0.2900763358778626,
171
+ "grad_norm": 0.7601252198219299,
172
+ "learning_rate": 6.408662784207149e-05,
173
+ "loss": 3.8022,
174
+ "step": 190
175
+ },
176
+ {
177
+ "epoch": 0.3053435114503817,
178
+ "grad_norm": 4.902952194213867,
179
+ "learning_rate": 6.062826447764883e-05,
180
+ "loss": 3.9776,
181
+ "step": 200
182
+ },
183
+ {
184
+ "epoch": 0.3053435114503817,
185
+ "eval_loss": 3.7500429153442383,
186
+ "eval_runtime": 157.5227,
187
+ "eval_samples_per_second": 7.002,
188
+ "eval_steps_per_second": 1.752,
189
+ "step": 200
190
+ },
191
+ {
192
+ "epoch": 0.32061068702290074,
193
+ "grad_norm": 0.9840830564498901,
194
+ "learning_rate": 5.7115741913664264e-05,
195
+ "loss": 3.3361,
196
+ "step": 210
197
+ },
198
+ {
199
+ "epoch": 0.33587786259541985,
200
+ "grad_norm": 0.5701741576194763,
201
+ "learning_rate": 5.3566959159961615e-05,
202
+ "loss": 3.4737,
203
+ "step": 220
204
+ },
205
+ {
206
+ "epoch": 0.3511450381679389,
207
+ "grad_norm": 0.6126981973648071,
208
+ "learning_rate": 5e-05,
209
+ "loss": 3.6109,
210
+ "step": 230
211
+ },
212
+ {
213
+ "epoch": 0.366412213740458,
214
+ "grad_norm": 0.8289356827735901,
215
+ "learning_rate": 4.643304084003839e-05,
216
+ "loss": 3.7844,
217
+ "step": 240
218
+ },
219
+ {
220
+ "epoch": 0.3816793893129771,
221
+ "grad_norm": 2.696506977081299,
222
+ "learning_rate": 4.288425808633575e-05,
223
+ "loss": 3.9399,
224
+ "step": 250
225
+ },
226
+ {
227
+ "epoch": 0.3816793893129771,
228
+ "eval_loss": 3.704169750213623,
229
+ "eval_runtime": 157.5177,
230
+ "eval_samples_per_second": 7.002,
231
+ "eval_steps_per_second": 1.752,
232
+ "step": 250
233
+ },
234
+ {
235
+ "epoch": 0.3969465648854962,
236
+ "grad_norm": 0.8667080998420715,
237
+ "learning_rate": 3.937173552235117e-05,
238
+ "loss": 3.3077,
239
+ "step": 260
240
+ },
241
+ {
242
+ "epoch": 0.4122137404580153,
243
+ "grad_norm": 0.8480358719825745,
244
+ "learning_rate": 3.591337215792852e-05,
245
+ "loss": 3.4585,
246
+ "step": 270
247
+ },
248
+ {
249
+ "epoch": 0.42748091603053434,
250
+ "grad_norm": 0.7193460464477539,
251
+ "learning_rate": 3.2526791020045086e-05,
252
+ "loss": 3.6021,
253
+ "step": 280
254
+ },
255
+ {
256
+ "epoch": 0.44274809160305345,
257
+ "grad_norm": 0.6467528939247131,
258
+ "learning_rate": 2.9229249349905684e-05,
259
+ "loss": 3.7745,
260
+ "step": 290
261
+ },
262
+ {
263
+ "epoch": 0.4580152671755725,
264
+ "grad_norm": 2.210923671722412,
265
+ "learning_rate": 2.603755066399718e-05,
266
+ "loss": 3.9242,
267
+ "step": 300
268
+ },
269
+ {
270
+ "epoch": 0.4580152671755725,
271
+ "eval_loss": 3.6560232639312744,
272
+ "eval_runtime": 157.2956,
273
+ "eval_samples_per_second": 7.012,
274
+ "eval_steps_per_second": 1.755,
275
+ "step": 300
276
  }
277
  ],
278
  "logging_steps": 10,
 
301
  "attributes": {}
302
  }
303
  },
304
+ "total_flos": 8.717132133543444e+17,
305
  "train_batch_size": 8,
306
  "trial_name": null,
307
  "trial_params": null