lesso12 commited on
Commit
bdbe159
·
verified ·
1 Parent(s): c4113b1

Training in progress, step 1500, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b37e3cd5f660df95241862c24a3e48f163ec6a69ea3025479a20f8d32d0953b4
3
  size 295488936
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ecebc9fd9c0270d4af5ffbd4a53066c70b3ae60b25aecf5f845d586e0780d4b
3
  size 295488936
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3e00c470aa5105cee62da4df302be149ac58d2be246bd8830a07a9c4e72906d0
3
  size 591208618
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5563f6f5b550906c52015dc91a9c62954bfd6ad5c0a0a50ee208917e5ae68e36
3
  size 591208618
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7b3a88ea4899f7ac3673d86679a655d1cf11b984715b6f9e4a333cdbfcd48577
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61a1376c44ff39c239fd1cdd69a4f9c61362956e696de95463462b6f930f2288
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b5294c8c5f0577845a3a17162958d210fca6dc9ef5073daed3bd9ec3ff96435c
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1f8727232a90a6db300fcb2bdb06d9b7cd6b135efb90aa1d4dcb2f1febaca86
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 1.4277774095535278,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-1000",
4
- "epoch": 0.16079755587715067,
5
  "eval_steps": 500,
6
- "global_step": 1000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -171,6 +171,84 @@
171
  "eval_samples_per_second": 24.29,
172
  "eval_steps_per_second": 6.073,
173
  "step": 1000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
174
  }
175
  ],
176
  "logging_steps": 50,
@@ -199,7 +277,7 @@
199
  "attributes": {}
200
  }
201
  },
202
- "total_flos": 2.7214473068544e+17,
203
  "train_batch_size": 4,
204
  "trial_name": null,
205
  "trial_params": null
 
1
  {
2
+ "best_metric": 1.3898102045059204,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-1500",
4
+ "epoch": 0.241196333815726,
5
  "eval_steps": 500,
6
+ "global_step": 1500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
171
  "eval_samples_per_second": 24.29,
172
  "eval_steps_per_second": 6.073,
173
  "step": 1000
174
+ },
175
+ {
176
+ "epoch": 0.1688374336710082,
177
+ "grad_norm": 0.7347446084022522,
178
+ "learning_rate": 0.000106,
179
+ "loss": 1.4432,
180
+ "step": 1050
181
+ },
182
+ {
183
+ "epoch": 0.17687731146486574,
184
+ "grad_norm": 0.8061316013336182,
185
+ "learning_rate": 9.724658937993278e-05,
186
+ "loss": 1.4889,
187
+ "step": 1100
188
+ },
189
+ {
190
+ "epoch": 0.18491718925872327,
191
+ "grad_norm": 0.9965606927871704,
192
+ "learning_rate": 8.855297343024219e-05,
193
+ "loss": 1.4394,
194
+ "step": 1150
195
+ },
196
+ {
197
+ "epoch": 0.1929570670525808,
198
+ "grad_norm": 0.6851484179496765,
199
+ "learning_rate": 7.99785383630753e-05,
200
+ "loss": 1.416,
201
+ "step": 1200
202
+ },
203
+ {
204
+ "epoch": 0.20099694484643835,
205
+ "grad_norm": 0.7076012492179871,
206
+ "learning_rate": 7.158185626430357e-05,
207
+ "loss": 1.4108,
208
+ "step": 1250
209
+ },
210
+ {
211
+ "epoch": 0.20903682264029586,
212
+ "grad_norm": 0.818530797958374,
213
+ "learning_rate": 6.342028498678525e-05,
214
+ "loss": 1.4205,
215
+ "step": 1300
216
+ },
217
+ {
218
+ "epoch": 0.2170767004341534,
219
+ "grad_norm": 0.7673355937004089,
220
+ "learning_rate": 5.5549576338070204e-05,
221
+ "loss": 1.4441,
222
+ "step": 1350
223
+ },
224
+ {
225
+ "epoch": 0.22511657822801093,
226
+ "grad_norm": 0.7837012410163879,
227
+ "learning_rate": 4.802349523902277e-05,
228
+ "loss": 1.4046,
229
+ "step": 1400
230
+ },
231
+ {
232
+ "epoch": 0.23315645602186846,
233
+ "grad_norm": 0.9885613918304443,
234
+ "learning_rate": 4.0893452454895215e-05,
235
+ "loss": 1.3865,
236
+ "step": 1450
237
+ },
238
+ {
239
+ "epoch": 0.241196333815726,
240
+ "grad_norm": 0.6985939145088196,
241
+ "learning_rate": 3.420815340767147e-05,
242
+ "loss": 1.3805,
243
+ "step": 1500
244
+ },
245
+ {
246
+ "epoch": 0.241196333815726,
247
+ "eval_loss": 1.3898102045059204,
248
+ "eval_runtime": 431.5472,
249
+ "eval_samples_per_second": 24.273,
250
+ "eval_steps_per_second": 6.069,
251
+ "step": 1500
252
  }
253
  ],
254
  "logging_steps": 50,
 
277
  "attributes": {}
278
  }
279
  },
280
+ "total_flos": 4.0821709602816e+17,
281
  "train_batch_size": 4,
282
  "trial_name": null,
283
  "trial_params": null