ViswanthSai commited on
Commit
3349e1b
·
verified ·
1 Parent(s): 8a4fe8e

Training in progress, step 1000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:64ae4f64e3e27b703c51d2a4228a527670c033261a35d7a2b34afd0c6d53a42a
3
  size 241895584
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d08e7548567d86124e4fbba8ce76106934c9f547f066151fd3de78ad0a08f37e
3
  size 241895584
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c6cedb706163a8260631ac2c77ca7ca0e8af370dafc6eeaaa3117b16928be6b
3
  size 123396357
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b047ebdc69a8b57ad351cbb5ff17e2f8f5c0ec0e954a5073a15faec4d0d52a7a
3
  size 123396357
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:746292b776e70f12d4c33e6be6cb963a26635d1670de1973ab3a32c9a8c60a0d
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18c428de3d337e366235ac71e21db0dee376009a830ae7e06fe3af1eec7f3a00
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:54ccbc889d1c38a21ecce182cf4adbf347b91b44d80537c158dc7c0e1f7ad46f
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f94ebdc28f5491fc51fc2ecbab5d9e2e3ba6be348d92d880d778a28fcd2cbce
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "best_global_step": 800,
3
- "best_metric": 0.613548219203949,
4
- "best_model_checkpoint": "outputs/checkpoint-800",
5
- "epoch": 0.6304176516942475,
6
  "eval_steps": 200,
7
- "global_step": 800,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -264,6 +264,70 @@
264
  "eval_samples_per_second": 10.206,
265
  "eval_steps_per_second": 5.108,
266
  "step": 800
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
267
  }
268
  ],
269
  "logging_steps": 25,
@@ -278,12 +342,12 @@
278
  "should_evaluate": false,
279
  "should_log": false,
280
  "should_save": true,
281
- "should_training_stop": false
282
  },
283
  "attributes": {}
284
  }
285
  },
286
- "total_flos": 3.827027177282765e+16,
287
  "train_batch_size": 2,
288
  "trial_name": null,
289
  "trial_params": null
 
1
  {
2
+ "best_global_step": 1000,
3
+ "best_metric": 0.6114270687103271,
4
+ "best_model_checkpoint": "outputs/checkpoint-1000",
5
+ "epoch": 0.7880220646178093,
6
  "eval_steps": 200,
7
+ "global_step": 1000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
264
  "eval_samples_per_second": 10.206,
265
  "eval_steps_per_second": 5.108,
266
  "step": 800
267
+ },
268
+ {
269
+ "epoch": 0.6501182033096927,
270
+ "grad_norm": 0.32342788577079773,
271
+ "learning_rate": 1.8285510166487152e-05,
272
+ "loss": 0.646,
273
+ "step": 825
274
+ },
275
+ {
276
+ "epoch": 0.6698187549251379,
277
+ "grad_norm": 0.40992236137390137,
278
+ "learning_rate": 1.3572519804629536e-05,
279
+ "loss": 0.6419,
280
+ "step": 850
281
+ },
282
+ {
283
+ "epoch": 0.6895193065405831,
284
+ "grad_norm": 0.36296817660331726,
285
+ "learning_rate": 9.517294753398064e-06,
286
+ "loss": 0.5924,
287
+ "step": 875
288
+ },
289
+ {
290
+ "epoch": 0.7092198581560284,
291
+ "grad_norm": 0.28041669726371765,
292
+ "learning_rate": 6.1506977240444074e-06,
293
+ "loss": 0.6353,
294
+ "step": 900
295
+ },
296
+ {
297
+ "epoch": 0.7289204097714737,
298
+ "grad_norm": 0.3714691996574402,
299
+ "learning_rate": 3.4983505527688586e-06,
300
+ "loss": 0.6522,
301
+ "step": 925
302
+ },
303
+ {
304
+ "epoch": 0.7486209613869188,
305
+ "grad_norm": 0.40027645230293274,
306
+ "learning_rate": 1.580439203075812e-06,
307
+ "loss": 0.6376,
308
+ "step": 950
309
+ },
310
+ {
311
+ "epoch": 0.7683215130023641,
312
+ "grad_norm": 0.3009302318096161,
313
+ "learning_rate": 4.115601384029666e-07,
314
+ "loss": 0.6226,
315
+ "step": 975
316
+ },
317
+ {
318
+ "epoch": 0.7880220646178093,
319
+ "grad_norm": 0.43267834186553955,
320
+ "learning_rate": 6.092342209607083e-10,
321
+ "loss": 0.6157,
322
+ "step": 1000
323
+ },
324
+ {
325
+ "epoch": 0.7880220646178093,
326
+ "eval_loss": 0.6114270687103271,
327
+ "eval_runtime": 104.2904,
328
+ "eval_samples_per_second": 10.25,
329
+ "eval_steps_per_second": 5.13,
330
+ "step": 1000
331
  }
332
  ],
333
  "logging_steps": 25,
 
342
  "should_evaluate": false,
343
  "should_log": false,
344
  "should_save": true,
345
+ "should_training_stop": true
346
  },
347
  "attributes": {}
348
  }
349
  },
350
+ "total_flos": 4.773356252585165e+16,
351
  "train_batch_size": 2,
352
  "trial_name": null,
353
  "trial_params": null