aleegis10 commited on
Commit
2781427
·
verified ·
1 Parent(s): 144f140

Training in progress, step 53, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b401a8996e1094a3ea616fc7f7226bdf80cd30ccb2ddbd87762a19b280fd0f39
3
  size 121155320
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:347755715d6412ed461b3e24f4c30cbc103765c379964a87826818d71b9ae1e7
3
  size 121155320
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bd67a688192c0222264a6c41cb51fae902e274ef2e310a9285bad9253a74207d
3
  size 61896852
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1bc8468acda25a8a77b68e9e04e9855d429892d8a94bf1a16bbb991a615b23c3
3
  size 61896852
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5d9b21302b3043c033d4190a4eab85c75e7262c9b261a2438237e642de7538ff
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:edd21bfcd79332bda74bfee43e586f695faaab87015ca17dd9f4a8948d287855
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b7e76384fe2e1907e44b199e48722ea251cbbcfea1285f875115318fffa6d887
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62186d98e2e1a228b1a77580a02e85af5559d427fe6308d24fc721da049c3720
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 1.4689046144485474,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-50",
4
- "epoch": 2.857142857142857,
5
  "eval_steps": 50,
6
- "global_step": 50,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -373,6 +373,27 @@
373
  "eval_samples_per_second": 57.17,
374
  "eval_steps_per_second": 15.245,
375
  "step": 50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
376
  }
377
  ],
378
  "logging_steps": 1,
@@ -396,12 +417,12 @@
396
  "should_evaluate": false,
397
  "should_log": false,
398
  "should_save": true,
399
- "should_training_stop": false
400
  },
401
  "attributes": {}
402
  }
403
  },
404
- "total_flos": 3308579507404800.0,
405
  "train_batch_size": 8,
406
  "trial_name": null,
407
  "trial_params": null
 
1
  {
2
  "best_metric": 1.4689046144485474,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-50",
4
+ "epoch": 3.0285714285714285,
5
  "eval_steps": 50,
6
+ "global_step": 53,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
373
  "eval_samples_per_second": 57.17,
374
  "eval_steps_per_second": 15.245,
375
  "step": 50
376
+ },
377
+ {
378
+ "epoch": 2.914285714285714,
379
+ "grad_norm": 3.166837215423584,
380
+ "learning_rate": 5.328315962444874e-07,
381
+ "loss": 0.8571,
382
+ "step": 51
383
+ },
384
+ {
385
+ "epoch": 2.9714285714285715,
386
+ "grad_norm": 1.184228539466858,
387
+ "learning_rate": 1.333858168224178e-07,
388
+ "loss": 1.1258,
389
+ "step": 52
390
+ },
391
+ {
392
+ "epoch": 3.0285714285714285,
393
+ "grad_norm": 4.288901329040527,
394
+ "learning_rate": 0.0,
395
+ "loss": 1.5266,
396
+ "step": 53
397
  }
398
  ],
399
  "logging_steps": 1,
 
417
  "should_evaluate": false,
418
  "should_log": false,
419
  "should_save": true,
420
+ "should_training_stop": true
421
  },
422
  "attributes": {}
423
  }
424
  },
425
+ "total_flos": 3497938460344320.0,
426
  "train_batch_size": 8,
427
  "trial_name": null,
428
  "trial_params": null