ncbateman commited on
Commit
1f7abce
·
verified ·
1 Parent(s): 5f1c3dd

Training in progress, step 260, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ff29c933a4daa7713f522963be2552c9a83673e6fd2520f8b71038da53fd571c
3
  size 167832240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b75a0248b725001d4b737202cee6a588454275e75688b8e92b0209cf50f24e3
3
  size 167832240
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d473e2521456a932c3a8bb5ef31bc35a268ac5a17cdd188761fe0e6562831150
3
- size 85723284
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25de9e1655df64ff1b16783b4ac59b40894a9b461138ea8b6756424d9a691538
3
+ size 85723732
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e80491df64e0b9dcefd07abe6128b6730507af86d32592d0d460e696bde58ca2
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f43da92b779de0b28d2e2154fe90a421bc3b606235826492ff897080fccad57
3
  size 14960
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b624ad5498b7e0d2d006e00677be92437acefdd1a52a75ba879cb0aa4c84e989
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81c68d24d7db1be76d9250e1c398a314ef42ae8cead5a2f3e0a2eedee6a3ff25
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:52e750e93299ff7110c9d9215f366603bb8954578c837fc22b7489b8b2c487f0
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3da237555441ba12bc3bc95ed870f2781bb439000b7e27bc6dc4e54c5877136f
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6d6d759d06e1fc36a3b0fb326b3e5dd3c3ec13a446a5ad06234c7e5737edb1f5
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:052a2e4cc9bb53aeaf3d416afc45769a53ef735852debe2282fc1430de68ba46
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:98e9d7325633d70b723b343ee3f62a4930c7e185a70840fb60ffc5f171ed44ed
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2707c2e7e0bedcedb09182ad986f1fcf600ab66b24375e84af16e5408d76950
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.564652738565782,
5
  "eval_steps": 222,
6
- "global_step": 250,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1773,6 +1773,76 @@
1773
  "learning_rate": 4.4874455403137514e-05,
1774
  "loss": 1.7377,
1775
  "step": 250
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1776
  }
1777
  ],
1778
  "logging_steps": 1,
@@ -1792,7 +1862,7 @@
1792
  "attributes": {}
1793
  }
1794
  },
1795
- "total_flos": 1.483774567120896e+18,
1796
  "train_batch_size": 2,
1797
  "trial_name": null,
1798
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.5872388481084133,
5
  "eval_steps": 222,
6
+ "global_step": 260,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1773
  "learning_rate": 4.4874455403137514e-05,
1774
  "loss": 1.7377,
1775
  "step": 250
1776
+ },
1777
+ {
1778
+ "epoch": 0.5669113495200452,
1779
+ "grad_norm": 1.191891074180603,
1780
+ "learning_rate": 4.449627291255184e-05,
1781
+ "loss": 1.6349,
1782
+ "step": 251
1783
+ },
1784
+ {
1785
+ "epoch": 0.5691699604743083,
1786
+ "grad_norm": 0.5702697038650513,
1787
+ "learning_rate": 4.411840888163449e-05,
1788
+ "loss": 1.3089,
1789
+ "step": 252
1790
+ },
1791
+ {
1792
+ "epoch": 0.5714285714285714,
1793
+ "grad_norm": 0.4630747139453888,
1794
+ "learning_rate": 4.3740885174560736e-05,
1795
+ "loss": 1.4976,
1796
+ "step": 253
1797
+ },
1798
+ {
1799
+ "epoch": 0.5736871823828346,
1800
+ "grad_norm": 0.6172465682029724,
1801
+ "learning_rate": 4.336372363581391e-05,
1802
+ "loss": 1.4348,
1803
+ "step": 254
1804
+ },
1805
+ {
1806
+ "epoch": 0.5759457933370977,
1807
+ "grad_norm": 0.6135768294334412,
1808
+ "learning_rate": 4.298694608892134e-05,
1809
+ "loss": 1.7773,
1810
+ "step": 255
1811
+ },
1812
+ {
1813
+ "epoch": 0.5782044042913608,
1814
+ "grad_norm": 1.1833115816116333,
1815
+ "learning_rate": 4.2610574335191615e-05,
1816
+ "loss": 1.427,
1817
+ "step": 256
1818
+ },
1819
+ {
1820
+ "epoch": 0.5804630152456239,
1821
+ "grad_norm": 0.8085864186286926,
1822
+ "learning_rate": 4.2234630152453116e-05,
1823
+ "loss": 1.3841,
1824
+ "step": 257
1825
+ },
1826
+ {
1827
+ "epoch": 0.5827216261998871,
1828
+ "grad_norm": 0.5345907807350159,
1829
+ "learning_rate": 4.185913529379381e-05,
1830
+ "loss": 1.1692,
1831
+ "step": 258
1832
+ },
1833
+ {
1834
+ "epoch": 0.5849802371541502,
1835
+ "grad_norm": 0.7415188550949097,
1836
+ "learning_rate": 4.1484111486302704e-05,
1837
+ "loss": 1.2728,
1838
+ "step": 259
1839
+ },
1840
+ {
1841
+ "epoch": 0.5872388481084133,
1842
+ "grad_norm": 0.5333618521690369,
1843
+ "learning_rate": 4.110958042981255e-05,
1844
+ "loss": 1.4616,
1845
+ "step": 260
1846
  }
1847
  ],
1848
  "logging_steps": 1,
 
1862
  "attributes": {}
1863
  }
1864
  },
1865
+ "total_flos": 1.5431255498057318e+18,
1866
  "train_batch_size": 2,
1867
  "trial_name": null,
1868
  "trial_params": null