Training in progress, step 260, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/optimizer.pt +2 -2
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +73 -3
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 167832240
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2b75a0248b725001d4b737202cee6a588454275e75688b8e92b0209cf50f24e3
|
3 |
size 167832240
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:25de9e1655df64ff1b16783b4ac59b40894a9b461138ea8b6756424d9a691538
|
3 |
+
size 85723732
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14960
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3f43da92b779de0b28d2e2154fe90a421bc3b606235826492ff897080fccad57
|
3 |
size 14960
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:81c68d24d7db1be76d9250e1c398a314ef42ae8cead5a2f3e0a2eedee6a3ff25
|
3 |
size 15024
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3da237555441ba12bc3bc95ed870f2781bb439000b7e27bc6dc4e54c5877136f
|
3 |
size 15024
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:052a2e4cc9bb53aeaf3d416afc45769a53ef735852debe2282fc1430de68ba46
|
3 |
size 15024
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b2707c2e7e0bedcedb09182ad986f1fcf600ab66b24375e84af16e5408d76950
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 222,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -1773,6 +1773,76 @@
|
|
1773 |
"learning_rate": 4.4874455403137514e-05,
|
1774 |
"loss": 1.7377,
|
1775 |
"step": 250
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1776 |
}
|
1777 |
],
|
1778 |
"logging_steps": 1,
|
@@ -1792,7 +1862,7 @@
|
|
1792 |
"attributes": {}
|
1793 |
}
|
1794 |
},
|
1795 |
-
"total_flos": 1.
|
1796 |
"train_batch_size": 2,
|
1797 |
"trial_name": null,
|
1798 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.5872388481084133,
|
5 |
"eval_steps": 222,
|
6 |
+
"global_step": 260,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
1773 |
"learning_rate": 4.4874455403137514e-05,
|
1774 |
"loss": 1.7377,
|
1775 |
"step": 250
|
1776 |
+
},
|
1777 |
+
{
|
1778 |
+
"epoch": 0.5669113495200452,
|
1779 |
+
"grad_norm": 1.191891074180603,
|
1780 |
+
"learning_rate": 4.449627291255184e-05,
|
1781 |
+
"loss": 1.6349,
|
1782 |
+
"step": 251
|
1783 |
+
},
|
1784 |
+
{
|
1785 |
+
"epoch": 0.5691699604743083,
|
1786 |
+
"grad_norm": 0.5702697038650513,
|
1787 |
+
"learning_rate": 4.411840888163449e-05,
|
1788 |
+
"loss": 1.3089,
|
1789 |
+
"step": 252
|
1790 |
+
},
|
1791 |
+
{
|
1792 |
+
"epoch": 0.5714285714285714,
|
1793 |
+
"grad_norm": 0.4630747139453888,
|
1794 |
+
"learning_rate": 4.3740885174560736e-05,
|
1795 |
+
"loss": 1.4976,
|
1796 |
+
"step": 253
|
1797 |
+
},
|
1798 |
+
{
|
1799 |
+
"epoch": 0.5736871823828346,
|
1800 |
+
"grad_norm": 0.6172465682029724,
|
1801 |
+
"learning_rate": 4.336372363581391e-05,
|
1802 |
+
"loss": 1.4348,
|
1803 |
+
"step": 254
|
1804 |
+
},
|
1805 |
+
{
|
1806 |
+
"epoch": 0.5759457933370977,
|
1807 |
+
"grad_norm": 0.6135768294334412,
|
1808 |
+
"learning_rate": 4.298694608892134e-05,
|
1809 |
+
"loss": 1.7773,
|
1810 |
+
"step": 255
|
1811 |
+
},
|
1812 |
+
{
|
1813 |
+
"epoch": 0.5782044042913608,
|
1814 |
+
"grad_norm": 1.1833115816116333,
|
1815 |
+
"learning_rate": 4.2610574335191615e-05,
|
1816 |
+
"loss": 1.427,
|
1817 |
+
"step": 256
|
1818 |
+
},
|
1819 |
+
{
|
1820 |
+
"epoch": 0.5804630152456239,
|
1821 |
+
"grad_norm": 0.8085864186286926,
|
1822 |
+
"learning_rate": 4.2234630152453116e-05,
|
1823 |
+
"loss": 1.3841,
|
1824 |
+
"step": 257
|
1825 |
+
},
|
1826 |
+
{
|
1827 |
+
"epoch": 0.5827216261998871,
|
1828 |
+
"grad_norm": 0.5345907807350159,
|
1829 |
+
"learning_rate": 4.185913529379381e-05,
|
1830 |
+
"loss": 1.1692,
|
1831 |
+
"step": 258
|
1832 |
+
},
|
1833 |
+
{
|
1834 |
+
"epoch": 0.5849802371541502,
|
1835 |
+
"grad_norm": 0.7415188550949097,
|
1836 |
+
"learning_rate": 4.1484111486302704e-05,
|
1837 |
+
"loss": 1.2728,
|
1838 |
+
"step": 259
|
1839 |
+
},
|
1840 |
+
{
|
1841 |
+
"epoch": 0.5872388481084133,
|
1842 |
+
"grad_norm": 0.5333618521690369,
|
1843 |
+
"learning_rate": 4.110958042981255e-05,
|
1844 |
+
"loss": 1.4616,
|
1845 |
+
"step": 260
|
1846 |
}
|
1847 |
],
|
1848 |
"logging_steps": 1,
|
|
|
1862 |
"attributes": {}
|
1863 |
}
|
1864 |
},
|
1865 |
+
"total_flos": 1.5431255498057318e+18,
|
1866 |
"train_batch_size": 2,
|
1867 |
"trial_name": null,
|
1868 |
"trial_params": null
|