patricksf commited on
Commit
bf28c7f
·
verified ·
1 Parent(s): c649533

Upload folder using huggingface_hub

Browse files
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "/lustre/fsn1/projects/rech/qjm/uar73ie/po_ckpts/Tower-4-Anthill-WPO",
3
  "add_faster_video": false,
4
  "add_time_instruction": false,
5
  "architectures": [
@@ -17,7 +17,7 @@
17
  "head_dim": 256,
18
  "hidden_act": "gelu_pytorch_tanh",
19
  "hidden_activation": "gelu_pytorch_tanh",
20
- "hidden_size": 2304,
21
  "image_aspect_ratio": "anyres",
22
  "image_crop_resolution": null,
23
  "image_grid_pinpoints": [
@@ -80,7 +80,7 @@
80
  ],
81
  "image_split_resolution": null,
82
  "initializer_range": 0.02,
83
- "intermediate_size": 9216,
84
  "max_position_embeddings": 8192,
85
  "mm_hidden_size": 1152,
86
  "mm_newline_position": "grid",
@@ -98,9 +98,9 @@
98
  "mm_vision_tower": "google/siglip2-so400m-patch14-384",
99
  "mm_vision_tower_lr": 2e-06,
100
  "model_type": "gemma2",
101
- "num_attention_heads": 8,
102
- "num_hidden_layers": 26,
103
- "num_key_value_heads": 4,
104
  "pad_token_id": 0,
105
  "pos_skipping_range": 4096,
106
  "query_pre_attn_scalar": 224,
 
1
  {
2
+ "_name_or_path": "Unbabel/Tower-Sugarloaf-v4-WPO-beta5-epoch1-2e7-32-JZ",
3
  "add_faster_video": false,
4
  "add_time_instruction": false,
5
  "architectures": [
 
17
  "head_dim": 256,
18
  "hidden_act": "gelu_pytorch_tanh",
19
  "hidden_activation": "gelu_pytorch_tanh",
20
+ "hidden_size": 3584,
21
  "image_aspect_ratio": "anyres",
22
  "image_crop_resolution": null,
23
  "image_grid_pinpoints": [
 
80
  ],
81
  "image_split_resolution": null,
82
  "initializer_range": 0.02,
83
+ "intermediate_size": 14336,
84
  "max_position_embeddings": 8192,
85
  "mm_hidden_size": 1152,
86
  "mm_newline_position": "grid",
 
98
  "mm_vision_tower": "google/siglip2-so400m-patch14-384",
99
  "mm_vision_tower_lr": 2e-06,
100
  "model_type": "gemma2",
101
+ "num_attention_heads": 16,
102
+ "num_hidden_layers": 42,
103
+ "num_key_value_heads": 8,
104
  "pad_token_id": 0,
105
  "pos_skipping_range": 4096,
106
  "query_pre_attn_scalar": 224,
model-00001-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7016a0f86d170c88d7e4be84d0f23509b9c861fc61ebb34c76e576af37c0ba48
3
+ size 4903359176
model-00002-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e41808a493b099e718a2a3e1e7c3663f86c52a14ce0d86a5887e84f9321a10b2
3
+ size 4947570872
model-00003-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1fe826517f05355eed3e38cc43e411fd1ebfae0245a24bd638ebf56adf30f2b8
3
+ size 4962221464
model-00004-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4597017fa49e2fe6c24de8ea70a34b69c7b803887a5c29c0158c6b84ae8ab7c
3
+ size 4499846248
model.safetensors.index.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5f7eee611703c5ce5d1eee32d9cdcfe465647b8aff0c1dfb3bed7ad7dbb05060
3
- size 34362873
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f559f2189f392b4555613965f089e7c4d300b41fbe080bf79da0d676e33ee7f0
3
+ size 34356041
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2
3
+ size 4241003
tokenizer_config.json CHANGED
@@ -1737,262 +1737,6 @@
1737
  "rstrip": false,
1738
  "single_word": false,
1739
  "special": false
1740
- },
1741
- "255968": {
1742
- "content": "[toxicity=0]",
1743
- "lstrip": false,
1744
- "normalized": false,
1745
- "rstrip": false,
1746
- "single_word": false,
1747
- "special": false
1748
- },
1749
- "255969": {
1750
- "content": "\t\t",
1751
- "lstrip": false,
1752
- "normalized": false,
1753
- "rstrip": false,
1754
- "single_word": false,
1755
- "special": false
1756
- },
1757
- "255970": {
1758
- "content": "\t\t\t",
1759
- "lstrip": false,
1760
- "normalized": false,
1761
- "rstrip": false,
1762
- "single_word": false,
1763
- "special": false
1764
- },
1765
- "255971": {
1766
- "content": "\t\t\t\t",
1767
- "lstrip": false,
1768
- "normalized": false,
1769
- "rstrip": false,
1770
- "single_word": false,
1771
- "special": false
1772
- },
1773
- "255972": {
1774
- "content": "\t\t\t\t\t",
1775
- "lstrip": false,
1776
- "normalized": false,
1777
- "rstrip": false,
1778
- "single_word": false,
1779
- "special": false
1780
- },
1781
- "255973": {
1782
- "content": "\t\t\t\t\t\t",
1783
- "lstrip": false,
1784
- "normalized": false,
1785
- "rstrip": false,
1786
- "single_word": false,
1787
- "special": false
1788
- },
1789
- "255974": {
1790
- "content": "\t\t\t\t\t\t\t",
1791
- "lstrip": false,
1792
- "normalized": false,
1793
- "rstrip": false,
1794
- "single_word": false,
1795
- "special": false
1796
- },
1797
- "255975": {
1798
- "content": "\t\t\t\t\t\t\t\t",
1799
- "lstrip": false,
1800
- "normalized": false,
1801
- "rstrip": false,
1802
- "single_word": false,
1803
- "special": false
1804
- },
1805
- "255976": {
1806
- "content": "\t\t\t\t\t\t\t\t\t",
1807
- "lstrip": false,
1808
- "normalized": false,
1809
- "rstrip": false,
1810
- "single_word": false,
1811
- "special": false
1812
- },
1813
- "255977": {
1814
- "content": "\t\t\t\t\t\t\t\t\t\t",
1815
- "lstrip": false,
1816
- "normalized": false,
1817
- "rstrip": false,
1818
- "single_word": false,
1819
- "special": false
1820
- },
1821
- "255978": {
1822
- "content": "\t\t\t\t\t\t\t\t\t\t\t",
1823
- "lstrip": false,
1824
- "normalized": false,
1825
- "rstrip": false,
1826
- "single_word": false,
1827
- "special": false
1828
- },
1829
- "255979": {
1830
- "content": "\t\t\t\t\t\t\t\t\t\t\t\t",
1831
- "lstrip": false,
1832
- "normalized": false,
1833
- "rstrip": false,
1834
- "single_word": false,
1835
- "special": false
1836
- },
1837
- "255980": {
1838
- "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t",
1839
- "lstrip": false,
1840
- "normalized": false,
1841
- "rstrip": false,
1842
- "single_word": false,
1843
- "special": false
1844
- },
1845
- "255981": {
1846
- "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1847
- "lstrip": false,
1848
- "normalized": false,
1849
- "rstrip": false,
1850
- "single_word": false,
1851
- "special": false
1852
- },
1853
- "255982": {
1854
- "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1855
- "lstrip": false,
1856
- "normalized": false,
1857
- "rstrip": false,
1858
- "single_word": false,
1859
- "special": false
1860
- },
1861
- "255983": {
1862
- "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1863
- "lstrip": false,
1864
- "normalized": false,
1865
- "rstrip": false,
1866
- "single_word": false,
1867
- "special": false
1868
- },
1869
- "255984": {
1870
- "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1871
- "lstrip": false,
1872
- "normalized": false,
1873
- "rstrip": false,
1874
- "single_word": false,
1875
- "special": false
1876
- },
1877
- "255985": {
1878
- "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1879
- "lstrip": false,
1880
- "normalized": false,
1881
- "rstrip": false,
1882
- "single_word": false,
1883
- "special": false
1884
- },
1885
- "255986": {
1886
- "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1887
- "lstrip": false,
1888
- "normalized": false,
1889
- "rstrip": false,
1890
- "single_word": false,
1891
- "special": false
1892
- },
1893
- "255987": {
1894
- "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1895
- "lstrip": false,
1896
- "normalized": false,
1897
- "rstrip": false,
1898
- "single_word": false,
1899
- "special": false
1900
- },
1901
- "255988": {
1902
- "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1903
- "lstrip": false,
1904
- "normalized": false,
1905
- "rstrip": false,
1906
- "single_word": false,
1907
- "special": false
1908
- },
1909
- "255989": {
1910
- "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1911
- "lstrip": false,
1912
- "normalized": false,
1913
- "rstrip": false,
1914
- "single_word": false,
1915
- "special": false
1916
- },
1917
- "255990": {
1918
- "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1919
- "lstrip": false,
1920
- "normalized": false,
1921
- "rstrip": false,
1922
- "single_word": false,
1923
- "special": false
1924
- },
1925
- "255991": {
1926
- "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1927
- "lstrip": false,
1928
- "normalized": false,
1929
- "rstrip": false,
1930
- "single_word": false,
1931
- "special": false
1932
- },
1933
- "255992": {
1934
- "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1935
- "lstrip": false,
1936
- "normalized": false,
1937
- "rstrip": false,
1938
- "single_word": false,
1939
- "special": false
1940
- },
1941
- "255993": {
1942
- "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1943
- "lstrip": false,
1944
- "normalized": false,
1945
- "rstrip": false,
1946
- "single_word": false,
1947
- "special": false
1948
- },
1949
- "255994": {
1950
- "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1951
- "lstrip": false,
1952
- "normalized": false,
1953
- "rstrip": false,
1954
- "single_word": false,
1955
- "special": false
1956
- },
1957
- "255995": {
1958
- "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1959
- "lstrip": false,
1960
- "normalized": false,
1961
- "rstrip": false,
1962
- "single_word": false,
1963
- "special": false
1964
- },
1965
- "255996": {
1966
- "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1967
- "lstrip": false,
1968
- "normalized": false,
1969
- "rstrip": false,
1970
- "single_word": false,
1971
- "special": false
1972
- },
1973
- "255997": {
1974
- "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1975
- "lstrip": false,
1976
- "normalized": false,
1977
- "rstrip": false,
1978
- "single_word": false,
1979
- "special": false
1980
- },
1981
- "255998": {
1982
- "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t",
1983
- "lstrip": false,
1984
- "normalized": false,
1985
- "rstrip": false,
1986
- "single_word": false,
1987
- "special": false
1988
- },
1989
- "255999": {
1990
- "content": "<unused99>",
1991
- "lstrip": false,
1992
- "normalized": false,
1993
- "rstrip": false,
1994
- "single_word": false,
1995
- "special": false
1996
  }
1997
  },
1998
  "additional_special_tokens": [
@@ -2009,7 +1753,7 @@
2009
  "padding_side": "right",
2010
  "sp_model_kwargs": {},
2011
  "spaces_between_special_tokens": false,
2012
- "tokenizer_class": "GemmaTokenizerFast",
2013
  "unk_token": "<unk>",
2014
  "use_default_system_prompt": false
2015
  }
 
1737
  "rstrip": false,
1738
  "single_word": false,
1739
  "special": false
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1740
  }
1741
  },
1742
  "additional_special_tokens": [
 
1753
  "padding_side": "right",
1754
  "sp_model_kwargs": {},
1755
  "spaces_between_special_tokens": false,
1756
+ "tokenizer_class": "GemmaTokenizer",
1757
  "unk_token": "<unk>",
1758
  "use_default_system_prompt": false
1759
  }
trainer_state.json CHANGED
The diff for this file is too large to render. See raw diff
 
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c05450dad189f8b65bb027050bcedddc3f079d05d23c9c019d9896e5ba8efb63
3
  size 8568
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:690964b7092e1509192cc2cb78d2bad662d258b520e23aee336cd023695a78cd
3
  size 8568