danielhanchen commited on
Commit
5e5b16e
·
verified ·
1 Parent(s): 9dbeabe

Add files using upload-large-folder tool

Browse files
chat_template.jinja ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- set today = strftime_now("%Y-%m-%d") %}
2
+ {%- set default_system_message = "You are Mistral Small 3, a Large Language Model (LLM) created by Mistral AI, a French startup headquartered in Paris.\nYour knowledge base was last updated on 2023-10-01. The current date is " + today + ".\n\nWhen you're not sure about some information, you say that you don't have the information and don't make up anything.\nIf the user's question is not clear, ambiguous, or does not provide enough context for you to accurately answer the question, you do not try to answer it right away and you rather ask the user to clarify their request (e.g. \"What are some good restaurants around me?\" => \"Where are you?\" or \"When is the next flight to Tokyo\" => \"Where do you travel from?\")" %}
3
+
4
+ {{- bos_token }}
5
+
6
+ {%- if messages[0]['role'] == 'system' %}
7
+ {%- if messages[0]['content'] is string %}
8
+ {%- set system_message = messages[0]['content'] %}
9
+ {%- else %}
10
+ {%- set system_message = messages[0]['content'][0]['text'] %}
11
+ {%- endif %}
12
+ {%- set loop_messages = messages[1:] %}
13
+ {%- else %}
14
+ {%- set system_message = default_system_message %}
15
+ {%- set loop_messages = messages %}
16
+ {%- endif %}
17
+ {{- '[SYSTEM_PROMPT]' + system_message + '[/SYSTEM_PROMPT]' }}
18
+
19
+ {%- for message in loop_messages %}
20
+ {%- if message['role'] == 'user' %}
21
+ {%- if message['content'] is string %}
22
+ {{- '[INST]' + message['content'] + '[/INST]' }}
23
+ {%- else %}
24
+ {{- '[INST]' }}
25
+ {%- for block in message['content'] %}
26
+ {%- if block['type'] == 'text' %}
27
+ {{- block['text'] }}
28
+ {%- elif block['type'] in ['image', 'image_url'] %}
29
+ {{- '[IMG]' }}
30
+ {%- else %}
31
+ {{- raise_exception('Only text and image blocks are supported in message content!') }}
32
+ {%- endif %}
33
+ {%- endfor %}
34
+ {{- '[/INST]' }}
35
+ {%- endif %}
36
+ {%- elif message['role'] == 'system' %}
37
+ {%- if message['content'] is string %}
38
+ {{- '[SYSTEM_PROMPT]' + message['content'] + '[/SYSTEM_PROMPT]' }}
39
+ {%- else %}
40
+ {{- '[SYSTEM_PROMPT]' + message['content'][0]['text'] + '[/SYSTEM_PROMPT]' }}
41
+ {%- endif %}
42
+ {%- elif message['role'] == 'assistant' %}
43
+ {%- if message['content'] is string %}
44
+ {{- message['content'] + eos_token }}
45
+ {%- else %}
46
+ {{- message['content'][0]['text'] + eos_token }}
47
+ {%- endif %}
48
+ {%- else %}
49
+ {{- raise_exception('Only user, system and assistant roles are supported!') }}
50
+ {%- endif %}
51
+ {%- endfor %}
config.json CHANGED
@@ -23,45 +23,48 @@
23
  "multi_modal_projector",
24
  "merger",
25
  "modality_projection",
26
- "language_model.model.layers.2.mlp",
27
- "vision_tower.transformer.layers.22.attention",
28
- "vision_tower.transformer.layers.17.feed_forward",
29
- "vision_tower.transformer.layers.18.feed_forward",
30
- "vision_tower.transformer.layers.14.feed_forward",
31
- "vision_tower.transformer.layers.19.feed_forward",
32
- "vision_tower.transformer.layers.8.feed_forward",
33
- "vision_tower.transformer.layers.7.feed_forward",
34
  "vision_tower.transformer.layers.15.feed_forward",
35
- "vision_tower.transformer.layers.10.feed_forward",
 
36
  "vision_tower.transformer.layers.4.feed_forward",
37
- "vision_tower.transformer.layers.3.feed_forward",
 
 
 
38
  "vision_tower.transformer.layers.14.attention",
39
- "vision_tower.transformer.layers.12.feed_forward",
40
  "vision_tower.transformer.layers.11.feed_forward",
41
- "multi_modal_projector",
 
 
 
 
42
  "vision_tower.transformer.layers.6.feed_forward",
43
- "vision_tower.transformer.layers.23.attention",
44
  "vision_tower.transformer.layers.21.feed_forward",
 
 
 
45
  "vision_tower.transformer.layers.22.feed_forward",
 
46
  "vision_tower.transformer.layers.9.feed_forward",
47
  "vision_tower.transformer.layers.13.feed_forward",
48
  "vision_tower.transformer.layers.13.attention",
49
  "vision_tower.transformer.layers.23.feed_forward",
50
  "vision_tower.transformer.layers.12.attention",
51
- "vision_tower.transformer.layers.11.attention",
52
  "vision_tower.transformer.layers.2.feed_forward",
53
  "vision_tower.transformer.layers.10.attention",
54
  "vision_tower.transformer.layers.0.feed_forward",
 
55
  "vision_tower.transformer.layers.1.feed_forward",
56
- "vision_tower.transformer.layers.8.attention",
57
  "vision_tower.transformer.layers.7.attention",
58
- "vision_tower.transformer.layers.4.attention",
59
  "vision_tower.transformer.layers.6.attention",
 
 
60
  "vision_tower.transformer.layers.5.attention",
61
  "vision_tower.transformer.layers.0.attention",
62
  "vision_tower.transformer.layers.3.attention",
63
- "vision_tower.transformer.layers.2.attention",
64
  "vision_tower.transformer.layers.1.attention",
 
65
  "vision_tower.transformer.layers.9.attention"
66
  ],
67
  "llm_int8_threshold": 6.0,
@@ -90,7 +93,7 @@
90
  "vocab_size": 131072
91
  },
92
  "torch_dtype": "bfloat16",
93
- "transformers_version": "4.50.0.dev0",
94
  "unsloth_fixed": true,
95
  "vision_config": {
96
  "attention_dropout": 0.0,
 
23
  "multi_modal_projector",
24
  "merger",
25
  "modality_projection",
26
+ "vision_tower.transformer.layers.5.feed_forward",
 
 
 
 
 
 
 
27
  "vision_tower.transformer.layers.15.feed_forward",
28
+ "vision_tower.transformer.layers.19.feed_forward",
29
+ "vision_tower.transformer.layers.18.feed_forward",
30
  "vision_tower.transformer.layers.4.feed_forward",
31
+ "vision_tower.transformer.layers.15.attention",
32
+ "vision_tower.transformer.layers.8.feed_forward",
33
+ "vision_tower.transformer.layers.17.feed_forward",
34
+ "vision_tower.transformer.layers.22.attention",
35
  "vision_tower.transformer.layers.14.attention",
 
36
  "vision_tower.transformer.layers.11.feed_forward",
37
+ "language_model.model.layers.2.mlp",
38
+ "vision_tower.transformer.layers.3.feed_forward",
39
+ "vision_tower.transformer.layers.16.feed_forward",
40
+ "vision_tower.transformer.layers.14.feed_forward",
41
+ "vision_tower.transformer.layers.7.feed_forward",
42
  "vision_tower.transformer.layers.6.feed_forward",
 
43
  "vision_tower.transformer.layers.21.feed_forward",
44
+ "vision_tower.transformer.layers.10.feed_forward",
45
+ "vision_tower.transformer.layers.12.feed_forward",
46
+ "multi_modal_projector",
47
  "vision_tower.transformer.layers.22.feed_forward",
48
+ "vision_tower.transformer.layers.23.attention",
49
  "vision_tower.transformer.layers.9.feed_forward",
50
  "vision_tower.transformer.layers.13.feed_forward",
51
  "vision_tower.transformer.layers.13.attention",
52
  "vision_tower.transformer.layers.23.feed_forward",
53
  "vision_tower.transformer.layers.12.attention",
 
54
  "vision_tower.transformer.layers.2.feed_forward",
55
  "vision_tower.transformer.layers.10.attention",
56
  "vision_tower.transformer.layers.0.feed_forward",
57
+ "vision_tower.transformer.layers.11.attention",
58
  "vision_tower.transformer.layers.1.feed_forward",
 
59
  "vision_tower.transformer.layers.7.attention",
 
60
  "vision_tower.transformer.layers.6.attention",
61
+ "vision_tower.transformer.layers.8.attention",
62
+ "vision_tower.transformer.layers.4.attention",
63
  "vision_tower.transformer.layers.5.attention",
64
  "vision_tower.transformer.layers.0.attention",
65
  "vision_tower.transformer.layers.3.attention",
 
66
  "vision_tower.transformer.layers.1.attention",
67
+ "vision_tower.transformer.layers.2.attention",
68
  "vision_tower.transformer.layers.9.attention"
69
  ],
70
  "llm_int8_threshold": 6.0,
 
93
  "vocab_size": 131072
94
  },
95
  "torch_dtype": "bfloat16",
96
+ "transformers_version": "4.52.0.dev0",
97
  "unsloth_fixed": true,
98
  "vision_config": {
99
  "attention_dropout": 0.0,
generation_config.json CHANGED
@@ -3,5 +3,5 @@
3
  "bos_token_id": 1,
4
  "eos_token_id": 2,
5
  "pad_token_id": 11,
6
- "transformers_version": "4.50.0.dev0"
7
  }
 
3
  "bos_token_id": 1,
4
  "eos_token_id": 2,
5
  "pad_token_id": 11,
6
+ "transformers_version": "4.52.0.dev0"
7
  }
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bbcba9903285efb8aca2765e117fac0a7a75893f0b8a55e959514cd879174e80
3
- size 4988274199
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4ccec6b6d0a863fb5a007448a9cfeb5564373205e6af6eb9f8f4f384b507f21
3
+ size 4945278181
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:63a9ce2581902cbf9d4c28aef49b66501f961900c956e200dcad28cdecaddd6c
3
- size 4960910645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c29193aabf114b62310c6cbcb62ccb8df6f74b2f3ef7bf233dbe55c2d9dee41
3
+ size 4960910653
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7423db08fec3220956b314f229a9c9f063084cbfc174cd91eb931f98c3dd8bce
3
- size 4387487653
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93460e261e4ca3fe139fb1b0df33b376d303b581f06f0392898646e86575f487
3
+ size 4474038219
model.safetensors.index.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "metadata": {
3
- "total_size": 15678558945
4
  },
5
  "weight_map": {
6
  "language_model.lm_head.weight": "model-00004-of-00004.safetensors",
@@ -751,12 +751,12 @@
751
  "language_model.model.layers.24.mlp.gate_proj.weight.nested_quant_map": "model-00002-of-00004.safetensors",
752
  "language_model.model.layers.24.mlp.gate_proj.weight.quant_map": "model-00002-of-00004.safetensors",
753
  "language_model.model.layers.24.mlp.gate_proj.weight.quant_state.bitsandbytes__nf4": "model-00002-of-00004.safetensors",
754
- "language_model.model.layers.24.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
755
- "language_model.model.layers.24.mlp.up_proj.weight.absmax": "model-00002-of-00004.safetensors",
756
- "language_model.model.layers.24.mlp.up_proj.weight.nested_absmax": "model-00002-of-00004.safetensors",
757
- "language_model.model.layers.24.mlp.up_proj.weight.nested_quant_map": "model-00002-of-00004.safetensors",
758
- "language_model.model.layers.24.mlp.up_proj.weight.quant_map": "model-00002-of-00004.safetensors",
759
- "language_model.model.layers.24.mlp.up_proj.weight.quant_state.bitsandbytes__nf4": "model-00002-of-00004.safetensors",
760
  "language_model.model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
761
  "language_model.model.layers.24.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
762
  "language_model.model.layers.24.self_attn.k_proj.weight.absmax": "model-00002-of-00004.safetensors",
@@ -1625,12 +1625,12 @@
1625
  "language_model.model.layers.7.mlp.down_proj.weight.nested_quant_map": "model-00002-of-00004.safetensors",
1626
  "language_model.model.layers.7.mlp.down_proj.weight.quant_map": "model-00002-of-00004.safetensors",
1627
  "language_model.model.layers.7.mlp.down_proj.weight.quant_state.bitsandbytes__nf4": "model-00002-of-00004.safetensors",
1628
- "language_model.model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
1629
- "language_model.model.layers.7.mlp.gate_proj.weight.absmax": "model-00001-of-00004.safetensors",
1630
- "language_model.model.layers.7.mlp.gate_proj.weight.nested_absmax": "model-00001-of-00004.safetensors",
1631
- "language_model.model.layers.7.mlp.gate_proj.weight.nested_quant_map": "model-00001-of-00004.safetensors",
1632
- "language_model.model.layers.7.mlp.gate_proj.weight.quant_map": "model-00001-of-00004.safetensors",
1633
- "language_model.model.layers.7.mlp.gate_proj.weight.quant_state.bitsandbytes__nf4": "model-00001-of-00004.safetensors",
1634
  "language_model.model.layers.7.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
1635
  "language_model.model.layers.7.mlp.up_proj.weight.absmax": "model-00002-of-00004.safetensors",
1636
  "language_model.model.layers.7.mlp.up_proj.weight.nested_absmax": "model-00002-of-00004.safetensors",
@@ -1821,29 +1821,9 @@
1821
  "vision_tower.transformer.layers.14.feed_forward.up_proj.weight": "model-00001-of-00004.safetensors",
1822
  "vision_tower.transformer.layers.14.ffn_norm.weight": "model-00001-of-00004.safetensors",
1823
  "vision_tower.transformer.layers.15.attention.k_proj.weight": "model-00001-of-00004.safetensors",
1824
- "vision_tower.transformer.layers.15.attention.k_proj.weight.absmax": "model-00001-of-00004.safetensors",
1825
- "vision_tower.transformer.layers.15.attention.k_proj.weight.nested_absmax": "model-00001-of-00004.safetensors",
1826
- "vision_tower.transformer.layers.15.attention.k_proj.weight.nested_quant_map": "model-00001-of-00004.safetensors",
1827
- "vision_tower.transformer.layers.15.attention.k_proj.weight.quant_map": "model-00001-of-00004.safetensors",
1828
- "vision_tower.transformer.layers.15.attention.k_proj.weight.quant_state.bitsandbytes__nf4": "model-00001-of-00004.safetensors",
1829
  "vision_tower.transformer.layers.15.attention.o_proj.weight": "model-00001-of-00004.safetensors",
1830
- "vision_tower.transformer.layers.15.attention.o_proj.weight.absmax": "model-00001-of-00004.safetensors",
1831
- "vision_tower.transformer.layers.15.attention.o_proj.weight.nested_absmax": "model-00001-of-00004.safetensors",
1832
- "vision_tower.transformer.layers.15.attention.o_proj.weight.nested_quant_map": "model-00001-of-00004.safetensors",
1833
- "vision_tower.transformer.layers.15.attention.o_proj.weight.quant_map": "model-00001-of-00004.safetensors",
1834
- "vision_tower.transformer.layers.15.attention.o_proj.weight.quant_state.bitsandbytes__nf4": "model-00001-of-00004.safetensors",
1835
  "vision_tower.transformer.layers.15.attention.q_proj.weight": "model-00001-of-00004.safetensors",
1836
- "vision_tower.transformer.layers.15.attention.q_proj.weight.absmax": "model-00001-of-00004.safetensors",
1837
- "vision_tower.transformer.layers.15.attention.q_proj.weight.nested_absmax": "model-00001-of-00004.safetensors",
1838
- "vision_tower.transformer.layers.15.attention.q_proj.weight.nested_quant_map": "model-00001-of-00004.safetensors",
1839
- "vision_tower.transformer.layers.15.attention.q_proj.weight.quant_map": "model-00001-of-00004.safetensors",
1840
- "vision_tower.transformer.layers.15.attention.q_proj.weight.quant_state.bitsandbytes__nf4": "model-00001-of-00004.safetensors",
1841
  "vision_tower.transformer.layers.15.attention.v_proj.weight": "model-00001-of-00004.safetensors",
1842
- "vision_tower.transformer.layers.15.attention.v_proj.weight.absmax": "model-00001-of-00004.safetensors",
1843
- "vision_tower.transformer.layers.15.attention.v_proj.weight.nested_absmax": "model-00001-of-00004.safetensors",
1844
- "vision_tower.transformer.layers.15.attention.v_proj.weight.nested_quant_map": "model-00001-of-00004.safetensors",
1845
- "vision_tower.transformer.layers.15.attention.v_proj.weight.quant_map": "model-00001-of-00004.safetensors",
1846
- "vision_tower.transformer.layers.15.attention.v_proj.weight.quant_state.bitsandbytes__nf4": "model-00001-of-00004.safetensors",
1847
  "vision_tower.transformer.layers.15.attention_norm.weight": "model-00001-of-00004.safetensors",
1848
  "vision_tower.transformer.layers.15.feed_forward.down_proj.weight": "model-00001-of-00004.safetensors",
1849
  "vision_tower.transformer.layers.15.feed_forward.gate_proj.weight": "model-00001-of-00004.safetensors",
@@ -1875,23 +1855,8 @@
1875
  "vision_tower.transformer.layers.16.attention.v_proj.weight.quant_state.bitsandbytes__nf4": "model-00001-of-00004.safetensors",
1876
  "vision_tower.transformer.layers.16.attention_norm.weight": "model-00001-of-00004.safetensors",
1877
  "vision_tower.transformer.layers.16.feed_forward.down_proj.weight": "model-00001-of-00004.safetensors",
1878
- "vision_tower.transformer.layers.16.feed_forward.down_proj.weight.absmax": "model-00001-of-00004.safetensors",
1879
- "vision_tower.transformer.layers.16.feed_forward.down_proj.weight.nested_absmax": "model-00001-of-00004.safetensors",
1880
- "vision_tower.transformer.layers.16.feed_forward.down_proj.weight.nested_quant_map": "model-00001-of-00004.safetensors",
1881
- "vision_tower.transformer.layers.16.feed_forward.down_proj.weight.quant_map": "model-00001-of-00004.safetensors",
1882
- "vision_tower.transformer.layers.16.feed_forward.down_proj.weight.quant_state.bitsandbytes__nf4": "model-00001-of-00004.safetensors",
1883
  "vision_tower.transformer.layers.16.feed_forward.gate_proj.weight": "model-00001-of-00004.safetensors",
1884
- "vision_tower.transformer.layers.16.feed_forward.gate_proj.weight.absmax": "model-00001-of-00004.safetensors",
1885
- "vision_tower.transformer.layers.16.feed_forward.gate_proj.weight.nested_absmax": "model-00001-of-00004.safetensors",
1886
- "vision_tower.transformer.layers.16.feed_forward.gate_proj.weight.nested_quant_map": "model-00001-of-00004.safetensors",
1887
- "vision_tower.transformer.layers.16.feed_forward.gate_proj.weight.quant_map": "model-00001-of-00004.safetensors",
1888
- "vision_tower.transformer.layers.16.feed_forward.gate_proj.weight.quant_state.bitsandbytes__nf4": "model-00001-of-00004.safetensors",
1889
  "vision_tower.transformer.layers.16.feed_forward.up_proj.weight": "model-00001-of-00004.safetensors",
1890
- "vision_tower.transformer.layers.16.feed_forward.up_proj.weight.absmax": "model-00001-of-00004.safetensors",
1891
- "vision_tower.transformer.layers.16.feed_forward.up_proj.weight.nested_absmax": "model-00001-of-00004.safetensors",
1892
- "vision_tower.transformer.layers.16.feed_forward.up_proj.weight.nested_quant_map": "model-00001-of-00004.safetensors",
1893
- "vision_tower.transformer.layers.16.feed_forward.up_proj.weight.quant_map": "model-00001-of-00004.safetensors",
1894
- "vision_tower.transformer.layers.16.feed_forward.up_proj.weight.quant_state.bitsandbytes__nf4": "model-00001-of-00004.safetensors",
1895
  "vision_tower.transformer.layers.16.ffn_norm.weight": "model-00001-of-00004.safetensors",
1896
  "vision_tower.transformer.layers.17.attention.k_proj.weight": "model-00001-of-00004.safetensors",
1897
  "vision_tower.transformer.layers.17.attention.k_proj.weight.absmax": "model-00001-of-00004.safetensors",
@@ -2104,23 +2069,8 @@
2104
  "vision_tower.transformer.layers.5.attention.v_proj.weight": "model-00001-of-00004.safetensors",
2105
  "vision_tower.transformer.layers.5.attention_norm.weight": "model-00001-of-00004.safetensors",
2106
  "vision_tower.transformer.layers.5.feed_forward.down_proj.weight": "model-00001-of-00004.safetensors",
2107
- "vision_tower.transformer.layers.5.feed_forward.down_proj.weight.absmax": "model-00001-of-00004.safetensors",
2108
- "vision_tower.transformer.layers.5.feed_forward.down_proj.weight.nested_absmax": "model-00001-of-00004.safetensors",
2109
- "vision_tower.transformer.layers.5.feed_forward.down_proj.weight.nested_quant_map": "model-00001-of-00004.safetensors",
2110
- "vision_tower.transformer.layers.5.feed_forward.down_proj.weight.quant_map": "model-00001-of-00004.safetensors",
2111
- "vision_tower.transformer.layers.5.feed_forward.down_proj.weight.quant_state.bitsandbytes__nf4": "model-00001-of-00004.safetensors",
2112
  "vision_tower.transformer.layers.5.feed_forward.gate_proj.weight": "model-00001-of-00004.safetensors",
2113
- "vision_tower.transformer.layers.5.feed_forward.gate_proj.weight.absmax": "model-00001-of-00004.safetensors",
2114
- "vision_tower.transformer.layers.5.feed_forward.gate_proj.weight.nested_absmax": "model-00001-of-00004.safetensors",
2115
- "vision_tower.transformer.layers.5.feed_forward.gate_proj.weight.nested_quant_map": "model-00001-of-00004.safetensors",
2116
- "vision_tower.transformer.layers.5.feed_forward.gate_proj.weight.quant_map": "model-00001-of-00004.safetensors",
2117
- "vision_tower.transformer.layers.5.feed_forward.gate_proj.weight.quant_state.bitsandbytes__nf4": "model-00001-of-00004.safetensors",
2118
  "vision_tower.transformer.layers.5.feed_forward.up_proj.weight": "model-00001-of-00004.safetensors",
2119
- "vision_tower.transformer.layers.5.feed_forward.up_proj.weight.absmax": "model-00001-of-00004.safetensors",
2120
- "vision_tower.transformer.layers.5.feed_forward.up_proj.weight.nested_absmax": "model-00001-of-00004.safetensors",
2121
- "vision_tower.transformer.layers.5.feed_forward.up_proj.weight.nested_quant_map": "model-00001-of-00004.safetensors",
2122
- "vision_tower.transformer.layers.5.feed_forward.up_proj.weight.quant_map": "model-00001-of-00004.safetensors",
2123
- "vision_tower.transformer.layers.5.feed_forward.up_proj.weight.quant_state.bitsandbytes__nf4": "model-00001-of-00004.safetensors",
2124
  "vision_tower.transformer.layers.5.ffn_norm.weight": "model-00001-of-00004.safetensors",
2125
  "vision_tower.transformer.layers.6.attention.k_proj.weight": "model-00001-of-00004.safetensors",
2126
  "vision_tower.transformer.layers.6.attention.o_proj.weight": "model-00001-of-00004.safetensors",
 
1
  {
2
  "metadata": {
3
+ "total_size": 15722120613
4
  },
5
  "weight_map": {
6
  "language_model.lm_head.weight": "model-00004-of-00004.safetensors",
 
751
  "language_model.model.layers.24.mlp.gate_proj.weight.nested_quant_map": "model-00002-of-00004.safetensors",
752
  "language_model.model.layers.24.mlp.gate_proj.weight.quant_map": "model-00002-of-00004.safetensors",
753
  "language_model.model.layers.24.mlp.gate_proj.weight.quant_state.bitsandbytes__nf4": "model-00002-of-00004.safetensors",
754
+ "language_model.model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
755
+ "language_model.model.layers.24.mlp.up_proj.weight.absmax": "model-00003-of-00004.safetensors",
756
+ "language_model.model.layers.24.mlp.up_proj.weight.nested_absmax": "model-00003-of-00004.safetensors",
757
+ "language_model.model.layers.24.mlp.up_proj.weight.nested_quant_map": "model-00003-of-00004.safetensors",
758
+ "language_model.model.layers.24.mlp.up_proj.weight.quant_map": "model-00003-of-00004.safetensors",
759
+ "language_model.model.layers.24.mlp.up_proj.weight.quant_state.bitsandbytes__nf4": "model-00003-of-00004.safetensors",
760
  "language_model.model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
761
  "language_model.model.layers.24.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
762
  "language_model.model.layers.24.self_attn.k_proj.weight.absmax": "model-00002-of-00004.safetensors",
 
1625
  "language_model.model.layers.7.mlp.down_proj.weight.nested_quant_map": "model-00002-of-00004.safetensors",
1626
  "language_model.model.layers.7.mlp.down_proj.weight.quant_map": "model-00002-of-00004.safetensors",
1627
  "language_model.model.layers.7.mlp.down_proj.weight.quant_state.bitsandbytes__nf4": "model-00002-of-00004.safetensors",
1628
+ "language_model.model.layers.7.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
1629
+ "language_model.model.layers.7.mlp.gate_proj.weight.absmax": "model-00002-of-00004.safetensors",
1630
+ "language_model.model.layers.7.mlp.gate_proj.weight.nested_absmax": "model-00002-of-00004.safetensors",
1631
+ "language_model.model.layers.7.mlp.gate_proj.weight.nested_quant_map": "model-00002-of-00004.safetensors",
1632
+ "language_model.model.layers.7.mlp.gate_proj.weight.quant_map": "model-00002-of-00004.safetensors",
1633
+ "language_model.model.layers.7.mlp.gate_proj.weight.quant_state.bitsandbytes__nf4": "model-00002-of-00004.safetensors",
1634
  "language_model.model.layers.7.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
1635
  "language_model.model.layers.7.mlp.up_proj.weight.absmax": "model-00002-of-00004.safetensors",
1636
  "language_model.model.layers.7.mlp.up_proj.weight.nested_absmax": "model-00002-of-00004.safetensors",
 
1821
  "vision_tower.transformer.layers.14.feed_forward.up_proj.weight": "model-00001-of-00004.safetensors",
1822
  "vision_tower.transformer.layers.14.ffn_norm.weight": "model-00001-of-00004.safetensors",
1823
  "vision_tower.transformer.layers.15.attention.k_proj.weight": "model-00001-of-00004.safetensors",
 
 
 
 
 
1824
  "vision_tower.transformer.layers.15.attention.o_proj.weight": "model-00001-of-00004.safetensors",
 
 
 
 
 
1825
  "vision_tower.transformer.layers.15.attention.q_proj.weight": "model-00001-of-00004.safetensors",
 
 
 
 
 
1826
  "vision_tower.transformer.layers.15.attention.v_proj.weight": "model-00001-of-00004.safetensors",
 
 
 
 
 
1827
  "vision_tower.transformer.layers.15.attention_norm.weight": "model-00001-of-00004.safetensors",
1828
  "vision_tower.transformer.layers.15.feed_forward.down_proj.weight": "model-00001-of-00004.safetensors",
1829
  "vision_tower.transformer.layers.15.feed_forward.gate_proj.weight": "model-00001-of-00004.safetensors",
 
1855
  "vision_tower.transformer.layers.16.attention.v_proj.weight.quant_state.bitsandbytes__nf4": "model-00001-of-00004.safetensors",
1856
  "vision_tower.transformer.layers.16.attention_norm.weight": "model-00001-of-00004.safetensors",
1857
  "vision_tower.transformer.layers.16.feed_forward.down_proj.weight": "model-00001-of-00004.safetensors",
 
 
 
 
 
1858
  "vision_tower.transformer.layers.16.feed_forward.gate_proj.weight": "model-00001-of-00004.safetensors",
 
 
 
 
 
1859
  "vision_tower.transformer.layers.16.feed_forward.up_proj.weight": "model-00001-of-00004.safetensors",
 
 
 
 
 
1860
  "vision_tower.transformer.layers.16.ffn_norm.weight": "model-00001-of-00004.safetensors",
1861
  "vision_tower.transformer.layers.17.attention.k_proj.weight": "model-00001-of-00004.safetensors",
1862
  "vision_tower.transformer.layers.17.attention.k_proj.weight.absmax": "model-00001-of-00004.safetensors",
 
2069
  "vision_tower.transformer.layers.5.attention.v_proj.weight": "model-00001-of-00004.safetensors",
2070
  "vision_tower.transformer.layers.5.attention_norm.weight": "model-00001-of-00004.safetensors",
2071
  "vision_tower.transformer.layers.5.feed_forward.down_proj.weight": "model-00001-of-00004.safetensors",
 
 
 
 
 
2072
  "vision_tower.transformer.layers.5.feed_forward.gate_proj.weight": "model-00001-of-00004.safetensors",
 
 
 
 
 
2073
  "vision_tower.transformer.layers.5.feed_forward.up_proj.weight": "model-00001-of-00004.safetensors",
 
 
 
 
 
2074
  "vision_tower.transformer.layers.5.ffn_norm.weight": "model-00001-of-00004.safetensors",
2075
  "vision_tower.transformer.layers.6.attention.k_proj.weight": "model-00001-of-00004.safetensors",
2076
  "vision_tower.transformer.layers.6.attention.o_proj.weight": "model-00001-of-00004.safetensors",
tokenizer_config.json CHANGED
@@ -9017,5 +9017,6 @@
9017
  "processor_class": "PixtralProcessor",
9018
  "tokenizer_class": "LlamaTokenizerFast",
9019
  "unk_token": "<unk>",
9020
- "use_default_system_prompt": false
9021
- }
 
 
9017
  "processor_class": "PixtralProcessor",
9018
  "tokenizer_class": "LlamaTokenizerFast",
9019
  "unk_token": "<unk>",
9020
+ "use_default_system_prompt": false,
9021
+ "chat_template": "{%- set today = strftime_now(\"%Y-%m-%d\") %}\n{%- set default_system_message = \"You are Mistral Small 3, a Large Language Model (LLM) created by Mistral AI, a French startup headquartered in Paris.\\nYour knowledge base was last updated on 2023-10-01. The current date is \" + today + \".\\n\\nWhen you're not sure about some information, you say that you don't have the information and don't make up anything.\\nIf the user's question is not clear, ambiguous, or does not provide enough context for you to accurately answer the question, you do not try to answer it right away and you rather ask the user to clarify their request (e.g. \\\"What are some good restaurants around me?\\\" => \\\"Where are you?\\\" or \\\"When is the next flight to Tokyo\\\" => \\\"Where do you travel from?\\\")\" %}\n\n{{- bos_token }}\n\n{%- if messages[0]['role'] == 'system' %}\n {%- if messages[0]['content'] is string %}\n {%- set system_message = messages[0]['content'] %}\n {%- else %}\n {%- set system_message = messages[0]['content'][0]['text'] %}\n {%- endif %}\n {%- set loop_messages = messages[1:] %}\n{%- else %}\n {%- set system_message = default_system_message %}\n {%- set loop_messages = messages %}\n{%- endif %}\n{{- '[SYSTEM_PROMPT]' + system_message + '[/SYSTEM_PROMPT]' }}\n\n{%- for message in loop_messages %}\n {%- if message['role'] == 'user' %}\n {%- if message['content'] is string %}\n {{- '[INST]' + message['content'] + '[/INST]' }}\n {%- else %}\n {{- '[INST]' }}\n {%- for block in message['content'] %}\n {%- if block['type'] == 'text' %}\n {{- block['text'] }}\n {%- elif block['type'] in ['image', 'image_url'] %}\n {{- '[IMG]' }}\n {%- else %}\n {{- raise_exception('Only text and image blocks are supported in message content!') }}\n {%- endif %}\n {%- endfor %}\n {{- '[/INST]' }}\n {%- endif %}\n {%- elif message['role'] == 'system' %}\n {%- if message['content'] is string %}\n {{- '[SYSTEM_PROMPT]' + message['content'] + '[/SYSTEM_PROMPT]' }}\n {%- else %}\n {{- '[SYSTEM_PROMPT]' + message['content'][0]['text'] + '[/SYSTEM_PROMPT]' }}\n {%- endif %}\n {%- elif message['role'] == 'assistant' %}\n {%- if message['content'] is string %}\n {{- message['content'] + eos_token }}\n {%- else %}\n {{- message['content'][0]['text'] + eos_token }}\n {%- endif %}\n {%- else %}\n {{- raise_exception('Only user, system and assistant roles are supported!') }}\n {%- endif %}\n{%- endfor %}"
9022
+ }