Add files using upload-large-folder tool
Browse files- chat_template.jinja +51 -0
- config.json +21 -18
- generation_config.json +1 -1
- model-00001-of-00004.safetensors +2 -2
- model-00002-of-00004.safetensors +2 -2
- model-00003-of-00004.safetensors +2 -2
- model.safetensors.index.json +13 -63
- tokenizer_config.json +3 -2
chat_template.jinja
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{%- set today = strftime_now("%Y-%m-%d") %}
|
2 |
+
{%- set default_system_message = "You are Mistral Small 3, a Large Language Model (LLM) created by Mistral AI, a French startup headquartered in Paris.\nYour knowledge base was last updated on 2023-10-01. The current date is " + today + ".\n\nWhen you're not sure about some information, you say that you don't have the information and don't make up anything.\nIf the user's question is not clear, ambiguous, or does not provide enough context for you to accurately answer the question, you do not try to answer it right away and you rather ask the user to clarify their request (e.g. \"What are some good restaurants around me?\" => \"Where are you?\" or \"When is the next flight to Tokyo\" => \"Where do you travel from?\")" %}
|
3 |
+
|
4 |
+
{{- bos_token }}
|
5 |
+
|
6 |
+
{%- if messages[0]['role'] == 'system' %}
|
7 |
+
{%- if messages[0]['content'] is string %}
|
8 |
+
{%- set system_message = messages[0]['content'] %}
|
9 |
+
{%- else %}
|
10 |
+
{%- set system_message = messages[0]['content'][0]['text'] %}
|
11 |
+
{%- endif %}
|
12 |
+
{%- set loop_messages = messages[1:] %}
|
13 |
+
{%- else %}
|
14 |
+
{%- set system_message = default_system_message %}
|
15 |
+
{%- set loop_messages = messages %}
|
16 |
+
{%- endif %}
|
17 |
+
{{- '[SYSTEM_PROMPT]' + system_message + '[/SYSTEM_PROMPT]' }}
|
18 |
+
|
19 |
+
{%- for message in loop_messages %}
|
20 |
+
{%- if message['role'] == 'user' %}
|
21 |
+
{%- if message['content'] is string %}
|
22 |
+
{{- '[INST]' + message['content'] + '[/INST]' }}
|
23 |
+
{%- else %}
|
24 |
+
{{- '[INST]' }}
|
25 |
+
{%- for block in message['content'] %}
|
26 |
+
{%- if block['type'] == 'text' %}
|
27 |
+
{{- block['text'] }}
|
28 |
+
{%- elif block['type'] in ['image', 'image_url'] %}
|
29 |
+
{{- '[IMG]' }}
|
30 |
+
{%- else %}
|
31 |
+
{{- raise_exception('Only text and image blocks are supported in message content!') }}
|
32 |
+
{%- endif %}
|
33 |
+
{%- endfor %}
|
34 |
+
{{- '[/INST]' }}
|
35 |
+
{%- endif %}
|
36 |
+
{%- elif message['role'] == 'system' %}
|
37 |
+
{%- if message['content'] is string %}
|
38 |
+
{{- '[SYSTEM_PROMPT]' + message['content'] + '[/SYSTEM_PROMPT]' }}
|
39 |
+
{%- else %}
|
40 |
+
{{- '[SYSTEM_PROMPT]' + message['content'][0]['text'] + '[/SYSTEM_PROMPT]' }}
|
41 |
+
{%- endif %}
|
42 |
+
{%- elif message['role'] == 'assistant' %}
|
43 |
+
{%- if message['content'] is string %}
|
44 |
+
{{- message['content'] + eos_token }}
|
45 |
+
{%- else %}
|
46 |
+
{{- message['content'][0]['text'] + eos_token }}
|
47 |
+
{%- endif %}
|
48 |
+
{%- else %}
|
49 |
+
{{- raise_exception('Only user, system and assistant roles are supported!') }}
|
50 |
+
{%- endif %}
|
51 |
+
{%- endfor %}
|
config.json
CHANGED
@@ -23,45 +23,48 @@
|
|
23 |
"multi_modal_projector",
|
24 |
"merger",
|
25 |
"modality_projection",
|
26 |
-
"
|
27 |
-
"vision_tower.transformer.layers.22.attention",
|
28 |
-
"vision_tower.transformer.layers.17.feed_forward",
|
29 |
-
"vision_tower.transformer.layers.18.feed_forward",
|
30 |
-
"vision_tower.transformer.layers.14.feed_forward",
|
31 |
-
"vision_tower.transformer.layers.19.feed_forward",
|
32 |
-
"vision_tower.transformer.layers.8.feed_forward",
|
33 |
-
"vision_tower.transformer.layers.7.feed_forward",
|
34 |
"vision_tower.transformer.layers.15.feed_forward",
|
35 |
-
"vision_tower.transformer.layers.
|
|
|
36 |
"vision_tower.transformer.layers.4.feed_forward",
|
37 |
-
"vision_tower.transformer.layers.
|
|
|
|
|
|
|
38 |
"vision_tower.transformer.layers.14.attention",
|
39 |
-
"vision_tower.transformer.layers.12.feed_forward",
|
40 |
"vision_tower.transformer.layers.11.feed_forward",
|
41 |
-
"
|
|
|
|
|
|
|
|
|
42 |
"vision_tower.transformer.layers.6.feed_forward",
|
43 |
-
"vision_tower.transformer.layers.23.attention",
|
44 |
"vision_tower.transformer.layers.21.feed_forward",
|
|
|
|
|
|
|
45 |
"vision_tower.transformer.layers.22.feed_forward",
|
|
|
46 |
"vision_tower.transformer.layers.9.feed_forward",
|
47 |
"vision_tower.transformer.layers.13.feed_forward",
|
48 |
"vision_tower.transformer.layers.13.attention",
|
49 |
"vision_tower.transformer.layers.23.feed_forward",
|
50 |
"vision_tower.transformer.layers.12.attention",
|
51 |
-
"vision_tower.transformer.layers.11.attention",
|
52 |
"vision_tower.transformer.layers.2.feed_forward",
|
53 |
"vision_tower.transformer.layers.10.attention",
|
54 |
"vision_tower.transformer.layers.0.feed_forward",
|
|
|
55 |
"vision_tower.transformer.layers.1.feed_forward",
|
56 |
-
"vision_tower.transformer.layers.8.attention",
|
57 |
"vision_tower.transformer.layers.7.attention",
|
58 |
-
"vision_tower.transformer.layers.4.attention",
|
59 |
"vision_tower.transformer.layers.6.attention",
|
|
|
|
|
60 |
"vision_tower.transformer.layers.5.attention",
|
61 |
"vision_tower.transformer.layers.0.attention",
|
62 |
"vision_tower.transformer.layers.3.attention",
|
63 |
-
"vision_tower.transformer.layers.2.attention",
|
64 |
"vision_tower.transformer.layers.1.attention",
|
|
|
65 |
"vision_tower.transformer.layers.9.attention"
|
66 |
],
|
67 |
"llm_int8_threshold": 6.0,
|
@@ -90,7 +93,7 @@
|
|
90 |
"vocab_size": 131072
|
91 |
},
|
92 |
"torch_dtype": "bfloat16",
|
93 |
-
"transformers_version": "4.
|
94 |
"unsloth_fixed": true,
|
95 |
"vision_config": {
|
96 |
"attention_dropout": 0.0,
|
|
|
23 |
"multi_modal_projector",
|
24 |
"merger",
|
25 |
"modality_projection",
|
26 |
+
"vision_tower.transformer.layers.5.feed_forward",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
"vision_tower.transformer.layers.15.feed_forward",
|
28 |
+
"vision_tower.transformer.layers.19.feed_forward",
|
29 |
+
"vision_tower.transformer.layers.18.feed_forward",
|
30 |
"vision_tower.transformer.layers.4.feed_forward",
|
31 |
+
"vision_tower.transformer.layers.15.attention",
|
32 |
+
"vision_tower.transformer.layers.8.feed_forward",
|
33 |
+
"vision_tower.transformer.layers.17.feed_forward",
|
34 |
+
"vision_tower.transformer.layers.22.attention",
|
35 |
"vision_tower.transformer.layers.14.attention",
|
|
|
36 |
"vision_tower.transformer.layers.11.feed_forward",
|
37 |
+
"language_model.model.layers.2.mlp",
|
38 |
+
"vision_tower.transformer.layers.3.feed_forward",
|
39 |
+
"vision_tower.transformer.layers.16.feed_forward",
|
40 |
+
"vision_tower.transformer.layers.14.feed_forward",
|
41 |
+
"vision_tower.transformer.layers.7.feed_forward",
|
42 |
"vision_tower.transformer.layers.6.feed_forward",
|
|
|
43 |
"vision_tower.transformer.layers.21.feed_forward",
|
44 |
+
"vision_tower.transformer.layers.10.feed_forward",
|
45 |
+
"vision_tower.transformer.layers.12.feed_forward",
|
46 |
+
"multi_modal_projector",
|
47 |
"vision_tower.transformer.layers.22.feed_forward",
|
48 |
+
"vision_tower.transformer.layers.23.attention",
|
49 |
"vision_tower.transformer.layers.9.feed_forward",
|
50 |
"vision_tower.transformer.layers.13.feed_forward",
|
51 |
"vision_tower.transformer.layers.13.attention",
|
52 |
"vision_tower.transformer.layers.23.feed_forward",
|
53 |
"vision_tower.transformer.layers.12.attention",
|
|
|
54 |
"vision_tower.transformer.layers.2.feed_forward",
|
55 |
"vision_tower.transformer.layers.10.attention",
|
56 |
"vision_tower.transformer.layers.0.feed_forward",
|
57 |
+
"vision_tower.transformer.layers.11.attention",
|
58 |
"vision_tower.transformer.layers.1.feed_forward",
|
|
|
59 |
"vision_tower.transformer.layers.7.attention",
|
|
|
60 |
"vision_tower.transformer.layers.6.attention",
|
61 |
+
"vision_tower.transformer.layers.8.attention",
|
62 |
+
"vision_tower.transformer.layers.4.attention",
|
63 |
"vision_tower.transformer.layers.5.attention",
|
64 |
"vision_tower.transformer.layers.0.attention",
|
65 |
"vision_tower.transformer.layers.3.attention",
|
|
|
66 |
"vision_tower.transformer.layers.1.attention",
|
67 |
+
"vision_tower.transformer.layers.2.attention",
|
68 |
"vision_tower.transformer.layers.9.attention"
|
69 |
],
|
70 |
"llm_int8_threshold": 6.0,
|
|
|
93 |
"vocab_size": 131072
|
94 |
},
|
95 |
"torch_dtype": "bfloat16",
|
96 |
+
"transformers_version": "4.52.0.dev0",
|
97 |
"unsloth_fixed": true,
|
98 |
"vision_config": {
|
99 |
"attention_dropout": 0.0,
|
generation_config.json
CHANGED
@@ -3,5 +3,5 @@
|
|
3 |
"bos_token_id": 1,
|
4 |
"eos_token_id": 2,
|
5 |
"pad_token_id": 11,
|
6 |
-
"transformers_version": "4.
|
7 |
}
|
|
|
3 |
"bos_token_id": 1,
|
4 |
"eos_token_id": 2,
|
5 |
"pad_token_id": 11,
|
6 |
+
"transformers_version": "4.52.0.dev0"
|
7 |
}
|
model-00001-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c4ccec6b6d0a863fb5a007448a9cfeb5564373205e6af6eb9f8f4f384b507f21
|
3 |
+
size 4945278181
|
model-00002-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9c29193aabf114b62310c6cbcb62ccb8df6f74b2f3ef7bf233dbe55c2d9dee41
|
3 |
+
size 4960910653
|
model-00003-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:93460e261e4ca3fe139fb1b0df33b376d303b581f06f0392898646e86575f487
|
3 |
+
size 4474038219
|
model.safetensors.index.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
{
|
2 |
"metadata": {
|
3 |
-
"total_size":
|
4 |
},
|
5 |
"weight_map": {
|
6 |
"language_model.lm_head.weight": "model-00004-of-00004.safetensors",
|
@@ -751,12 +751,12 @@
|
|
751 |
"language_model.model.layers.24.mlp.gate_proj.weight.nested_quant_map": "model-00002-of-00004.safetensors",
|
752 |
"language_model.model.layers.24.mlp.gate_proj.weight.quant_map": "model-00002-of-00004.safetensors",
|
753 |
"language_model.model.layers.24.mlp.gate_proj.weight.quant_state.bitsandbytes__nf4": "model-00002-of-00004.safetensors",
|
754 |
-
"language_model.model.layers.24.mlp.up_proj.weight": "model-
|
755 |
-
"language_model.model.layers.24.mlp.up_proj.weight.absmax": "model-
|
756 |
-
"language_model.model.layers.24.mlp.up_proj.weight.nested_absmax": "model-
|
757 |
-
"language_model.model.layers.24.mlp.up_proj.weight.nested_quant_map": "model-
|
758 |
-
"language_model.model.layers.24.mlp.up_proj.weight.quant_map": "model-
|
759 |
-
"language_model.model.layers.24.mlp.up_proj.weight.quant_state.bitsandbytes__nf4": "model-
|
760 |
"language_model.model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
761 |
"language_model.model.layers.24.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
762 |
"language_model.model.layers.24.self_attn.k_proj.weight.absmax": "model-00002-of-00004.safetensors",
|
@@ -1625,12 +1625,12 @@
|
|
1625 |
"language_model.model.layers.7.mlp.down_proj.weight.nested_quant_map": "model-00002-of-00004.safetensors",
|
1626 |
"language_model.model.layers.7.mlp.down_proj.weight.quant_map": "model-00002-of-00004.safetensors",
|
1627 |
"language_model.model.layers.7.mlp.down_proj.weight.quant_state.bitsandbytes__nf4": "model-00002-of-00004.safetensors",
|
1628 |
-
"language_model.model.layers.7.mlp.gate_proj.weight": "model-
|
1629 |
-
"language_model.model.layers.7.mlp.gate_proj.weight.absmax": "model-
|
1630 |
-
"language_model.model.layers.7.mlp.gate_proj.weight.nested_absmax": "model-
|
1631 |
-
"language_model.model.layers.7.mlp.gate_proj.weight.nested_quant_map": "model-
|
1632 |
-
"language_model.model.layers.7.mlp.gate_proj.weight.quant_map": "model-
|
1633 |
-
"language_model.model.layers.7.mlp.gate_proj.weight.quant_state.bitsandbytes__nf4": "model-
|
1634 |
"language_model.model.layers.7.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
1635 |
"language_model.model.layers.7.mlp.up_proj.weight.absmax": "model-00002-of-00004.safetensors",
|
1636 |
"language_model.model.layers.7.mlp.up_proj.weight.nested_absmax": "model-00002-of-00004.safetensors",
|
@@ -1821,29 +1821,9 @@
|
|
1821 |
"vision_tower.transformer.layers.14.feed_forward.up_proj.weight": "model-00001-of-00004.safetensors",
|
1822 |
"vision_tower.transformer.layers.14.ffn_norm.weight": "model-00001-of-00004.safetensors",
|
1823 |
"vision_tower.transformer.layers.15.attention.k_proj.weight": "model-00001-of-00004.safetensors",
|
1824 |
-
"vision_tower.transformer.layers.15.attention.k_proj.weight.absmax": "model-00001-of-00004.safetensors",
|
1825 |
-
"vision_tower.transformer.layers.15.attention.k_proj.weight.nested_absmax": "model-00001-of-00004.safetensors",
|
1826 |
-
"vision_tower.transformer.layers.15.attention.k_proj.weight.nested_quant_map": "model-00001-of-00004.safetensors",
|
1827 |
-
"vision_tower.transformer.layers.15.attention.k_proj.weight.quant_map": "model-00001-of-00004.safetensors",
|
1828 |
-
"vision_tower.transformer.layers.15.attention.k_proj.weight.quant_state.bitsandbytes__nf4": "model-00001-of-00004.safetensors",
|
1829 |
"vision_tower.transformer.layers.15.attention.o_proj.weight": "model-00001-of-00004.safetensors",
|
1830 |
-
"vision_tower.transformer.layers.15.attention.o_proj.weight.absmax": "model-00001-of-00004.safetensors",
|
1831 |
-
"vision_tower.transformer.layers.15.attention.o_proj.weight.nested_absmax": "model-00001-of-00004.safetensors",
|
1832 |
-
"vision_tower.transformer.layers.15.attention.o_proj.weight.nested_quant_map": "model-00001-of-00004.safetensors",
|
1833 |
-
"vision_tower.transformer.layers.15.attention.o_proj.weight.quant_map": "model-00001-of-00004.safetensors",
|
1834 |
-
"vision_tower.transformer.layers.15.attention.o_proj.weight.quant_state.bitsandbytes__nf4": "model-00001-of-00004.safetensors",
|
1835 |
"vision_tower.transformer.layers.15.attention.q_proj.weight": "model-00001-of-00004.safetensors",
|
1836 |
-
"vision_tower.transformer.layers.15.attention.q_proj.weight.absmax": "model-00001-of-00004.safetensors",
|
1837 |
-
"vision_tower.transformer.layers.15.attention.q_proj.weight.nested_absmax": "model-00001-of-00004.safetensors",
|
1838 |
-
"vision_tower.transformer.layers.15.attention.q_proj.weight.nested_quant_map": "model-00001-of-00004.safetensors",
|
1839 |
-
"vision_tower.transformer.layers.15.attention.q_proj.weight.quant_map": "model-00001-of-00004.safetensors",
|
1840 |
-
"vision_tower.transformer.layers.15.attention.q_proj.weight.quant_state.bitsandbytes__nf4": "model-00001-of-00004.safetensors",
|
1841 |
"vision_tower.transformer.layers.15.attention.v_proj.weight": "model-00001-of-00004.safetensors",
|
1842 |
-
"vision_tower.transformer.layers.15.attention.v_proj.weight.absmax": "model-00001-of-00004.safetensors",
|
1843 |
-
"vision_tower.transformer.layers.15.attention.v_proj.weight.nested_absmax": "model-00001-of-00004.safetensors",
|
1844 |
-
"vision_tower.transformer.layers.15.attention.v_proj.weight.nested_quant_map": "model-00001-of-00004.safetensors",
|
1845 |
-
"vision_tower.transformer.layers.15.attention.v_proj.weight.quant_map": "model-00001-of-00004.safetensors",
|
1846 |
-
"vision_tower.transformer.layers.15.attention.v_proj.weight.quant_state.bitsandbytes__nf4": "model-00001-of-00004.safetensors",
|
1847 |
"vision_tower.transformer.layers.15.attention_norm.weight": "model-00001-of-00004.safetensors",
|
1848 |
"vision_tower.transformer.layers.15.feed_forward.down_proj.weight": "model-00001-of-00004.safetensors",
|
1849 |
"vision_tower.transformer.layers.15.feed_forward.gate_proj.weight": "model-00001-of-00004.safetensors",
|
@@ -1875,23 +1855,8 @@
|
|
1875 |
"vision_tower.transformer.layers.16.attention.v_proj.weight.quant_state.bitsandbytes__nf4": "model-00001-of-00004.safetensors",
|
1876 |
"vision_tower.transformer.layers.16.attention_norm.weight": "model-00001-of-00004.safetensors",
|
1877 |
"vision_tower.transformer.layers.16.feed_forward.down_proj.weight": "model-00001-of-00004.safetensors",
|
1878 |
-
"vision_tower.transformer.layers.16.feed_forward.down_proj.weight.absmax": "model-00001-of-00004.safetensors",
|
1879 |
-
"vision_tower.transformer.layers.16.feed_forward.down_proj.weight.nested_absmax": "model-00001-of-00004.safetensors",
|
1880 |
-
"vision_tower.transformer.layers.16.feed_forward.down_proj.weight.nested_quant_map": "model-00001-of-00004.safetensors",
|
1881 |
-
"vision_tower.transformer.layers.16.feed_forward.down_proj.weight.quant_map": "model-00001-of-00004.safetensors",
|
1882 |
-
"vision_tower.transformer.layers.16.feed_forward.down_proj.weight.quant_state.bitsandbytes__nf4": "model-00001-of-00004.safetensors",
|
1883 |
"vision_tower.transformer.layers.16.feed_forward.gate_proj.weight": "model-00001-of-00004.safetensors",
|
1884 |
-
"vision_tower.transformer.layers.16.feed_forward.gate_proj.weight.absmax": "model-00001-of-00004.safetensors",
|
1885 |
-
"vision_tower.transformer.layers.16.feed_forward.gate_proj.weight.nested_absmax": "model-00001-of-00004.safetensors",
|
1886 |
-
"vision_tower.transformer.layers.16.feed_forward.gate_proj.weight.nested_quant_map": "model-00001-of-00004.safetensors",
|
1887 |
-
"vision_tower.transformer.layers.16.feed_forward.gate_proj.weight.quant_map": "model-00001-of-00004.safetensors",
|
1888 |
-
"vision_tower.transformer.layers.16.feed_forward.gate_proj.weight.quant_state.bitsandbytes__nf4": "model-00001-of-00004.safetensors",
|
1889 |
"vision_tower.transformer.layers.16.feed_forward.up_proj.weight": "model-00001-of-00004.safetensors",
|
1890 |
-
"vision_tower.transformer.layers.16.feed_forward.up_proj.weight.absmax": "model-00001-of-00004.safetensors",
|
1891 |
-
"vision_tower.transformer.layers.16.feed_forward.up_proj.weight.nested_absmax": "model-00001-of-00004.safetensors",
|
1892 |
-
"vision_tower.transformer.layers.16.feed_forward.up_proj.weight.nested_quant_map": "model-00001-of-00004.safetensors",
|
1893 |
-
"vision_tower.transformer.layers.16.feed_forward.up_proj.weight.quant_map": "model-00001-of-00004.safetensors",
|
1894 |
-
"vision_tower.transformer.layers.16.feed_forward.up_proj.weight.quant_state.bitsandbytes__nf4": "model-00001-of-00004.safetensors",
|
1895 |
"vision_tower.transformer.layers.16.ffn_norm.weight": "model-00001-of-00004.safetensors",
|
1896 |
"vision_tower.transformer.layers.17.attention.k_proj.weight": "model-00001-of-00004.safetensors",
|
1897 |
"vision_tower.transformer.layers.17.attention.k_proj.weight.absmax": "model-00001-of-00004.safetensors",
|
@@ -2104,23 +2069,8 @@
|
|
2104 |
"vision_tower.transformer.layers.5.attention.v_proj.weight": "model-00001-of-00004.safetensors",
|
2105 |
"vision_tower.transformer.layers.5.attention_norm.weight": "model-00001-of-00004.safetensors",
|
2106 |
"vision_tower.transformer.layers.5.feed_forward.down_proj.weight": "model-00001-of-00004.safetensors",
|
2107 |
-
"vision_tower.transformer.layers.5.feed_forward.down_proj.weight.absmax": "model-00001-of-00004.safetensors",
|
2108 |
-
"vision_tower.transformer.layers.5.feed_forward.down_proj.weight.nested_absmax": "model-00001-of-00004.safetensors",
|
2109 |
-
"vision_tower.transformer.layers.5.feed_forward.down_proj.weight.nested_quant_map": "model-00001-of-00004.safetensors",
|
2110 |
-
"vision_tower.transformer.layers.5.feed_forward.down_proj.weight.quant_map": "model-00001-of-00004.safetensors",
|
2111 |
-
"vision_tower.transformer.layers.5.feed_forward.down_proj.weight.quant_state.bitsandbytes__nf4": "model-00001-of-00004.safetensors",
|
2112 |
"vision_tower.transformer.layers.5.feed_forward.gate_proj.weight": "model-00001-of-00004.safetensors",
|
2113 |
-
"vision_tower.transformer.layers.5.feed_forward.gate_proj.weight.absmax": "model-00001-of-00004.safetensors",
|
2114 |
-
"vision_tower.transformer.layers.5.feed_forward.gate_proj.weight.nested_absmax": "model-00001-of-00004.safetensors",
|
2115 |
-
"vision_tower.transformer.layers.5.feed_forward.gate_proj.weight.nested_quant_map": "model-00001-of-00004.safetensors",
|
2116 |
-
"vision_tower.transformer.layers.5.feed_forward.gate_proj.weight.quant_map": "model-00001-of-00004.safetensors",
|
2117 |
-
"vision_tower.transformer.layers.5.feed_forward.gate_proj.weight.quant_state.bitsandbytes__nf4": "model-00001-of-00004.safetensors",
|
2118 |
"vision_tower.transformer.layers.5.feed_forward.up_proj.weight": "model-00001-of-00004.safetensors",
|
2119 |
-
"vision_tower.transformer.layers.5.feed_forward.up_proj.weight.absmax": "model-00001-of-00004.safetensors",
|
2120 |
-
"vision_tower.transformer.layers.5.feed_forward.up_proj.weight.nested_absmax": "model-00001-of-00004.safetensors",
|
2121 |
-
"vision_tower.transformer.layers.5.feed_forward.up_proj.weight.nested_quant_map": "model-00001-of-00004.safetensors",
|
2122 |
-
"vision_tower.transformer.layers.5.feed_forward.up_proj.weight.quant_map": "model-00001-of-00004.safetensors",
|
2123 |
-
"vision_tower.transformer.layers.5.feed_forward.up_proj.weight.quant_state.bitsandbytes__nf4": "model-00001-of-00004.safetensors",
|
2124 |
"vision_tower.transformer.layers.5.ffn_norm.weight": "model-00001-of-00004.safetensors",
|
2125 |
"vision_tower.transformer.layers.6.attention.k_proj.weight": "model-00001-of-00004.safetensors",
|
2126 |
"vision_tower.transformer.layers.6.attention.o_proj.weight": "model-00001-of-00004.safetensors",
|
|
|
1 |
{
|
2 |
"metadata": {
|
3 |
+
"total_size": 15722120613
|
4 |
},
|
5 |
"weight_map": {
|
6 |
"language_model.lm_head.weight": "model-00004-of-00004.safetensors",
|
|
|
751 |
"language_model.model.layers.24.mlp.gate_proj.weight.nested_quant_map": "model-00002-of-00004.safetensors",
|
752 |
"language_model.model.layers.24.mlp.gate_proj.weight.quant_map": "model-00002-of-00004.safetensors",
|
753 |
"language_model.model.layers.24.mlp.gate_proj.weight.quant_state.bitsandbytes__nf4": "model-00002-of-00004.safetensors",
|
754 |
+
"language_model.model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
755 |
+
"language_model.model.layers.24.mlp.up_proj.weight.absmax": "model-00003-of-00004.safetensors",
|
756 |
+
"language_model.model.layers.24.mlp.up_proj.weight.nested_absmax": "model-00003-of-00004.safetensors",
|
757 |
+
"language_model.model.layers.24.mlp.up_proj.weight.nested_quant_map": "model-00003-of-00004.safetensors",
|
758 |
+
"language_model.model.layers.24.mlp.up_proj.weight.quant_map": "model-00003-of-00004.safetensors",
|
759 |
+
"language_model.model.layers.24.mlp.up_proj.weight.quant_state.bitsandbytes__nf4": "model-00003-of-00004.safetensors",
|
760 |
"language_model.model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
761 |
"language_model.model.layers.24.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
762 |
"language_model.model.layers.24.self_attn.k_proj.weight.absmax": "model-00002-of-00004.safetensors",
|
|
|
1625 |
"language_model.model.layers.7.mlp.down_proj.weight.nested_quant_map": "model-00002-of-00004.safetensors",
|
1626 |
"language_model.model.layers.7.mlp.down_proj.weight.quant_map": "model-00002-of-00004.safetensors",
|
1627 |
"language_model.model.layers.7.mlp.down_proj.weight.quant_state.bitsandbytes__nf4": "model-00002-of-00004.safetensors",
|
1628 |
+
"language_model.model.layers.7.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
1629 |
+
"language_model.model.layers.7.mlp.gate_proj.weight.absmax": "model-00002-of-00004.safetensors",
|
1630 |
+
"language_model.model.layers.7.mlp.gate_proj.weight.nested_absmax": "model-00002-of-00004.safetensors",
|
1631 |
+
"language_model.model.layers.7.mlp.gate_proj.weight.nested_quant_map": "model-00002-of-00004.safetensors",
|
1632 |
+
"language_model.model.layers.7.mlp.gate_proj.weight.quant_map": "model-00002-of-00004.safetensors",
|
1633 |
+
"language_model.model.layers.7.mlp.gate_proj.weight.quant_state.bitsandbytes__nf4": "model-00002-of-00004.safetensors",
|
1634 |
"language_model.model.layers.7.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
1635 |
"language_model.model.layers.7.mlp.up_proj.weight.absmax": "model-00002-of-00004.safetensors",
|
1636 |
"language_model.model.layers.7.mlp.up_proj.weight.nested_absmax": "model-00002-of-00004.safetensors",
|
|
|
1821 |
"vision_tower.transformer.layers.14.feed_forward.up_proj.weight": "model-00001-of-00004.safetensors",
|
1822 |
"vision_tower.transformer.layers.14.ffn_norm.weight": "model-00001-of-00004.safetensors",
|
1823 |
"vision_tower.transformer.layers.15.attention.k_proj.weight": "model-00001-of-00004.safetensors",
|
|
|
|
|
|
|
|
|
|
|
1824 |
"vision_tower.transformer.layers.15.attention.o_proj.weight": "model-00001-of-00004.safetensors",
|
|
|
|
|
|
|
|
|
|
|
1825 |
"vision_tower.transformer.layers.15.attention.q_proj.weight": "model-00001-of-00004.safetensors",
|
|
|
|
|
|
|
|
|
|
|
1826 |
"vision_tower.transformer.layers.15.attention.v_proj.weight": "model-00001-of-00004.safetensors",
|
|
|
|
|
|
|
|
|
|
|
1827 |
"vision_tower.transformer.layers.15.attention_norm.weight": "model-00001-of-00004.safetensors",
|
1828 |
"vision_tower.transformer.layers.15.feed_forward.down_proj.weight": "model-00001-of-00004.safetensors",
|
1829 |
"vision_tower.transformer.layers.15.feed_forward.gate_proj.weight": "model-00001-of-00004.safetensors",
|
|
|
1855 |
"vision_tower.transformer.layers.16.attention.v_proj.weight.quant_state.bitsandbytes__nf4": "model-00001-of-00004.safetensors",
|
1856 |
"vision_tower.transformer.layers.16.attention_norm.weight": "model-00001-of-00004.safetensors",
|
1857 |
"vision_tower.transformer.layers.16.feed_forward.down_proj.weight": "model-00001-of-00004.safetensors",
|
|
|
|
|
|
|
|
|
|
|
1858 |
"vision_tower.transformer.layers.16.feed_forward.gate_proj.weight": "model-00001-of-00004.safetensors",
|
|
|
|
|
|
|
|
|
|
|
1859 |
"vision_tower.transformer.layers.16.feed_forward.up_proj.weight": "model-00001-of-00004.safetensors",
|
|
|
|
|
|
|
|
|
|
|
1860 |
"vision_tower.transformer.layers.16.ffn_norm.weight": "model-00001-of-00004.safetensors",
|
1861 |
"vision_tower.transformer.layers.17.attention.k_proj.weight": "model-00001-of-00004.safetensors",
|
1862 |
"vision_tower.transformer.layers.17.attention.k_proj.weight.absmax": "model-00001-of-00004.safetensors",
|
|
|
2069 |
"vision_tower.transformer.layers.5.attention.v_proj.weight": "model-00001-of-00004.safetensors",
|
2070 |
"vision_tower.transformer.layers.5.attention_norm.weight": "model-00001-of-00004.safetensors",
|
2071 |
"vision_tower.transformer.layers.5.feed_forward.down_proj.weight": "model-00001-of-00004.safetensors",
|
|
|
|
|
|
|
|
|
|
|
2072 |
"vision_tower.transformer.layers.5.feed_forward.gate_proj.weight": "model-00001-of-00004.safetensors",
|
|
|
|
|
|
|
|
|
|
|
2073 |
"vision_tower.transformer.layers.5.feed_forward.up_proj.weight": "model-00001-of-00004.safetensors",
|
|
|
|
|
|
|
|
|
|
|
2074 |
"vision_tower.transformer.layers.5.ffn_norm.weight": "model-00001-of-00004.safetensors",
|
2075 |
"vision_tower.transformer.layers.6.attention.k_proj.weight": "model-00001-of-00004.safetensors",
|
2076 |
"vision_tower.transformer.layers.6.attention.o_proj.weight": "model-00001-of-00004.safetensors",
|
tokenizer_config.json
CHANGED
@@ -9017,5 +9017,6 @@
|
|
9017 |
"processor_class": "PixtralProcessor",
|
9018 |
"tokenizer_class": "LlamaTokenizerFast",
|
9019 |
"unk_token": "<unk>",
|
9020 |
-
"use_default_system_prompt": false
|
9021 |
-
}
|
|
|
|
9017 |
"processor_class": "PixtralProcessor",
|
9018 |
"tokenizer_class": "LlamaTokenizerFast",
|
9019 |
"unk_token": "<unk>",
|
9020 |
+
"use_default_system_prompt": false,
|
9021 |
+
"chat_template": "{%- set today = strftime_now(\"%Y-%m-%d\") %}\n{%- set default_system_message = \"You are Mistral Small 3, a Large Language Model (LLM) created by Mistral AI, a French startup headquartered in Paris.\\nYour knowledge base was last updated on 2023-10-01. The current date is \" + today + \".\\n\\nWhen you're not sure about some information, you say that you don't have the information and don't make up anything.\\nIf the user's question is not clear, ambiguous, or does not provide enough context for you to accurately answer the question, you do not try to answer it right away and you rather ask the user to clarify their request (e.g. \\\"What are some good restaurants around me?\\\" => \\\"Where are you?\\\" or \\\"When is the next flight to Tokyo\\\" => \\\"Where do you travel from?\\\")\" %}\n\n{{- bos_token }}\n\n{%- if messages[0]['role'] == 'system' %}\n {%- if messages[0]['content'] is string %}\n {%- set system_message = messages[0]['content'] %}\n {%- else %}\n {%- set system_message = messages[0]['content'][0]['text'] %}\n {%- endif %}\n {%- set loop_messages = messages[1:] %}\n{%- else %}\n {%- set system_message = default_system_message %}\n {%- set loop_messages = messages %}\n{%- endif %}\n{{- '[SYSTEM_PROMPT]' + system_message + '[/SYSTEM_PROMPT]' }}\n\n{%- for message in loop_messages %}\n {%- if message['role'] == 'user' %}\n {%- if message['content'] is string %}\n {{- '[INST]' + message['content'] + '[/INST]' }}\n {%- else %}\n {{- '[INST]' }}\n {%- for block in message['content'] %}\n {%- if block['type'] == 'text' %}\n {{- block['text'] }}\n {%- elif block['type'] in ['image', 'image_url'] %}\n {{- '[IMG]' }}\n {%- else %}\n {{- raise_exception('Only text and image blocks are supported in message content!') }}\n {%- endif %}\n {%- endfor %}\n {{- '[/INST]' }}\n {%- endif %}\n {%- elif message['role'] == 'system' %}\n {%- if message['content'] is string %}\n {{- '[SYSTEM_PROMPT]' + message['content'] + '[/SYSTEM_PROMPT]' }}\n {%- else %}\n {{- '[SYSTEM_PROMPT]' + message['content'][0]['text'] + '[/SYSTEM_PROMPT]' }}\n {%- endif %}\n {%- elif message['role'] == 'assistant' %}\n {%- if message['content'] is string %}\n {{- message['content'] + eos_token }}\n {%- else %}\n {{- message['content'][0]['text'] + eos_token }}\n {%- endif %}\n {%- else %}\n {{- raise_exception('Only user, system and assistant roles are supported!') }}\n {%- endif %}\n{%- endfor %}"
|
9022 |
+
}
|