Sylvain87 commited on
Commit
143ff3b
·
verified ·
1 Parent(s): 8367721

Upload 12 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.model.v3 filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,246 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - en
4
+ - es
5
+ - it
6
+ - de
7
+ - fr
8
+ license: apache-2.0
9
+ base_model: mistralai/Mixtral-8x22B-v0.1
10
+
11
+ extra_gated_description: If you want to learn more about how we process your personal data, please read our <a href="https://mistral.ai/terms/">Privacy Policy</a>.
12
+ ---
13
+
14
+ # Model Card for Mixtral-8x22B-Instruct-v0.1
15
+
16
+
17
+ ## Encode and Decode with `mistral_common`
18
+
19
+ ```py
20
+ from mistral_common.tokens.tokenizers.mistral import MistralTokenizer
21
+ from mistral_common.protocol.instruct.messages import UserMessage
22
+ from mistral_common.protocol.instruct.request import ChatCompletionRequest
23
+
24
+ mistral_models_path = "MISTRAL_MODELS_PATH"
25
+
26
+ tokenizer = MistralTokenizer.v3()
27
+
28
+ completion_request = ChatCompletionRequest(messages=[UserMessage(content="Explain Machine Learning to me in a nutshell.")])
29
+
30
+ tokens = tokenizer.encode_chat_completion(completion_request).tokens
31
+ ```
32
+
33
+ ## Inference with `mistral_inference`
34
+
35
+ ```py
36
+ from mistral_inference.transformer import Transformer
37
+ from mistral_inference.generate import generate
38
+
39
+ model = Transformer.from_folder(mistral_models_path)
40
+ out_tokens, _ = generate([tokens], model, max_tokens=64, temperature=0.0, eos_id=tokenizer.instruct_tokenizer.tokenizer.eos_id)
41
+
42
+ result = tokenizer.decode(out_tokens[0])
43
+
44
+ print(result)
45
+ ```
46
+
47
+ ## Preparing inputs with Hugging Face `transformers`
48
+
49
+ ```py
50
+ from transformers import AutoTokenizer
51
+
52
+ tokenizer = AutoTokenizer.from_pretrained("mistralai/Mixtral-8x22B-Instruct-v0.1")
53
+
54
+ chat = [{"role": "user", "content": "Explain Machine Learning to me in a nutshell."}]
55
+
56
+ tokens = tokenizer.apply_chat_template(chat, return_dict=True, return_tensors="pt", add_generation_prompt=True)
57
+ ```
58
+
59
+ ## Inference with hugging face `transformers`
60
+
61
+ ```py
62
+ from transformers import AutoModelForCausalLM
63
+ import torch
64
+
65
+ # You can also use 8-bit or 4-bit quantization here
66
+ model = AutoModelForCausalLM.from_pretrained("mistralai/Mixtral-8x22B-Instruct-v0.1", torch_dtype=torch.bfloat16, device_map="auto")
67
+ model.to("cuda")
68
+
69
+ generated_ids = model.generate(**tokens, max_new_tokens=1000, do_sample=True)
70
+
71
+ # decode with HF tokenizer
72
+ result = tokenizer.decode(generated_ids[0])
73
+ print(result)
74
+ ```
75
+
76
+ > [!TIP]
77
+ > PRs to correct the `transformers` tokenizer so that it gives 1-to-1 the same results as the `mistral_common` reference implementation are very welcome!
78
+
79
+ ---
80
+ The Mixtral-8x22B-Instruct-v0.1 Large Language Model (LLM) is an instruct fine-tuned version of the [Mixtral-8x22B-v0.1](https://huggingface.co/mistralai/Mixtral-8x22B-v0.1).
81
+
82
+ ## Function calling example
83
+ ```python
84
+ from transformers import AutoModelForCausalLM
85
+ from mistral_common.protocol.instruct.messages import (
86
+ AssistantMessage,
87
+ UserMessage,
88
+ )
89
+ from mistral_common.protocol.instruct.tool_calls import (
90
+ Tool,
91
+ Function,
92
+ )
93
+ from mistral_common.tokens.tokenizers.mistral import MistralTokenizer
94
+ from mistral_common.tokens.instruct.normalize import ChatCompletionRequest
95
+
96
+ device = "cuda" # the device to load the model onto
97
+
98
+ tokenizer_v3 = MistralTokenizer.v3()
99
+
100
+ mistral_query = ChatCompletionRequest(
101
+ tools=[
102
+ Tool(
103
+ function=Function(
104
+ name="get_current_weather",
105
+ description="Get the current weather",
106
+ parameters={
107
+ "type": "object",
108
+ "properties": {
109
+ "location": {
110
+ "type": "string",
111
+ "description": "The city and state, e.g. San Francisco, CA",
112
+ },
113
+ "format": {
114
+ "type": "string",
115
+ "enum": ["celsius", "fahrenheit"],
116
+ "description": "The temperature unit to use. Infer this from the users location.",
117
+ },
118
+ },
119
+ "required": ["location", "format"],
120
+ },
121
+ )
122
+ )
123
+ ],
124
+ messages=[
125
+ UserMessage(content="What's the weather like today in Paris"),
126
+ ],
127
+ model="test",
128
+ )
129
+
130
+ encodeds = tokenizer_v3.encode_chat_completion(mistral_query).tokens
131
+ model = AutoModelForCausalLM.from_pretrained("mistralai/Mixtral-8x22B-Instruct-v0.1")
132
+ model_inputs = encodeds.to(device)
133
+ model.to(device)
134
+
135
+ generated_ids = model.generate(model_inputs, max_new_tokens=1000, do_sample=True)
136
+ sp_tokenizer = tokenizer_v3.instruct_tokenizer.tokenizer
137
+ decoded = sp_tokenizer.decode(generated_ids[0])
138
+ print(decoded)
139
+ ```
140
+
141
+ ## Function calling with `transformers`
142
+
143
+ To use this example, you'll need `transformers` version 4.42.0 or higher. Please see the
144
+ [function calling guide](https://huggingface.co/docs/transformers/main/chat_templating#advanced-tool-use--function-calling)
145
+ in the `transformers` docs for more information.
146
+
147
+ ```python
148
+ from transformers import AutoModelForCausalLM, AutoTokenizer
149
+ import torch
150
+
151
+ model_id = "mistralai/Mixtral-8x22B-Instruct-v0.1"
152
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
153
+
154
+ def get_current_weather(location: str, format: str):
155
+ """
156
+ Get the current weather
157
+
158
+ Args:
159
+ location: The city and state, e.g. San Francisco, CA
160
+ format: The temperature unit to use. Infer this from the users location. (choices: ["celsius", "fahrenheit"])
161
+ """
162
+ pass
163
+
164
+ conversation = [{"role": "user", "content": "What's the weather like in Paris?"}]
165
+ tools = [get_current_weather]
166
+
167
+ # format and tokenize the tool use prompt
168
+ inputs = tokenizer.apply_chat_template(
169
+ conversation,
170
+ tools=tools,
171
+ add_generation_prompt=True,
172
+ return_dict=True,
173
+ return_tensors="pt",
174
+ )
175
+
176
+ model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16, device_map="auto")
177
+
178
+ inputs.to(model.device)
179
+ outputs = model.generate(**inputs, max_new_tokens=1000)
180
+ print(tokenizer.decode(outputs[0], skip_special_tokens=True))
181
+ ```
182
+
183
+ Note that, for reasons of space, this example does not show a complete cycle of calling a tool and adding the tool call and tool
184
+ results to the chat history so that the model can use them in its next generation. For a full tool calling example, please
185
+ see the [function calling guide](https://huggingface.co/docs/transformers/main/chat_templating#advanced-tool-use--function-calling),
186
+ and note that Mixtral **does** use tool call IDs, so these must be included in your tool calls and tool results. They should be
187
+ exactly 9 alphanumeric characters.
188
+
189
+ # Instruct tokenizer
190
+ The HuggingFace tokenizer included in this release should match our own. To compare:
191
+ `pip install mistral-common`
192
+
193
+ ```py
194
+ from mistral_common.protocol.instruct.messages import (
195
+ AssistantMessage,
196
+ UserMessage,
197
+ )
198
+ from mistral_common.tokens.tokenizers.mistral import MistralTokenizer
199
+ from mistral_common.tokens.instruct.normalize import ChatCompletionRequest
200
+
201
+ from transformers import AutoTokenizer
202
+
203
+ tokenizer_v3 = MistralTokenizer.v3()
204
+
205
+ mistral_query = ChatCompletionRequest(
206
+ messages=[
207
+ UserMessage(content="How many experts ?"),
208
+ AssistantMessage(content="8"),
209
+ UserMessage(content="How big ?"),
210
+ AssistantMessage(content="22B"),
211
+ UserMessage(content="Noice 🎉 !"),
212
+ ],
213
+ model="test",
214
+ )
215
+ hf_messages = mistral_query.model_dump()['messages']
216
+
217
+ tokenized_mistral = tokenizer_v3.encode_chat_completion(mistral_query).tokens
218
+
219
+ tokenizer_hf = AutoTokenizer.from_pretrained('mistralai/Mixtral-8x22B-Instruct-v0.1')
220
+ tokenized_hf = tokenizer_hf.apply_chat_template(hf_messages, tokenize=True)
221
+
222
+ assert tokenized_hf == tokenized_mistral
223
+ ```
224
+
225
+ # Function calling and special tokens
226
+ This tokenizer includes more special tokens, related to function calling :
227
+ - [TOOL_CALLS]
228
+ - [AVAILABLE_TOOLS]
229
+ - [/AVAILABLE_TOOLS]
230
+ - [TOOL_RESULTS]
231
+ - [/TOOL_RESULTS]
232
+
233
+ If you want to use this model with function calling, please be sure to apply it similarly to what is done in our [SentencePieceTokenizerV3](https://github.com/mistralai/mistral-common/blob/main/src/mistral_common/tokens/tokenizers/sentencepiece.py#L299).
234
+
235
+ # The Mistral AI Team
236
+ Albert Jiang, Alexandre Sablayrolles, Alexis Tacnet, Antoine Roux,
237
+ Arthur Mensch, Audrey Herblin-Stoop, Baptiste Bout, Baudouin de Monicault,
238
+ Blanche Savary, Bam4d, Caroline Feldman, Devendra Singh Chaplot,
239
+ Diego de las Casas, Eleonore Arcelin, Emma Bou Hanna, Etienne Metzger,
240
+ Gianna Lengyel, Guillaume Bour, Guillaume Lample, Harizo Rajaona,
241
+ Jean-Malo Delignon, Jia Li, Justus Murke, Louis Martin, Louis Ternon,
242
+ Lucile Saulnier, Lélio Renard Lavaud, Margaret Jennings, Marie Pellat,
243
+ Marie Torelli, Marie-Anne Lachaux, Nicolas Schuhl, Patrick von Platen,
244
+ Pierre Stock, Sandeep Subramanian, Sophia Yang, Szymon Antoniak, Teven Le Scao,
245
+ Thibaut Lavril, Timothée Lacroix, Théophile Gervet, Thomas Wang,
246
+ Valera Nemychnikova, William El Sayed, William Marshall
model-00055-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a11416ad60a30e809c0eff404b1a3615e694aeeca4e6723e9c610216d4252e3c
3
+ size 4806774344
model-00056-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:228ff78bb0e5484a390fc8658c634d9932ed980d71c3ec152904e1c455eeb571
3
+ size 4806799144
model-00057-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0adb8fee7f504787eedfd65e4e10ffb8fb1d9d5efc5f0d4d3d3063a502c03434
3
+ size 4806799144
model-00058-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e2de705aefc7b98a4394b9b691fd733d19633370ec8c3ded13f89fe73e11b5b
3
+ size 4806799144
model-00059-of-00059.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:700482c2a697bd67ae38b25ddbd81babb83c77ebce91b5f61761409eb55e4ae0
3
+ size 1207997392
model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
special_tokens_map.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "unk_token": {
17
+ "content": "<unk>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ }
23
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37f00374dea48658ee8f5d0f21895b9bc55cb0103939607c8185bfd1c6ca1f89
3
+ size 587404
tokenizer.model.v3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37f00374dea48658ee8f5d0f21895b9bc55cb0103939607c8185bfd1c6ca1f89
3
+ size 587404
tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff