nielsr HF Staff commited on
Commit
5c79a37
·
verified ·
1 Parent(s): 125c431

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +38 -15
README.md CHANGED
@@ -41,6 +41,8 @@ tokenize(BOT_MESSAGE_N) + [EOS_ID]
41
 
42
  In the pseudo-code above, note that the `tokenize` method should not add a BOS or EOS token automatically, but should add a prefix space.
43
 
 
 
44
  ## Run the model
45
 
46
  ```python
@@ -49,12 +51,17 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
49
  model_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
50
  tokenizer = AutoTokenizer.from_pretrained(model_id)
51
 
52
- model = AutoModelForCausalLM.from_pretrained(model_id)
53
 
54
- text = "Hello my name is"
55
- inputs = tokenizer(text, return_tensors="pt")
 
 
 
56
 
57
- outputs = model.generate(**inputs, max_new_tokens=20)
 
 
58
  print(tokenizer.decode(outputs[0], skip_special_tokens=True))
59
  ```
60
 
@@ -74,12 +81,17 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
74
  model_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
75
  tokenizer = AutoTokenizer.from_pretrained(model_id)
76
 
77
- + model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16).to(0)
78
 
79
- text = "Hello my name is"
80
- + inputs = tokenizer(text, return_tensors="pt").to(0)
 
 
 
81
 
82
- outputs = model.generate(**inputs, max_new_tokens=20)
 
 
83
  print(tokenizer.decode(outputs[0], skip_special_tokens=True))
84
  ```
85
  </details>
@@ -96,12 +108,18 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
96
  model_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
97
  tokenizer = AutoTokenizer.from_pretrained(model_id)
98
 
99
- + model = AutoModelForCausalLM.from_pretrained(model_id, load_in_4bit=True)
100
 
101
  text = "Hello my name is"
102
- + inputs = tokenizer(text, return_tensors="pt").to(0)
 
 
 
 
 
 
103
 
104
- outputs = model.generate(**inputs, max_new_tokens=20)
105
  print(tokenizer.decode(outputs[0], skip_special_tokens=True))
106
  ```
107
  </details>
@@ -118,12 +136,17 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
118
  model_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
119
  tokenizer = AutoTokenizer.from_pretrained(model_id)
120
 
121
- + model = AutoModelForCausalLM.from_pretrained(model_id, use_flash_attention_2=True)
122
 
123
- text = "Hello my name is"
124
- + inputs = tokenizer(text, return_tensors="pt").to(0)
 
 
 
 
 
125
 
126
- outputs = model.generate(**inputs, max_new_tokens=20)
127
  print(tokenizer.decode(outputs[0], skip_special_tokens=True))
128
  ```
129
  </details>
 
41
 
42
  In the pseudo-code above, note that the `tokenize` method should not add a BOS or EOS token automatically, but should add a prefix space.
43
 
44
+ In the Transformers library, one can use [chat templates](https://huggingface.co/docs/transformers/main/en/chat_templating) which make sure the right format is applied.
45
+
46
  ## Run the model
47
 
48
  ```python
 
51
  model_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
52
  tokenizer = AutoTokenizer.from_pretrained(model_id)
53
 
54
+ model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto")
55
 
56
+ messages = [
57
+ {"role": "user", "content": "What is your favourite condiment?"},
58
+ {"role": "assistant", "content": "Well, I'm quite partial to a good squeeze of fresh lemon juice. It adds just the right amount of zesty flavour to whatever I'm cooking up in the kitchen!"},
59
+ {"role": "user", "content": "Do you have mayonnaise recipes?"}
60
+ ]
61
 
62
+ inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")
63
+
64
+ outputs = model.generate(inputs, max_new_tokens=20)
65
  print(tokenizer.decode(outputs[0], skip_special_tokens=True))
66
  ```
67
 
 
81
  model_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
82
  tokenizer = AutoTokenizer.from_pretrained(model_id)
83
 
84
+ + model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, device_map="auto")
85
 
86
+ messages = [
87
+ {"role": "user", "content": "What is your favourite condiment?"},
88
+ {"role": "assistant", "content": "Well, I'm quite partial to a good squeeze of fresh lemon juice. It adds just the right amount of zesty flavour to whatever I'm cooking up in the kitchen!"},
89
+ {"role": "user", "content": "Do you have mayonnaise recipes?"}
90
+ ]
91
 
92
+ input_ids = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")
93
+
94
+ outputs = model.generate(input_ids, max_new_tokens=20)
95
  print(tokenizer.decode(outputs[0], skip_special_tokens=True))
96
  ```
97
  </details>
 
108
  model_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
109
  tokenizer = AutoTokenizer.from_pretrained(model_id)
110
 
111
+ + model = AutoModelForCausalLM.from_pretrained(model_id, load_in_4bit=True, device_map="auto")
112
 
113
  text = "Hello my name is"
114
+ messages = [
115
+ {"role": "user", "content": "What is your favourite condiment?"},
116
+ {"role": "assistant", "content": "Well, I'm quite partial to a good squeeze of fresh lemon juice. It adds just the right amount of zesty flavour to whatever I'm cooking up in the kitchen!"},
117
+ {"role": "user", "content": "Do you have mayonnaise recipes?"}
118
+ ]
119
+
120
+ input_ids = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")
121
 
122
+ outputs = model.generate(input_ids, max_new_tokens=20)
123
  print(tokenizer.decode(outputs[0], skip_special_tokens=True))
124
  ```
125
  </details>
 
136
  model_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
137
  tokenizer = AutoTokenizer.from_pretrained(model_id)
138
 
139
+ + model = AutoModelForCausalLM.from_pretrained(model_id, use_flash_attention_2=True, device_map="auto")
140
 
141
+ messages = [
142
+ {"role": "user", "content": "What is your favourite condiment?"},
143
+ {"role": "assistant", "content": "Well, I'm quite partial to a good squeeze of fresh lemon juice. It adds just the right amount of zesty flavour to whatever I'm cooking up in the kitchen!"},
144
+ {"role": "user", "content": "Do you have mayonnaise recipes?"}
145
+ ]
146
+
147
+ input_ids = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")
148
 
149
+ outputs = model.generate(input_ids, max_new_tokens=20)
150
  print(tokenizer.decode(outputs[0], skip_special_tokens=True))
151
  ```
152
  </details>