Update README.md
Browse files
README.md
CHANGED
|
@@ -180,7 +180,9 @@ outputs = model.generate(inputs, streamer=streamer, max_new_tokens=300)
|
|
| 180 |
```python
|
| 181 |
from transformers import AutoTokenizer, TextStreamer
|
| 182 |
from intel_extension_for_transformers.transformers import AutoModelForCausalLM, WeightOnlyQuantConfig
|
| 183 |
-
model_name = "Intel/neural-chat-7b-v3-2"
|
|
|
|
|
|
|
| 184 |
config = WeightOnlyQuantConfig(compute_dtype="bf16", weight_dtype="int4")
|
| 185 |
prompt = "Once upon a time, there existed a little girl,"
|
| 186 |
|
|
|
|
| 180 |
```python
|
| 181 |
from transformers import AutoTokenizer, TextStreamer
|
| 182 |
from intel_extension_for_transformers.transformers import AutoModelForCausalLM, WeightOnlyQuantConfig
|
| 183 |
+
model_name = "Intel/neural-chat-7b-v3-2"
|
| 184 |
+
|
| 185 |
+
# for int8, should set weight_dtype="int8"
|
| 186 |
config = WeightOnlyQuantConfig(compute_dtype="bf16", weight_dtype="int4")
|
| 187 |
prompt = "Once upon a time, there existed a little girl,"
|
| 188 |
|