Update README.md
Browse files
README.md
CHANGED
@@ -95,9 +95,12 @@ import torch
|
|
95 |
# Load the tokenizer
|
96 |
tokenizer = AutoTokenizer.from_pretrained("EpistemeAI2/Fireball-12B-v1.13a-philosophers")
|
97 |
|
98 |
-
|
99 |
-
quantization_config = BitsAndBytesConfig(
|
100 |
-
|
|
|
|
|
|
|
101 |
model = AutoModelForCausalLM.from_pretrained(
|
102 |
"EpistemeAI2/Fireball-12B-v1.13a-philosophers",
|
103 |
quantization_config=quantization_config,
|
@@ -122,7 +125,6 @@ output_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
|
|
122 |
|
123 |
# Print the output
|
124 |
print(output_text)
|
125 |
-
|
126 |
```
|
127 |
|
128 |
Google colab - [link](https://colab.research.google.com/drive/1ZgUrbonMlK05iQ-tgWZ_lFmUZFbWZNnM?usp=sharing)
|
|
|
95 |
# Load the tokenizer
|
96 |
tokenizer = AutoTokenizer.from_pretrained("EpistemeAI2/Fireball-12B-v1.13a-philosophers")
|
97 |
|
98 |
+
# Configure 4-bit quantization and enable CPU offloading
|
99 |
+
quantization_config = BitsAndBytesConfig(
|
100 |
+
load_in_4bit=True,
|
101 |
+
llm_int8_enable_fp32_cpu_offload=True
|
102 |
+
)
|
103 |
+
# Load the model with 4-bit quantization and CPU offloading
|
104 |
model = AutoModelForCausalLM.from_pretrained(
|
105 |
"EpistemeAI2/Fireball-12B-v1.13a-philosophers",
|
106 |
quantization_config=quantization_config,
|
|
|
125 |
|
126 |
# Print the output
|
127 |
print(output_text)
|
|
|
128 |
```
|
129 |
|
130 |
Google colab - [link](https://colab.research.google.com/drive/1ZgUrbonMlK05iQ-tgWZ_lFmUZFbWZNnM?usp=sharing)
|