legolasyiu commited on
Commit
abcbd52
·
verified ·
1 Parent(s): fbffc89

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +6 -4
README.md CHANGED
@@ -95,9 +95,12 @@ import torch
95
  # Load the tokenizer
96
  tokenizer = AutoTokenizer.from_pretrained("EpistemeAI2/Fireball-12B-v1.13a-philosophers")
97
 
98
-
99
- quantization_config = BitsAndBytesConfig(load_in_4bit=True)
100
- # Load the model with 4-bit quantization (no need to use .to() later)
 
 
 
101
  model = AutoModelForCausalLM.from_pretrained(
102
  "EpistemeAI2/Fireball-12B-v1.13a-philosophers",
103
  quantization_config=quantization_config,
@@ -122,7 +125,6 @@ output_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
122
 
123
  # Print the output
124
  print(output_text)
125
-
126
  ```
127
 
128
  Google colab - [link](https://colab.research.google.com/drive/1ZgUrbonMlK05iQ-tgWZ_lFmUZFbWZNnM?usp=sharing)
 
95
  # Load the tokenizer
96
  tokenizer = AutoTokenizer.from_pretrained("EpistemeAI2/Fireball-12B-v1.13a-philosophers")
97
 
98
+ # Configure 4-bit quantization and enable CPU offloading
99
+ quantization_config = BitsAndBytesConfig(
100
+ load_in_4bit=True,
101
+ llm_int8_enable_fp32_cpu_offload=True
102
+ )
103
+ # Load the model with 4-bit quantization and CPU offloading
104
  model = AutoModelForCausalLM.from_pretrained(
105
  "EpistemeAI2/Fireball-12B-v1.13a-philosophers",
106
  quantization_config=quantization_config,
 
125
 
126
  # Print the output
127
  print(output_text)
 
128
  ```
129
 
130
  Google colab - [link](https://colab.research.google.com/drive/1ZgUrbonMlK05iQ-tgWZ_lFmUZFbWZNnM?usp=sharing)