Commit
·
ec40b36
1
Parent(s):
dda3b34
Update README.md
Browse files
README.md
CHANGED
|
@@ -89,7 +89,8 @@ import transformers
|
|
| 89 |
name = 'mosaicml/mpt-30b'
|
| 90 |
|
| 91 |
config = transformers.AutoConfig.from_pretrained(name, trust_remote_code=True)
|
| 92 |
-
config.attn_config['attn_impl'] = '
|
|
|
|
| 93 |
|
| 94 |
model = transformers.AutoModelForCausalLM.from_pretrained(
|
| 95 |
name,
|
|
|
|
| 89 |
name = 'mosaicml/mpt-30b'
|
| 90 |
|
| 91 |
config = transformers.AutoConfig.from_pretrained(name, trust_remote_code=True)
|
| 92 |
+
config.attn_config['attn_impl'] = 'triton' # change this to use triton-based FlashAttention
|
| 93 |
+
config.init_device = 'cuda:0' # For fast initialization directly on GPU!
|
| 94 |
|
| 95 |
model = transformers.AutoModelForCausalLM.from_pretrained(
|
| 96 |
name,
|