jacobmorrison
commited on
Commit
•
2d50cde
1
Parent(s):
53f06b7
Update README.md
Browse files
README.md
CHANGED
@@ -85,14 +85,17 @@ import hf_olmo
|
|
85 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
86 |
olmo = AutoModelForCausalLM.from_pretrained("allenai/OLMo-7B-Instruct")
|
87 |
tokenizer = AutoTokenizer.from_pretrained("allenai/OLMo-7B-Instruct")
|
88 |
-
|
89 |
-
|
|
|
|
|
|
|
90 |
# optional verifying cuda
|
91 |
# inputs = {k: v.to('cuda') for k,v in inputs.items()}
|
92 |
# olmo = olmo.to('cuda')
|
93 |
-
response = olmo.generate(
|
94 |
print(tokenizer.batch_decode(response, skip_special_tokens=True)[0])
|
95 |
-
>> '
|
96 |
```
|
97 |
Alternatively, with the pipeline abstraction:
|
98 |
```python
|
@@ -100,8 +103,8 @@ import hf_olmo
|
|
100 |
|
101 |
from transformers import pipeline
|
102 |
olmo_pipe = pipeline("text-generation", model="allenai/OLMo-7B-Instruct")
|
103 |
-
print(olmo_pipe("
|
104 |
-
>> '
|
105 |
```
|
106 |
|
107 |
Or, you can make this slightly faster by quantizing the model, e.g. `AutoModelForCausalLM.from_pretrained("allenai/OLMo-7B-Instruct", torch_dtype=torch.float16, load_in_8bit=True)` (requires `bitsandbytes`).
|
|
|
85 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
86 |
olmo = AutoModelForCausalLM.from_pretrained("allenai/OLMo-7B-Instruct")
|
87 |
tokenizer = AutoTokenizer.from_pretrained("allenai/OLMo-7B-Instruct")
|
88 |
+
chat = [
|
89 |
+
{ "role": "user", "content": "What is language modeling?" },
|
90 |
+
]
|
91 |
+
prompt = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
|
92 |
+
inputs = tokenizer.encode(prompt, add_special_tokens=False, return_tensors="pt")
|
93 |
# optional verifying cuda
|
94 |
# inputs = {k: v.to('cuda') for k,v in inputs.items()}
|
95 |
# olmo = olmo.to('cuda')
|
96 |
+
response = olmo.generate(input_ids=inputs.to(olmo.device), max_new_tokens=100, do_sample=True, top_k=50, top_p=0.95)
|
97 |
print(tokenizer.batch_decode(response, skip_special_tokens=True)[0])
|
98 |
+
>> '<|user|>\nWhat is language modeling?\n<|assistant|>\nLanguage modeling is a type of natural language processing (NLP) task or machine learning task that...'
|
99 |
```
|
100 |
Alternatively, with the pipeline abstraction:
|
101 |
```python
|
|
|
103 |
|
104 |
from transformers import pipeline
|
105 |
olmo_pipe = pipeline("text-generation", model="allenai/OLMo-7B-Instruct")
|
106 |
+
print(olmo_pipe("What is language modeling?"))
|
107 |
+
>> '[{'generated_text': 'What is language modeling?\nLanguage modeling is a type of natural language processing (NLP) task...'}]'
|
108 |
```
|
109 |
|
110 |
Or, you can make this slightly faster by quantizing the model, e.g. `AutoModelForCausalLM.from_pretrained("allenai/OLMo-7B-Instruct", torch_dtype=torch.float16, load_in_8bit=True)` (requires `bitsandbytes`).
|