Update README.md
Browse files
README.md
CHANGED
@@ -1,5 +1,7 @@
|
|
1 |
from transformers import RobertaTokenizerFast
|
2 |
-
tokenizer = RobertaTokenizerFast.from_pretrained('Andrija/RobertaFastBPE', bos_token="
|
3 |
|
4 |
encoded = tokenizer('Stručnjaci te bolnice, predvođeni dr Alisom Lim')
|
5 |
-
|
|
|
|
|
|
1 |
from transformers import RobertaTokenizerFast
|
2 |
+
tokenizer = RobertaTokenizerFast.from_pretrained('Andrija/RobertaFastBPE', bos_token="<s>", eos_token="</s>")
|
3 |
|
4 |
encoded = tokenizer('Stručnjaci te bolnice, predvođeni dr Alisom Lim')
|
5 |
+
# {'input_ids': [0, 47541, 34632, 603, 24817, 16, 27540, 6768, 2350, 2803, 3991, 2733, 81, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}
|
6 |
+
tokenizer.decode(encoded['input_ids'])
|
7 |
+
# <s>Stručnjaci te bolnice, predvođeni dr Alisom Lim</s>
|