Andrija commited on
Commit
cc8bc9c
·
1 Parent(s): 3fdb709

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +4 -2
README.md CHANGED
@@ -1,5 +1,7 @@
1
  from transformers import RobertaTokenizerFast
2
- tokenizer = RobertaTokenizerFast.from_pretrained('Andrija/RobertaFastBPE', bos_token="<s>", eos_token="</s>")
3
 
4
  encoded = tokenizer('Stručnjaci te bolnice, predvođeni dr Alisom Lim')
5
- tokenizer.decode(encoded)
 
 
 
1
  from transformers import RobertaTokenizerFast
2
+ tokenizer = RobertaTokenizerFast.from_pretrained('Andrija/RobertaFastBPE', bos_token="&lt;s&gt;", eos_token="&lt;/s&gt;")
3
 
4
  encoded = tokenizer('Stručnjaci te bolnice, predvođeni dr Alisom Lim')
5
+ # {'input_ids': [0, 47541, 34632, 603, 24817, 16, 27540, 6768, 2350, 2803, 3991, 2733, 81, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}
6
+ tokenizer.decode(encoded['input_ids'])
7
+ # <s>Stručnjaci te bolnice, predvođeni dr Alisom Lim</s>