opus-unigram2 / special_tokens_map.json
speedcell4's picture
Upload tokenizer
2a451c4 verified
{
"additional_special_tokens": [
"<af>",
"<am>",
"<ar>",
"<as>",
"<az>",
"<be>",
"<bg>",
"<bn>",
"<br>",
"<bs>",
"<ca>",
"<cs>",
"<cy>",
"<da>",
"<de>",
"<el>",
"<en>",
"<eo>",
"<es>",
"<et>",
"<eu>",
"<fa>",
"<fi>",
"<fr>",
"<fy>",
"<ga>",
"<gd>",
"<gl>",
"<gu>",
"<ha>",
"<he>",
"<hi>",
"<hr>",
"<hu>",
"<id>",
"<ig>",
"<is>",
"<it>",
"<ja>",
"<ka>",
"<kk>",
"<km>",
"<kn>",
"<ko>",
"<ku>",
"<ky>",
"<li>",
"<lt>",
"<lv>",
"<mg>",
"<mk>",
"<ml>",
"<mr>",
"<ms>",
"<mt>",
"<my>",
"<nb>",
"<ne>",
"<nl>",
"<nn>",
"<no>",
"<oc>",
"<or>",
"<pa>",
"<pl>",
"<ps>",
"<pt>",
"<ro>",
"<ru>",
"<rw>",
"<se>",
"<sh>",
"<si>",
"<sk>",
"<sl>",
"<sq>",
"<sr>",
"<sv>",
"<ta>",
"<te>",
"<tg>",
"<th>",
"<tk>",
"<tr>",
"<tt>",
"<ug>",
"<uk>",
"<ur>",
"<uz>",
"<vi>",
"<wa>",
"<xh>",
"<yi>",
"<zh>",
"<zu>"
],
"bos_token": {
"content": "<eos>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"eos_token": {
"content": "<eos>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"pad_token": {
"content": "<pad>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"unk_token": {
"content": "<unk>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
}
}