{ "additional_special_tokens": [ { "content": "isv_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "ace_Arab", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "ace_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "acm_Arab", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "acq_Arab", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "aeb_Arab", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "afr_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "ajp_Arab", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "aka_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "amh_Ethi", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "apc_Arab", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "arb_Arab", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "ars_Arab", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "ary_Arab", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "arz_Arab", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "asm_Beng", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "ast_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "awa_Deva", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "ayr_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "azb_Arab", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "azj_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "bak_Cyrl", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "bam_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "ban_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "bel_Cyrl", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "bem_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "ben_Beng", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "bho_Deva", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "bjn_Arab", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "bjn_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "bod_Tibt", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "bos_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "bug_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "bul_Cyrl", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "cat_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "ceb_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "ces_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "cjk_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "ckb_Arab", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "crh_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "cym_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "dan_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "deu_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "dik_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "dyu_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "dzo_Tibt", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "ell_Grek", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "eng_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "epo_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "est_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "eus_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "ewe_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "fao_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "pes_Arab", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "fij_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "fin_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "fon_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "fra_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "fur_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "fuv_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "gla_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "gle_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "glg_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "grn_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "guj_Gujr", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "hat_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "hau_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "heb_Hebr", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "hin_Deva", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "hne_Deva", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "hrv_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "hun_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "hye_Armn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "ibo_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "ilo_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "ind_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "isl_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "ita_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "jav_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "jpn_Jpan", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "kab_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "kac_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "kam_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "kan_Knda", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "kas_Arab", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "kas_Deva", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "kat_Geor", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "knc_Arab", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "knc_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "kaz_Cyrl", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "kbp_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "kea_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "khm_Khmr", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "kik_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "kin_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "kir_Cyrl", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "kmb_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "kon_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "kor_Hang", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "kmr_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "lao_Laoo", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "lvs_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "lij_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "lim_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "lin_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "lit_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "lmo_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "ltg_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "ltz_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "lua_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "lug_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "luo_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "lus_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "mag_Deva", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "mai_Deva", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "mal_Mlym", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "mar_Deva", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "min_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "mkd_Cyrl", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "plt_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "mlt_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "mni_Beng", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "khk_Cyrl", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "mos_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "mri_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "zsm_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "mya_Mymr", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "nld_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "nno_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "nob_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "npi_Deva", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "nso_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "nus_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "nya_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "oci_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "gaz_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "ory_Orya", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "pag_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "pan_Guru", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "pap_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "pol_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "por_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "prs_Arab", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "pbt_Arab", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "quy_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "ron_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "run_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "rus_Cyrl", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "sag_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "san_Deva", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "sat_Beng", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "scn_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "shn_Mymr", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "sin_Sinh", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "slk_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "slv_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "smo_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "sna_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "snd_Arab", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "som_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "sot_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "spa_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "als_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "srd_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "srp_Cyrl", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "ssw_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "sun_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "swe_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "swh_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "szl_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "tam_Taml", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "tat_Cyrl", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "tel_Telu", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "tgk_Cyrl", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "tgl_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "tha_Thai", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "tir_Ethi", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "taq_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "taq_Tfng", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "tpi_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "tsn_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "tso_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "tuk_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "tum_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "tur_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "twi_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "tzm_Tfng", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "uig_Arab", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "ukr_Cyrl", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "umb_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "urd_Arab", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "uzn_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "vec_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "vie_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "war_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "wol_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "xho_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "ydd_Hebr", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "yor_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "yue_Hant", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "zho_Hans", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "zho_Hant", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, { "content": "zul_Latn", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false } ], "bos_token": { "content": "", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, "cls_token": { "content": "", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, "eos_token": { "content": "", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, "mask_token": { "content": "", "lstrip": true, "normalized": true, "rstrip": false, "single_word": false }, "pad_token": { "content": "", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, "sep_token": { "content": "", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, "unk_token": { "content": "", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false } }