{ "version": "1.0", "truncation": { "direction": "Right", "max_length": 34, "strategy": "LongestFirst", "stride": 0 }, "padding": { "strategy": { "Fixed": 34 }, "direction": "Right", "pad_to_multiple_of": null, "pad_id": 0, "pad_type_id": 0, "pad_token": "[PAD]" }, "added_tokens": [ { "id": 0, "content": "[PAD]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "[UNK]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "[CLS]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 3, "content": "[SEP]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 4, "content": "[MASK]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": { "type": "BertNormalizer", "clean_text": true, "handle_chinese_chars": true, "strip_accents": false, "lowercase": true }, "pre_tokenizer": { "type": "BertPreTokenizer" }, "post_processor": { "type": "TemplateProcessing", "single": [ { "SpecialToken": { "id": "[CLS]", "type_id": 0 } }, { "Sequence": { "id": "A", "type_id": 0 } }, { "SpecialToken": { "id": "[SEP]", "type_id": 0 } } ], "pair": [ { "SpecialToken": { "id": "[CLS]", "type_id": 0 } }, { "Sequence": { "id": "A", "type_id": 0 } }, { "SpecialToken": { "id": "[SEP]", "type_id": 0 } }, { "Sequence": { "id": "B", "type_id": 1 } }, { "SpecialToken": { "id": "[SEP]", "type_id": 1 } } ], "special_tokens": { "[CLS]": { "id": "[CLS]", "ids": [ 2 ], "tokens": [ "[CLS]" ] }, "[SEP]": { "id": "[SEP]", "ids": [ 3 ], "tokens": [ "[SEP]" ] } } }, "decoder": { "type": "WordPiece", "prefix": "##", "cleanup": true }, "model": { "type": "WordPiece", "unk_token": "[UNK]", "continuing_subword_prefix": "##", "max_input_chars_per_word": 100, "vocab": { "[PAD]": 0, "[UNK]": 1, "[CLS]": 2, "[SEP]": 3, "[MASK]": 4, "а": 5, "б": 6, "в": 7, "г": 8, "д": 9, "е": 10, "ё": 11, "ж": 12, "з": 13, "и": 14, "й": 15, "к": 16, "л": 17, "м": 18, "н": 19, "о": 20, "п": 21, "р": 22, "с": 23, "т": 24, "у": 25, "ф": 26, "х": 27, "ц": 28, "ч": 29, "ш": 30, "щ": 31, "ъ": 32, "ы": 33, "ь": 34, "э": 35, "ю": 36, "я": 37, "##а": 38, "##б": 39, "##в": 40, "##г": 41, "##д": 42, "##е": 43, "##ё": 44, "##ж": 45, "##з": 46, "##и": 47, "##й": 48, "##к": 49, "##л": 50, "##м": 51, "##н": 52, "##о": 53, "##п": 54, "##р": 55, "##с": 56, "##т": 57, "##у": 58, "##ф": 59, "##х": 60, "##ц": 61, "##ч": 62, "##ш": 63, "##щ": 64, "##ъ": 65, "##ы": 66, "##ь": 67, "##э": 68, "##ю": 69, "##я": 70, "_": 71, "-": 72, "##_": 73, "##-": 74 } } }