{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 3, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 4, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 5, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 6, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 7, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 8, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 9, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 10, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": { "type": "Sequence", "normalizers": [ { "type": "NFKC" } ] }, "pre_tokenizer": { "type": "ByteLevel", "add_prefix_space": true, "trim_offsets": true, "use_regex": true }, "post_processor": null, "decoder": { "type": "ByteLevel", "add_prefix_space": true, "trim_offsets": true, "use_regex": true }, "model": { "type": "BPE", "dropout": null, "unk_token": null, "continuing_subword_prefix": null, "end_of_word_suffix": null, "fuse_unk": false, "byte_fallback": false, "vocab": { "": 0, "": 1, "": 2, "": 3, "": 4, "": 5, "": 6, "": 7, "": 8, "": 9, "": 10, "!": 11, "\"": 12, "#": 13, "$": 14, "%": 15, "&": 16, "'": 17, "(": 18, ")": 19, "*": 20, "+": 21, ",": 22, "-": 23, ".": 24, "/": 25, "0": 26, "1": 27, "2": 28, "3": 29, "4": 30, "5": 31, "6": 32, "7": 33, "8": 34, "9": 35, ":": 36, ";": 37, "<": 38, "=": 39, ">": 40, "?": 41, "@": 42, "A": 43, "B": 44, "C": 45, "D": 46, "E": 47, "F": 48, "G": 49, "H": 50, "I": 51, "J": 52, "K": 53, "L": 54, "M": 55, "N": 56, "O": 57, "P": 58, "Q": 59, "R": 60, "S": 61, "T": 62, "U": 63, "V": 64, "W": 65, "X": 66, "Y": 67, "Z": 68, "[": 69, "\\": 70, "]": 71, "^": 72, "_": 73, "`": 74, "a": 75, "b": 76, "c": 77, "d": 78, "e": 79, "f": 80, "g": 81, "h": 82, "i": 83, "j": 84, "k": 85, "l": 86, "m": 87, "n": 88, "o": 89, "p": 90, "q": 91, "r": 92, "s": 93, "t": 94, "u": 95, "v": 96, "w": 97, "x": 98, "y": 99, "z": 100, "{": 101, "|": 102, "}": 103, "~": 104, "¡": 105, "¢": 106, "£": 107, "¤": 108, "¥": 109, "¦": 110, "§": 111, "¨": 112, "©": 113, "ª": 114, "«": 115, "¬": 116, "®": 117, "¯": 118, "°": 119, "±": 120, "²": 121, "³": 122, "´": 123, "µ": 124, "¶": 125, "·": 126, "¸": 127, "¹": 128, "º": 129, "»": 130, "¼": 131, "½": 132, "¾": 133, "¿": 134, "À": 135, "Á": 136, "Â": 137, "Ã": 138, "Ä": 139, "Å": 140, "Æ": 141, "Ç": 142, "È": 143, "É": 144, "Ê": 145, "Ë": 146, "Ì": 147, "Í": 148, "Î": 149, "Ï": 150, "Ð": 151, "Ñ": 152, "Ò": 153, "Ó": 154, "Ô": 155, "Õ": 156, "Ö": 157, "×": 158, "Ø": 159, "Ù": 160, "Ú": 161, "Û": 162, "Ü": 163, "Ý": 164, "Þ": 165, "ß": 166, "à": 167, "á": 168, "â": 169, "ã": 170, "ä": 171, "å": 172, "æ": 173, "ç": 174, "è": 175, "é": 176, "ê": 177, "ë": 178, "ì": 179, "í": 180, "î": 181, "ï": 182, "ð": 183, "ñ": 184, "ò": 185, "ó": 186, "ô": 187, "õ": 188, "ö": 189, "÷": 190, "ø": 191, "ù": 192, "ú": 193, "û": 194, "ü": 195, "ý": 196, "þ": 197, "ÿ": 198, "Ā": 199, "ā": 200, "Ă": 201, "ă": 202, "Ą": 203, "ą": 204, "Ć": 205, "ć": 206, "Ĉ": 207, "ĉ": 208, "Ċ": 209, "ċ": 210, "Č": 211, "č": 212, "Ď": 213, "ď": 214, "Đ": 215, "đ": 216, "Ē": 217, "ē": 218, "Ĕ": 219, "ĕ": 220, "Ė": 221, "ė": 222, "Ę": 223, "ę": 224, "Ě": 225, "ě": 226, "Ĝ": 227, "ĝ": 228, "Ğ": 229, "ğ": 230, "Ġ": 231, "ġ": 232, "Ģ": 233, "ģ": 234, "Ĥ": 235, "ĥ": 236, "Ħ": 237, "ħ": 238, "Ĩ": 239, "ĩ": 240, "Ī": 241, "ī": 242, "Ĭ": 243, "ĭ": 244, "Į": 245, "į": 246, "İ": 247, "ı": 248, "IJ": 249, "ij": 250, "Ĵ": 251, "ĵ": 252, "Ķ": 253, "ķ": 254, "ĸ": 255, "Ĺ": 256, "ĺ": 257, "Ļ": 258, "ļ": 259, "Ľ": 260, "ľ": 261, "Ŀ": 262, "ŀ": 263, "Ł": 264, "ł": 265, "Ń": 266 }, "merges": [] } }