bytetokenizer / tokenizer.json
KTibow's picture
Create tokenizer.json
e116049 verified
raw
history blame
2.04 kB
{
"version": "1.0",
"truncation": null,
"padding": null,
"added_tokens": [
{
"id": 0,
"content": "[UNK]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": null,
"pre_tokenizer": null,
"post_processor": null,
"decoder": null,
"model": {
"type": "BPE",
"dropout": null,
"unk_token": "[UNK]",
"continuing_subword_prefix": null,
"end_of_word_suffix": null,
"fuse_unk": false,
"byte_fallback": false,
"vocab": {
"[UNK]": 0,
"0": 1,
"1": 2,
"2": 3,
"3": 4,
"4": 5,
"5": 6,
"6": 7,
"7": 8,
"8": 9,
"9": 10,
"a": 11,
"b": 12,
"c": 13,
"d": 14,
"e": 15,
"f": 16,
"g": 17,
"h": 18,
"i": 19,
"j": 20,
"k": 21,
"l": 22,
"m": 23,
"n": 24,
"o": 25,
"p": 26,
"q": 27,
"r": 28,
"s": 29,
"t": 30,
"u": 31,
"v": 32,
"w": 33,
"x": 34,
"y": 35,
"z": 36,
"A": 37,
"B": 38,
"C": 39,
"D": 40,
"E": 41,
"F": 42,
"G": 43,
"H": 44,
"I": 45,
"J": 46,
"K": 47,
"L": 48,
"M": 49,
"N": 50,
"O": 51,
"P": 52,
"Q": 53,
"R": 54,
"S": 55,
"T": 56,
"U": 57,
"V": 58,
"W": 59,
"X": 60,
"Y": 61,
"Z": 62,
"!": 63,
"\"": 64,
"#": 65,
"$": 66,
"%": 67,
"&": 68,
"'": 69,
"(": 70,
")": 71,
"*": 72,
"+": 73,
",": 74,
"-": 75,
".": 76,
"/": 77,
":": 78,
";": 79,
"<": 80,
"=": 81,
">": 82,
"?": 83,
"@": 84,
"[": 85,
"\\": 86,
"]": 87,
"^": 88,
"_": 89,
"`": 90,
"{": 91,
"|": 92,
"}": 93,
"~": 94,
" ": 95
},
"merges": []
}
}