byte_level_tokenizer / tokenizer.json
DanielHesslow's picture
Update tokenizer.json
ba3e3ae verified
{
"version": "1.0",
"truncation": null,
"padding": null,
"added_tokens": [
{
"id": 0,
"content": "<0x00>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"pre_tokenizer": null,
"decoder": {
"type": "Sequence",
"decoders": [
{
"type": "ByteFallback"
},
{
"type": "Fuse"
}
]
},
"model": {
"type": "BPE",
"dropout": null,
"unk_token": "<unk>",
"continuing_subword_prefix": null,
"end_of_word_suffix": null,
"fuse_unk": true,
"byte_fallback": true,
"vocab": {
"<0x00>": 0,
"<0x01>": 1,
"<0x02>": 2,
"<0x03>": 3,
"<0x04>": 4,
"<0x05>": 5,
"<0x06>": 6,
"<0x07>": 7,
"<0x08>": 8,
"<0x09>": 9,
"<0x0A>": 10,
"<0x0B>": 11,
"<0x0C>": 12,
"<0x0D>": 13,
"<0x0E>": 14,
"<0x0F>": 15,
"<0x10>": 16,
"<0x11>": 17,
"<0x12>": 18,
"<0x13>": 19,
"<0x14>": 20,
"<0x15>": 21,
"<0x16>": 22,
"<0x17>": 23,
"<0x18>": 24,
"<0x19>": 25,
"<0x1A>": 26,
"<0x1B>": 27,
"<0x1C>": 28,
"<0x1D>": 29,
"<0x1E>": 30,
"<0x1F>": 31,
"<0x20>": 32,
"<0x21>": 33,
"<0x22>": 34,
"<0x23>": 35,
"<0x24>": 36,
"<0x25>": 37,
"<0x26>": 38,
"<0x27>": 39,
"<0x28>": 40,
"<0x29>": 41,
"<0x2A>": 42,
"<0x2B>": 43,
"<0x2C>": 44,
"<0x2D>": 45,
"<0x2E>": 46,
"<0x2F>": 47,
"<0x30>": 48,
"<0x31>": 49,
"<0x32>": 50,
"<0x33>": 51,
"<0x34>": 52,
"<0x35>": 53,
"<0x36>": 54,
"<0x37>": 55,
"<0x38>": 56,
"<0x39>": 57,
"<0x3A>": 58,
"<0x3B>": 59,
"<0x3C>": 60,
"<0x3D>": 61,
"<0x3E>": 62,
"<0x3F>": 63,
"<0x40>": 64,
"<0x41>": 65,
"<0x42>": 66,
"<0x43>": 67,
"<0x44>": 68,
"<0x45>": 69,
"<0x46>": 70,
"<0x47>": 71,
"<0x48>": 72,
"<0x49>": 73,
"<0x4A>": 74,
"<0x4B>": 75,
"<0x4C>": 76,
"<0x4D>": 77,
"<0x4E>": 78,
"<0x4F>": 79,
"<0x50>": 80,
"<0x51>": 81,
"<0x52>": 82,
"<0x53>": 83,
"<0x54>": 84,
"<0x55>": 85,
"<0x56>": 86,
"<0x57>": 87,
"<0x58>": 88,
"<0x59>": 89,
"<0x5A>": 90,
"<0x5B>": 91,
"<0x5C>": 92,
"<0x5D>": 93,
"<0x5E>": 94,
"<0x5F>": 95,
"<0x60>": 96,
"<0x61>": 97,
"<0x62>": 98,
"<0x63>": 99,
"<0x64>": 100,
"<0x65>": 101,
"<0x66>": 102,
"<0x67>": 103,
"<0x68>": 104,
"<0x69>": 105,
"<0x6A>": 106,
"<0x6B>": 107,
"<0x6C>": 108,
"<0x6D>": 109,
"<0x6E>": 110,
"<0x6F>": 111,
"<0x70>": 112,
"<0x71>": 113,
"<0x72>": 114,
"<0x73>": 115,
"<0x74>": 116,
"<0x75>": 117,
"<0x76>": 118,
"<0x77>": 119,
"<0x78>": 120,
"<0x79>": 121,
"<0x7A>": 122,
"<0x7B>": 123,
"<0x7C>": 124,
"<0x7D>": 125,
"<0x7E>": 126,
"<0x7F>": 127,
"<0x80>": 128,
"<0x81>": 129,
"<0x82>": 130,
"<0x83>": 131,
"<0x84>": 132,
"<0x85>": 133,
"<0x86>": 134,
"<0x87>": 135,
"<0x88>": 136,
"<0x89>": 137,
"<0x8A>": 138,
"<0x8B>": 139,
"<0x8C>": 140,
"<0x8D>": 141,
"<0x8E>": 142,
"<0x8F>": 143,
"<0x90>": 144,
"<0x91>": 145,
"<0x92>": 146,
"<0x93>": 147,
"<0x94>": 148,
"<0x95>": 149,
"<0x96>": 150,
"<0x97>": 151,
"<0x98>": 152,
"<0x99>": 153,
"<0x9A>": 154,
"<0x9B>": 155,
"<0x9C>": 156,
"<0x9D>": 157,
"<0x9E>": 158,
"<0x9F>": 159,
"<0xA0>": 160,
"<0xA1>": 161,
"<0xA2>": 162,
"<0xA3>": 163,
"<0xA4>": 164,
"<0xA5>": 165,
"<0xA6>": 166,
"<0xA7>": 167,
"<0xA8>": 168,
"<0xA9>": 169,
"<0xAA>": 170,
"<0xAB>": 171,
"<0xAC>": 172,
"<0xAD>": 173,
"<0xAE>": 174,
"<0xAF>": 175,
"<0xB0>": 176,
"<0xB1>": 177,
"<0xB2>": 178,
"<0xB3>": 179,
"<0xB4>": 180,
"<0xB5>": 181,
"<0xB6>": 182,
"<0xB7>": 183,
"<0xB8>": 184,
"<0xB9>": 185,
"<0xBA>": 186,
"<0xBB>": 187,
"<0xBC>": 188,
"<0xBD>": 189,
"<0xBE>": 190,
"<0xBF>": 191,
"<0xC0>": 192,
"<0xC1>": 193,
"<0xC2>": 194,
"<0xC3>": 195,
"<0xC4>": 196,
"<0xC5>": 197,
"<0xC6>": 198,
"<0xC7>": 199,
"<0xC8>": 200,
"<0xC9>": 201,
"<0xCA>": 202,
"<0xCB>": 203,
"<0xCC>": 204,
"<0xCD>": 205,
"<0xCE>": 206,
"<0xCF>": 207,
"<0xD0>": 208,
"<0xD1>": 209,
"<0xD2>": 210,
"<0xD3>": 211,
"<0xD4>": 212,
"<0xD5>": 213,
"<0xD6>": 214,
"<0xD7>": 215,
"<0xD8>": 216,
"<0xD9>": 217,
"<0xDA>": 218,
"<0xDB>": 219,
"<0xDC>": 220,
"<0xDD>": 221,
"<0xDE>": 222,
"<0xDF>": 223,
"<0xE0>": 224,
"<0xE1>": 225,
"<0xE2>": 226,
"<0xE3>": 227,
"<0xE4>": 228,
"<0xE5>": 229,
"<0xE6>": 230,
"<0xE7>": 231,
"<0xE8>": 232,
"<0xE9>": 233,
"<0xEA>": 234,
"<0xEB>": 235,
"<0xEC>": 236,
"<0xED>": 237,
"<0xEE>": 238,
"<0xEF>": 239,
"<0xF0>": 240,
"<0xF1>": 241,
"<0xF2>": 242,
"<0xF3>": 243,
"<0xF4>": 244,
"<0xF5>": 245,
"<0xF6>": 246,
"<0xF7>": 247,
"<0xF8>": 248,
"<0xF9>": 249,
"<0xFA>": 250,
"<0xFB>": 251,
"<0xFC>": 252,
"<0xFD>": 253,
"<0xFE>": 254,
"<0xFF>": 255
},
"merges": []
}
}