SwinDonutPanjabi / tokenizer_config.json
HarsimarSingh's picture
Training done
39c2ccf verified
raw
history blame
49.8 kB
{
"added_tokens_decoder": {
"0": {
"content": "<s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"1": {
"content": "<pad>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"2": {
"content": "</s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"3": {
"content": "<unk>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"4": {
"content": "a",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"486": {
"content": "8",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"1338": {
"content": "R",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"1476": {
"content": "ı",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"1601": {
"content": "U",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"1681": {
"content": "[",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"2586": {
"content": ":",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"2664": {
"content": "–",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"3200": {
"content": "9",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"3890": {
"content": "Ú",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"4573": {
"content": "Í",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"5051": {
"content": "e",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"5770": {
"content": "&",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"5798": {
"content": "ę",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"6522": {
"content": "h",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"6967": {
"content": "·",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"7456": {
"content": "i",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"7690": {
"content": "(",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"8053": {
"content": "+",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"9026": {
"content": "=",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"9170": {
"content": "J",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"9745": {
"content": "l",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10719": {
"content": "α",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"10960": {
"content": "W",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"11317": {
"content": "ι",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"11938": {
"content": ")",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"12162": {
"content": "Z",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"12816": {
"content": "X",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"14022": {
"content": "ν",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"14301": {
"content": "<",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"14898": {
"content": "*",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"15811": {
"content": "z",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"15994": {
"content": "j",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"16191": {
"content": "t",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"16648": {
"content": "Y",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"17367": {
"content": "g",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"17521": {
"content": "×",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"17725": {
"content": "•",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"18215": {
"content": "n",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"19064": {
"content": "ɔ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"20642": {
"content": "・",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"20825": {
"content": "è",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"27134": {
"content": "~",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"28019": {
"content": "c",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"28268": {
"content": "0",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"28431": {
"content": "'",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30252": {
"content": "—",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30424": {
"content": "✓",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"30955": {
"content": "ਔ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"31285": {
"content": "ˌ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32650": {
"content": "{",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"33855": {
"content": "’",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"33917": {
"content": "Ā",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"33968": {
"content": "p",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"34490": {
"content": "]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"34779": {
"content": "Q",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"34796": {
"content": "b",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"35059": {
"content": "א",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"35161": {
"content": "A",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"35354": {
"content": "π",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"35544": {
"content": "S",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"35815": {
"content": ",",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"35816": {
"content": "o",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"35934": {
"content": "2",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"36209": {
"content": "?",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"36967": {
"content": "u",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"37093": {
"content": "v",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"38167": {
"content": "3",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"38459": {
"content": "!",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"38460": {
"content": "‘",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"38844": {
"content": "m",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"39379": {
"content": "%",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"39415": {
"content": "D",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"39539": {
"content": ".",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"39563": {
"content": "τ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"39905": {
"content": "r",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"40217": {
"content": "\\",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"40227": {
"content": "f",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"40392": {
"content": "x",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"40582": {
"content": "í",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"40705": {
"content": "w",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"40831": {
"content": "$",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"40956": {
"content": "C",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"41403": {
"content": "_",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"42309": {
"content": "γ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"43154": {
"content": "ה",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"43634": {
"content": "y",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"45105": {
"content": "I",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"45201": {
"content": "\"",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"45202": {
"content": "B",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"45785": {
"content": "|",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"45964": {
"content": "M",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"46192": {
"content": "s",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"46318": {
"content": "K",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"46500": {
"content": "み",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"46502": {
"content": "ṇ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"46579": {
"content": "ṭ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"46702": {
"content": "4",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"46735": {
"content": "T",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"47106": {
"content": "/",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"47597": {
"content": "E",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"47992": {
"content": "Á",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"48318": {
"content": "O",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"48693": {
"content": "P",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"49224": {
"content": "d",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"49458": {
"content": "á",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"49933": {
"content": "G",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"49945": {
"content": "°",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50155": {
"content": ";",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"50934": {
"content": "5",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"52165": {
"content": "-",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"52613": {
"content": "}",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"53031": {
"content": "÷",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"53233": {
"content": "N",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"53278": {
"content": "H",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"53504": {
"content": "ṛ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"53669": {
"content": "V",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"53904": {
"content": "q",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"53958": {
"content": "L",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"54359": {
"content": "и",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"55144": {
"content": "6",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"55175": {
"content": "k",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"55410": {
"content": "F",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"56620": {
"content": "7",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"56739": {
"content": "ú",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57521": {
"content": "<mask>",
"lstrip": true,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": true
},
"57522": {
"content": "<sep/>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57523": {
"content": "<s_iitcdip>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"57524": {
"content": "<s_synthdog>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"57525": {
"content": "ਥ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57526": {
"content": "ੱ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57527": {
"content": "્",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57528": {
"content": "ਯ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57529": {
"content": "ਲ਼",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57530": {
"content": "॥",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57531": {
"content": "च",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57532": {
"content": "Ṇ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57533": {
"content": "ਐ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57534": {
"content": "ि",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57535": {
"content": "અ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57536": {
"content": "ਦ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57537": {
"content": "Ī",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57538": {
"content": "ॉ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57539": {
"content": "ा",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57540": {
"content": "ਅ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57541": {
"content": "ਚ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57542": {
"content": "घ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57543": {
"content": "ਞ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57544": {
"content": "ਂ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57545": {
"content": "ਟ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57546": {
"content": "ੈ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57547": {
"content": "य",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57548": {
"content": "ਧ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57549": {
"content": "ો",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57550": {
"content": "ਊ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57551": {
"content": "ॅ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57552": {
"content": "ˈ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57553": {
"content": "भ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57554": {
"content": "ਿ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57555": {
"content": "ज",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57556": {
"content": "ل",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57557": {
"content": "न",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57558": {
"content": "ਆ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57559": {
"content": "ਗ਼",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57560": {
"content": "ਰ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57561": {
"content": "੍",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57562": {
"content": "ય",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57563": {
"content": "्",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57564": {
"content": "ਛ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57565": {
"content": "ਡ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57566": {
"content": "र",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57567": {
"content": "थ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57568": {
"content": "ੁ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57569": {
"content": "³",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57570": {
"content": "י",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57571": {
"content": "ੲ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57572": {
"content": "म",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57573": {
"content": "Ḥ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57574": {
"content": "ਤ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57575": {
"content": "ગ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57576": {
"content": "ਜ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57577": {
"content": "ੰ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57578": {
"content": "ै",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57579": {
"content": "ਣ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57580": {
"content": "ਬ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57581": {
"content": "व",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57582": {
"content": "د",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57583": {
"content": "т",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57584": {
"content": "ી",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57585": {
"content": "ਪ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57586": {
"content": "…",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57587": {
"content": "½",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57588": {
"content": "²",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57589": {
"content": "ग",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57590": {
"content": "ד",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57591": {
"content": "उ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57592": {
"content": "ઘ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57593": {
"content": "ੇ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57594": {
"content": "ੋ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57595": {
"content": "Ṉ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57596": {
"content": "ठ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57597": {
"content": "ल",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57598": {
"content": "ਜ਼",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57599": {
"content": "ड",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57600": {
"content": "ṉ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57601": {
"content": "ਫ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57602": {
"content": "ਮ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57603": {
"content": "ਸ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57604": {
"content": "੫",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57605": {
"content": "ਵ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57606": {
"content": "त",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57607": {
"content": "¼",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57608": {
"content": "ਉ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57609": {
"content": "¶",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57610": {
"content": "Ι",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57611": {
"content": "р",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57612": {
"content": "ਕ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57613": {
"content": "ो",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57614": {
"content": "ਾ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57615": {
"content": "છ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57616": {
"content": "о",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57617": {
"content": "સ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57618": {
"content": "ਘ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57619": {
"content": "¹",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57620": {
"content": "द",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57621": {
"content": "ḍ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57622": {
"content": "ष",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57623": {
"content": "ੜ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57624": {
"content": "प",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57625": {
"content": "ਭ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57626": {
"content": "Ṛ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57627": {
"content": "ੳ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57628": {
"content": "ત",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57629": {
"content": "ર",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57630": {
"content": "Ṭ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57631": {
"content": "ઠ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57632": {
"content": "ह",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57633": {
"content": "ਗ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57634": {
"content": "ੂ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57635": {
"content": "ी",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57636": {
"content": "।",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57637": {
"content": "ਇ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57638": {
"content": "ਙ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57639": {
"content": "ਸ਼",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57640": {
"content": "ु",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57641": {
"content": "Ḍ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57642": {
"content": "આ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57643": {
"content": "ਨ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57644": {
"content": "क",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57645": {
"content": "ट",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57646": {
"content": "े",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57647": {
"content": "П",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57648": {
"content": "ા",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57649": {
"content": "ણ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57650": {
"content": "і",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57651": {
"content": "ਢ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57652": {
"content": "ا",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57653": {
"content": "ੀ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57654": {
"content": "ਝ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57655": {
"content": "ૂ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57656": {
"content": "1",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57657": {
"content": "ਏ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57658": {
"content": "ਹ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57659": {
"content": "स",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57660": {
"content": "ذ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57661": {
"content": "਼",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57662": {
"content": "ੌ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57663": {
"content": "ध",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57664": {
"content": "ਓ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57665": {
"content": "ਈ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57666": {
"content": "ુ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57667": {
"content": "ਠ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57668": {
"content": "ਖ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57669": {
"content": "ं",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57670": {
"content": "ਲ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"57671": {
"content": "ດ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
}
},
"additional_special_tokens": [
"<s_iitcdip>",
"<s_synthdog>"
],
"bos_token": "<s>",
"clean_up_tokenization_spaces": false,
"cls_token": "<s>",
"eos_token": "</s>",
"mask_token": "<mask>",
"model_max_length": 1000000000000000019884624838656,
"pad_token": "<pad>",
"processor_class": "DonutProcessor",
"sep_token": "</s>",
"sp_model_kwargs": {},
"tokenizer_class": "XLMRobertaTokenizer",
"unk_token": "<unk>"
}