Reihaneh commited on
Commit
d0828a6
·
verified ·
1 Parent(s): 16f82c4

Upload tokenizer

Browse files
Files changed (1) hide show
  1. vocab.json +127 -127
vocab.json CHANGED
@@ -1,133 +1,133 @@
1
  {
2
- "'": 69,
3
- "A": 3,
4
- "L": 55,
5
- "M": 41,
6
  "ML": 129,
7
- "T": 96,
8
  "TA": 130,
9
- "[": 110,
10
  "[PAD]": 128,
11
  "[UNK]": 127,
12
- "]": 58,
13
- "|": 67,
14
- "ஃ": 87,
15
- "அ": 81,
16
- "ஆ": 106,
17
- "இ": 35,
18
- "ஈ": 21,
19
- "உ": 15,
20
- "ஊ": 117,
21
- "எ": 85,
22
- "ஏ": 66,
23
- "ஐ": 18,
24
- "ஒ": 19,
25
- "ஓ": 33,
26
- "க": 52,
27
- "ங": 40,
28
- "ச": 115,
29
- "ஜ": 123,
30
- "ஞ": 11,
31
- "ட": 104,
32
- "ண": 64,
33
- "த": 82,
34
- "ந": 74,
35
- "ன": 7,
36
- "ப": 93,
37
- "ம": 20,
38
- "ய": 10,
39
- "ர": 121,
40
- "ற": 76,
41
- "ல": 29,
42
- "ள": 16,
43
- "ழ": 124,
44
- "வ": 77,
45
- "ஷ": 48,
46
- "ஸ": 107,
47
- "ஹ": 78,
48
- "ா": 102,
49
- "ி": 13,
50
- "ீ": 59,
51
- "ு": 31,
52
- "ூ": 14,
53
- "ெ": 0,
54
- "ே": 113,
55
- "ை": 49,
56
- "ொ": 65,
57
- "ோ": 84,
58
- "ௌ": 30,
59
- "்": 2,
60
- "ം": 86,
61
- "ഃ": 94,
62
- "അ": 37,
63
- "ആ": 83,
64
- "ഇ": 17,
65
- "ഈ": 22,
66
- "ഉ": 91,
67
- "ഊ": 28,
68
- "എ": 54,
69
- "ഏ": 116,
70
- "ഐ": 111,
71
- "ഒ": 112,
72
- "ഓ": 61,
73
- "ക": 44,
74
- "ഖ": 51,
75
- "ഗ": 27,
76
- "ഘ": 125,
77
- "ങ": 80,
78
- "ച": 50,
79
- "ഛ": 47,
80
- "ജ": 120,
81
- "ഞ": 46,
82
- "ട": 45,
83
- "ഠ": 5,
84
- "ഡ": 98,
85
- "ഢ": 8,
86
- "ണ": 38,
87
- "ത": 101,
88
- "ഥ": 1,
89
- "ദ": 42,
90
- "ധ": 34,
91
- "ന": 108,
92
- "പ": 6,
93
- "ഫ": 39,
94
- "ബ": 53,
95
- "ഭ": 73,
96
- "മ": 43,
97
- "യ": 23,
98
- "ര": 25,
99
- "റ": 105,
100
- "ല": 57,
101
- "ള": 88,
102
- "ഴ": 122,
103
- "വ": 56,
104
- "ശ": 118,
105
- "ഷ": 97,
106
- "സ": 62,
107
- "ഹ": 119,
108
- "ാ": 60,
109
- "ി": 68,
110
- "ീ": 100,
111
- "ു": 109,
112
- "ൂ": 79,
113
- "ൃ": 126,
114
- "െ": 70,
115
- "േ": 89,
116
- "ൈ": 36,
117
- "ൊ": 9,
118
- "ോ": 99,
119
- "ൌ": 32,
120
- "്": 12,
121
- "ൗ": 71,
122
- "ൺ": 90,
123
- "ൻ": 75,
124
- "ർ": 92,
125
- "ൽ": 26,
126
- "ൾ": 24,
127
- "ൿ": 72,
128
- "‘": 103,
129
- "’": 114,
130
- "“": 95,
131
- "”": 63,
132
- "●": 4
133
  }
 
1
  {
2
+ "'": 34,
3
+ "A": 99,
4
+ "L": 92,
5
+ "M": 78,
6
  "ML": 129,
7
+ "T": 18,
8
  "TA": 130,
9
+ "[": 106,
10
  "[PAD]": 128,
11
  "[UNK]": 127,
12
+ "]": 72,
13
+ "|": 47,
14
+ "ஃ": 57,
15
+ "அ": 45,
16
+ "ஆ": 83,
17
+ "இ": 108,
18
+ "ஈ": 10,
19
+ "உ": 29,
20
+ "ஊ": 113,
21
+ "எ": 11,
22
+ "ஏ": 103,
23
+ "ஐ": 8,
24
+ "ஒ": 84,
25
+ "ஓ": 116,
26
+ "க": 73,
27
+ "ங": 89,
28
+ "ச": 53,
29
+ "ஜ": 62,
30
+ "ஞ": 41,
31
+ "ட": 51,
32
+ "ண": 44,
33
+ "த": 111,
34
+ "ந": 28,
35
+ "ன": 98,
36
+ "ப": 69,
37
+ "ம": 5,
38
+ "ய": 87,
39
+ "ர": 49,
40
+ "ற": 105,
41
+ "ல": 59,
42
+ "ள": 50,
43
+ "ழ": 82,
44
+ "வ": 115,
45
+ "ஷ": 94,
46
+ "ஸ": 104,
47
+ "ஹ": 81,
48
+ "ா": 21,
49
+ "ி": 97,
50
+ "ீ": 70,
51
+ "ு": 65,
52
+ "ூ": 126,
53
+ "ெ": 90,
54
+ "ே": 122,
55
+ "ை": 38,
56
+ "ொ": 114,
57
+ "ோ": 27,
58
+ "ௌ": 102,
59
+ "்": 13,
60
+ "ം": 121,
61
+ "ഃ": 12,
62
+ "അ": 60,
63
+ "ആ": 23,
64
+ "ഇ": 31,
65
+ "ഈ": 68,
66
+ "ഉ": 30,
67
+ "ഊ": 26,
68
+ "എ": 95,
69
+ "ഏ": 67,
70
+ "ഐ": 0,
71
+ "ഒ": 64,
72
+ "ഓ": 112,
73
+ "ക": 76,
74
+ "ഖ": 100,
75
+ "ഗ": 109,
76
+ "ഘ": 75,
77
+ "ങ": 123,
78
+ "ച": 54,
79
+ "ഛ": 63,
80
+ "ജ": 9,
81
+ "ഞ": 119,
82
+ "ട": 3,
83
+ "ഠ": 74,
84
+ "ഡ": 16,
85
+ "ഢ": 37,
86
+ "ണ": 91,
87
+ "ത": 120,
88
+ "ഥ": 4,
89
+ "ദ": 7,
90
+ "ധ": 24,
91
+ "ന": 43,
92
+ "പ": 93,
93
+ "ഫ": 61,
94
+ "ബ": 125,
95
+ "ഭ": 14,
96
+ "മ": 17,
97
+ "യ": 58,
98
+ "ര": 101,
99
+ "റ": 86,
100
+ "ല": 2,
101
+ "ള": 117,
102
+ "ഴ": 35,
103
+ "വ": 85,
104
+ "ശ": 6,
105
+ "ഷ": 107,
106
+ "സ": 40,
107
+ "ഹ": 66,
108
+ "ാ": 42,
109
+ "ി": 36,
110
+ "ീ": 110,
111
+ "ു": 15,
112
+ "ൂ": 25,
113
+ "ൃ": 19,
114
+ "െ": 1,
115
+ "േ": 124,
116
+ "ൈ": 55,
117
+ "ൊ": 46,
118
+ "ോ": 77,
119
+ "ൌ": 56,
120
+ "്": 71,
121
+ "ൗ": 88,
122
+ "ൺ": 32,
123
+ "ൻ": 96,
124
+ "ർ": 39,
125
+ "ൽ": 33,
126
+ "ൾ": 48,
127
+ "ൿ": 52,
128
+ "‘": 79,
129
+ "’": 80,
130
+ "“": 20,
131
+ "”": 22,
132
+ "●": 118
133
  }