bihungba1101 commited on
Commit
b452f0d
·
verified ·
1 Parent(s): 0215b37

Upload tokenizer

Browse files
Files changed (3) hide show
  1. special_tokens_map.json +6 -0
  2. tokenizer_config.json +10 -0
  3. vocab.json +64 -0
special_tokens_map.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "eos_token": "</s>",
4
+ "pad_token": "[PAD]",
5
+ "unk_token": "[UNK]"
6
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "do_lower_case": false,
4
+ "eos_token": "</s>",
5
+ "pad_token": "[PAD]",
6
+ "replace_word_delimiter_char": " ",
7
+ "tokenizer_class": "Wav2Vec2CTCTokenizer",
8
+ "unk_token": "[UNK]",
9
+ "word_delimiter_token": "|"
10
+ }
vocab.json ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "[PAD]": 61,
3
+ "[UNK]": 60,
4
+ "aa": 0,
5
+ "ae": 1,
6
+ "ah": 2,
7
+ "ao": 3,
8
+ "aw": 4,
9
+ "ax": 5,
10
+ "ax-h": 6,
11
+ "axr": 7,
12
+ "ay": 8,
13
+ "b": 9,
14
+ "bcl": 10,
15
+ "ch": 11,
16
+ "d": 12,
17
+ "dcl": 13,
18
+ "dh": 14,
19
+ "dx": 15,
20
+ "eh": 16,
21
+ "el": 17,
22
+ "em": 18,
23
+ "en": 19,
24
+ "eng": 20,
25
+ "epi": 21,
26
+ "er": 22,
27
+ "ey": 23,
28
+ "f": 24,
29
+ "g": 25,
30
+ "gcl": 26,
31
+ "hh": 27,
32
+ "hv": 28,
33
+ "ih": 29,
34
+ "ix": 30,
35
+ "iy": 31,
36
+ "jh": 32,
37
+ "k": 33,
38
+ "kcl": 34,
39
+ "l": 35,
40
+ "m": 36,
41
+ "n": 37,
42
+ "ng": 38,
43
+ "nx": 39,
44
+ "ow": 40,
45
+ "oy": 41,
46
+ "p": 42,
47
+ "pau": 43,
48
+ "pcl": 44,
49
+ "q": 45,
50
+ "r": 46,
51
+ "s": 47,
52
+ "sh": 48,
53
+ "t": 49,
54
+ "tcl": 50,
55
+ "th": 51,
56
+ "uh": 52,
57
+ "uw": 53,
58
+ "ux": 54,
59
+ "v": 55,
60
+ "w": 56,
61
+ "y": 57,
62
+ "z": 58,
63
+ "zh": 59
64
+ }