ikrysinska commited on
Commit
4e070bb
·
1 Parent(s): 0ca3b47

Upload processor

Browse files
Files changed (3) hide show
  1. added_tokens.json +2 -2
  2. tokenizer_config.json +47 -15
  3. vocab.json +48 -42
added_tokens.json CHANGED
@@ -1,4 +1,4 @@
1
  {
2
- "</s>": 44,
3
- "<s>": 43
4
  }
 
1
  {
2
+ "</s>": 50,
3
+ "<s>": 49
4
  }
tokenizer_config.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
  "added_tokens_decoder": {
3
- "1": {
4
- "content": "",
5
  "lstrip": true,
6
  "normalized": false,
7
  "rstrip": true,
8
  "single_word": false,
9
  "special": false
10
  },
11
- "4": {
12
  "content": "oʊ",
13
  "lstrip": true,
14
  "normalized": false,
@@ -16,8 +16,16 @@
16
  "single_word": false,
17
  "special": false
18
  },
19
- "10": {
20
- "content": "t͡ʃ",
 
 
 
 
 
 
 
 
21
  "lstrip": true,
22
  "normalized": false,
23
  "rstrip": true,
@@ -25,14 +33,14 @@
25
  "special": false
26
  },
27
  "17": {
28
- "content": "d͡ʒ",
29
  "lstrip": true,
30
  "normalized": false,
31
  "rstrip": true,
32
  "single_word": false,
33
  "special": false
34
  },
35
- "23": {
36
  "content": "aɪ",
37
  "lstrip": true,
38
  "normalized": false,
@@ -40,23 +48,47 @@
40
  "single_word": false,
41
  "special": false
42
  },
43
- "25": {
44
- "content": "ɔɪ",
45
  "lstrip": true,
46
  "normalized": false,
47
  "rstrip": true,
48
  "single_word": false,
49
  "special": false
50
  },
51
- "28": {
52
- "content": "",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  "lstrip": true,
54
  "normalized": false,
55
  "rstrip": true,
56
  "single_word": false,
57
  "special": false
58
  },
59
- "40": {
60
  "content": "[UNK]",
61
  "lstrip": true,
62
  "normalized": false,
@@ -64,7 +96,7 @@
64
  "single_word": false,
65
  "special": false
66
  },
67
- "41": {
68
  "content": "[PAD]",
69
  "lstrip": true,
70
  "normalized": false,
@@ -72,7 +104,7 @@
72
  "single_word": false,
73
  "special": false
74
  },
75
- "43": {
76
  "content": "<s>",
77
  "lstrip": false,
78
  "normalized": false,
@@ -80,7 +112,7 @@
80
  "single_word": false,
81
  "special": true
82
  },
83
- "44": {
84
  "content": "</s>",
85
  "lstrip": false,
86
  "normalized": false,
 
1
  {
2
  "added_tokens_decoder": {
3
+ "7": {
4
+ "content": "d͡ʒ",
5
  "lstrip": true,
6
  "normalized": false,
7
  "rstrip": true,
8
  "single_word": false,
9
  "special": false
10
  },
11
+ "8": {
12
  "content": "oʊ",
13
  "lstrip": true,
14
  "normalized": false,
 
16
  "single_word": false,
17
  "special": false
18
  },
19
+ "12": {
20
+ "content": "ʊə",
21
+ "lstrip": true,
22
+ "normalized": false,
23
+ "rstrip": true,
24
+ "single_word": false,
25
+ "special": false
26
+ },
27
+ "16": {
28
+ "content": "aʊ",
29
  "lstrip": true,
30
  "normalized": false,
31
  "rstrip": true,
 
33
  "special": false
34
  },
35
  "17": {
36
+ "content": "t͡ʃ",
37
  "lstrip": true,
38
  "normalized": false,
39
  "rstrip": true,
40
  "single_word": false,
41
  "special": false
42
  },
43
+ "20": {
44
  "content": "aɪ",
45
  "lstrip": true,
46
  "normalized": false,
 
48
  "single_word": false,
49
  "special": false
50
  },
51
+ "21": {
52
+ "content": "",
53
  "lstrip": true,
54
  "normalized": false,
55
  "rstrip": true,
56
  "single_word": false,
57
  "special": false
58
  },
59
+ "23": {
60
+ "content": "ɪə",
61
+ "lstrip": true,
62
+ "normalized": false,
63
+ "rstrip": true,
64
+ "single_word": false,
65
+ "special": false
66
+ },
67
+ "26": {
68
+ "content": "əʊ",
69
+ "lstrip": true,
70
+ "normalized": false,
71
+ "rstrip": true,
72
+ "single_word": false,
73
+ "special": false
74
+ },
75
+ "27": {
76
+ "content": "eɪ",
77
+ "lstrip": true,
78
+ "normalized": false,
79
+ "rstrip": true,
80
+ "single_word": false,
81
+ "special": false
82
+ },
83
+ "37": {
84
+ "content": "ɔɪ",
85
  "lstrip": true,
86
  "normalized": false,
87
  "rstrip": true,
88
  "single_word": false,
89
  "special": false
90
  },
91
+ "46": {
92
  "content": "[UNK]",
93
  "lstrip": true,
94
  "normalized": false,
 
96
  "single_word": false,
97
  "special": false
98
  },
99
+ "47": {
100
  "content": "[PAD]",
101
  "lstrip": true,
102
  "normalized": false,
 
104
  "single_word": false,
105
  "special": false
106
  },
107
+ "49": {
108
  "content": "<s>",
109
  "lstrip": false,
110
  "normalized": false,
 
112
  "single_word": false,
113
  "special": true
114
  },
115
+ "50": {
116
  "content": "</s>",
117
  "lstrip": false,
118
  "normalized": false,
vocab.json CHANGED
@@ -1,45 +1,51 @@
1
  {
2
- " ": 42,
3
- "[PAD]": 41,
4
- "[UNK]": 40,
5
- "aɪ": 23,
6
- "aʊ": 28,
7
- "b": 12,
8
- "d": 3,
9
- "d͡ʒ": 17,
10
- "": 1,
11
- "f": 31,
12
- "h": 16,
13
- "i": 35,
14
- "j": 24,
15
- "k": 22,
16
- "l": 5,
17
- "m": 15,
18
- "n": 33,
19
- "": 4,
20
- "p": 37,
21
- "s": 32,
22
- "t": 11,
23
- "t͡ʃ": 10,
24
- "u": 39,
25
- "v": 29,
26
- "w": 21,
27
- "z": 19,
28
- "æ": 30,
29
- "ð": 2,
30
- "ŋ": 7,
31
- "ɑ": 9,
32
- "ɔ": 14,
33
- "ɔɪ": 25,
 
34
  "ə": 6,
35
- "ɛ": 34,
36
- "ɝ": 18,
37
- "ɡ": 36,
38
- "ɪ": 8,
39
- "ɹ": 27,
40
- "ʃ": 38,
41
- "ʊ": 13,
42
- "ʌ": 0,
43
- "ʒ": 26,
44
- "θ": 20
 
 
 
 
 
45
  }
 
1
  {
2
+ " ": 48,
3
+ "[PAD]": 47,
4
+ "[UNK]": 46,
5
+ "aɪ": 20,
6
+ "aʊ": 16,
7
+ "b": 44,
8
+ "d": 15,
9
+ "d͡ʒ": 7,
10
+ "": 21,
11
+ "": 27,
12
+ "f": 43,
13
+ "h": 22,
14
+ "i": 45,
15
+ "j": 11,
16
+ "k": 13,
17
+ "l": 4,
18
+ "m": 9,
19
+ "n": 36,
20
+ "": 8,
21
+ "p": 31,
22
+ "s": 38,
23
+ "t": 0,
24
+ "t͡ʃ": 17,
25
+ "u": 35,
26
+ "v": 24,
27
+ "w": 40,
28
+ "z": 10,
29
+ "æ": 2,
30
+ "ð": 42,
31
+ "ŋ": 19,
32
+ "ɑ": 18,
33
+ "ɔ": 34,
34
+ "ɔɪ": 37,
35
  "ə": 6,
36
+ "əʊ": 26,
37
+ "ɛ": 30,
38
+ "ɝ": 3,
39
+ "ɡ": 1,
40
+ "ɪ": 14,
41
+ "ɪə": 23,
42
+ "ɹ": 33,
43
+ "ɾ": 32,
44
+ "ʃ": 5,
45
+ "ʊ": 28,
46
+ "ʊə": 12,
47
+ "ʌ": 25,
48
+ "ʒ": 41,
49
+ "ʔ": 39,
50
+ "θ": 29
51
  }