Reihaneh commited on
Commit
0762dc1
·
verified ·
1 Parent(s): c95b229

Upload tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer_config.json +1 -2
  2. vocab.json +40 -40
tokenizer_config.json CHANGED
@@ -42,10 +42,9 @@
42
  }
43
  },
44
  "bos_token": "<s>",
45
- "clean_up_tokenization_spaces": false,
46
  "do_lower_case": false,
47
  "eos_token": "</s>",
48
- "extra_special_tokens": {},
49
  "model_max_length": 1000000000000000019884624838656,
50
  "pad_token": "[PAD]",
51
  "replace_word_delimiter_char": " ",
 
42
  }
43
  },
44
  "bos_token": "<s>",
45
+ "clean_up_tokenization_spaces": true,
46
  "do_lower_case": false,
47
  "eos_token": "</s>",
 
48
  "model_max_length": 1000000000000000019884624838656,
49
  "pad_token": "[PAD]",
50
  "replace_word_delimiter_char": " ",
vocab.json CHANGED
@@ -1,47 +1,47 @@
1
  {
2
- "'": 17,
3
- "A": 32,
4
- "D": 35,
5
- "[": 10,
6
  "[DA]": 44,
7
  "[PAD]": 43,
8
  "[UNK]": 42,
9
- "]": 41,
10
- "a": 24,
11
- "b": 28,
12
- "c": 19,
13
- "d": 14,
14
- "e": 38,
15
- "f": 27,
16
- "g": 23,
17
- "h": 6,
18
- "i": 12,
19
- "j": 5,
20
  "k": 3,
21
- "l": 11,
22
  "m": 25,
23
- "n": 37,
24
- "o": 9,
25
- "p": 13,
26
- "q": 20,
27
- "r": 15,
28
- "s": 36,
29
- "t": 16,
30
- "u": 33,
31
- "v": 40,
32
- "w": 39,
33
- "x": 34,
34
- "y": 30,
35
- "z": 0,
36
- "|": 7,
37
- "«": 26,
38
- "»": 31,
39
- "å": 21,
40
- "æ": 1,
41
- "é": 8,
42
- "í": 2,
43
- "ó": 4,
44
- "ø": 29,
45
- "–": 18,
46
- "—": 22
47
  }
 
1
  {
2
+ "'": 31,
3
+ "A": 20,
4
+ "D": 16,
5
+ "[": 1,
6
  "[DA]": 44,
7
  "[PAD]": 43,
8
  "[UNK]": 42,
9
+ "]": 30,
10
+ "a": 14,
11
+ "b": 22,
12
+ "c": 5,
13
+ "d": 0,
14
+ "e": 32,
15
+ "f": 15,
16
+ "g": 21,
17
+ "h": 10,
18
+ "i": 18,
19
+ "j": 6,
20
  "k": 3,
21
+ "l": 17,
22
  "m": 25,
23
+ "n": 35,
24
+ "o": 23,
25
+ "p": 11,
26
+ "q": 40,
27
+ "r": 37,
28
+ "s": 9,
29
+ "t": 26,
30
+ "u": 27,
31
+ "v": 4,
32
+ "w": 34,
33
+ "x": 7,
34
+ "y": 28,
35
+ "z": 24,
36
+ "|": 38,
37
+ "«": 19,
38
+ "»": 13,
39
+ "å": 12,
40
+ "æ": 2,
41
+ "é": 36,
42
+ "í": 41,
43
+ "ó": 33,
44
+ "ø": 39,
45
+ "–": 29,
46
+ "—": 8
47
  }