ryanu commited on
Commit
9fa0195
·
verified ·
1 Parent(s): a3c3eb8

Upload tokenizer

Browse files
special_tokens_map.json CHANGED
@@ -15,7 +15,7 @@
15
  },
16
  "pad_token": "<|im_end|>",
17
  "unk_token": {
18
- "content": "<|endoftext|>",
19
  "lstrip": false,
20
  "normalized": false,
21
  "rstrip": false,
 
15
  },
16
  "pad_token": "<|im_end|>",
17
  "unk_token": {
18
+ "content": "<unk>",
19
  "lstrip": false,
20
  "normalized": false,
21
  "rstrip": false,
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -2,15 +2,15 @@
2
  "add_bos_token": true,
3
  "add_eos_token": false,
4
  "added_tokens_decoder": {
5
- "50256": {
6
- "content": "<|endoftext|>",
7
  "lstrip": false,
8
  "normalized": false,
9
  "rstrip": false,
10
  "single_word": false,
11
  "special": true
12
  },
13
- "58940": {
14
  "content": "<s>",
15
  "lstrip": false,
16
  "normalized": false,
@@ -18,7 +18,7 @@
18
  "single_word": false,
19
  "special": true
20
  },
21
- "58941": {
22
  "content": "</s>",
23
  "lstrip": false,
24
  "normalized": false,
@@ -26,7 +26,7 @@
26
  "single_word": false,
27
  "special": true
28
  },
29
- "58943": {
30
  "content": "<|im_end|>",
31
  "lstrip": false,
32
  "normalized": false,
@@ -47,7 +47,7 @@
47
  "spaces_between_special_tokens": false,
48
  "tokenizer_class": "LlamaTokenizer",
49
  "trust_remote_code": false,
50
- "unk_token": "<|endoftext|>",
51
  "use_default_system_prompt": false,
52
  "use_fast": true
53
  }
 
2
  "add_bos_token": true,
3
  "add_eos_token": false,
4
  "added_tokens_decoder": {
5
+ "0": {
6
+ "content": "<unk>",
7
  "lstrip": false,
8
  "normalized": false,
9
  "rstrip": false,
10
  "single_word": false,
11
  "special": true
12
  },
13
+ "1": {
14
  "content": "<s>",
15
  "lstrip": false,
16
  "normalized": false,
 
18
  "single_word": false,
19
  "special": true
20
  },
21
+ "2": {
22
  "content": "</s>",
23
  "lstrip": false,
24
  "normalized": false,
 
26
  "single_word": false,
27
  "special": true
28
  },
29
+ "32000": {
30
  "content": "<|im_end|>",
31
  "lstrip": false,
32
  "normalized": false,
 
47
  "spaces_between_special_tokens": false,
48
  "tokenizer_class": "LlamaTokenizer",
49
  "trust_remote_code": false,
50
+ "unk_token": "<unk>",
51
  "use_default_system_prompt": false,
52
  "use_fast": true
53
  }