PocketDoc commited on
Commit
0516d56
·
verified ·
1 Parent(s): 9d39b9d

Upload 5 files

Browse files
Files changed (3) hide show
  1. special_tokens_map.json +2 -2
  2. tokenizer.json +12 -12
  3. tokenizer_config.json +10 -12
special_tokens_map.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "bos_token": {
3
- "content": "<|begin_of_text|>",
4
  "lstrip": false,
5
  "normalized": false,
6
  "rstrip": false,
7
  "single_word": false
8
  },
9
  "eos_token": {
10
- "content": "<|end_of_text|>",
11
  "lstrip": false,
12
  "normalized": false,
13
  "rstrip": false,
 
1
  {
2
  "bos_token": {
3
+ "content": "[gMASK]<sop>",
4
  "lstrip": false,
5
  "normalized": false,
6
  "rstrip": false,
7
  "single_word": false
8
  },
9
  "eos_token": {
10
+ "content": "<|endoftext|>",
11
  "lstrip": false,
12
  "normalized": false,
13
  "rstrip": false,
tokenizer.json CHANGED
@@ -5,7 +5,7 @@
5
  "added_tokens": [
6
  {
7
  "id": 128000,
8
- "content": "<|begin_of_text|>",
9
  "single_word": false,
10
  "lstrip": false,
11
  "rstrip": false,
@@ -14,7 +14,7 @@
14
  },
15
  {
16
  "id": 128001,
17
- "content": "<|end_of_text|>",
18
  "single_word": false,
19
  "lstrip": false,
20
  "rstrip": false,
@@ -104,7 +104,7 @@
104
  },
105
  {
106
  "id": 128011,
107
- "content": "<|reserved_special_token_3|>",
108
  "single_word": false,
109
  "lstrip": false,
110
  "rstrip": false,
@@ -113,7 +113,7 @@
113
  },
114
  {
115
  "id": 128012,
116
- "content": "<|reserved_special_token_4|>",
117
  "single_word": false,
118
  "lstrip": false,
119
  "rstrip": false,
@@ -122,7 +122,7 @@
122
  },
123
  {
124
  "id": 128013,
125
- "content": "<|reserved_special_token_5|>",
126
  "single_word": false,
127
  "lstrip": false,
128
  "rstrip": false,
@@ -131,7 +131,7 @@
131
  },
132
  {
133
  "id": 128014,
134
- "content": "<|reserved_special_token_6|>",
135
  "single_word": false,
136
  "lstrip": false,
137
  "rstrip": false,
@@ -2342,7 +2342,7 @@
2342
  "single": [
2343
  {
2344
  "SpecialToken": {
2345
- "id": "<|begin_of_text|>",
2346
  "type_id": 0
2347
  }
2348
  },
@@ -2356,7 +2356,7 @@
2356
  "pair": [
2357
  {
2358
  "SpecialToken": {
2359
- "id": "<|begin_of_text|>",
2360
  "type_id": 0
2361
  }
2362
  },
@@ -2368,7 +2368,7 @@
2368
  },
2369
  {
2370
  "SpecialToken": {
2371
- "id": "<|begin_of_text|>",
2372
  "type_id": 1
2373
  }
2374
  },
@@ -2380,13 +2380,13 @@
2380
  }
2381
  ],
2382
  "special_tokens": {
2383
- "<|begin_of_text|>": {
2384
- "id": "<|begin_of_text|>",
2385
  "ids": [
2386
  128000
2387
  ],
2388
  "tokens": [
2389
- "<|begin_of_text|>"
2390
  ]
2391
  }
2392
  }
 
5
  "added_tokens": [
6
  {
7
  "id": 128000,
8
+ "content": "[gMASK]<sop>",
9
  "single_word": false,
10
  "lstrip": false,
11
  "rstrip": false,
 
14
  },
15
  {
16
  "id": 128001,
17
+ "content": "<|endoftext|>",
18
  "single_word": false,
19
  "lstrip": false,
20
  "rstrip": false,
 
104
  },
105
  {
106
  "id": 128011,
107
+ "content": "<|system|>",
108
  "single_word": false,
109
  "lstrip": false,
110
  "rstrip": false,
 
113
  },
114
  {
115
  "id": 128012,
116
+ "content": "<|assistant|>",
117
  "single_word": false,
118
  "lstrip": false,
119
  "rstrip": false,
 
122
  },
123
  {
124
  "id": 128013,
125
+ "content": "<|user|>",
126
  "single_word": false,
127
  "lstrip": false,
128
  "rstrip": false,
 
131
  },
132
  {
133
  "id": 128014,
134
+ "content": "<|observation|>",
135
  "single_word": false,
136
  "lstrip": false,
137
  "rstrip": false,
 
2342
  "single": [
2343
  {
2344
  "SpecialToken": {
2345
+ "id": "[gMASK]<sop>",
2346
  "type_id": 0
2347
  }
2348
  },
 
2356
  "pair": [
2357
  {
2358
  "SpecialToken": {
2359
+ "id": "[gMASK]<sop>",
2360
  "type_id": 0
2361
  }
2362
  },
 
2368
  },
2369
  {
2370
  "SpecialToken": {
2371
+ "id": "[gMASK]<sop>",
2372
  "type_id": 1
2373
  }
2374
  },
 
2380
  }
2381
  ],
2382
  "special_tokens": {
2383
+ "[gMASK]<sop>": {
2384
+ "id": "[gMASK]<sop>",
2385
  "ids": [
2386
  128000
2387
  ],
2388
  "tokens": [
2389
+ "[gMASK]<sop>"
2390
  ]
2391
  }
2392
  }
tokenizer_config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "added_tokens_decoder": {
3
  "128000": {
4
- "content": "<|begin_of_text|>",
5
  "lstrip": false,
6
  "normalized": false,
7
  "rstrip": false,
@@ -9,7 +9,7 @@
9
  "special": true
10
  },
11
  "128001": {
12
- "content": "<|end_of_text|>",
13
  "lstrip": false,
14
  "normalized": false,
15
  "rstrip": false,
@@ -89,7 +89,7 @@
89
  "special": true
90
  },
91
  "128011": {
92
- "content": "<|reserved_special_token_3|>",
93
  "lstrip": false,
94
  "normalized": false,
95
  "rstrip": false,
@@ -97,7 +97,7 @@
97
  "special": true
98
  },
99
  "128012": {
100
- "content": "<|reserved_special_token_4|>",
101
  "lstrip": false,
102
  "normalized": false,
103
  "rstrip": false,
@@ -105,7 +105,7 @@
105
  "special": true
106
  },
107
  "128013": {
108
- "content": "<|reserved_special_token_5|>",
109
  "lstrip": false,
110
  "normalized": false,
111
  "rstrip": false,
@@ -113,7 +113,7 @@
113
  "special": true
114
  },
115
  "128014": {
116
- "content": "<|reserved_special_token_6|>",
117
  "lstrip": false,
118
  "normalized": false,
119
  "rstrip": false,
@@ -2049,13 +2049,11 @@
2049
  "special": true
2050
  }
2051
  },
2052
- "bos_token": "<|begin_of_text|>",
2053
  "clean_up_tokenization_spaces": true,
2054
- "eos_token": "<|end_of_text|>",
2055
- "model_input_names": [
2056
- "input_ids",
2057
- "attention_mask"
2058
- ],
2059
  "model_max_length": 131072,
2060
  "pad_token": "<|finetune_right_pad_id|>",
2061
  "padding_side": "left",
 
1
  {
2
  "added_tokens_decoder": {
3
  "128000": {
4
+ "content": "[gMASK]<sop>",
5
  "lstrip": false,
6
  "normalized": false,
7
  "rstrip": false,
 
9
  "special": true
10
  },
11
  "128001": {
12
+ "content": "<|endoftext|>",
13
  "lstrip": false,
14
  "normalized": false,
15
  "rstrip": false,
 
89
  "special": true
90
  },
91
  "128011": {
92
+ "content": "<|system|>",
93
  "lstrip": false,
94
  "normalized": false,
95
  "rstrip": false,
 
97
  "special": true
98
  },
99
  "128012": {
100
+ "content": "<|assistant|>",
101
  "lstrip": false,
102
  "normalized": false,
103
  "rstrip": false,
 
105
  "special": true
106
  },
107
  "128013": {
108
+ "content": "<|user|>",
109
  "lstrip": false,
110
  "normalized": false,
111
  "rstrip": false,
 
113
  "special": true
114
  },
115
  "128014": {
116
+ "content": "<|observation|>",
117
  "lstrip": false,
118
  "normalized": false,
119
  "rstrip": false,
 
2049
  "special": true
2050
  }
2051
  },
2052
+ "bos_token": "[gMASK]<sop>",
2053
  "clean_up_tokenization_spaces": true,
2054
+ "eos_token": "<|endoftext|>",
2055
+ "model_input_names": ["input_ids", "attention_mask"],
2056
+ "chat_template": "{{ bos_token }}{%- set loop_messages = messages %}\n{%- for message in loop_messages %}\n {%- set content = '<|' + message['role'] + '|>'+ message['content'] | trim %}\n {%- if loop.index0 == 0 %}\n {%- set content = content %}\n {%- endif %}\n {%- if not (loop.last and message['role'] == 'assistant') %}\n {%- set content = content + '<|endoftext|>' %}\n {%- endif %}\n {{- content }}\n{%- endfor %}\n{%- if messages[-1]['role'] != 'assistant' %}\n {{- '<|assistant|>' }}\n{%- endif %}",
 
 
2057
  "model_max_length": 131072,
2058
  "pad_token": "<|finetune_right_pad_id|>",
2059
  "padding_side": "left",