Upload 5 files
Browse files- special_tokens_map.json +2 -2
- tokenizer.json +12 -12
- tokenizer_config.json +10 -12
special_tokens_map.json
CHANGED
@@ -1,13 +1,13 @@
|
|
1 |
{
|
2 |
"bos_token": {
|
3 |
-
"content": "
|
4 |
"lstrip": false,
|
5 |
"normalized": false,
|
6 |
"rstrip": false,
|
7 |
"single_word": false
|
8 |
},
|
9 |
"eos_token": {
|
10 |
-
"content": "<|
|
11 |
"lstrip": false,
|
12 |
"normalized": false,
|
13 |
"rstrip": false,
|
|
|
1 |
{
|
2 |
"bos_token": {
|
3 |
+
"content": "[gMASK]<sop>",
|
4 |
"lstrip": false,
|
5 |
"normalized": false,
|
6 |
"rstrip": false,
|
7 |
"single_word": false
|
8 |
},
|
9 |
"eos_token": {
|
10 |
+
"content": "<|endoftext|>",
|
11 |
"lstrip": false,
|
12 |
"normalized": false,
|
13 |
"rstrip": false,
|
tokenizer.json
CHANGED
@@ -5,7 +5,7 @@
|
|
5 |
"added_tokens": [
|
6 |
{
|
7 |
"id": 128000,
|
8 |
-
"content": "
|
9 |
"single_word": false,
|
10 |
"lstrip": false,
|
11 |
"rstrip": false,
|
@@ -14,7 +14,7 @@
|
|
14 |
},
|
15 |
{
|
16 |
"id": 128001,
|
17 |
-
"content": "<|
|
18 |
"single_word": false,
|
19 |
"lstrip": false,
|
20 |
"rstrip": false,
|
@@ -104,7 +104,7 @@
|
|
104 |
},
|
105 |
{
|
106 |
"id": 128011,
|
107 |
-
"content": "<|
|
108 |
"single_word": false,
|
109 |
"lstrip": false,
|
110 |
"rstrip": false,
|
@@ -113,7 +113,7 @@
|
|
113 |
},
|
114 |
{
|
115 |
"id": 128012,
|
116 |
-
"content": "<|
|
117 |
"single_word": false,
|
118 |
"lstrip": false,
|
119 |
"rstrip": false,
|
@@ -122,7 +122,7 @@
|
|
122 |
},
|
123 |
{
|
124 |
"id": 128013,
|
125 |
-
"content": "<|
|
126 |
"single_word": false,
|
127 |
"lstrip": false,
|
128 |
"rstrip": false,
|
@@ -131,7 +131,7 @@
|
|
131 |
},
|
132 |
{
|
133 |
"id": 128014,
|
134 |
-
"content": "<|
|
135 |
"single_word": false,
|
136 |
"lstrip": false,
|
137 |
"rstrip": false,
|
@@ -2342,7 +2342,7 @@
|
|
2342 |
"single": [
|
2343 |
{
|
2344 |
"SpecialToken": {
|
2345 |
-
"id": "
|
2346 |
"type_id": 0
|
2347 |
}
|
2348 |
},
|
@@ -2356,7 +2356,7 @@
|
|
2356 |
"pair": [
|
2357 |
{
|
2358 |
"SpecialToken": {
|
2359 |
-
"id": "
|
2360 |
"type_id": 0
|
2361 |
}
|
2362 |
},
|
@@ -2368,7 +2368,7 @@
|
|
2368 |
},
|
2369 |
{
|
2370 |
"SpecialToken": {
|
2371 |
-
"id": "
|
2372 |
"type_id": 1
|
2373 |
}
|
2374 |
},
|
@@ -2380,13 +2380,13 @@
|
|
2380 |
}
|
2381 |
],
|
2382 |
"special_tokens": {
|
2383 |
-
"
|
2384 |
-
"id": "
|
2385 |
"ids": [
|
2386 |
128000
|
2387 |
],
|
2388 |
"tokens": [
|
2389 |
-
"
|
2390 |
]
|
2391 |
}
|
2392 |
}
|
|
|
5 |
"added_tokens": [
|
6 |
{
|
7 |
"id": 128000,
|
8 |
+
"content": "[gMASK]<sop>",
|
9 |
"single_word": false,
|
10 |
"lstrip": false,
|
11 |
"rstrip": false,
|
|
|
14 |
},
|
15 |
{
|
16 |
"id": 128001,
|
17 |
+
"content": "<|endoftext|>",
|
18 |
"single_word": false,
|
19 |
"lstrip": false,
|
20 |
"rstrip": false,
|
|
|
104 |
},
|
105 |
{
|
106 |
"id": 128011,
|
107 |
+
"content": "<|system|>",
|
108 |
"single_word": false,
|
109 |
"lstrip": false,
|
110 |
"rstrip": false,
|
|
|
113 |
},
|
114 |
{
|
115 |
"id": 128012,
|
116 |
+
"content": "<|assistant|>",
|
117 |
"single_word": false,
|
118 |
"lstrip": false,
|
119 |
"rstrip": false,
|
|
|
122 |
},
|
123 |
{
|
124 |
"id": 128013,
|
125 |
+
"content": "<|user|>",
|
126 |
"single_word": false,
|
127 |
"lstrip": false,
|
128 |
"rstrip": false,
|
|
|
131 |
},
|
132 |
{
|
133 |
"id": 128014,
|
134 |
+
"content": "<|observation|>",
|
135 |
"single_word": false,
|
136 |
"lstrip": false,
|
137 |
"rstrip": false,
|
|
|
2342 |
"single": [
|
2343 |
{
|
2344 |
"SpecialToken": {
|
2345 |
+
"id": "[gMASK]<sop>",
|
2346 |
"type_id": 0
|
2347 |
}
|
2348 |
},
|
|
|
2356 |
"pair": [
|
2357 |
{
|
2358 |
"SpecialToken": {
|
2359 |
+
"id": "[gMASK]<sop>",
|
2360 |
"type_id": 0
|
2361 |
}
|
2362 |
},
|
|
|
2368 |
},
|
2369 |
{
|
2370 |
"SpecialToken": {
|
2371 |
+
"id": "[gMASK]<sop>",
|
2372 |
"type_id": 1
|
2373 |
}
|
2374 |
},
|
|
|
2380 |
}
|
2381 |
],
|
2382 |
"special_tokens": {
|
2383 |
+
"[gMASK]<sop>": {
|
2384 |
+
"id": "[gMASK]<sop>",
|
2385 |
"ids": [
|
2386 |
128000
|
2387 |
],
|
2388 |
"tokens": [
|
2389 |
+
"[gMASK]<sop>"
|
2390 |
]
|
2391 |
}
|
2392 |
}
|
tokenizer_config.json
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
{
|
2 |
"added_tokens_decoder": {
|
3 |
"128000": {
|
4 |
-
"content": "
|
5 |
"lstrip": false,
|
6 |
"normalized": false,
|
7 |
"rstrip": false,
|
@@ -9,7 +9,7 @@
|
|
9 |
"special": true
|
10 |
},
|
11 |
"128001": {
|
12 |
-
"content": "<|
|
13 |
"lstrip": false,
|
14 |
"normalized": false,
|
15 |
"rstrip": false,
|
@@ -89,7 +89,7 @@
|
|
89 |
"special": true
|
90 |
},
|
91 |
"128011": {
|
92 |
-
"content": "<|
|
93 |
"lstrip": false,
|
94 |
"normalized": false,
|
95 |
"rstrip": false,
|
@@ -97,7 +97,7 @@
|
|
97 |
"special": true
|
98 |
},
|
99 |
"128012": {
|
100 |
-
"content": "<|
|
101 |
"lstrip": false,
|
102 |
"normalized": false,
|
103 |
"rstrip": false,
|
@@ -105,7 +105,7 @@
|
|
105 |
"special": true
|
106 |
},
|
107 |
"128013": {
|
108 |
-
"content": "<|
|
109 |
"lstrip": false,
|
110 |
"normalized": false,
|
111 |
"rstrip": false,
|
@@ -113,7 +113,7 @@
|
|
113 |
"special": true
|
114 |
},
|
115 |
"128014": {
|
116 |
-
"content": "<|
|
117 |
"lstrip": false,
|
118 |
"normalized": false,
|
119 |
"rstrip": false,
|
@@ -2049,13 +2049,11 @@
|
|
2049 |
"special": true
|
2050 |
}
|
2051 |
},
|
2052 |
-
"bos_token": "
|
2053 |
"clean_up_tokenization_spaces": true,
|
2054 |
-
"eos_token": "<|
|
2055 |
-
"model_input_names": [
|
2056 |
-
"
|
2057 |
-
"attention_mask"
|
2058 |
-
],
|
2059 |
"model_max_length": 131072,
|
2060 |
"pad_token": "<|finetune_right_pad_id|>",
|
2061 |
"padding_side": "left",
|
|
|
1 |
{
|
2 |
"added_tokens_decoder": {
|
3 |
"128000": {
|
4 |
+
"content": "[gMASK]<sop>",
|
5 |
"lstrip": false,
|
6 |
"normalized": false,
|
7 |
"rstrip": false,
|
|
|
9 |
"special": true
|
10 |
},
|
11 |
"128001": {
|
12 |
+
"content": "<|endoftext|>",
|
13 |
"lstrip": false,
|
14 |
"normalized": false,
|
15 |
"rstrip": false,
|
|
|
89 |
"special": true
|
90 |
},
|
91 |
"128011": {
|
92 |
+
"content": "<|system|>",
|
93 |
"lstrip": false,
|
94 |
"normalized": false,
|
95 |
"rstrip": false,
|
|
|
97 |
"special": true
|
98 |
},
|
99 |
"128012": {
|
100 |
+
"content": "<|assistant|>",
|
101 |
"lstrip": false,
|
102 |
"normalized": false,
|
103 |
"rstrip": false,
|
|
|
105 |
"special": true
|
106 |
},
|
107 |
"128013": {
|
108 |
+
"content": "<|user|>",
|
109 |
"lstrip": false,
|
110 |
"normalized": false,
|
111 |
"rstrip": false,
|
|
|
113 |
"special": true
|
114 |
},
|
115 |
"128014": {
|
116 |
+
"content": "<|observation|>",
|
117 |
"lstrip": false,
|
118 |
"normalized": false,
|
119 |
"rstrip": false,
|
|
|
2049 |
"special": true
|
2050 |
}
|
2051 |
},
|
2052 |
+
"bos_token": "[gMASK]<sop>",
|
2053 |
"clean_up_tokenization_spaces": true,
|
2054 |
+
"eos_token": "<|endoftext|>",
|
2055 |
+
"model_input_names": ["input_ids", "attention_mask"],
|
2056 |
+
"chat_template": "{{ bos_token }}{%- set loop_messages = messages %}\n{%- for message in loop_messages %}\n {%- set content = '<|' + message['role'] + '|>'+ message['content'] | trim %}\n {%- if loop.index0 == 0 %}\n {%- set content = content %}\n {%- endif %}\n {%- if not (loop.last and message['role'] == 'assistant') %}\n {%- set content = content + '<|endoftext|>' %}\n {%- endif %}\n {{- content }}\n{%- endfor %}\n{%- if messages[-1]['role'] != 'assistant' %}\n {{- '<|assistant|>' }}\n{%- endif %}",
|
|
|
|
|
2057 |
"model_max_length": 131072,
|
2058 |
"pad_token": "<|finetune_right_pad_id|>",
|
2059 |
"padding_side": "left",
|