Upload tokenizer
Browse files- tokenizer.json +6 -6
- tokenizer_config.json +2 -2
tokenizer.json
CHANGED
@@ -72,7 +72,7 @@
|
|
72 |
"single": [
|
73 |
{
|
74 |
"SpecialToken": {
|
75 |
-
"id": "<
|
76 |
"type_id": 0
|
77 |
}
|
78 |
},
|
@@ -92,7 +92,7 @@
|
|
92 |
"pair": [
|
93 |
{
|
94 |
"SpecialToken": {
|
95 |
-
"id": "<
|
96 |
"type_id": 0
|
97 |
}
|
98 |
},
|
@@ -125,13 +125,13 @@
|
|
125 |
"</s>"
|
126 |
]
|
127 |
},
|
128 |
-
"<
|
129 |
-
"id": "<
|
130 |
"ids": [
|
131 |
-
|
132 |
],
|
133 |
"tokens": [
|
134 |
-
"<
|
135 |
]
|
136 |
}
|
137 |
}
|
|
|
72 |
"single": [
|
73 |
{
|
74 |
"SpecialToken": {
|
75 |
+
"id": "<s>",
|
76 |
"type_id": 0
|
77 |
}
|
78 |
},
|
|
|
92 |
"pair": [
|
93 |
{
|
94 |
"SpecialToken": {
|
95 |
+
"id": "<s>",
|
96 |
"type_id": 0
|
97 |
}
|
98 |
},
|
|
|
125 |
"</s>"
|
126 |
]
|
127 |
},
|
128 |
+
"<s>": {
|
129 |
+
"id": "<s>",
|
130 |
"ids": [
|
131 |
+
0
|
132 |
],
|
133 |
"tokens": [
|
134 |
+
"<s>"
|
135 |
]
|
136 |
}
|
137 |
}
|
tokenizer_config.json
CHANGED
@@ -46,8 +46,8 @@
|
|
46 |
"pad_token": "<pad>",
|
47 |
"sep_token": "</s>",
|
48 |
"sp_model_kwargs": {},
|
49 |
-
"src_lang": "
|
50 |
-
"tgt_lang":
|
51 |
"tokenizer_class": "NllbTokenizer",
|
52 |
"unk_token": "<unk>"
|
53 |
}
|
|
|
46 |
"pad_token": "<pad>",
|
47 |
"sep_token": "</s>",
|
48 |
"sp_model_kwargs": {},
|
49 |
+
"src_lang": "<s>",
|
50 |
+
"tgt_lang": "<s>",
|
51 |
"tokenizer_class": "NllbTokenizer",
|
52 |
"unk_token": "<unk>"
|
53 |
}
|