speedcell4 commited on
Commit
005655a
·
verified ·
1 Parent(s): 014d7ac

Upload tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer.json +6 -6
  2. tokenizer_config.json +2 -2
tokenizer.json CHANGED
@@ -72,7 +72,7 @@
72
  "single": [
73
  {
74
  "SpecialToken": {
75
- "id": "<unk>",
76
  "type_id": 0
77
  }
78
  },
@@ -92,7 +92,7 @@
92
  "pair": [
93
  {
94
  "SpecialToken": {
95
- "id": "<unk>",
96
  "type_id": 0
97
  }
98
  },
@@ -125,13 +125,13 @@
125
  "</s>"
126
  ]
127
  },
128
- "<unk>": {
129
- "id": "<unk>",
130
  "ids": [
131
- 3
132
  ],
133
  "tokens": [
134
- "<unk>"
135
  ]
136
  }
137
  }
 
72
  "single": [
73
  {
74
  "SpecialToken": {
75
+ "id": "<s>",
76
  "type_id": 0
77
  }
78
  },
 
92
  "pair": [
93
  {
94
  "SpecialToken": {
95
+ "id": "<s>",
96
  "type_id": 0
97
  }
98
  },
 
125
  "</s>"
126
  ]
127
  },
128
+ "<s>": {
129
+ "id": "<s>",
130
  "ids": [
131
+ 0
132
  ],
133
  "tokens": [
134
+ "<s>"
135
  ]
136
  }
137
  }
tokenizer_config.json CHANGED
@@ -46,8 +46,8 @@
46
  "pad_token": "<pad>",
47
  "sep_token": "</s>",
48
  "sp_model_kwargs": {},
49
- "src_lang": "eng_Latn",
50
- "tgt_lang": null,
51
  "tokenizer_class": "NllbTokenizer",
52
  "unk_token": "<unk>"
53
  }
 
46
  "pad_token": "<pad>",
47
  "sep_token": "</s>",
48
  "sp_model_kwargs": {},
49
+ "src_lang": "<s>",
50
+ "tgt_lang": "<s>",
51
  "tokenizer_class": "NllbTokenizer",
52
  "unk_token": "<unk>"
53
  }