Transformers
speedcell4 commited on
Commit
374bb44
·
verified ·
1 Parent(s): 2b3d4ae

Upload tokenizer

Browse files
added_tokens.json CHANGED
@@ -1,44 +1,43 @@
1
  {
2
- "<af>": 64002,
3
- "<am>": 64003,
4
- "<ar>": 64004,
5
- "<ast>": 64005,
6
- "<be>": 64006,
7
- "<bg>": 64007,
8
- "<bn>": 64008,
9
- "<bs>": 64009,
10
- "<ca>": 64010,
11
- "<cs>": 64011,
12
- "<da>": 64012,
13
- "<de>": 64013,
14
- "<en>": 64014,
15
- "<eos>": 64001,
16
- "<es>": 64015,
17
- "<fr>": 64016,
18
- "<gu>": 64017,
19
- "<ha>": 64018,
20
- "<he>": 64019,
21
- "<hi>": 64020,
22
- "<is>": 64021,
23
- "<it>": 64022,
24
- "<kab>": 64023,
25
- "<kn>": 64024,
26
- "<lb>": 64025,
27
- "<mr>": 64026,
28
- "<mt>": 64027,
29
- "<ne>": 64028,
30
- "<nl>": 64029,
31
- "<no>": 64030,
32
- "<oc>": 64031,
33
- "<pl>": 64032,
34
- "<pt>": 64033,
35
- "<ro>": 64034,
36
- "<ru>": 64035,
37
- "<sd>": 64036,
38
- "<so>": 64037,
39
- "<sr>": 64038,
40
- "<sv>": 64039,
41
- "<ti>": 64040,
42
- "<uk>": 64041,
43
- "<ur>": 64042
44
  }
 
1
  {
2
+ "<af>": 64001,
3
+ "<am>": 64002,
4
+ "<ar>": 64003,
5
+ "<ast>": 64004,
6
+ "<be>": 64005,
7
+ "<bg>": 64006,
8
+ "<bn>": 64007,
9
+ "<bs>": 64008,
10
+ "<ca>": 64009,
11
+ "<cs>": 64010,
12
+ "<da>": 64011,
13
+ "<de>": 64012,
14
+ "<en>": 64013,
15
+ "<es>": 64014,
16
+ "<fr>": 64015,
17
+ "<gu>": 64016,
18
+ "<ha>": 64017,
19
+ "<he>": 64018,
20
+ "<hi>": 64019,
21
+ "<is>": 64020,
22
+ "<it>": 64021,
23
+ "<kab>": 64022,
24
+ "<kn>": 64023,
25
+ "<lb>": 64024,
26
+ "<mr>": 64025,
27
+ "<mt>": 64026,
28
+ "<ne>": 64027,
29
+ "<nl>": 64028,
30
+ "<no>": 64029,
31
+ "<oc>": 64030,
32
+ "<pl>": 64031,
33
+ "<pt>": 64032,
34
+ "<ro>": 64033,
35
+ "<ru>": 64034,
36
+ "<sd>": 64035,
37
+ "<so>": 64036,
38
+ "<sr>": 64037,
39
+ "<sv>": 64038,
40
+ "<ti>": 64039,
41
+ "<uk>": 64040,
42
+ "<ur>": 64041
 
43
  }
special_tokens_map.json CHANGED
@@ -43,14 +43,14 @@
43
  "<ur>"
44
  ],
45
  "bos_token": {
46
- "content": "<eos>",
47
  "lstrip": false,
48
  "normalized": false,
49
  "rstrip": false,
50
  "single_word": false
51
  },
52
  "eos_token": {
53
- "content": "<eos>",
54
  "lstrip": false,
55
  "normalized": false,
56
  "rstrip": false,
 
43
  "<ur>"
44
  ],
45
  "bos_token": {
46
+ "content": "</s>",
47
  "lstrip": false,
48
  "normalized": false,
49
  "rstrip": false,
50
  "single_word": false
51
  },
52
  "eos_token": {
53
+ "content": "</s>",
54
  "lstrip": false,
55
  "normalized": false,
56
  "rstrip": false,
tokenizer.json CHANGED
@@ -41,15 +41,6 @@
41
  },
42
  {
43
  "id": 64001,
44
- "content": "<eos>",
45
- "single_word": false,
46
- "lstrip": false,
47
- "rstrip": false,
48
- "normalized": false,
49
- "special": true
50
- },
51
- {
52
- "id": 64002,
53
  "content": "<af>",
54
  "single_word": false,
55
  "lstrip": false,
@@ -58,7 +49,7 @@
58
  "special": true
59
  },
60
  {
61
- "id": 64003,
62
  "content": "<am>",
63
  "single_word": false,
64
  "lstrip": false,
@@ -67,7 +58,7 @@
67
  "special": true
68
  },
69
  {
70
- "id": 64004,
71
  "content": "<ar>",
72
  "single_word": false,
73
  "lstrip": false,
@@ -76,7 +67,7 @@
76
  "special": true
77
  },
78
  {
79
- "id": 64005,
80
  "content": "<ast>",
81
  "single_word": false,
82
  "lstrip": false,
@@ -85,7 +76,7 @@
85
  "special": true
86
  },
87
  {
88
- "id": 64006,
89
  "content": "<be>",
90
  "single_word": false,
91
  "lstrip": false,
@@ -94,7 +85,7 @@
94
  "special": true
95
  },
96
  {
97
- "id": 64007,
98
  "content": "<bg>",
99
  "single_word": false,
100
  "lstrip": false,
@@ -103,7 +94,7 @@
103
  "special": true
104
  },
105
  {
106
- "id": 64008,
107
  "content": "<bn>",
108
  "single_word": false,
109
  "lstrip": false,
@@ -112,7 +103,7 @@
112
  "special": true
113
  },
114
  {
115
- "id": 64009,
116
  "content": "<bs>",
117
  "single_word": false,
118
  "lstrip": false,
@@ -121,7 +112,7 @@
121
  "special": true
122
  },
123
  {
124
- "id": 64010,
125
  "content": "<ca>",
126
  "single_word": false,
127
  "lstrip": false,
@@ -130,7 +121,7 @@
130
  "special": true
131
  },
132
  {
133
- "id": 64011,
134
  "content": "<cs>",
135
  "single_word": false,
136
  "lstrip": false,
@@ -139,7 +130,7 @@
139
  "special": true
140
  },
141
  {
142
- "id": 64012,
143
  "content": "<da>",
144
  "single_word": false,
145
  "lstrip": false,
@@ -148,7 +139,7 @@
148
  "special": true
149
  },
150
  {
151
- "id": 64013,
152
  "content": "<de>",
153
  "single_word": false,
154
  "lstrip": false,
@@ -157,7 +148,7 @@
157
  "special": true
158
  },
159
  {
160
- "id": 64014,
161
  "content": "<en>",
162
  "single_word": false,
163
  "lstrip": false,
@@ -166,7 +157,7 @@
166
  "special": true
167
  },
168
  {
169
- "id": 64015,
170
  "content": "<es>",
171
  "single_word": false,
172
  "lstrip": false,
@@ -175,7 +166,7 @@
175
  "special": true
176
  },
177
  {
178
- "id": 64016,
179
  "content": "<fr>",
180
  "single_word": false,
181
  "lstrip": false,
@@ -184,7 +175,7 @@
184
  "special": true
185
  },
186
  {
187
- "id": 64017,
188
  "content": "<gu>",
189
  "single_word": false,
190
  "lstrip": false,
@@ -193,7 +184,7 @@
193
  "special": true
194
  },
195
  {
196
- "id": 64018,
197
  "content": "<ha>",
198
  "single_word": false,
199
  "lstrip": false,
@@ -202,7 +193,7 @@
202
  "special": true
203
  },
204
  {
205
- "id": 64019,
206
  "content": "<he>",
207
  "single_word": false,
208
  "lstrip": false,
@@ -211,7 +202,7 @@
211
  "special": true
212
  },
213
  {
214
- "id": 64020,
215
  "content": "<hi>",
216
  "single_word": false,
217
  "lstrip": false,
@@ -220,7 +211,7 @@
220
  "special": true
221
  },
222
  {
223
- "id": 64021,
224
  "content": "<is>",
225
  "single_word": false,
226
  "lstrip": false,
@@ -229,7 +220,7 @@
229
  "special": true
230
  },
231
  {
232
- "id": 64022,
233
  "content": "<it>",
234
  "single_word": false,
235
  "lstrip": false,
@@ -238,7 +229,7 @@
238
  "special": true
239
  },
240
  {
241
- "id": 64023,
242
  "content": "<kab>",
243
  "single_word": false,
244
  "lstrip": false,
@@ -247,7 +238,7 @@
247
  "special": true
248
  },
249
  {
250
- "id": 64024,
251
  "content": "<kn>",
252
  "single_word": false,
253
  "lstrip": false,
@@ -256,7 +247,7 @@
256
  "special": true
257
  },
258
  {
259
- "id": 64025,
260
  "content": "<lb>",
261
  "single_word": false,
262
  "lstrip": false,
@@ -265,7 +256,7 @@
265
  "special": true
266
  },
267
  {
268
- "id": 64026,
269
  "content": "<mr>",
270
  "single_word": false,
271
  "lstrip": false,
@@ -274,7 +265,7 @@
274
  "special": true
275
  },
276
  {
277
- "id": 64027,
278
  "content": "<mt>",
279
  "single_word": false,
280
  "lstrip": false,
@@ -283,7 +274,7 @@
283
  "special": true
284
  },
285
  {
286
- "id": 64028,
287
  "content": "<ne>",
288
  "single_word": false,
289
  "lstrip": false,
@@ -292,7 +283,7 @@
292
  "special": true
293
  },
294
  {
295
- "id": 64029,
296
  "content": "<nl>",
297
  "single_word": false,
298
  "lstrip": false,
@@ -301,7 +292,7 @@
301
  "special": true
302
  },
303
  {
304
- "id": 64030,
305
  "content": "<no>",
306
  "single_word": false,
307
  "lstrip": false,
@@ -310,7 +301,7 @@
310
  "special": true
311
  },
312
  {
313
- "id": 64031,
314
  "content": "<oc>",
315
  "single_word": false,
316
  "lstrip": false,
@@ -319,7 +310,7 @@
319
  "special": true
320
  },
321
  {
322
- "id": 64032,
323
  "content": "<pl>",
324
  "single_word": false,
325
  "lstrip": false,
@@ -328,7 +319,7 @@
328
  "special": true
329
  },
330
  {
331
- "id": 64033,
332
  "content": "<pt>",
333
  "single_word": false,
334
  "lstrip": false,
@@ -337,7 +328,7 @@
337
  "special": true
338
  },
339
  {
340
- "id": 64034,
341
  "content": "<ro>",
342
  "single_word": false,
343
  "lstrip": false,
@@ -346,7 +337,7 @@
346
  "special": true
347
  },
348
  {
349
- "id": 64035,
350
  "content": "<ru>",
351
  "single_word": false,
352
  "lstrip": false,
@@ -355,7 +346,7 @@
355
  "special": true
356
  },
357
  {
358
- "id": 64036,
359
  "content": "<sd>",
360
  "single_word": false,
361
  "lstrip": false,
@@ -364,7 +355,7 @@
364
  "special": true
365
  },
366
  {
367
- "id": 64037,
368
  "content": "<so>",
369
  "single_word": false,
370
  "lstrip": false,
@@ -373,7 +364,7 @@
373
  "special": true
374
  },
375
  {
376
- "id": 64038,
377
  "content": "<sr>",
378
  "single_word": false,
379
  "lstrip": false,
@@ -382,7 +373,7 @@
382
  "special": true
383
  },
384
  {
385
- "id": 64039,
386
  "content": "<sv>",
387
  "single_word": false,
388
  "lstrip": false,
@@ -391,7 +382,7 @@
391
  "special": true
392
  },
393
  {
394
- "id": 64040,
395
  "content": "<ti>",
396
  "single_word": false,
397
  "lstrip": false,
@@ -400,7 +391,7 @@
400
  "special": true
401
  },
402
  {
403
- "id": 64041,
404
  "content": "<uk>",
405
  "single_word": false,
406
  "lstrip": false,
@@ -409,7 +400,7 @@
409
  "special": true
410
  },
411
  {
412
- "id": 64042,
413
  "content": "<ur>",
414
  "single_word": false,
415
  "lstrip": false,
@@ -462,7 +453,7 @@
462
  },
463
  {
464
  "SpecialToken": {
465
- "id": "<eos>",
466
  "type_id": 0
467
  }
468
  }
@@ -488,28 +479,28 @@
488
  },
489
  {
490
  "SpecialToken": {
491
- "id": "<eos>",
492
  "type_id": 0
493
  }
494
  }
495
  ],
496
  "special_tokens": {
497
- "<en>": {
498
- "id": "<en>",
499
  "ids": [
500
- 64014
501
  ],
502
  "tokens": [
503
- "<en>"
504
  ]
505
  },
506
- "<eos>": {
507
- "id": "<eos>",
508
  "ids": [
509
- 64001
510
  ],
511
  "tokens": [
512
- "<eos>"
513
  ]
514
  }
515
  }
 
41
  },
42
  {
43
  "id": 64001,
 
 
 
 
 
 
 
 
 
44
  "content": "<af>",
45
  "single_word": false,
46
  "lstrip": false,
 
49
  "special": true
50
  },
51
  {
52
+ "id": 64002,
53
  "content": "<am>",
54
  "single_word": false,
55
  "lstrip": false,
 
58
  "special": true
59
  },
60
  {
61
+ "id": 64003,
62
  "content": "<ar>",
63
  "single_word": false,
64
  "lstrip": false,
 
67
  "special": true
68
  },
69
  {
70
+ "id": 64004,
71
  "content": "<ast>",
72
  "single_word": false,
73
  "lstrip": false,
 
76
  "special": true
77
  },
78
  {
79
+ "id": 64005,
80
  "content": "<be>",
81
  "single_word": false,
82
  "lstrip": false,
 
85
  "special": true
86
  },
87
  {
88
+ "id": 64006,
89
  "content": "<bg>",
90
  "single_word": false,
91
  "lstrip": false,
 
94
  "special": true
95
  },
96
  {
97
+ "id": 64007,
98
  "content": "<bn>",
99
  "single_word": false,
100
  "lstrip": false,
 
103
  "special": true
104
  },
105
  {
106
+ "id": 64008,
107
  "content": "<bs>",
108
  "single_word": false,
109
  "lstrip": false,
 
112
  "special": true
113
  },
114
  {
115
+ "id": 64009,
116
  "content": "<ca>",
117
  "single_word": false,
118
  "lstrip": false,
 
121
  "special": true
122
  },
123
  {
124
+ "id": 64010,
125
  "content": "<cs>",
126
  "single_word": false,
127
  "lstrip": false,
 
130
  "special": true
131
  },
132
  {
133
+ "id": 64011,
134
  "content": "<da>",
135
  "single_word": false,
136
  "lstrip": false,
 
139
  "special": true
140
  },
141
  {
142
+ "id": 64012,
143
  "content": "<de>",
144
  "single_word": false,
145
  "lstrip": false,
 
148
  "special": true
149
  },
150
  {
151
+ "id": 64013,
152
  "content": "<en>",
153
  "single_word": false,
154
  "lstrip": false,
 
157
  "special": true
158
  },
159
  {
160
+ "id": 64014,
161
  "content": "<es>",
162
  "single_word": false,
163
  "lstrip": false,
 
166
  "special": true
167
  },
168
  {
169
+ "id": 64015,
170
  "content": "<fr>",
171
  "single_word": false,
172
  "lstrip": false,
 
175
  "special": true
176
  },
177
  {
178
+ "id": 64016,
179
  "content": "<gu>",
180
  "single_word": false,
181
  "lstrip": false,
 
184
  "special": true
185
  },
186
  {
187
+ "id": 64017,
188
  "content": "<ha>",
189
  "single_word": false,
190
  "lstrip": false,
 
193
  "special": true
194
  },
195
  {
196
+ "id": 64018,
197
  "content": "<he>",
198
  "single_word": false,
199
  "lstrip": false,
 
202
  "special": true
203
  },
204
  {
205
+ "id": 64019,
206
  "content": "<hi>",
207
  "single_word": false,
208
  "lstrip": false,
 
211
  "special": true
212
  },
213
  {
214
+ "id": 64020,
215
  "content": "<is>",
216
  "single_word": false,
217
  "lstrip": false,
 
220
  "special": true
221
  },
222
  {
223
+ "id": 64021,
224
  "content": "<it>",
225
  "single_word": false,
226
  "lstrip": false,
 
229
  "special": true
230
  },
231
  {
232
+ "id": 64022,
233
  "content": "<kab>",
234
  "single_word": false,
235
  "lstrip": false,
 
238
  "special": true
239
  },
240
  {
241
+ "id": 64023,
242
  "content": "<kn>",
243
  "single_word": false,
244
  "lstrip": false,
 
247
  "special": true
248
  },
249
  {
250
+ "id": 64024,
251
  "content": "<lb>",
252
  "single_word": false,
253
  "lstrip": false,
 
256
  "special": true
257
  },
258
  {
259
+ "id": 64025,
260
  "content": "<mr>",
261
  "single_word": false,
262
  "lstrip": false,
 
265
  "special": true
266
  },
267
  {
268
+ "id": 64026,
269
  "content": "<mt>",
270
  "single_word": false,
271
  "lstrip": false,
 
274
  "special": true
275
  },
276
  {
277
+ "id": 64027,
278
  "content": "<ne>",
279
  "single_word": false,
280
  "lstrip": false,
 
283
  "special": true
284
  },
285
  {
286
+ "id": 64028,
287
  "content": "<nl>",
288
  "single_word": false,
289
  "lstrip": false,
 
292
  "special": true
293
  },
294
  {
295
+ "id": 64029,
296
  "content": "<no>",
297
  "single_word": false,
298
  "lstrip": false,
 
301
  "special": true
302
  },
303
  {
304
+ "id": 64030,
305
  "content": "<oc>",
306
  "single_word": false,
307
  "lstrip": false,
 
310
  "special": true
311
  },
312
  {
313
+ "id": 64031,
314
  "content": "<pl>",
315
  "single_word": false,
316
  "lstrip": false,
 
319
  "special": true
320
  },
321
  {
322
+ "id": 64032,
323
  "content": "<pt>",
324
  "single_word": false,
325
  "lstrip": false,
 
328
  "special": true
329
  },
330
  {
331
+ "id": 64033,
332
  "content": "<ro>",
333
  "single_word": false,
334
  "lstrip": false,
 
337
  "special": true
338
  },
339
  {
340
+ "id": 64034,
341
  "content": "<ru>",
342
  "single_word": false,
343
  "lstrip": false,
 
346
  "special": true
347
  },
348
  {
349
+ "id": 64035,
350
  "content": "<sd>",
351
  "single_word": false,
352
  "lstrip": false,
 
355
  "special": true
356
  },
357
  {
358
+ "id": 64036,
359
  "content": "<so>",
360
  "single_word": false,
361
  "lstrip": false,
 
364
  "special": true
365
  },
366
  {
367
+ "id": 64037,
368
  "content": "<sr>",
369
  "single_word": false,
370
  "lstrip": false,
 
373
  "special": true
374
  },
375
  {
376
+ "id": 64038,
377
  "content": "<sv>",
378
  "single_word": false,
379
  "lstrip": false,
 
382
  "special": true
383
  },
384
  {
385
+ "id": 64039,
386
  "content": "<ti>",
387
  "single_word": false,
388
  "lstrip": false,
 
391
  "special": true
392
  },
393
  {
394
+ "id": 64040,
395
  "content": "<uk>",
396
  "single_word": false,
397
  "lstrip": false,
 
400
  "special": true
401
  },
402
  {
403
+ "id": 64041,
404
  "content": "<ur>",
405
  "single_word": false,
406
  "lstrip": false,
 
453
  },
454
  {
455
  "SpecialToken": {
456
+ "id": "</s>",
457
  "type_id": 0
458
  }
459
  }
 
479
  },
480
  {
481
  "SpecialToken": {
482
+ "id": "</s>",
483
  "type_id": 0
484
  }
485
  }
486
  ],
487
  "special_tokens": {
488
+ "</s>": {
489
+ "id": "</s>",
490
  "ids": [
491
+ 2
492
  ],
493
  "tokens": [
494
+ "</s>"
495
  ]
496
  },
497
+ "<en>": {
498
+ "id": "<en>",
499
  "ids": [
500
+ 64013
501
  ],
502
  "tokens": [
503
+ "<en>"
504
  ]
505
  }
506
  }
tokenizer_config.json CHANGED
@@ -33,14 +33,6 @@
33
  "special": true
34
  },
35
  "64001": {
36
- "content": "<eos>",
37
- "lstrip": false,
38
- "normalized": false,
39
- "rstrip": false,
40
- "single_word": false,
41
- "special": true
42
- },
43
- "64002": {
44
  "content": "<af>",
45
  "lstrip": false,
46
  "normalized": false,
@@ -48,7 +40,7 @@
48
  "single_word": false,
49
  "special": true
50
  },
51
- "64003": {
52
  "content": "<am>",
53
  "lstrip": false,
54
  "normalized": false,
@@ -56,7 +48,7 @@
56
  "single_word": false,
57
  "special": true
58
  },
59
- "64004": {
60
  "content": "<ar>",
61
  "lstrip": false,
62
  "normalized": false,
@@ -64,7 +56,7 @@
64
  "single_word": false,
65
  "special": true
66
  },
67
- "64005": {
68
  "content": "<ast>",
69
  "lstrip": false,
70
  "normalized": false,
@@ -72,7 +64,7 @@
72
  "single_word": false,
73
  "special": true
74
  },
75
- "64006": {
76
  "content": "<be>",
77
  "lstrip": false,
78
  "normalized": false,
@@ -80,7 +72,7 @@
80
  "single_word": false,
81
  "special": true
82
  },
83
- "64007": {
84
  "content": "<bg>",
85
  "lstrip": false,
86
  "normalized": false,
@@ -88,7 +80,7 @@
88
  "single_word": false,
89
  "special": true
90
  },
91
- "64008": {
92
  "content": "<bn>",
93
  "lstrip": false,
94
  "normalized": false,
@@ -96,7 +88,7 @@
96
  "single_word": false,
97
  "special": true
98
  },
99
- "64009": {
100
  "content": "<bs>",
101
  "lstrip": false,
102
  "normalized": false,
@@ -104,7 +96,7 @@
104
  "single_word": false,
105
  "special": true
106
  },
107
- "64010": {
108
  "content": "<ca>",
109
  "lstrip": false,
110
  "normalized": false,
@@ -112,7 +104,7 @@
112
  "single_word": false,
113
  "special": true
114
  },
115
- "64011": {
116
  "content": "<cs>",
117
  "lstrip": false,
118
  "normalized": false,
@@ -120,7 +112,7 @@
120
  "single_word": false,
121
  "special": true
122
  },
123
- "64012": {
124
  "content": "<da>",
125
  "lstrip": false,
126
  "normalized": false,
@@ -128,7 +120,7 @@
128
  "single_word": false,
129
  "special": true
130
  },
131
- "64013": {
132
  "content": "<de>",
133
  "lstrip": false,
134
  "normalized": false,
@@ -136,7 +128,7 @@
136
  "single_word": false,
137
  "special": true
138
  },
139
- "64014": {
140
  "content": "<en>",
141
  "lstrip": false,
142
  "normalized": false,
@@ -144,7 +136,7 @@
144
  "single_word": false,
145
  "special": true
146
  },
147
- "64015": {
148
  "content": "<es>",
149
  "lstrip": false,
150
  "normalized": false,
@@ -152,7 +144,7 @@
152
  "single_word": false,
153
  "special": true
154
  },
155
- "64016": {
156
  "content": "<fr>",
157
  "lstrip": false,
158
  "normalized": false,
@@ -160,7 +152,7 @@
160
  "single_word": false,
161
  "special": true
162
  },
163
- "64017": {
164
  "content": "<gu>",
165
  "lstrip": false,
166
  "normalized": false,
@@ -168,7 +160,7 @@
168
  "single_word": false,
169
  "special": true
170
  },
171
- "64018": {
172
  "content": "<ha>",
173
  "lstrip": false,
174
  "normalized": false,
@@ -176,7 +168,7 @@
176
  "single_word": false,
177
  "special": true
178
  },
179
- "64019": {
180
  "content": "<he>",
181
  "lstrip": false,
182
  "normalized": false,
@@ -184,7 +176,7 @@
184
  "single_word": false,
185
  "special": true
186
  },
187
- "64020": {
188
  "content": "<hi>",
189
  "lstrip": false,
190
  "normalized": false,
@@ -192,7 +184,7 @@
192
  "single_word": false,
193
  "special": true
194
  },
195
- "64021": {
196
  "content": "<is>",
197
  "lstrip": false,
198
  "normalized": false,
@@ -200,7 +192,7 @@
200
  "single_word": false,
201
  "special": true
202
  },
203
- "64022": {
204
  "content": "<it>",
205
  "lstrip": false,
206
  "normalized": false,
@@ -208,7 +200,7 @@
208
  "single_word": false,
209
  "special": true
210
  },
211
- "64023": {
212
  "content": "<kab>",
213
  "lstrip": false,
214
  "normalized": false,
@@ -216,7 +208,7 @@
216
  "single_word": false,
217
  "special": true
218
  },
219
- "64024": {
220
  "content": "<kn>",
221
  "lstrip": false,
222
  "normalized": false,
@@ -224,7 +216,7 @@
224
  "single_word": false,
225
  "special": true
226
  },
227
- "64025": {
228
  "content": "<lb>",
229
  "lstrip": false,
230
  "normalized": false,
@@ -232,7 +224,7 @@
232
  "single_word": false,
233
  "special": true
234
  },
235
- "64026": {
236
  "content": "<mr>",
237
  "lstrip": false,
238
  "normalized": false,
@@ -240,7 +232,7 @@
240
  "single_word": false,
241
  "special": true
242
  },
243
- "64027": {
244
  "content": "<mt>",
245
  "lstrip": false,
246
  "normalized": false,
@@ -248,7 +240,7 @@
248
  "single_word": false,
249
  "special": true
250
  },
251
- "64028": {
252
  "content": "<ne>",
253
  "lstrip": false,
254
  "normalized": false,
@@ -256,7 +248,7 @@
256
  "single_word": false,
257
  "special": true
258
  },
259
- "64029": {
260
  "content": "<nl>",
261
  "lstrip": false,
262
  "normalized": false,
@@ -264,7 +256,7 @@
264
  "single_word": false,
265
  "special": true
266
  },
267
- "64030": {
268
  "content": "<no>",
269
  "lstrip": false,
270
  "normalized": false,
@@ -272,7 +264,7 @@
272
  "single_word": false,
273
  "special": true
274
  },
275
- "64031": {
276
  "content": "<oc>",
277
  "lstrip": false,
278
  "normalized": false,
@@ -280,7 +272,7 @@
280
  "single_word": false,
281
  "special": true
282
  },
283
- "64032": {
284
  "content": "<pl>",
285
  "lstrip": false,
286
  "normalized": false,
@@ -288,7 +280,7 @@
288
  "single_word": false,
289
  "special": true
290
  },
291
- "64033": {
292
  "content": "<pt>",
293
  "lstrip": false,
294
  "normalized": false,
@@ -296,7 +288,7 @@
296
  "single_word": false,
297
  "special": true
298
  },
299
- "64034": {
300
  "content": "<ro>",
301
  "lstrip": false,
302
  "normalized": false,
@@ -304,7 +296,7 @@
304
  "single_word": false,
305
  "special": true
306
  },
307
- "64035": {
308
  "content": "<ru>",
309
  "lstrip": false,
310
  "normalized": false,
@@ -312,7 +304,7 @@
312
  "single_word": false,
313
  "special": true
314
  },
315
- "64036": {
316
  "content": "<sd>",
317
  "lstrip": false,
318
  "normalized": false,
@@ -320,7 +312,7 @@
320
  "single_word": false,
321
  "special": true
322
  },
323
- "64037": {
324
  "content": "<so>",
325
  "lstrip": false,
326
  "normalized": false,
@@ -328,7 +320,7 @@
328
  "single_word": false,
329
  "special": true
330
  },
331
- "64038": {
332
  "content": "<sr>",
333
  "lstrip": false,
334
  "normalized": false,
@@ -336,7 +328,7 @@
336
  "single_word": false,
337
  "special": true
338
  },
339
- "64039": {
340
  "content": "<sv>",
341
  "lstrip": false,
342
  "normalized": false,
@@ -344,7 +336,7 @@
344
  "single_word": false,
345
  "special": true
346
  },
347
- "64040": {
348
  "content": "<ti>",
349
  "lstrip": false,
350
  "normalized": false,
@@ -352,7 +344,7 @@
352
  "single_word": false,
353
  "special": true
354
  },
355
- "64041": {
356
  "content": "<uk>",
357
  "lstrip": false,
358
  "normalized": false,
@@ -360,7 +352,7 @@
360
  "single_word": false,
361
  "special": true
362
  },
363
- "64042": {
364
  "content": "<ur>",
365
  "lstrip": false,
366
  "normalized": false,
@@ -412,10 +404,10 @@
412
  "<uk>",
413
  "<ur>"
414
  ],
415
- "bos_token": "<eos>",
416
  "clean_up_tokenization_spaces": true,
417
  "cls_token": null,
418
- "eos_token": "<eos>",
419
  "legacy_behaviour": false,
420
  "mask_token": null,
421
  "model_max_length": 1000000000000000019884624838656,
 
33
  "special": true
34
  },
35
  "64001": {
 
 
 
 
 
 
 
 
36
  "content": "<af>",
37
  "lstrip": false,
38
  "normalized": false,
 
40
  "single_word": false,
41
  "special": true
42
  },
43
+ "64002": {
44
  "content": "<am>",
45
  "lstrip": false,
46
  "normalized": false,
 
48
  "single_word": false,
49
  "special": true
50
  },
51
+ "64003": {
52
  "content": "<ar>",
53
  "lstrip": false,
54
  "normalized": false,
 
56
  "single_word": false,
57
  "special": true
58
  },
59
+ "64004": {
60
  "content": "<ast>",
61
  "lstrip": false,
62
  "normalized": false,
 
64
  "single_word": false,
65
  "special": true
66
  },
67
+ "64005": {
68
  "content": "<be>",
69
  "lstrip": false,
70
  "normalized": false,
 
72
  "single_word": false,
73
  "special": true
74
  },
75
+ "64006": {
76
  "content": "<bg>",
77
  "lstrip": false,
78
  "normalized": false,
 
80
  "single_word": false,
81
  "special": true
82
  },
83
+ "64007": {
84
  "content": "<bn>",
85
  "lstrip": false,
86
  "normalized": false,
 
88
  "single_word": false,
89
  "special": true
90
  },
91
+ "64008": {
92
  "content": "<bs>",
93
  "lstrip": false,
94
  "normalized": false,
 
96
  "single_word": false,
97
  "special": true
98
  },
99
+ "64009": {
100
  "content": "<ca>",
101
  "lstrip": false,
102
  "normalized": false,
 
104
  "single_word": false,
105
  "special": true
106
  },
107
+ "64010": {
108
  "content": "<cs>",
109
  "lstrip": false,
110
  "normalized": false,
 
112
  "single_word": false,
113
  "special": true
114
  },
115
+ "64011": {
116
  "content": "<da>",
117
  "lstrip": false,
118
  "normalized": false,
 
120
  "single_word": false,
121
  "special": true
122
  },
123
+ "64012": {
124
  "content": "<de>",
125
  "lstrip": false,
126
  "normalized": false,
 
128
  "single_word": false,
129
  "special": true
130
  },
131
+ "64013": {
132
  "content": "<en>",
133
  "lstrip": false,
134
  "normalized": false,
 
136
  "single_word": false,
137
  "special": true
138
  },
139
+ "64014": {
140
  "content": "<es>",
141
  "lstrip": false,
142
  "normalized": false,
 
144
  "single_word": false,
145
  "special": true
146
  },
147
+ "64015": {
148
  "content": "<fr>",
149
  "lstrip": false,
150
  "normalized": false,
 
152
  "single_word": false,
153
  "special": true
154
  },
155
+ "64016": {
156
  "content": "<gu>",
157
  "lstrip": false,
158
  "normalized": false,
 
160
  "single_word": false,
161
  "special": true
162
  },
163
+ "64017": {
164
  "content": "<ha>",
165
  "lstrip": false,
166
  "normalized": false,
 
168
  "single_word": false,
169
  "special": true
170
  },
171
+ "64018": {
172
  "content": "<he>",
173
  "lstrip": false,
174
  "normalized": false,
 
176
  "single_word": false,
177
  "special": true
178
  },
179
+ "64019": {
180
  "content": "<hi>",
181
  "lstrip": false,
182
  "normalized": false,
 
184
  "single_word": false,
185
  "special": true
186
  },
187
+ "64020": {
188
  "content": "<is>",
189
  "lstrip": false,
190
  "normalized": false,
 
192
  "single_word": false,
193
  "special": true
194
  },
195
+ "64021": {
196
  "content": "<it>",
197
  "lstrip": false,
198
  "normalized": false,
 
200
  "single_word": false,
201
  "special": true
202
  },
203
+ "64022": {
204
  "content": "<kab>",
205
  "lstrip": false,
206
  "normalized": false,
 
208
  "single_word": false,
209
  "special": true
210
  },
211
+ "64023": {
212
  "content": "<kn>",
213
  "lstrip": false,
214
  "normalized": false,
 
216
  "single_word": false,
217
  "special": true
218
  },
219
+ "64024": {
220
  "content": "<lb>",
221
  "lstrip": false,
222
  "normalized": false,
 
224
  "single_word": false,
225
  "special": true
226
  },
227
+ "64025": {
228
  "content": "<mr>",
229
  "lstrip": false,
230
  "normalized": false,
 
232
  "single_word": false,
233
  "special": true
234
  },
235
+ "64026": {
236
  "content": "<mt>",
237
  "lstrip": false,
238
  "normalized": false,
 
240
  "single_word": false,
241
  "special": true
242
  },
243
+ "64027": {
244
  "content": "<ne>",
245
  "lstrip": false,
246
  "normalized": false,
 
248
  "single_word": false,
249
  "special": true
250
  },
251
+ "64028": {
252
  "content": "<nl>",
253
  "lstrip": false,
254
  "normalized": false,
 
256
  "single_word": false,
257
  "special": true
258
  },
259
+ "64029": {
260
  "content": "<no>",
261
  "lstrip": false,
262
  "normalized": false,
 
264
  "single_word": false,
265
  "special": true
266
  },
267
+ "64030": {
268
  "content": "<oc>",
269
  "lstrip": false,
270
  "normalized": false,
 
272
  "single_word": false,
273
  "special": true
274
  },
275
+ "64031": {
276
  "content": "<pl>",
277
  "lstrip": false,
278
  "normalized": false,
 
280
  "single_word": false,
281
  "special": true
282
  },
283
+ "64032": {
284
  "content": "<pt>",
285
  "lstrip": false,
286
  "normalized": false,
 
288
  "single_word": false,
289
  "special": true
290
  },
291
+ "64033": {
292
  "content": "<ro>",
293
  "lstrip": false,
294
  "normalized": false,
 
296
  "single_word": false,
297
  "special": true
298
  },
299
+ "64034": {
300
  "content": "<ru>",
301
  "lstrip": false,
302
  "normalized": false,
 
304
  "single_word": false,
305
  "special": true
306
  },
307
+ "64035": {
308
  "content": "<sd>",
309
  "lstrip": false,
310
  "normalized": false,
 
312
  "single_word": false,
313
  "special": true
314
  },
315
+ "64036": {
316
  "content": "<so>",
317
  "lstrip": false,
318
  "normalized": false,
 
320
  "single_word": false,
321
  "special": true
322
  },
323
+ "64037": {
324
  "content": "<sr>",
325
  "lstrip": false,
326
  "normalized": false,
 
328
  "single_word": false,
329
  "special": true
330
  },
331
+ "64038": {
332
  "content": "<sv>",
333
  "lstrip": false,
334
  "normalized": false,
 
336
  "single_word": false,
337
  "special": true
338
  },
339
+ "64039": {
340
  "content": "<ti>",
341
  "lstrip": false,
342
  "normalized": false,
 
344
  "single_word": false,
345
  "special": true
346
  },
347
+ "64040": {
348
  "content": "<uk>",
349
  "lstrip": false,
350
  "normalized": false,
 
352
  "single_word": false,
353
  "special": true
354
  },
355
+ "64041": {
356
  "content": "<ur>",
357
  "lstrip": false,
358
  "normalized": false,
 
404
  "<uk>",
405
  "<ur>"
406
  ],
407
+ "bos_token": "</s>",
408
  "clean_up_tokenization_spaces": true,
409
  "cls_token": null,
410
+ "eos_token": "</s>",
411
  "legacy_behaviour": false,
412
  "mask_token": null,
413
  "model_max_length": 1000000000000000019884624838656,