Rodr16020 commited on
Commit
e9779b2
·
verified ·
1 Parent(s): d23e800

Bronya Zaychik V13. Resized Embeddings for PAD token

Browse files
added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "<pad>": 32001
3
+ }
special_tokens_map.json CHANGED
@@ -13,6 +13,13 @@
13
  "rstrip": false,
14
  "single_word": false
15
  },
 
 
 
 
 
 
 
16
  "unk_token": {
17
  "content": "<unk>",
18
  "lstrip": false,
 
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
+ "pad_token": {
17
+ "content": "<pad>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
  "unk_token": {
24
  "content": "<unk>",
25
  "lstrip": false,
tokenizer.json CHANGED
@@ -1,6 +1,11 @@
1
  {
2
  "version": "1.0",
3
- "truncation": null,
 
 
 
 
 
4
  "padding": null,
5
  "added_tokens": [
6
  {
@@ -29,6 +34,15 @@
29
  "rstrip": false,
30
  "normalized": false,
31
  "special": true
 
 
 
 
 
 
 
 
 
32
  }
33
  ],
34
  "normalizer": {
@@ -32134,7 +32148,8 @@
32134
  "왕": 31996,
32135
  "收": 31997,
32136
  "弘": 31998,
32137
- "给": 31999
 
32138
  },
32139
  "merges": [
32140
  "▁ t",
 
1
  {
2
  "version": "1.0",
3
+ "truncation": {
4
+ "direction": "Right",
5
+ "max_length": 4096,
6
+ "strategy": "LongestFirst",
7
+ "stride": 0
8
+ },
9
  "padding": null,
10
  "added_tokens": [
11
  {
 
34
  "rstrip": false,
35
  "normalized": false,
36
  "special": true
37
+ },
38
+ {
39
+ "id": 32001,
40
+ "content": "<pad>",
41
+ "single_word": false,
42
+ "lstrip": false,
43
+ "rstrip": false,
44
+ "normalized": false,
45
+ "special": true
46
  }
47
  ],
48
  "normalizer": {
 
32148
  "왕": 31996,
32149
  "收": 31997,
32150
  "弘": 31998,
32151
+ "给": 31999,
32152
+ "<pad": 32000
32153
  },
32154
  "merges": [
32155
  "▁ t",
tokenizer_config.json CHANGED
@@ -19,7 +19,14 @@
19
  },
20
  "legacy": false,
21
  "model_max_length": 1000000000000000019884624838656,
22
- "pad_token": null,
 
 
 
 
 
 
 
23
  "padding_side": "right",
24
  "sp_model_kwargs": {},
25
  "tokenizer_class": "LlamaTokenizer",
 
19
  },
20
  "legacy": false,
21
  "model_max_length": 1000000000000000019884624838656,
22
+ "pad_token": {
23
+ "__type": "AddedToken",
24
+ "content": "<pad>",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
  "padding_side": "right",
31
  "sp_model_kwargs": {},
32
  "tokenizer_class": "LlamaTokenizer",