JakeOh commited on
Commit
eb238bf
·
verified ·
1 Parent(s): 050306f

Upload tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer.json +30 -10
  2. tokenizer_config.json +1 -1
tokenizer.json CHANGED
@@ -33,20 +33,18 @@
33
  ],
34
  "normalizer": {
35
  "type": "Sequence",
36
- "normalizers": [
37
- {
38
- "type": "Replace",
39
- "pattern": {
40
- "String": " "
41
- },
42
- "content": "▁"
43
- }
44
- ]
45
  },
46
  "pre_tokenizer": null,
47
  "post_processor": {
48
  "type": "TemplateProcessing",
49
  "single": [
 
 
 
 
 
 
50
  {
51
  "Sequence": {
52
  "id": "A",
@@ -55,12 +53,24 @@
55
  }
56
  ],
57
  "pair": [
 
 
 
 
 
 
58
  {
59
  "Sequence": {
60
  "id": "A",
61
  "type_id": 0
62
  }
63
  },
 
 
 
 
 
 
64
  {
65
  "Sequence": {
66
  "id": "B",
@@ -68,7 +78,17 @@
68
  }
69
  }
70
  ],
71
- "special_tokens": {}
 
 
 
 
 
 
 
 
 
 
72
  },
73
  "decoder": {
74
  "type": "Sequence",
 
33
  ],
34
  "normalizer": {
35
  "type": "Sequence",
36
+ "normalizers": []
 
 
 
 
 
 
 
 
37
  },
38
  "pre_tokenizer": null,
39
  "post_processor": {
40
  "type": "TemplateProcessing",
41
  "single": [
42
+ {
43
+ "SpecialToken": {
44
+ "id": "<s>",
45
+ "type_id": 0
46
+ }
47
+ },
48
  {
49
  "Sequence": {
50
  "id": "A",
 
53
  }
54
  ],
55
  "pair": [
56
+ {
57
+ "SpecialToken": {
58
+ "id": "<s>",
59
+ "type_id": 0
60
+ }
61
+ },
62
  {
63
  "Sequence": {
64
  "id": "A",
65
  "type_id": 0
66
  }
67
  },
68
+ {
69
+ "SpecialToken": {
70
+ "id": "<s>",
71
+ "type_id": 1
72
+ }
73
+ },
74
  {
75
  "Sequence": {
76
  "id": "B",
 
78
  }
79
  }
80
  ],
81
+ "special_tokens": {
82
+ "<s>": {
83
+ "id": "<s>",
84
+ "ids": [
85
+ 1
86
+ ],
87
+ "tokens": [
88
+ "<s>"
89
+ ]
90
+ }
91
+ }
92
  },
93
  "decoder": {
94
  "type": "Sequence",
tokenizer_config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "add_bos_token": false,
3
  "add_eos_token": false,
4
  "added_tokens_decoder": {
5
  "0": {
 
1
  {
2
+ "add_bos_token": true,
3
  "add_eos_token": false,
4
  "added_tokens_decoder": {
5
  "0": {