hyllus123 commited on
Commit
1f256d5
Β·
verified Β·
1 Parent(s): 24abc3d

Upload tokenizer

Browse files
Files changed (2) hide show
  1. special_tokens_map.json +11 -1
  2. tokenizer_config.json +12 -1
special_tokens_map.json CHANGED
@@ -1,5 +1,9 @@
1
  {
2
  "additional_special_tokens": [
 
 
 
 
3
  "▁<PRE>",
4
  "▁<MID>",
5
  "▁<SUF>",
@@ -19,7 +23,13 @@
19
  "rstrip": false,
20
  "single_word": false
21
  },
22
- "pad_token": "</s>",
 
 
 
 
 
 
23
  "unk_token": {
24
  "content": "<unk>",
25
  "lstrip": false,
 
1
  {
2
  "additional_special_tokens": [
3
+ "▁<PRE>",
4
+ "▁<MID>",
5
+ "▁<SUF>",
6
+ "▁<EOT>",
7
  "▁<PRE>",
8
  "▁<MID>",
9
  "▁<SUF>",
 
23
  "rstrip": false,
24
  "single_word": false
25
  },
26
+ "pad_token": {
27
+ "content": "</s>",
28
+ "lstrip": false,
29
+ "normalized": false,
30
+ "rstrip": false,
31
+ "single_word": false
32
+ },
33
  "unk_token": {
34
  "content": "<unk>",
35
  "lstrip": false,
tokenizer_config.json CHANGED
@@ -60,6 +60,10 @@
60
  }
61
  },
62
  "additional_special_tokens": [
 
 
 
 
63
  "▁<PRE>",
64
  "▁<MID>",
65
  "▁<SUF>",
@@ -72,13 +76,20 @@
72
  "extra_special_tokens": {},
73
  "fill_token": "<FILL_ME>",
74
  "legacy": null,
 
75
  "middle_token": "▁<MID>",
76
  "model_max_length": 1000000000000000019884624838656,
 
77
  "pad_token": "</s>",
 
 
78
  "prefix_token": "▁<PRE>",
79
  "sp_model_kwargs": {},
 
80
  "suffix_token": "▁<SUF>",
81
- "tokenizer_class": "CodeLlamaTokenizer",
 
 
82
  "unk_token": "<unk>",
83
  "use_default_system_prompt": false
84
  }
 
60
  }
61
  },
62
  "additional_special_tokens": [
63
+ "▁<PRE>",
64
+ "▁<MID>",
65
+ "▁<SUF>",
66
+ "▁<EOT>",
67
  "▁<PRE>",
68
  "▁<MID>",
69
  "▁<SUF>",
 
76
  "extra_special_tokens": {},
77
  "fill_token": "<FILL_ME>",
78
  "legacy": null,
79
+ "max_length": 2048,
80
  "middle_token": "▁<MID>",
81
  "model_max_length": 1000000000000000019884624838656,
82
+ "pad_to_multiple_of": null,
83
  "pad_token": "</s>",
84
+ "pad_token_type_id": 0,
85
+ "padding_side": "right",
86
  "prefix_token": "▁<PRE>",
87
  "sp_model_kwargs": {},
88
+ "stride": 0,
89
  "suffix_token": "▁<SUF>",
90
+ "tokenizer_class": "CodeLlamaTokenizerFast",
91
+ "truncation_side": "right",
92
+ "truncation_strategy": "longest_first",
93
  "unk_token": "<unk>",
94
  "use_default_system_prompt": false
95
  }