Echo9Zulu commited on
Commit
7e55b8e
·
verified ·
1 Parent(s): 2df1e1c

Delete int8_asym-ov

Browse files
int8_asym-ov/added_tokens.json DELETED
@@ -1,12 +0,0 @@
1
- {
2
- "<|/tool_call|>": 200026,
3
- "<|/tool|>": 200024,
4
- "<|assistant|>": 200019,
5
- "<|end|>": 200020,
6
- "<|system|>": 200022,
7
- "<|tag|>": 200028,
8
- "<|tool_call|>": 200025,
9
- "<|tool_response|>": 200027,
10
- "<|tool|>": 200023,
11
- "<|user|>": 200021
12
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
int8_asym-ov/config.json DELETED
@@ -1,144 +0,0 @@
1
- {
2
- "_attn_implementation_autoset": true,
3
- "architectures": [
4
- "Phi3ForCausalLM"
5
- ],
6
- "attention_bias": false,
7
- "attention_dropout": 0.0,
8
- "auto_map": {
9
- "AutoConfig": "configuration_phi3.Phi3Config",
10
- "AutoModelForCausalLM": "modeling_phi3.Phi3ForCausalLM",
11
- "AutoTokenizer": "Xenova/gpt-4o"
12
- },
13
- "bos_token_id": 199999,
14
- "embd_pdrop": 0.0,
15
- "eos_token_id": 199999,
16
- "full_attn_mod": 1,
17
- "hidden_act": "silu",
18
- "hidden_size": 3072,
19
- "initializer_range": 0.02,
20
- "intermediate_size": 8192,
21
- "interpolate_factor": 1,
22
- "lm_head_bias": false,
23
- "max_position_embeddings": 131072,
24
- "mlp_bias": false,
25
- "model_type": "phi3",
26
- "num_attention_heads": 24,
27
- "num_hidden_layers": 32,
28
- "num_key_value_heads": 8,
29
- "original_max_position_embeddings": 4096,
30
- "pad_token_id": 199999,
31
- "partial_rotary_factor": 0.75,
32
- "resid_pdrop": 0.0,
33
- "rms_norm_eps": 1e-05,
34
- "rope_scaling": {
35
- "long_factor": [
36
- 1,
37
- 1.118320672,
38
- 1.250641126,
39
- 1.398617824,
40
- 1.564103225,
41
- 1.74916897,
42
- 1.956131817,
43
- 2.187582649,
44
- 2.446418898,
45
- 2.735880826,
46
- 3.059592084,
47
- 3.421605075,
48
- 3.826451687,
49
- 4.279200023,
50
- 4.785517845,
51
- 5.351743533,
52
- 5.984965424,
53
- 6.693110555,
54
- 7.485043894,
55
- 8.370679318,
56
- 9.36110372,
57
- 10.4687158,
58
- 11.70738129,
59
- 13.09260651,
60
- 14.64173252,
61
- 16.37415215,
62
- 18.31155283,
63
- 20.47818807,
64
- 22.90118105,
65
- 25.61086418,
66
- 28.64115884,
67
- 32.03,
68
- 32.1,
69
- 32.13,
70
- 32.23,
71
- 32.6,
72
- 32.61,
73
- 32.64,
74
- 32.66,
75
- 32.7,
76
- 32.71,
77
- 32.93,
78
- 32.97,
79
- 33.28,
80
- 33.49,
81
- 33.5,
82
- 44.16,
83
- 47.77
84
- ],
85
- "short_factor": [
86
- 1.0,
87
- 1.0,
88
- 1.0,
89
- 1.0,
90
- 1.0,
91
- 1.0,
92
- 1.0,
93
- 1.0,
94
- 1.0,
95
- 1.0,
96
- 1.0,
97
- 1.0,
98
- 1.0,
99
- 1.0,
100
- 1.0,
101
- 1.0,
102
- 1.0,
103
- 1.0,
104
- 1.0,
105
- 1.0,
106
- 1.0,
107
- 1.0,
108
- 1.0,
109
- 1.0,
110
- 1.0,
111
- 1.0,
112
- 1.0,
113
- 1.0,
114
- 1.0,
115
- 1.0,
116
- 1.0,
117
- 1.0,
118
- 1.0,
119
- 1.0,
120
- 1.0,
121
- 1.0,
122
- 1.0,
123
- 1.0,
124
- 1.0,
125
- 1.0,
126
- 1.0,
127
- 1.0,
128
- 1.0,
129
- 1.0,
130
- 1.0,
131
- 1.0,
132
- 1.0,
133
- 1.0
134
- ],
135
- "type": "longrope"
136
- },
137
- "rope_theta": 10000.0,
138
- "sliding_window": 262144,
139
- "tie_word_embeddings": true,
140
- "torch_dtype": "bfloat16",
141
- "transformers_version": "4.51.3",
142
- "use_cache": true,
143
- "vocab_size": 200064
144
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
int8_asym-ov/configuration_phi3.py DELETED
@@ -1,226 +0,0 @@
1
- # coding=utf-8
2
- # Copyright 2024 Microsoft and the HuggingFace Inc. team. All rights reserved.
3
- #
4
- # Licensed under the Apache License, Version 2.0 (the "License");
5
- # you may not use this file except in compliance with the License.
6
- # You may obtain a copy of the License at
7
- #
8
- # http://www.apache.org/licenses/LICENSE-2.0
9
- #
10
- # Unless required by applicable law or agreed to in writing, software
11
- # distributed under the License is distributed on an "AS IS" BASIS,
12
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
- # See the License for the specific language governing permissions and
14
- # limitations under the License.
15
-
16
- """Phi-3 model configuration"""
17
-
18
- from transformers.configuration_utils import PretrainedConfig
19
- from transformers.utils import logging
20
-
21
-
22
- logger = logging.get_logger(__name__)
23
-
24
-
25
- class Phi3Config(PretrainedConfig):
26
- r"""
27
- This is the configuration class to store the configuration of a [`Phi3Model`]. It is used to instantiate a Phi-3
28
- model according to the specified arguments, defining the model architecture. Instantiating a configuration with the
29
- defaults will yield a similar configuration to that of the
30
- [microsoft/Phi-3-mini-4k-instruct](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct).
31
-
32
- Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
33
- documentation from [`PretrainedConfig`] for more information.
34
-
35
- Args:
36
- vocab_size (`int`, *optional*, defaults to 32064):
37
- Vocabulary size of the Phi-3 model. Defines the number of different tokens that can be represented by the
38
- `inputs_ids` passed when calling [`Phi3Model`].
39
- hidden_size (`int`, *optional*, defaults to 3072):
40
- Dimension of the hidden representations.
41
- intermediate_size (`int`, *optional*, defaults to 8192):
42
- Dimension of the MLP representations.
43
- num_hidden_layers (`int`, *optional*, defaults to 32):
44
- Number of hidden layers in the Transformer decoder.
45
- num_attention_heads (`int`, *optional*, defaults to 32):
46
- Number of attention heads for each attention layer in the Transformer decoder.
47
- num_key_value_heads (`int`, *optional*):
48
- This is the number of key_value heads that should be used to implement Grouped Query Attention. If
49
- `num_key_value_heads=num_attention_heads`, the model will use Multi Head Attention (MHA), if
50
- `num_key_value_heads=1` the model will use Multi Query Attention (MQA) otherwise GQA is used. When
51
- converting a multi-head checkpoint to a GQA checkpoint, each group key and value head should be constructed
52
- by meanpooling all the original heads within that group. For more details checkout [this
53
- paper](https://arxiv.org/pdf/2305.13245.pdf). If it is not specified, will default to
54
- `num_attention_heads`.
55
- resid_pdrop (`float`, *optional*, defaults to 0.0):
56
- Dropout probability for mlp outputs.
57
- embd_pdrop (`int`, *optional*, defaults to 0.0):
58
- The dropout ratio for the embeddings.
59
- attention_dropout (`float`, *optional*, defaults to 0.0):
60
- The dropout ratio after computing the attention scores.
61
- hidden_act (`str` or `function`, *optional*, defaults to `"silu"`):
62
- The non-linear activation function (function or string) in the decoder.
63
- max_position_embeddings (`int`, *optional*, defaults to 4096):
64
- The maximum sequence length that this model might ever be used with.
65
- original_max_position_embeddings (`int`, *optional*, defaults to 4096):
66
- The maximum sequence length that this model was trained with. This is used to determine the size of the
67
- original RoPE embeddings when using long scaling.
68
- initializer_range (`float`, *optional*, defaults to 0.02):
69
- The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
70
- rms_norm_eps (`float`, *optional*, defaults to 1e-05):
71
- The epsilon value used for the RMSNorm.
72
- use_cache (`bool`, *optional*, defaults to `True`):
73
- Whether or not the model should return the last key/values attentions (not used by all models). Only
74
- relevant if `config.is_decoder=True`. Whether to tie weight embeddings or not.
75
- tie_word_embeddings (`bool`, *optional*, defaults to `False`):
76
- Whether to tie weight embeddings
77
- rope_theta (`float`, *optional*, defaults to 10000.0):
78
- The base period of the RoPE embeddings.
79
- rope_scaling (`dict`, *optional*):
80
- The scaling strategy for the RoPE embeddings. If `None`, no scaling is applied. If a dictionary, it must
81
- contain the following keys: `type`, `short_factor` and `long_factor`. The `type` must be `longrope` and
82
- the `short_factor` and `long_factor` must be lists of numbers with the same length as the hidden size
83
- divided by the number of attention heads divided by 2.
84
- partial_rotary_factor (`float`, *optional*, defaults to 1.0):
85
- Percentage of the query and keys which will have rotary embedding. Must be between 0.0 and 1.0.
86
- bos_token_id (`int`, *optional*, defaults to 1):
87
- The id of the "beginning-of-sequence" token.
88
- eos_token_id (`int`, *optional*, defaults to 32000):
89
- The id of the "end-of-sequence" token.
90
- pad_token_id (`int`, *optional*, defaults to 32000):
91
- The id of the padding token.
92
- sliding_window (`int`, *optional*):
93
- Sliding window attention window size. If `None`, no sliding window is applied.
94
-
95
- Example:
96
-
97
- ```python
98
- >>> from transformers import Phi3Model, Phi3Config
99
-
100
- >>> # Initializing a Phi-3 style configuration
101
- >>> configuration = Phi3Config.from_pretrained("microsoft/Phi-3-mini-4k-instruct")
102
-
103
- >>> # Initializing a model from the configuration
104
- >>> model = Phi3Model(configuration)
105
-
106
- >>> # Accessing the model configuration
107
- >>> configuration = model.config
108
- ```"""
109
-
110
- model_type = "phi3"
111
- keys_to_ignore_at_inference = ["past_key_values"]
112
-
113
- def __init__(
114
- self,
115
- vocab_size=32064,
116
- hidden_size=3072,
117
- intermediate_size=8192,
118
- num_hidden_layers=32,
119
- num_attention_heads=32,
120
- num_key_value_heads=None,
121
- resid_pdrop=0.0,
122
- embd_pdrop=0.0,
123
- attention_dropout=0.0,
124
- hidden_act="silu",
125
- max_position_embeddings=4096,
126
- original_max_position_embeddings=4096,
127
- initializer_range=0.02,
128
- rms_norm_eps=1e-5,
129
- use_cache=True,
130
- tie_word_embeddings=False,
131
- rope_theta=10000.0,
132
- rope_scaling=None,
133
- partial_rotary_factor=1.0,
134
- bos_token_id=1,
135
- eos_token_id=32000,
136
- pad_token_id=32000,
137
- sliding_window=None,
138
- **kwargs,
139
- ):
140
- self.vocab_size = vocab_size
141
- self.hidden_size = hidden_size
142
- self.intermediate_size = intermediate_size
143
- self.num_hidden_layers = num_hidden_layers
144
- self.num_attention_heads = num_attention_heads
145
-
146
- if num_key_value_heads is None:
147
- num_key_value_heads = num_attention_heads
148
-
149
- self.num_key_value_heads = num_key_value_heads
150
- self.resid_pdrop = resid_pdrop
151
- self.embd_pdrop = embd_pdrop
152
- self.attention_dropout = attention_dropout
153
- self.hidden_act = hidden_act
154
- self.max_position_embeddings = max_position_embeddings
155
- self.original_max_position_embeddings = original_max_position_embeddings
156
- self.initializer_range = initializer_range
157
- self.rms_norm_eps = rms_norm_eps
158
- self.use_cache = use_cache
159
- self.rope_theta = rope_theta
160
- self.rope_scaling = rope_scaling
161
- self.partial_rotary_factor = partial_rotary_factor
162
- self._rope_scaling_adjustment()
163
- self._rope_scaling_validation()
164
- self.sliding_window = sliding_window
165
-
166
- super().__init__(
167
- bos_token_id=bos_token_id,
168
- eos_token_id=eos_token_id,
169
- pad_token_id=pad_token_id,
170
- tie_word_embeddings=tie_word_embeddings,
171
- **kwargs,
172
- )
173
-
174
- def _rope_scaling_adjustment(self):
175
- """
176
- Adjust the `type` of the `rope_scaling` configuration for backward compatibility.
177
- """
178
- if self.rope_scaling is None:
179
- return
180
-
181
- rope_scaling_type = self.rope_scaling.get("type", None)
182
-
183
- # For backward compatibility if previous version used "su" or "yarn"
184
- if rope_scaling_type is not None and rope_scaling_type in ["su", "yarn"]:
185
- self.rope_scaling["type"] = "longrope"
186
-
187
- def _rope_scaling_validation(self):
188
- """
189
- Validate the `rope_scaling` configuration.
190
- """
191
- if self.rope_scaling is None:
192
- return
193
-
194
- if not isinstance(self.rope_scaling, dict) or len(self.rope_scaling) != 3:
195
- raise ValueError(
196
- "`rope_scaling` must be a dictionary with three fields, `type`, `short_factor` and `long_factor`, "
197
- f"got {self.rope_scaling}"
198
- )
199
- rope_scaling_type = self.rope_scaling.get("type", None)
200
- rope_scaling_short_factor = self.rope_scaling.get("short_factor", None)
201
- rope_scaling_long_factor = self.rope_scaling.get("long_factor", None)
202
- if rope_scaling_type is None or rope_scaling_type not in ["longrope"]:
203
- raise ValueError(f"`rope_scaling`'s type field must be one of ['longrope'], got {rope_scaling_type}")
204
- if not (
205
- isinstance(rope_scaling_short_factor, list)
206
- and all(isinstance(x, (int, float)) for x in rope_scaling_short_factor)
207
- ):
208
- raise ValueError(
209
- f"`rope_scaling`'s short_factor field must be a list of numbers, got {rope_scaling_short_factor}"
210
- )
211
- rotary_ndims = int(self.hidden_size // self.num_attention_heads * self.partial_rotary_factor)
212
- if not len(rope_scaling_short_factor) == rotary_ndims // 2:
213
- raise ValueError(
214
- f"`rope_scaling`'s short_factor field must have length {rotary_ndims // 2}, got {len(rope_scaling_short_factor)}"
215
- )
216
- if not (
217
- isinstance(rope_scaling_long_factor, list)
218
- and all(isinstance(x, (int, float)) for x in rope_scaling_long_factor)
219
- ):
220
- raise ValueError(
221
- f"`rope_scaling`'s long_factor field must be a list of numbers, got {rope_scaling_long_factor}"
222
- )
223
- if not len(rope_scaling_long_factor) == rotary_ndims // 2:
224
- raise ValueError(
225
- f"`rope_scaling`'s long_factor field must have length {rotary_ndims // 2}, got {len(rope_scaling_long_factor)}"
226
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
int8_asym-ov/generation_config.json DELETED
@@ -1,10 +0,0 @@
1
- {
2
- "_from_model_config": true,
3
- "bos_token_id": 199999,
4
- "eos_token_id": [
5
- 200020,
6
- 199999
7
- ],
8
- "pad_token_id": 199999,
9
- "transformers_version": "4.51.3"
10
- }
 
 
 
 
 
 
 
 
 
 
 
int8_asym-ov/merges.txt DELETED
The diff for this file is too large to render. See raw diff
 
int8_asym-ov/openvino_detokenizer.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:58ec20da66d1d780b298f3cdcf252ccc0e228636fc7bee219163af81f1837e0a
3
- size 2998349
 
 
 
 
int8_asym-ov/openvino_detokenizer.xml DELETED
@@ -1,219 +0,0 @@
1
- <?xml version="1.0"?>
2
- <net name="detokenizer" version="11">
3
- <layers>
4
- <layer id="0" name="Parameter_170636" type="Parameter" version="opset1">
5
- <data shape="?,?" element_type="i64" />
6
- <output>
7
- <port id="0" precision="I64" names="Parameter_170636">
8
- <dim>-1</dim>
9
- <dim>-1</dim>
10
- </port>
11
- </output>
12
- </layer>
13
- <layer id="1" name="Convert_170806" type="Convert" version="opset1">
14
- <data destination_type="i32" />
15
- <input>
16
- <port id="0" precision="I64">
17
- <dim>-1</dim>
18
- <dim>-1</dim>
19
- </port>
20
- </input>
21
- <output>
22
- <port id="1" precision="I32">
23
- <dim>-1</dim>
24
- <dim>-1</dim>
25
- </port>
26
- </output>
27
- </layer>
28
- <layer id="2" name="Constant_170638" type="Const" version="opset1">
29
- <data element_type="i32" shape="200029" offset="0" size="800116" />
30
- <output>
31
- <port id="0" precision="I32">
32
- <dim>200029</dim>
33
- </port>
34
- </output>
35
- </layer>
36
- <layer id="3" name="Constant_170640" type="Const" version="opset1">
37
- <data element_type="i32" shape="200029" offset="800116" size="800116" />
38
- <output>
39
- <port id="0" precision="I32">
40
- <dim>200029</dim>
41
- </port>
42
- </output>
43
- </layer>
44
- <layer id="4" name="Constant_170642" type="Const" version="opset1">
45
- <data element_type="u8" shape="1398089" offset="1600232" size="1398089" />
46
- <output>
47
- <port id="0" precision="U8">
48
- <dim>1398089</dim>
49
- </port>
50
- </output>
51
- </layer>
52
- <layer id="5" name="Slice_170647" type="Const" version="opset1">
53
- <data element_type="i32" shape="7" offset="2998321" size="28" />
54
- <output>
55
- <port id="0" precision="I32">
56
- <dim>7</dim>
57
- </port>
58
- </output>
59
- </layer>
60
- <layer id="6" name="VocabDecoder_170649" type="VocabDecoder" version="extension">
61
- <data skip_tokens="" />
62
- <input>
63
- <port id="0" precision="I32">
64
- <dim>-1</dim>
65
- <dim>-1</dim>
66
- </port>
67
- <port id="1" precision="I32">
68
- <dim>200029</dim>
69
- </port>
70
- <port id="2" precision="I32">
71
- <dim>200029</dim>
72
- </port>
73
- <port id="3" precision="U8">
74
- <dim>1398089</dim>
75
- </port>
76
- <port id="4" precision="I32">
77
- <dim>7</dim>
78
- </port>
79
- </input>
80
- <output>
81
- <port id="5" precision="I32">
82
- <dim>-1</dim>
83
- </port>
84
- <port id="6" precision="I32">
85
- <dim>-1</dim>
86
- </port>
87
- <port id="7" precision="I32">
88
- <dim>-1</dim>
89
- </port>
90
- <port id="8" precision="I32">
91
- <dim>-1</dim>
92
- </port>
93
- <port id="9" precision="U8">
94
- <dim>-1</dim>
95
- </port>
96
- </output>
97
- </layer>
98
- <layer id="7" name="FuzeRagged_170650" type="FuzeRagged" version="extension">
99
- <input>
100
- <port id="0" precision="I32">
101
- <dim>-1</dim>
102
- </port>
103
- <port id="1" precision="I32">
104
- <dim>-1</dim>
105
- </port>
106
- <port id="2" precision="I32">
107
- <dim>-1</dim>
108
- </port>
109
- <port id="3" precision="I32">
110
- <dim>-1</dim>
111
- </port>
112
- </input>
113
- <output>
114
- <port id="4" precision="I32">
115
- <dim>-1</dim>
116
- </port>
117
- <port id="5" precision="I32">
118
- <dim>-1</dim>
119
- </port>
120
- </output>
121
- </layer>
122
- <layer id="8" name="UTF8Validate_170651" type="UTF8Validate" version="extension">
123
- <data replace_mode="true" />
124
- <input>
125
- <port id="0" precision="I32">
126
- <dim>-1</dim>
127
- </port>
128
- <port id="1" precision="I32">
129
- <dim>-1</dim>
130
- </port>
131
- <port id="2" precision="U8">
132
- <dim>-1</dim>
133
- </port>
134
- </input>
135
- <output>
136
- <port id="3" precision="I32">
137
- <dim>-1</dim>
138
- </port>
139
- <port id="4" precision="I32">
140
- <dim>-1</dim>
141
- </port>
142
- <port id="5" precision="U8">
143
- <dim>-1</dim>
144
- </port>
145
- </output>
146
- </layer>
147
- <layer id="9" name="StringTensorPack_170652" type="StringTensorPack" version="opset15">
148
- <input>
149
- <port id="0" precision="I32">
150
- <dim>-1</dim>
151
- </port>
152
- <port id="1" precision="I32">
153
- <dim>-1</dim>
154
- </port>
155
- <port id="2" precision="U8">
156
- <dim>-1</dim>
157
- </port>
158
- </input>
159
- <output>
160
- <port id="3" precision="STRING" names="Result_170653,string_output">
161
- <dim>-1</dim>
162
- </port>
163
- </output>
164
- </layer>
165
- <layer id="10" name="Result_170653" type="Result" version="opset1" output_names="Result_170653,string_output">
166
- <input>
167
- <port id="0" precision="STRING">
168
- <dim>-1</dim>
169
- </port>
170
- </input>
171
- </layer>
172
- </layers>
173
- <edges>
174
- <edge from-layer="0" from-port="0" to-layer="1" to-port="0" />
175
- <edge from-layer="1" from-port="1" to-layer="6" to-port="0" />
176
- <edge from-layer="2" from-port="0" to-layer="6" to-port="1" />
177
- <edge from-layer="3" from-port="0" to-layer="6" to-port="2" />
178
- <edge from-layer="4" from-port="0" to-layer="6" to-port="3" />
179
- <edge from-layer="5" from-port="0" to-layer="6" to-port="4" />
180
- <edge from-layer="6" from-port="7" to-layer="7" to-port="2" />
181
- <edge from-layer="6" from-port="9" to-layer="8" to-port="2" />
182
- <edge from-layer="6" from-port="8" to-layer="7" to-port="3" />
183
- <edge from-layer="6" from-port="6" to-layer="7" to-port="1" />
184
- <edge from-layer="6" from-port="5" to-layer="7" to-port="0" />
185
- <edge from-layer="7" from-port="4" to-layer="8" to-port="0" />
186
- <edge from-layer="7" from-port="5" to-layer="8" to-port="1" />
187
- <edge from-layer="8" from-port="3" to-layer="9" to-port="0" />
188
- <edge from-layer="8" from-port="4" to-layer="9" to-port="1" />
189
- <edge from-layer="8" from-port="5" to-layer="9" to-port="2" />
190
- <edge from-layer="9" from-port="3" to-layer="10" to-port="0" />
191
- </edges>
192
- <rt_info>
193
- <add_attention_mask value="True" />
194
- <add_prefix_space />
195
- <add_special_tokens value="True" />
196
- <bos_token_id value="199999" />
197
- <chat_template value="{% for message in messages %}{% if message['role'] == 'system' and 'tools' in message and message['tools'] is not none %}{{ '&lt;|' + message['role'] + '|>' + message['content'] + '&lt;|tool|>' + message['tools'] + '&lt;|/tool|>' + '&lt;|end|>' }}{% else %}{{ '&lt;|' + message['role'] + '|>' + message['content'] + '&lt;|end|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '&lt;|assistant|>' }}{% else %}{{ eos_token }}{% endif %}" />
198
- <clean_up_tokenization_spaces />
199
- <detokenizer_input_type value="i64" />
200
- <eos_token_id value="199999" />
201
- <handle_special_tokens_with_re />
202
- <max_length />
203
- <number_of_inputs value="1" />
204
- <openvino_tokenizers_version value="2025.1.0.0-523-710ddf14de8" />
205
- <openvino_version value="2025.1.0-18503-6fec06580ab-releases/2025/1" />
206
- <original_tokenizer_class value="&lt;class 'transformers.models.gpt2.tokenization_gpt2_fast.GPT2TokenizerFast'>" />
207
- <pad_token_id value="199999" />
208
- <sentencepiece_version value="0.2.0" />
209
- <skip_special_tokens value="True" />
210
- <streaming_detokenizer value="False" />
211
- <tokenizer_output_type value="i64" />
212
- <tokenizers_version value="0.21.1" />
213
- <transformers_version value="4.51.3" />
214
- <use_max_padding value="False" />
215
- <use_sentencepiece_backend value="False" />
216
- <utf8_replace_mode value="replace" />
217
- <with_detokenizer value="True" />
218
- </rt_info>
219
- </net>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
int8_asym-ov/openvino_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:b889c2b0977e68d76017c9ad226081d1c9f30560173b57456ed78f142ecc6e2d
3
- size 3839875692
 
 
 
 
int8_asym-ov/openvino_model.xml DELETED
The diff for this file is too large to render. See raw diff
 
int8_asym-ov/openvino_tokenizer.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:818537d6633196e2f45e51017a6320010ca3c06120460c14028a6c325f92f477
3
- size 7602768
 
 
 
 
int8_asym-ov/openvino_tokenizer.xml DELETED
@@ -1,685 +0,0 @@
1
- <?xml version="1.0"?>
2
- <net name="tokenizer" version="11">
3
- <layers>
4
- <layer id="0" name="Parameter_170518" type="Parameter" version="opset1">
5
- <data shape="?" element_type="string" />
6
- <output>
7
- <port id="0" precision="STRING" names="Parameter_170518">
8
- <dim>-1</dim>
9
- </port>
10
- </output>
11
- </layer>
12
- <layer id="1" name="Constant_170524" type="Const" version="opset1">
13
- <data element_type="i64" shape="" offset="0" size="8" />
14
- <output>
15
- <port id="0" precision="I64" />
16
- </output>
17
- </layer>
18
- <layer id="2" name="StringTensorUnpack_170519" type="StringTensorUnpack" version="opset15">
19
- <input>
20
- <port id="0" precision="STRING">
21
- <dim>-1</dim>
22
- </port>
23
- </input>
24
- <output>
25
- <port id="1" precision="I32">
26
- <dim>-1</dim>
27
- </port>
28
- <port id="2" precision="I32">
29
- <dim>-1</dim>
30
- </port>
31
- <port id="3" precision="U8">
32
- <dim>-1</dim>
33
- </port>
34
- </output>
35
- </layer>
36
- <layer id="3" name="ShapeOf_170520" type="ShapeOf" version="opset3">
37
- <data output_type="i64" />
38
- <input>
39
- <port id="0" precision="I32">
40
- <dim>-1</dim>
41
- </port>
42
- </input>
43
- <output>
44
- <port id="1" precision="I64">
45
- <dim>1</dim>
46
- </port>
47
- </output>
48
- </layer>
49
- <layer id="4" name="Constant_170521" type="Const" version="opset1">
50
- <data element_type="i64" shape="" offset="0" size="8" />
51
- <output>
52
- <port id="0" precision="I64" />
53
- </output>
54
- </layer>
55
- <layer id="5" name="Constant_170522" type="Const" version="opset1">
56
- <data element_type="i64" shape="" offset="0" size="8" />
57
- <output>
58
- <port id="0" precision="I64" />
59
- </output>
60
- </layer>
61
- <layer id="6" name="Gather_170523" type="Gather" version="opset8">
62
- <data batch_dims="0" />
63
- <input>
64
- <port id="0" precision="I64">
65
- <dim>1</dim>
66
- </port>
67
- <port id="1" precision="I64" />
68
- <port id="2" precision="I64" />
69
- </input>
70
- <output>
71
- <port id="3" precision="I64" />
72
- </output>
73
- </layer>
74
- <layer id="7" name="Constant_170525" type="Const" version="opset1">
75
- <data element_type="i64" shape="" offset="8" size="8" />
76
- <output>
77
- <port id="0" precision="I64" />
78
- </output>
79
- </layer>
80
- <layer id="8" name="Range_170526" type="Range" version="opset4">
81
- <data output_type="i32" />
82
- <input>
83
- <port id="0" precision="I64" />
84
- <port id="1" precision="I64" />
85
- <port id="2" precision="I64" />
86
- </input>
87
- <output>
88
- <port id="3" precision="I32">
89
- <dim>-1</dim>
90
- </port>
91
- </output>
92
- </layer>
93
- <layer id="9" name="Constant_170527" type="Const" version="opset1">
94
- <data element_type="i64" shape="" offset="8" size="8" />
95
- <output>
96
- <port id="0" precision="I64" />
97
- </output>
98
- </layer>
99
- <layer id="10" name="Constant_170528" type="Const" version="opset1">
100
- <data element_type="i64" shape="" offset="8" size="8" />
101
- <output>
102
- <port id="0" precision="I64" />
103
- </output>
104
- </layer>
105
- <layer id="11" name="Add_170529" type="Add" version="opset1">
106
- <data auto_broadcast="numpy" />
107
- <input>
108
- <port id="0" precision="I64" />
109
- <port id="1" precision="I64" />
110
- </input>
111
- <output>
112
- <port id="2" precision="I64" />
113
- </output>
114
- </layer>
115
- <layer id="12" name="Constant_170530" type="Const" version="opset1">
116
- <data element_type="i64" shape="" offset="8" size="8" />
117
- <output>
118
- <port id="0" precision="I64" />
119
- </output>
120
- </layer>
121
- <layer id="13" name="Range_170531" type="Range" version="opset4">
122
- <data output_type="i32" />
123
- <input>
124
- <port id="0" precision="I64" />
125
- <port id="1" precision="I64" />
126
- <port id="2" precision="I64" />
127
- </input>
128
- <output>
129
- <port id="3" precision="I32">
130
- <dim>-1</dim>
131
- </port>
132
- </output>
133
- </layer>
134
- <layer id="14" name="Constant_170593" type="Const" version="opset1">
135
- <data element_type="u8" shape="289" offset="16" size="289" />
136
- <output>
137
- <port id="0" precision="U8">
138
- <dim>289</dim>
139
- </port>
140
- </output>
141
- </layer>
142
- <layer id="15" name="SpecialTokensSplit_170594" type="SpecialTokensSplit" version="extension">
143
- <input>
144
- <port id="0" precision="I32">
145
- <dim>-1</dim>
146
- </port>
147
- <port id="1" precision="I32">
148
- <dim>-1</dim>
149
- </port>
150
- <port id="2" precision="I32">
151
- <dim>-1</dim>
152
- </port>
153
- <port id="3" precision="I32">
154
- <dim>-1</dim>
155
- </port>
156
- <port id="4" precision="U8">
157
- <dim>-1</dim>
158
- </port>
159
- <port id="5" precision="U8">
160
- <dim>289</dim>
161
- </port>
162
- </input>
163
- <output>
164
- <port id="6" precision="I32">
165
- <dim>-1</dim>
166
- </port>
167
- <port id="7" precision="I32">
168
- <dim>-1</dim>
169
- </port>
170
- <port id="8" precision="I32">
171
- <dim>-1</dim>
172
- </port>
173
- <port id="9" precision="I32">
174
- <dim>-1</dim>
175
- </port>
176
- <port id="10" precision="U8">
177
- <dim>-1</dim>
178
- </port>
179
- <port id="11" precision="BOOL">
180
- <dim>-1</dim>
181
- </port>
182
- </output>
183
- </layer>
184
- <layer id="16" name="Constant_170596" type="Const" version="opset1">
185
- <data element_type="u8" shape="274" offset="305" size="274" />
186
- <output>
187
- <port id="0" precision="U8">
188
- <dim>274</dim>
189
- </port>
190
- </output>
191
- </layer>
192
- <layer id="17" name="RegexSplit_170597" type="RegexSplit" version="extension">
193
- <data behaviour="remove" invert="true" max_splits="-1" />
194
- <input>
195
- <port id="0" precision="I32">
196
- <dim>-1</dim>
197
- </port>
198
- <port id="1" precision="I32">
199
- <dim>-1</dim>
200
- </port>
201
- <port id="2" precision="I32">
202
- <dim>-1</dim>
203
- </port>
204
- <port id="3" precision="I32">
205
- <dim>-1</dim>
206
- </port>
207
- <port id="4" precision="U8">
208
- <dim>-1</dim>
209
- </port>
210
- <port id="5" precision="BOOL">
211
- <dim>-1</dim>
212
- </port>
213
- <port id="6" precision="U8">
214
- <dim>274</dim>
215
- </port>
216
- </input>
217
- <output>
218
- <port id="7" precision="I32">
219
- <dim>-1</dim>
220
- </port>
221
- <port id="8" precision="I32">
222
- <dim>-1</dim>
223
- </port>
224
- <port id="9" precision="I32">
225
- <dim>-1</dim>
226
- </port>
227
- <port id="10" precision="I32">
228
- <dim>-1</dim>
229
- </port>
230
- <port id="11" precision="U8">
231
- <dim>-1</dim>
232
- </port>
233
- <port id="12" precision="BOOL">
234
- <dim>-1</dim>
235
- </port>
236
- </output>
237
- </layer>
238
- <layer id="18" name="Constant_170599" type="Const" version="opset1">
239
- <data element_type="i32" shape="200029" offset="579" size="800116" />
240
- <output>
241
- <port id="0" precision="I32">
242
- <dim>200029</dim>
243
- </port>
244
- </output>
245
- </layer>
246
- <layer id="19" name="Constant_170601" type="Const" version="opset1">
247
- <data element_type="i32" shape="200029" offset="800695" size="800116" />
248
- <output>
249
- <port id="0" precision="I32">
250
- <dim>200029</dim>
251
- </port>
252
- </output>
253
- </layer>
254
- <layer id="20" name="Constant_170603" type="Const" version="opset1">
255
- <data element_type="u8" shape="1398109" offset="1600811" size="1398109" />
256
- <output>
257
- <port id="0" precision="U8">
258
- <dim>1398109</dim>
259
- </port>
260
- </output>
261
- </layer>
262
- <layer id="21" name="Constant_170611" type="Const" version="opset1">
263
- <data element_type="i32" shape="199742" offset="2998920" size="798968" />
264
- <output>
265
- <port id="0" precision="I32">
266
- <dim>199742</dim>
267
- </port>
268
- </output>
269
- </layer>
270
- <layer id="22" name="Constant_170613" type="Const" version="opset1">
271
- <data element_type="i32" shape="199742" offset="3797888" size="798968" />
272
- <output>
273
- <port id="0" precision="I32">
274
- <dim>199742</dim>
275
- </port>
276
- </output>
277
- </layer>
278
- <layer id="23" name="Constant_170615" type="Const" version="opset1">
279
- <data element_type="u8" shape="718313" offset="4596856" size="718313" />
280
- <output>
281
- <port id="0" precision="U8">
282
- <dim>718313</dim>
283
- </port>
284
- </output>
285
- </layer>
286
- <layer id="24" name="Constant_170617" type="Const" version="opset1">
287
- <data element_type="i32" shape="199742" offset="5315169" size="798968" />
288
- <output>
289
- <port id="0" precision="I32">
290
- <dim>199742</dim>
291
- </port>
292
- </output>
293
- </layer>
294
- <layer id="25" name="Constant_170619" type="Const" version="opset1">
295
- <data element_type="i32" shape="199742" offset="6114137" size="798968" />
296
- <output>
297
- <port id="0" precision="I32">
298
- <dim>199742</dim>
299
- </port>
300
- </output>
301
- </layer>
302
- <layer id="26" name="Constant_170621" type="Const" version="opset1">
303
- <data element_type="u8" shape="679101" offset="6913105" size="679101" />
304
- <output>
305
- <port id="0" precision="U8">
306
- <dim>679101</dim>
307
- </port>
308
- </output>
309
- </layer>
310
- <layer id="27" name="Constant_170605" type="Const" version="opset1">
311
- <data element_type="i32" shape="432" offset="7592206" size="1728" />
312
- <output>
313
- <port id="0" precision="I32">
314
- <dim>432</dim>
315
- </port>
316
- </output>
317
- </layer>
318
- <layer id="28" name="Constant_170607" type="Const" version="opset1">
319
- <data element_type="i32" shape="432" offset="7593934" size="1728" />
320
- <output>
321
- <port id="0" precision="I32">
322
- <dim>432</dim>
323
- </port>
324
- </output>
325
- </layer>
326
- <layer id="29" name="Constant_170609" type="Const" version="opset1">
327
- <data element_type="u8" shape="5366" offset="7595662" size="5366" />
328
- <output>
329
- <port id="0" precision="U8">
330
- <dim>5366</dim>
331
- </port>
332
- </output>
333
- </layer>
334
- <layer id="30" name="Constant_170622" type="Const" version="opset1">
335
- <data element_type="i32" shape="432" offset="7601028" size="1728" />
336
- <output>
337
- <port id="0" precision="I32">
338
- <dim>432</dim>
339
- </port>
340
- </output>
341
- </layer>
342
- <layer id="31" name="BPETokenizer_170623" type="BPETokenizer" version="extension">
343
- <data unk_token="" fuse_unk="false" suffix_indicator="" end_suffix="" byte_fallback="false" cache_capacity="40003" />
344
- <input>
345
- <port id="0" precision="I32">
346
- <dim>-1</dim>
347
- </port>
348
- <port id="1" precision="I32">
349
- <dim>-1</dim>
350
- </port>
351
- <port id="2" precision="I32">
352
- <dim>-1</dim>
353
- </port>
354
- <port id="3" precision="I32">
355
- <dim>-1</dim>
356
- </port>
357
- <port id="4" precision="U8">
358
- <dim>-1</dim>
359
- </port>
360
- <port id="5" precision="I32">
361
- <dim>200029</dim>
362
- </port>
363
- <port id="6" precision="I32">
364
- <dim>200029</dim>
365
- </port>
366
- <port id="7" precision="U8">
367
- <dim>1398109</dim>
368
- </port>
369
- <port id="8" precision="I32">
370
- <dim>199742</dim>
371
- </port>
372
- <port id="9" precision="I32">
373
- <dim>199742</dim>
374
- </port>
375
- <port id="10" precision="U8">
376
- <dim>718313</dim>
377
- </port>
378
- <port id="11" precision="I32">
379
- <dim>199742</dim>
380
- </port>
381
- <port id="12" precision="I32">
382
- <dim>199742</dim>
383
- </port>
384
- <port id="13" precision="U8">
385
- <dim>679101</dim>
386
- </port>
387
- <port id="14" precision="I32">
388
- <dim>432</dim>
389
- </port>
390
- <port id="15" precision="I32">
391
- <dim>432</dim>
392
- </port>
393
- <port id="16" precision="U8">
394
- <dim>5366</dim>
395
- </port>
396
- <port id="17" precision="I32">
397
- <dim>432</dim>
398
- </port>
399
- </input>
400
- <output>
401
- <port id="18" precision="I32">
402
- <dim>-1</dim>
403
- </port>
404
- <port id="19" precision="I32">
405
- <dim>-1</dim>
406
- </port>
407
- <port id="20" precision="I32">
408
- <dim>-1</dim>
409
- </port>
410
- </output>
411
- </layer>
412
- <layer id="32" name="Subtract_170624" type="Subtract" version="opset1">
413
- <data auto_broadcast="numpy" />
414
- <input>
415
- <port id="0" precision="I32">
416
- <dim>-1</dim>
417
- </port>
418
- <port id="1" precision="I32">
419
- <dim>-1</dim>
420
- </port>
421
- </input>
422
- <output>
423
- <port id="2" precision="I32">
424
- <dim>-1</dim>
425
- </port>
426
- </output>
427
- </layer>
428
- <layer id="33" name="Constant_170625" type="Const" version="opset1">
429
- <data element_type="i32" shape="" offset="7602756" size="4" />
430
- <output>
431
- <port id="0" precision="I32" />
432
- </output>
433
- </layer>
434
- <layer id="34" name="Minimum_170626" type="Minimum" version="opset1">
435
- <data auto_broadcast="numpy" />
436
- <input>
437
- <port id="0" precision="I32">
438
- <dim>-1</dim>
439
- </port>
440
- <port id="1" precision="I32" />
441
- </input>
442
- <output>
443
- <port id="2" precision="I32">
444
- <dim>-1</dim>
445
- </port>
446
- </output>
447
- </layer>
448
- <layer id="35" name="Subtract_170627" type="Subtract" version="opset1">
449
- <data auto_broadcast="numpy" />
450
- <input>
451
- <port id="0" precision="I32">
452
- <dim>-1</dim>
453
- </port>
454
- <port id="1" precision="I32">
455
- <dim>-1</dim>
456
- </port>
457
- </input>
458
- <output>
459
- <port id="2" precision="I32">
460
- <dim>-1</dim>
461
- </port>
462
- </output>
463
- </layer>
464
- <layer id="36" name="Subtract_170628" type="Subtract" version="opset1">
465
- <data auto_broadcast="numpy" />
466
- <input>
467
- <port id="0" precision="I32">
468
- <dim>-1</dim>
469
- </port>
470
- <port id="1" precision="I32">
471
- <dim>-1</dim>
472
- </port>
473
- </input>
474
- <output>
475
- <port id="2" precision="I32">
476
- <dim>-1</dim>
477
- </port>
478
- </output>
479
- </layer>
480
- <layer id="37" name="Constant_170629" type="Const" version="opset1">
481
- <data element_type="i32" shape="" offset="7602760" size="4" />
482
- <output>
483
- <port id="0" precision="I32" />
484
- </output>
485
- </layer>
486
- <layer id="38" name="ReduceMax_170630" type="ReduceMax" version="opset1">
487
- <data keep_dims="false" />
488
- <input>
489
- <port id="0" precision="I32">
490
- <dim>-1</dim>
491
- </port>
492
- <port id="1" precision="I32" />
493
- </input>
494
- <output>
495
- <port id="2" precision="I32" />
496
- </output>
497
- </layer>
498
- <layer id="39" name="Constant_170631" type="Const" version="opset1">
499
- <data element_type="i32" shape="" offset="7602764" size="4" />
500
- <output>
501
- <port id="0" precision="I32" />
502
- </output>
503
- </layer>
504
- <layer id="40" name="RaggedToDense_170632" type="RaggedToDense" version="extension">
505
- <data pad_right="false" m_pad_max_length="false" />
506
- <input>
507
- <port id="0" precision="I32">
508
- <dim>-1</dim>
509
- </port>
510
- <port id="1" precision="I32">
511
- <dim>-1</dim>
512
- </port>
513
- <port id="2" precision="I32">
514
- <dim>-1</dim>
515
- </port>
516
- <port id="3" precision="I32" />
517
- <port id="4" precision="I32" />
518
- </input>
519
- <output>
520
- <port id="5" precision="I32">
521
- <dim>-1</dim>
522
- <dim>-1</dim>
523
- </port>
524
- <port id="6" precision="BOOL">
525
- <dim>-1</dim>
526
- <dim>-1</dim>
527
- </port>
528
- </output>
529
- </layer>
530
- <layer id="41" name="Convert_170633" type="Convert" version="opset1">
531
- <data destination_type="i32" />
532
- <input>
533
- <port id="0" precision="BOOL">
534
- <dim>-1</dim>
535
- <dim>-1</dim>
536
- </port>
537
- </input>
538
- <output>
539
- <port id="1" precision="I32">
540
- <dim>-1</dim>
541
- <dim>-1</dim>
542
- </port>
543
- </output>
544
- </layer>
545
- <layer id="42" name="Convert_170633.0" type="Convert" version="opset1">
546
- <data destination_type="i64" />
547
- <input>
548
- <port id="0" precision="I32">
549
- <dim>-1</dim>
550
- <dim>-1</dim>
551
- </port>
552
- </input>
553
- <output>
554
- <port id="1" precision="I64" names="attention_mask">
555
- <dim>-1</dim>
556
- <dim>-1</dim>
557
- </port>
558
- </output>
559
- </layer>
560
- <layer id="44" name="RaggedToDense_170632.0" type="Convert" version="opset1">
561
- <data destination_type="i64" />
562
- <input>
563
- <port id="0" precision="I32">
564
- <dim>-1</dim>
565
- <dim>-1</dim>
566
- </port>
567
- </input>
568
- <output>
569
- <port id="1" precision="I64" names="input_ids">
570
- <dim>-1</dim>
571
- <dim>-1</dim>
572
- </port>
573
- </output>
574
- </layer>
575
- <layer id="45" name="Result_170634" type="Result" version="opset1" output_names="input_ids">
576
- <input>
577
- <port id="0" precision="I64">
578
- <dim>-1</dim>
579
- <dim>-1</dim>
580
- </port>
581
- </input>
582
- </layer>
583
- <layer id="43" name="Result_170635" type="Result" version="opset1" output_names="attention_mask">
584
- <input>
585
- <port id="0" precision="I64">
586
- <dim>-1</dim>
587
- <dim>-1</dim>
588
- </port>
589
- </input>
590
- </layer>
591
- </layers>
592
- <edges>
593
- <edge from-layer="0" from-port="0" to-layer="2" to-port="0" />
594
- <edge from-layer="1" from-port="0" to-layer="8" to-port="0" />
595
- <edge from-layer="2" from-port="2" to-layer="15" to-port="3" />
596
- <edge from-layer="2" from-port="3" to-layer="15" to-port="4" />
597
- <edge from-layer="2" from-port="1" to-layer="15" to-port="2" />
598
- <edge from-layer="2" from-port="1" to-layer="3" to-port="0" />
599
- <edge from-layer="3" from-port="1" to-layer="6" to-port="0" />
600
- <edge from-layer="4" from-port="0" to-layer="6" to-port="1" />
601
- <edge from-layer="5" from-port="0" to-layer="6" to-port="2" />
602
- <edge from-layer="6" from-port="3" to-layer="8" to-port="1" />
603
- <edge from-layer="6" from-port="3" to-layer="11" to-port="0" />
604
- <edge from-layer="7" from-port="0" to-layer="8" to-port="2" />
605
- <edge from-layer="8" from-port="3" to-layer="15" to-port="0" />
606
- <edge from-layer="9" from-port="0" to-layer="13" to-port="0" />
607
- <edge from-layer="10" from-port="0" to-layer="11" to-port="1" />
608
- <edge from-layer="11" from-port="2" to-layer="13" to-port="1" />
609
- <edge from-layer="12" from-port="0" to-layer="13" to-port="2" />
610
- <edge from-layer="13" from-port="3" to-layer="15" to-port="1" />
611
- <edge from-layer="14" from-port="0" to-layer="15" to-port="5" />
612
- <edge from-layer="15" from-port="6" to-layer="17" to-port="0" />
613
- <edge from-layer="15" from-port="7" to-layer="17" to-port="1" />
614
- <edge from-layer="15" from-port="8" to-layer="17" to-port="2" />
615
- <edge from-layer="15" from-port="9" to-layer="17" to-port="3" />
616
- <edge from-layer="15" from-port="10" to-layer="17" to-port="4" />
617
- <edge from-layer="15" from-port="11" to-layer="17" to-port="5" />
618
- <edge from-layer="16" from-port="0" to-layer="17" to-port="6" />
619
- <edge from-layer="17" from-port="7" to-layer="31" to-port="0" />
620
- <edge from-layer="17" from-port="8" to-layer="31" to-port="1" />
621
- <edge from-layer="17" from-port="9" to-layer="31" to-port="2" />
622
- <edge from-layer="17" from-port="10" to-layer="31" to-port="3" />
623
- <edge from-layer="17" from-port="11" to-layer="31" to-port="4" />
624
- <edge from-layer="18" from-port="0" to-layer="31" to-port="5" />
625
- <edge from-layer="19" from-port="0" to-layer="31" to-port="6" />
626
- <edge from-layer="20" from-port="0" to-layer="31" to-port="7" />
627
- <edge from-layer="21" from-port="0" to-layer="31" to-port="8" />
628
- <edge from-layer="22" from-port="0" to-layer="31" to-port="9" />
629
- <edge from-layer="23" from-port="0" to-layer="31" to-port="10" />
630
- <edge from-layer="24" from-port="0" to-layer="31" to-port="11" />
631
- <edge from-layer="25" from-port="0" to-layer="31" to-port="12" />
632
- <edge from-layer="26" from-port="0" to-layer="31" to-port="13" />
633
- <edge from-layer="27" from-port="0" to-layer="31" to-port="14" />
634
- <edge from-layer="28" from-port="0" to-layer="31" to-port="15" />
635
- <edge from-layer="29" from-port="0" to-layer="31" to-port="16" />
636
- <edge from-layer="30" from-port="0" to-layer="31" to-port="17" />
637
- <edge from-layer="31" from-port="19" to-layer="35" to-port="0" />
638
- <edge from-layer="31" from-port="20" to-layer="40" to-port="2" />
639
- <edge from-layer="31" from-port="19" to-layer="40" to-port="1" />
640
- <edge from-layer="31" from-port="19" to-layer="36" to-port="0" />
641
- <edge from-layer="31" from-port="18" to-layer="32" to-port="1" />
642
- <edge from-layer="31" from-port="19" to-layer="32" to-port="0" />
643
- <edge from-layer="32" from-port="2" to-layer="34" to-port="0" />
644
- <edge from-layer="33" from-port="0" to-layer="34" to-port="1" />
645
- <edge from-layer="34" from-port="2" to-layer="35" to-port="1" />
646
- <edge from-layer="35" from-port="2" to-layer="36" to-port="1" />
647
- <edge from-layer="35" from-port="2" to-layer="40" to-port="0" />
648
- <edge from-layer="36" from-port="2" to-layer="38" to-port="0" />
649
- <edge from-layer="37" from-port="0" to-layer="38" to-port="1" />
650
- <edge from-layer="38" from-port="2" to-layer="40" to-port="3" />
651
- <edge from-layer="39" from-port="0" to-layer="40" to-port="4" />
652
- <edge from-layer="40" from-port="6" to-layer="41" to-port="0" />
653
- <edge from-layer="40" from-port="5" to-layer="44" to-port="0" />
654
- <edge from-layer="41" from-port="1" to-layer="42" to-port="0" />
655
- <edge from-layer="42" from-port="1" to-layer="43" to-port="0" />
656
- <edge from-layer="44" from-port="1" to-layer="45" to-port="0" />
657
- </edges>
658
- <rt_info>
659
- <add_attention_mask value="True" />
660
- <add_prefix_space />
661
- <add_special_tokens value="True" />
662
- <bos_token_id value="199999" />
663
- <chat_template value="{% for message in messages %}{% if message['role'] == 'system' and 'tools' in message and message['tools'] is not none %}{{ '&lt;|' + message['role'] + '|>' + message['content'] + '&lt;|tool|>' + message['tools'] + '&lt;|/tool|>' + '&lt;|end|>' }}{% else %}{{ '&lt;|' + message['role'] + '|>' + message['content'] + '&lt;|end|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '&lt;|assistant|>' }}{% else %}{{ eos_token }}{% endif %}" />
664
- <clean_up_tokenization_spaces />
665
- <detokenizer_input_type value="i64" />
666
- <eos_token_id value="199999" />
667
- <handle_special_tokens_with_re />
668
- <max_length />
669
- <number_of_inputs value="1" />
670
- <openvino_tokenizers_version value="2025.1.0.0-523-710ddf14de8" />
671
- <openvino_version value="2025.1.0-18503-6fec06580ab-releases/2025/1" />
672
- <original_tokenizer_class value="&lt;class 'transformers.models.gpt2.tokenization_gpt2_fast.GPT2TokenizerFast'>" />
673
- <pad_token_id value="199999" />
674
- <sentencepiece_version value="0.2.0" />
675
- <skip_special_tokens value="True" />
676
- <streaming_detokenizer value="False" />
677
- <tokenizer_output_type value="i64" />
678
- <tokenizers_version value="0.21.1" />
679
- <transformers_version value="4.51.3" />
680
- <use_max_padding value="False" />
681
- <use_sentencepiece_backend value="False" />
682
- <utf8_replace_mode value="replace" />
683
- <with_detokenizer value="True" />
684
- </rt_info>
685
- </net>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
int8_asym-ov/special_tokens_map.json DELETED
@@ -1,30 +0,0 @@
1
- {
2
- "bos_token": {
3
- "content": "<|endoftext|>",
4
- "lstrip": false,
5
- "normalized": false,
6
- "rstrip": false,
7
- "single_word": false
8
- },
9
- "eos_token": {
10
- "content": "<|endoftext|>",
11
- "lstrip": false,
12
- "normalized": false,
13
- "rstrip": false,
14
- "single_word": false
15
- },
16
- "pad_token": {
17
- "content": "<|endoftext|>",
18
- "lstrip": false,
19
- "normalized": false,
20
- "rstrip": false,
21
- "single_word": false
22
- },
23
- "unk_token": {
24
- "content": "<|endoftext|>",
25
- "lstrip": false,
26
- "normalized": false,
27
- "rstrip": false,
28
- "single_word": false
29
- }
30
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
int8_asym-ov/tokenizer.json DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:382cc235b56c725945e149cc25f191da667c836655efd0857b004320e90e91ea
3
- size 15524095
 
 
 
 
int8_asym-ov/tokenizer_config.json DELETED
@@ -1,112 +0,0 @@
1
- {
2
- "add_bos_token": false,
3
- "add_eos_token": false,
4
- "add_prefix_space": false,
5
- "added_tokens_decoder": {
6
- "199999": {
7
- "content": "<|endoftext|>",
8
- "lstrip": false,
9
- "normalized": false,
10
- "rstrip": false,
11
- "single_word": false,
12
- "special": true
13
- },
14
- "200018": {
15
- "content": "<|endofprompt|>",
16
- "lstrip": false,
17
- "normalized": false,
18
- "rstrip": false,
19
- "single_word": false,
20
- "special": true
21
- },
22
- "200019": {
23
- "content": "<|assistant|>",
24
- "lstrip": false,
25
- "normalized": false,
26
- "rstrip": true,
27
- "single_word": false,
28
- "special": true
29
- },
30
- "200020": {
31
- "content": "<|end|>",
32
- "lstrip": false,
33
- "normalized": false,
34
- "rstrip": true,
35
- "single_word": false,
36
- "special": true
37
- },
38
- "200021": {
39
- "content": "<|user|>",
40
- "lstrip": false,
41
- "normalized": false,
42
- "rstrip": true,
43
- "single_word": false,
44
- "special": true
45
- },
46
- "200022": {
47
- "content": "<|system|>",
48
- "lstrip": false,
49
- "normalized": false,
50
- "rstrip": true,
51
- "single_word": false,
52
- "special": true
53
- },
54
- "200023": {
55
- "content": "<|tool|>",
56
- "lstrip": false,
57
- "normalized": false,
58
- "rstrip": true,
59
- "single_word": false,
60
- "special": false
61
- },
62
- "200024": {
63
- "content": "<|/tool|>",
64
- "lstrip": false,
65
- "normalized": false,
66
- "rstrip": true,
67
- "single_word": false,
68
- "special": false
69
- },
70
- "200025": {
71
- "content": "<|tool_call|>",
72
- "lstrip": false,
73
- "normalized": false,
74
- "rstrip": true,
75
- "single_word": false,
76
- "special": false
77
- },
78
- "200026": {
79
- "content": "<|/tool_call|>",
80
- "lstrip": false,
81
- "normalized": false,
82
- "rstrip": true,
83
- "single_word": false,
84
- "special": false
85
- },
86
- "200027": {
87
- "content": "<|tool_response|>",
88
- "lstrip": false,
89
- "normalized": false,
90
- "rstrip": true,
91
- "single_word": false,
92
- "special": false
93
- },
94
- "200028": {
95
- "content": "<|tag|>",
96
- "lstrip": false,
97
- "normalized": false,
98
- "rstrip": true,
99
- "single_word": false,
100
- "special": true
101
- }
102
- },
103
- "bos_token": "<|endoftext|>",
104
- "chat_template": "{% for message in messages %}{% if message['role'] == 'system' and 'tools' in message and message['tools'] is not none %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|tool|>' + message['tools'] + '<|/tool|>' + '<|end|>' }}{% else %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|end|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>' }}{% else %}{{ eos_token }}{% endif %}",
105
- "clean_up_tokenization_spaces": false,
106
- "eos_token": "<|endoftext|>",
107
- "extra_special_tokens": {},
108
- "model_max_length": 131072,
109
- "pad_token": "<|endoftext|>",
110
- "tokenizer_class": "GPT2Tokenizer",
111
- "unk_token": "<|endoftext|>"
112
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
int8_asym-ov/vocab.json DELETED
The diff for this file is too large to render. See raw diff