pious-butterfly-170 remove layers and fine tune from flan-t5-small model
Browse files- config.json +39 -8
- generation_config.json +270 -0
- onnx/decoder_model.onnx +2 -2
- onnx/decoder_model_merged.onnx +2 -2
- onnx/decoder_model_merged_quantized.onnx +2 -2
- onnx/decoder_model_quantized.onnx +2 -2
- onnx/decoder_with_past_model.onnx +2 -2
- onnx/decoder_with_past_model_quantized.onnx +2 -2
- onnx/encoder_model.onnx +2 -2
- onnx/encoder_model_quantized.onnx +2 -2
- quantize_config.json +1 -1
- special_tokens_map.json +21 -3
- tokenizer.json +0 -0
- tokenizer_config.json +4 -1
config.json
CHANGED
@@ -1,29 +1,60 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "
|
3 |
"architectures": [
|
4 |
"T5ForConditionalGeneration"
|
5 |
],
|
6 |
"classifier_dropout": 0.0,
|
7 |
"d_ff": 1024,
|
8 |
"d_kv": 64,
|
9 |
-
"d_model":
|
10 |
"decoder_start_token_id": 0,
|
11 |
-
"dense_act_fn": "
|
12 |
"dropout_rate": 0.1,
|
13 |
"eos_token_id": 1,
|
14 |
-
"feed_forward_proj": "
|
15 |
"initializer_factor": 1.0,
|
16 |
"is_encoder_decoder": true,
|
17 |
-
"is_gated_act":
|
18 |
"layer_norm_epsilon": 1e-06,
|
19 |
"model_type": "t5",
|
20 |
"n_positions": 512,
|
21 |
-
"num_decoder_layers":
|
22 |
-
"num_heads":
|
23 |
-
"num_layers":
|
|
|
24 |
"pad_token_id": 0,
|
25 |
"relative_attention_max_distance": 128,
|
26 |
"relative_attention_num_buckets": 32,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
"transformers_version": "4.43.4",
|
28 |
"use_cache": true,
|
29 |
"vocab_size": 32128
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "pious-butterfly-170",
|
3 |
"architectures": [
|
4 |
"T5ForConditionalGeneration"
|
5 |
],
|
6 |
"classifier_dropout": 0.0,
|
7 |
"d_ff": 1024,
|
8 |
"d_kv": 64,
|
9 |
+
"d_model": 512,
|
10 |
"decoder_start_token_id": 0,
|
11 |
+
"dense_act_fn": "gelu_new",
|
12 |
"dropout_rate": 0.1,
|
13 |
"eos_token_id": 1,
|
14 |
+
"feed_forward_proj": "gated-gelu",
|
15 |
"initializer_factor": 1.0,
|
16 |
"is_encoder_decoder": true,
|
17 |
+
"is_gated_act": true,
|
18 |
"layer_norm_epsilon": 1e-06,
|
19 |
"model_type": "t5",
|
20 |
"n_positions": 512,
|
21 |
+
"num_decoder_layers": 3,
|
22 |
+
"num_heads": 6,
|
23 |
+
"num_layers": 3,
|
24 |
+
"output_past": true,
|
25 |
"pad_token_id": 0,
|
26 |
"relative_attention_max_distance": 128,
|
27 |
"relative_attention_num_buckets": 32,
|
28 |
+
"task_specific_params": {
|
29 |
+
"summarization": {
|
30 |
+
"early_stopping": true,
|
31 |
+
"length_penalty": 2.0,
|
32 |
+
"max_length": 200,
|
33 |
+
"min_length": 30,
|
34 |
+
"no_repeat_ngram_size": 3,
|
35 |
+
"num_beams": 4,
|
36 |
+
"prefix": "summarize: "
|
37 |
+
},
|
38 |
+
"translation_en_to_de": {
|
39 |
+
"early_stopping": true,
|
40 |
+
"max_length": 300,
|
41 |
+
"num_beams": 4,
|
42 |
+
"prefix": "translate English to German: "
|
43 |
+
},
|
44 |
+
"translation_en_to_fr": {
|
45 |
+
"early_stopping": true,
|
46 |
+
"max_length": 300,
|
47 |
+
"num_beams": 4,
|
48 |
+
"prefix": "translate English to French: "
|
49 |
+
},
|
50 |
+
"translation_en_to_ro": {
|
51 |
+
"early_stopping": true,
|
52 |
+
"max_length": 300,
|
53 |
+
"num_beams": 4,
|
54 |
+
"prefix": "translate English to Romanian: "
|
55 |
+
}
|
56 |
+
},
|
57 |
+
"tie_word_embeddings": false,
|
58 |
"transformers_version": "4.43.4",
|
59 |
"use_cache": true,
|
60 |
"vocab_size": 32128
|
generation_config.json
CHANGED
@@ -1,5 +1,275 @@
|
|
1 |
{
|
2 |
"_from_model_config": true,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
"decoder_start_token_id": 0,
|
4 |
"eos_token_id": 1,
|
5 |
"pad_token_id": 0,
|
|
|
1 |
{
|
2 |
"_from_model_config": true,
|
3 |
+
"bad_words_ids": [
|
4 |
+
[
|
5 |
+
8581
|
6 |
+
],
|
7 |
+
[
|
8 |
+
815
|
9 |
+
],
|
10 |
+
[
|
11 |
+
1216
|
12 |
+
],
|
13 |
+
[
|
14 |
+
3,
|
15 |
+
2781
|
16 |
+
],
|
17 |
+
[
|
18 |
+
2886
|
19 |
+
],
|
20 |
+
[
|
21 |
+
815
|
22 |
+
],
|
23 |
+
[
|
24 |
+
3640
|
25 |
+
],
|
26 |
+
[
|
27 |
+
3,
|
28 |
+
4994
|
29 |
+
],
|
30 |
+
[
|
31 |
+
1216
|
32 |
+
],
|
33 |
+
[
|
34 |
+
5781
|
35 |
+
],
|
36 |
+
[
|
37 |
+
1216
|
38 |
+
],
|
39 |
+
[
|
40 |
+
2886
|
41 |
+
],
|
42 |
+
[
|
43 |
+
3,
|
44 |
+
6855
|
45 |
+
],
|
46 |
+
[
|
47 |
+
6949
|
48 |
+
],
|
49 |
+
[
|
50 |
+
3,
|
51 |
+
7315
|
52 |
+
],
|
53 |
+
[
|
54 |
+
7706
|
55 |
+
],
|
56 |
+
[
|
57 |
+
7738
|
58 |
+
],
|
59 |
+
[
|
60 |
+
7927
|
61 |
+
],
|
62 |
+
[
|
63 |
+
815
|
64 |
+
],
|
65 |
+
[
|
66 |
+
8759
|
67 |
+
],
|
68 |
+
[
|
69 |
+
3,
|
70 |
+
8578
|
71 |
+
],
|
72 |
+
[
|
73 |
+
8581
|
74 |
+
],
|
75 |
+
[
|
76 |
+
8759
|
77 |
+
],
|
78 |
+
[
|
79 |
+
9529
|
80 |
+
],
|
81 |
+
[
|
82 |
+
3,
|
83 |
+
9715
|
84 |
+
],
|
85 |
+
[
|
86 |
+
3,
|
87 |
+
10008
|
88 |
+
],
|
89 |
+
[
|
90 |
+
10203
|
91 |
+
],
|
92 |
+
[
|
93 |
+
10490
|
94 |
+
],
|
95 |
+
[
|
96 |
+
5781
|
97 |
+
],
|
98 |
+
[
|
99 |
+
11112
|
100 |
+
],
|
101 |
+
[
|
102 |
+
3640
|
103 |
+
],
|
104 |
+
[
|
105 |
+
11178
|
106 |
+
],
|
107 |
+
[
|
108 |
+
7927
|
109 |
+
],
|
110 |
+
[
|
111 |
+
2886
|
112 |
+
],
|
113 |
+
[
|
114 |
+
3,
|
115 |
+
12030
|
116 |
+
],
|
117 |
+
[
|
118 |
+
3,
|
119 |
+
12105
|
120 |
+
],
|
121 |
+
[
|
122 |
+
13150
|
123 |
+
],
|
124 |
+
[
|
125 |
+
13292
|
126 |
+
],
|
127 |
+
[
|
128 |
+
13503
|
129 |
+
],
|
130 |
+
[
|
131 |
+
7102,
|
132 |
+
208
|
133 |
+
],
|
134 |
+
[
|
135 |
+
13721
|
136 |
+
],
|
137 |
+
[
|
138 |
+
3,
|
139 |
+
4994
|
140 |
+
],
|
141 |
+
[
|
142 |
+
5781
|
143 |
+
],
|
144 |
+
[
|
145 |
+
9858,
|
146 |
+
23
|
147 |
+
],
|
148 |
+
[
|
149 |
+
15484
|
150 |
+
],
|
151 |
+
[
|
152 |
+
7706
|
153 |
+
],
|
154 |
+
[
|
155 |
+
24556
|
156 |
+
],
|
157 |
+
[
|
158 |
+
16998
|
159 |
+
],
|
160 |
+
[
|
161 |
+
17081
|
162 |
+
],
|
163 |
+
[
|
164 |
+
17227
|
165 |
+
],
|
166 |
+
[
|
167 |
+
3,
|
168 |
+
2781
|
169 |
+
],
|
170 |
+
[
|
171 |
+
28577,
|
172 |
+
23
|
173 |
+
],
|
174 |
+
[
|
175 |
+
3,
|
176 |
+
17789
|
177 |
+
],
|
178 |
+
[
|
179 |
+
3,
|
180 |
+
17789
|
181 |
+
],
|
182 |
+
[
|
183 |
+
3640
|
184 |
+
],
|
185 |
+
[
|
186 |
+
18364
|
187 |
+
],
|
188 |
+
[
|
189 |
+
18754
|
190 |
+
],
|
191 |
+
[
|
192 |
+
3,
|
193 |
+
19230
|
194 |
+
],
|
195 |
+
[
|
196 |
+
19329
|
197 |
+
],
|
198 |
+
[
|
199 |
+
19981
|
200 |
+
],
|
201 |
+
[
|
202 |
+
16998
|
203 |
+
],
|
204 |
+
[
|
205 |
+
3,
|
206 |
+
26,
|
207 |
+
3142
|
208 |
+
],
|
209 |
+
[
|
210 |
+
3,
|
211 |
+
10008
|
212 |
+
],
|
213 |
+
[
|
214 |
+
1560,
|
215 |
+
1171
|
216 |
+
],
|
217 |
+
[
|
218 |
+
22658
|
219 |
+
],
|
220 |
+
[
|
221 |
+
23568
|
222 |
+
],
|
223 |
+
[
|
224 |
+
13292
|
225 |
+
],
|
226 |
+
[
|
227 |
+
24556
|
228 |
+
],
|
229 |
+
[
|
230 |
+
9529
|
231 |
+
],
|
232 |
+
[
|
233 |
+
24974
|
234 |
+
],
|
235 |
+
[
|
236 |
+
25039
|
237 |
+
],
|
238 |
+
[
|
239 |
+
25164
|
240 |
+
],
|
241 |
+
[
|
242 |
+
25547
|
243 |
+
],
|
244 |
+
[
|
245 |
+
6949
|
246 |
+
],
|
247 |
+
[
|
248 |
+
25723
|
249 |
+
],
|
250 |
+
[
|
251 |
+
26113
|
252 |
+
],
|
253 |
+
[
|
254 |
+
26806
|
255 |
+
],
|
256 |
+
[
|
257 |
+
30320
|
258 |
+
],
|
259 |
+
[
|
260 |
+
6949
|
261 |
+
],
|
262 |
+
[
|
263 |
+
3,
|
264 |
+
31145
|
265 |
+
],
|
266 |
+
[
|
267 |
+
31648
|
268 |
+
],
|
269 |
+
[
|
270 |
+
11178
|
271 |
+
]
|
272 |
+
],
|
273 |
"decoder_start_token_id": 0,
|
274 |
"eos_token_id": 1,
|
275 |
"pad_token_id": 0,
|
onnx/decoder_model.onnx
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:568444304b19da5c4a8eb3c7282fe53e4c08c57991e5812ea19fbd846e25e9a6
|
3 |
+
size 169472577
|
onnx/decoder_model_merged.onnx
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9fba6c06c12046fa4b9c7b680f9fc6093e1210bcaa7c345456f335d791dffb50
|
3 |
+
size 169578954
|
onnx/decoder_model_merged_quantized.onnx
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fe83a419e30af7f28066f20c6178956963dae41cd9da67781d03398e18bad42e
|
3 |
+
size 42895009
|
onnx/decoder_model_quantized.onnx
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b3aeaa9f321ebd94bf26537f423fe31a817d85d0d48378a1fe0751e18ca4adf6
|
3 |
+
size 42753828
|
onnx/decoder_with_past_model.onnx
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:40a0ee7a12e84d445f2fd76f9aa779130d77391cf257f1df27a85519c4e814f9
|
3 |
+
size 164738287
|
onnx/decoder_with_past_model_quantized.onnx
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:af0ab350e4137be691706d89bb9d5dea92ee871620361481be583afdb2dcd96e
|
3 |
+
size 41540983
|
onnx/encoder_model.onnx
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:61c4716849d3d0fe971611531f33a1cef3e7cad9b839cf9cc90112ef6b62002f
|
3 |
+
size 94183774
|
onnx/encoder_model_quantized.onnx
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1dffba65048f543e7598826fe6c5a86f1d10cf41b13e1e03420995e3d03cf81b
|
3 |
+
size 23688378
|
quantize_config.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
{
|
2 |
"modes": [
|
3 |
-
"
|
4 |
],
|
5 |
"per_channel": true,
|
6 |
"reduce_range": true,
|
|
|
1 |
{
|
2 |
"modes": [
|
3 |
+
"q8"
|
4 |
],
|
5 |
"per_channel": true,
|
6 |
"reduce_range": true,
|
special_tokens_map.json
CHANGED
@@ -101,7 +101,25 @@
|
|
101 |
"<extra_id_98>",
|
102 |
"<extra_id_99>"
|
103 |
],
|
104 |
-
"eos_token":
|
105 |
-
|
106 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
107 |
}
|
|
|
101 |
"<extra_id_98>",
|
102 |
"<extra_id_99>"
|
103 |
],
|
104 |
+
"eos_token": {
|
105 |
+
"content": "</s>",
|
106 |
+
"lstrip": false,
|
107 |
+
"normalized": false,
|
108 |
+
"rstrip": false,
|
109 |
+
"single_word": false
|
110 |
+
},
|
111 |
+
"pad_token": {
|
112 |
+
"content": "<pad>",
|
113 |
+
"lstrip": false,
|
114 |
+
"normalized": false,
|
115 |
+
"rstrip": false,
|
116 |
+
"single_word": false
|
117 |
+
},
|
118 |
+
"unk_token": {
|
119 |
+
"content": "<unk>",
|
120 |
+
"lstrip": false,
|
121 |
+
"normalized": false,
|
122 |
+
"rstrip": false,
|
123 |
+
"single_word": false
|
124 |
+
}
|
125 |
}
|
tokenizer.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
{
|
|
|
2 |
"added_tokens_decoder": {
|
3 |
"0": {
|
4 |
"content": "<pad>",
|
@@ -930,8 +931,10 @@
|
|
930 |
"clean_up_tokenization_spaces": true,
|
931 |
"eos_token": "</s>",
|
932 |
"extra_ids": 100,
|
933 |
-
"
|
|
|
934 |
"pad_token": "<pad>",
|
|
|
935 |
"tokenizer_class": "T5Tokenizer",
|
936 |
"unk_token": "<unk>"
|
937 |
}
|
|
|
1 |
{
|
2 |
+
"add_prefix_space": true,
|
3 |
"added_tokens_decoder": {
|
4 |
"0": {
|
5 |
"content": "<pad>",
|
|
|
931 |
"clean_up_tokenization_spaces": true,
|
932 |
"eos_token": "</s>",
|
933 |
"extra_ids": 100,
|
934 |
+
"legacy": true,
|
935 |
+
"model_max_length": 512,
|
936 |
"pad_token": "<pad>",
|
937 |
+
"sp_model_kwargs": {},
|
938 |
"tokenizer_class": "T5Tokenizer",
|
939 |
"unk_token": "<unk>"
|
940 |
}
|