Avanthika commited on
Commit
9ce941d
·
1 Parent(s): 0a3739d

Upload config.json

Browse files
Files changed (1) hide show
  1. config.json +270 -0
config.json ADDED
@@ -0,0 +1,270 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "Transformer": {
3
+ "encoder": {
4
+ "sentence_embedding": {
5
+ "embedding": {
6
+ "num_embeddings": 71,
7
+ "embedding_dim": 512
8
+ },
9
+ "position_encoder": {},
10
+ "dropout": 0.1
11
+ },
12
+ "layers": [
13
+ {
14
+ "attention": {
15
+ "qkv_layer": {
16
+ "in_features": 512,
17
+ "out_features": 1536,
18
+ "bias": true
19
+ },
20
+ "linear_layer": {
21
+ "in_features": 512,
22
+ "out_features": 512,
23
+ "bias": true
24
+ }
25
+ },
26
+ "norm1": {},
27
+ "dropout1": 0.1,
28
+ "ffn": {
29
+ "linear1": {
30
+ "in_features": 512,
31
+ "out_features": 2048,
32
+ "bias": true
33
+ },
34
+ "linear2": {
35
+ "in_features": 2048,
36
+ "out_features": 512,
37
+ "bias": true
38
+ }
39
+ },
40
+ "norm2": {},
41
+ "dropout2": 0.1
42
+ },
43
+ {
44
+ "attention": {
45
+ "qkv_layer": {
46
+ "in_features": 512,
47
+ "out_features": 1536,
48
+ "bias": true
49
+ },
50
+ "linear_layer": {
51
+ "in_features": 512,
52
+ "out_features": 512,
53
+ "bias": true
54
+ }
55
+ },
56
+ "norm1": {},
57
+ "dropout1": 0.1,
58
+ "ffn": {
59
+ "linear1": {
60
+ "in_features": 512,
61
+ "out_features": 2048,
62
+ "bias": true
63
+ },
64
+ "linear2": {
65
+ "in_features": 2048,
66
+ "out_features": 512,
67
+ "bias": true
68
+ }
69
+ },
70
+ "norm2": {},
71
+ "dropout2": 0.1
72
+ },
73
+ {
74
+ "attention": {
75
+ "qkv_layer": {
76
+ "in_features": 512,
77
+ "out_features": 1536,
78
+ "bias": true
79
+ },
80
+ "linear_layer": {
81
+ "in_features": 512,
82
+ "out_features": 512,
83
+ "bias": true
84
+ }
85
+ },
86
+ "norm1": {},
87
+ "dropout1": 0.1,
88
+ "ffn": {
89
+ "linear1": {
90
+ "in_features": 512,
91
+ "out_features": 2048,
92
+ "bias": true
93
+ },
94
+ "linear2": {
95
+ "in_features": 2048,
96
+ "out_features": 512,
97
+ "bias": true
98
+ }
99
+ },
100
+ "norm2": {},
101
+ "dropout2": 0.1
102
+ }
103
+ ]
104
+ },
105
+ "decoder": {
106
+ "sentence_embedding": {
107
+ "embedding": {
108
+ "num_embeddings": 125,
109
+ "embedding_dim": 512
110
+ },
111
+ "position_encoder": {},
112
+ "dropout": 0.1
113
+ },
114
+ "layers": [
115
+ {
116
+ "self_attention": {
117
+ "qkv_layer": {
118
+ "in_features": 512,
119
+ "out_features": 1536,
120
+ "bias": true
121
+ },
122
+ "linear_layer": {
123
+ "in_features": 512,
124
+ "out_features": 512,
125
+ "bias": true
126
+ }
127
+ },
128
+ "layer_norm1": {},
129
+ "dropout1": 0.1,
130
+ "encoder_decoder_attention": {
131
+ "kv_layer": {
132
+ "in_features": 512,
133
+ "out_features": 1024,
134
+ "bias": true
135
+ },
136
+ "q_layer": {
137
+ "in_features": 512,
138
+ "out_features": 512,
139
+ "bias": true
140
+ },
141
+ "linear_layer": {
142
+ "in_features": 512,
143
+ "out_features": 512,
144
+ "bias": true
145
+ }
146
+ },
147
+ "layer_norm2": {},
148
+ "dropout2": 0.1,
149
+ "ffn": {
150
+ "linear1": {
151
+ "in_features": 512,
152
+ "out_features": 2048,
153
+ "bias": true
154
+ },
155
+ "linear2": {
156
+ "in_features": 2048,
157
+ "out_features": 512,
158
+ "bias": true
159
+ }
160
+ },
161
+ "layer_norm3": {},
162
+ "dropout3": 0.1
163
+ },
164
+ {
165
+ "self_attention": {
166
+ "qkv_layer": {
167
+ "in_features": 512,
168
+ "out_features": 1536,
169
+ "bias": true
170
+ },
171
+ "linear_layer": {
172
+ "in_features": 512,
173
+ "out_features": 512,
174
+ "bias": true
175
+ }
176
+ },
177
+ "layer_norm1": {},
178
+ "dropout1": 0.1,
179
+ "encoder_decoder_attention": {
180
+ "kv_layer": {
181
+ "in_features": 512,
182
+ "out_features": 1024,
183
+ "bias": true
184
+ },
185
+ "q_layer": {
186
+ "in_features": 512,
187
+ "out_features": 512,
188
+ "bias": true
189
+ },
190
+ "linear_layer": {
191
+ "in_features": 512,
192
+ "out_features": 512,
193
+ "bias": true
194
+ }
195
+ },
196
+ "layer_norm2": {},
197
+ "dropout2": 0.1,
198
+ "ffn": {
199
+ "linear1": {
200
+ "in_features": 512,
201
+ "out_features": 2048,
202
+ "bias": true
203
+ },
204
+ "linear2": {
205
+ "in_features": 2048,
206
+ "out_features": 512,
207
+ "bias": true
208
+ }
209
+ },
210
+ "layer_norm3": {},
211
+ "dropout3": 0.1
212
+ },
213
+ {
214
+ "self_attention": {
215
+ "qkv_layer": {
216
+ "in_features": 512,
217
+ "out_features": 1536,
218
+ "bias": true
219
+ },
220
+ "linear_layer": {
221
+ "in_features": 512,
222
+ "out_features": 512,
223
+ "bias": true
224
+ }
225
+ },
226
+ "layer_norm1": {},
227
+ "dropout1": 0.1,
228
+ "encoder_decoder_attention": {
229
+ "kv_layer": {
230
+ "in_features": 512,
231
+ "out_features": 1024,
232
+ "bias": true
233
+ },
234
+ "q_layer": {
235
+ "in_features": 512,
236
+ "out_features": 512,
237
+ "bias": true
238
+ },
239
+ "linear_layer": {
240
+ "in_features": 512,
241
+ "out_features": 512,
242
+ "bias": true
243
+ }
244
+ },
245
+ "layer_norm2": {},
246
+ "dropout2": 0.1,
247
+ "ffn": {
248
+ "linear1": {
249
+ "in_features": 512,
250
+ "out_features": 2048,
251
+ "bias": true
252
+ },
253
+ "linear2": {
254
+ "in_features": 2048,
255
+ "out_features": 512,
256
+ "bias": true
257
+ }
258
+ },
259
+ "layer_norm3": {},
260
+ "dropout3": 0.1
261
+ }
262
+ ]
263
+ },
264
+ "linear": {
265
+ "in_features": 512,
266
+ "out_features": 125,
267
+ "bias": true
268
+ }
269
+ }
270
+ }