|
{ |
|
"Transformer": { |
|
"encoder": { |
|
"sentence_embedding": { |
|
"embedding": { |
|
"num_embeddings": 71, |
|
"embedding_dim": 512 |
|
}, |
|
"position_encoder": {}, |
|
"dropout": 0.1 |
|
}, |
|
"layers": [ |
|
{ |
|
"attention": { |
|
"qkv_layer": { |
|
"in_features": 512, |
|
"out_features": 1536, |
|
"bias": true |
|
}, |
|
"linear_layer": { |
|
"in_features": 512, |
|
"out_features": 512, |
|
"bias": true |
|
} |
|
}, |
|
"norm1": {}, |
|
"dropout1": 0.1, |
|
"ffn": { |
|
"linear1": { |
|
"in_features": 512, |
|
"out_features": 2048, |
|
"bias": true |
|
}, |
|
"linear2": { |
|
"in_features": 2048, |
|
"out_features": 512, |
|
"bias": true |
|
} |
|
}, |
|
"norm2": {}, |
|
"dropout2": 0.1 |
|
}, |
|
{ |
|
"attention": { |
|
"qkv_layer": { |
|
"in_features": 512, |
|
"out_features": 1536, |
|
"bias": true |
|
}, |
|
"linear_layer": { |
|
"in_features": 512, |
|
"out_features": 512, |
|
"bias": true |
|
} |
|
}, |
|
"norm1": {}, |
|
"dropout1": 0.1, |
|
"ffn": { |
|
"linear1": { |
|
"in_features": 512, |
|
"out_features": 2048, |
|
"bias": true |
|
}, |
|
"linear2": { |
|
"in_features": 2048, |
|
"out_features": 512, |
|
"bias": true |
|
} |
|
}, |
|
"norm2": {}, |
|
"dropout2": 0.1 |
|
}, |
|
{ |
|
"attention": { |
|
"qkv_layer": { |
|
"in_features": 512, |
|
"out_features": 1536, |
|
"bias": true |
|
}, |
|
"linear_layer": { |
|
"in_features": 512, |
|
"out_features": 512, |
|
"bias": true |
|
} |
|
}, |
|
"norm1": {}, |
|
"dropout1": 0.1, |
|
"ffn": { |
|
"linear1": { |
|
"in_features": 512, |
|
"out_features": 2048, |
|
"bias": true |
|
}, |
|
"linear2": { |
|
"in_features": 2048, |
|
"out_features": 512, |
|
"bias": true |
|
} |
|
}, |
|
"norm2": {}, |
|
"dropout2": 0.1 |
|
} |
|
] |
|
}, |
|
"decoder": { |
|
"sentence_embedding": { |
|
"embedding": { |
|
"num_embeddings": 125, |
|
"embedding_dim": 512 |
|
}, |
|
"position_encoder": {}, |
|
"dropout": 0.1 |
|
}, |
|
"layers": [ |
|
{ |
|
"self_attention": { |
|
"qkv_layer": { |
|
"in_features": 512, |
|
"out_features": 1536, |
|
"bias": true |
|
}, |
|
"linear_layer": { |
|
"in_features": 512, |
|
"out_features": 512, |
|
"bias": true |
|
} |
|
}, |
|
"layer_norm1": {}, |
|
"dropout1": 0.1, |
|
"encoder_decoder_attention": { |
|
"kv_layer": { |
|
"in_features": 512, |
|
"out_features": 1024, |
|
"bias": true |
|
}, |
|
"q_layer": { |
|
"in_features": 512, |
|
"out_features": 512, |
|
"bias": true |
|
}, |
|
"linear_layer": { |
|
"in_features": 512, |
|
"out_features": 512, |
|
"bias": true |
|
} |
|
}, |
|
"layer_norm2": {}, |
|
"dropout2": 0.1, |
|
"ffn": { |
|
"linear1": { |
|
"in_features": 512, |
|
"out_features": 2048, |
|
"bias": true |
|
}, |
|
"linear2": { |
|
"in_features": 2048, |
|
"out_features": 512, |
|
"bias": true |
|
} |
|
}, |
|
"layer_norm3": {}, |
|
"dropout3": 0.1 |
|
}, |
|
{ |
|
"self_attention": { |
|
"qkv_layer": { |
|
"in_features": 512, |
|
"out_features": 1536, |
|
"bias": true |
|
}, |
|
"linear_layer": { |
|
"in_features": 512, |
|
"out_features": 512, |
|
"bias": true |
|
} |
|
}, |
|
"layer_norm1": {}, |
|
"dropout1": 0.1, |
|
"encoder_decoder_attention": { |
|
"kv_layer": { |
|
"in_features": 512, |
|
"out_features": 1024, |
|
"bias": true |
|
}, |
|
"q_layer": { |
|
"in_features": 512, |
|
"out_features": 512, |
|
"bias": true |
|
}, |
|
"linear_layer": { |
|
"in_features": 512, |
|
"out_features": 512, |
|
"bias": true |
|
} |
|
}, |
|
"layer_norm2": {}, |
|
"dropout2": 0.1, |
|
"ffn": { |
|
"linear1": { |
|
"in_features": 512, |
|
"out_features": 2048, |
|
"bias": true |
|
}, |
|
"linear2": { |
|
"in_features": 2048, |
|
"out_features": 512, |
|
"bias": true |
|
} |
|
}, |
|
"layer_norm3": {}, |
|
"dropout3": 0.1 |
|
}, |
|
{ |
|
"self_attention": { |
|
"qkv_layer": { |
|
"in_features": 512, |
|
"out_features": 1536, |
|
"bias": true |
|
}, |
|
"linear_layer": { |
|
"in_features": 512, |
|
"out_features": 512, |
|
"bias": true |
|
} |
|
}, |
|
"layer_norm1": {}, |
|
"dropout1": 0.1, |
|
"encoder_decoder_attention": { |
|
"kv_layer": { |
|
"in_features": 512, |
|
"out_features": 1024, |
|
"bias": true |
|
}, |
|
"q_layer": { |
|
"in_features": 512, |
|
"out_features": 512, |
|
"bias": true |
|
}, |
|
"linear_layer": { |
|
"in_features": 512, |
|
"out_features": 512, |
|
"bias": true |
|
} |
|
}, |
|
"layer_norm2": {}, |
|
"dropout2": 0.1, |
|
"ffn": { |
|
"linear1": { |
|
"in_features": 512, |
|
"out_features": 2048, |
|
"bias": true |
|
}, |
|
"linear2": { |
|
"in_features": 2048, |
|
"out_features": 512, |
|
"bias": true |
|
} |
|
}, |
|
"layer_norm3": {}, |
|
"dropout3": 0.1 |
|
} |
|
] |
|
}, |
|
"linear": { |
|
"in_features": 512, |
|
"out_features": 125, |
|
"bias": true |
|
} |
|
} |
|
} |
|
|