Update modeling_motif.py
Browse files- modeling_motif.py +3 -5
modeling_motif.py
CHANGED
@@ -811,9 +811,8 @@ class MotifDecoderLayer(nn.Module):
|
|
811 |
self.self_attn = MOTIF_ATTENTION_CLASSES[config._attn_implementation](config, layer_idx)
|
812 |
self.mlp = MotifMLP(config)
|
813 |
|
814 |
-
|
815 |
-
self.
|
816 |
-
self.post_attention_layernorm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps)
|
817 |
|
818 |
|
819 |
def forward(
|
@@ -1048,8 +1047,7 @@ class MotifModel(MotifPreTrainedModel):
|
|
1048 |
num_hidden_layers = config.num_hidden_layers
|
1049 |
self.layers = nn.ModuleList([MotifDecoderLayer(config = config, layer_idx=layer_idx) for layer_idx in range(num_hidden_layers)])
|
1050 |
self._attn_implementation = config._attn_implementation
|
1051 |
-
|
1052 |
-
self.norm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps)
|
1053 |
self.hidden_size = config.hidden_size
|
1054 |
self.num_heads = config.num_attention_heads
|
1055 |
self.head_dim = self.hidden_size // self.num_heads
|
|
|
811 |
self.self_attn = MOTIF_ATTENTION_CLASSES[config._attn_implementation](config, layer_idx)
|
812 |
self.mlp = MotifMLP(config)
|
813 |
|
814 |
+
self.input_layernorm = MotifRMSNorm(config.hidden_size, eps=config.rms_norm_eps)
|
815 |
+
self.post_attention_layernorm = MotifRMSNorm(config.hidden_size, eps=config.rms_norm_eps)
|
|
|
816 |
|
817 |
|
818 |
def forward(
|
|
|
1047 |
num_hidden_layers = config.num_hidden_layers
|
1048 |
self.layers = nn.ModuleList([MotifDecoderLayer(config = config, layer_idx=layer_idx) for layer_idx in range(num_hidden_layers)])
|
1049 |
self._attn_implementation = config._attn_implementation
|
1050 |
+
self.norm = MotifRMSNorm(config.hidden_size, eps=config.rms_norm_eps)
|
|
|
1051 |
self.hidden_size = config.hidden_size
|
1052 |
self.num_heads = config.num_attention_heads
|
1053 |
self.head_dim = self.hidden_size // self.num_heads
|