Motif-Technologies
/

Motif-2.6B

Text Generation

text-generation-inference

Model card Files Files and versions

eunhwanpark-motiftech commited on 22 days ago

Commit

83d1f84

·

verified ·

1 Parent(s): a95fee7

Update modeling_motif.py

Files changed (1) hide show

modeling_motif.py +0 -25

modeling_motif.py CHANGED Viewed

@@ -959,28 +959,6 @@ class MotifPreTrainedModel(PreTrainedModel):
     def _init_weights(self, module):
         module_std = self.config.initializer_range
         if isinstance(module, nn.Linear):
-            if getattr(module, "__do_scale_tager__", False):
-                module_std = module_std / self.config.init_scale_o
-            if getattr(module, "__do_scale_tager_mu_o__", False):
-                if self.config.dim_model_base_init is not None:
-                    module_std = module_std / math.sqrt(2*(self.config.hidden_size / self.config.dim_model_base_init)*self.config.num_hidden_layers)
-                else:
-                    module_std = module_std
-            elif getattr(module, "__do_scale_tager_mu_ffn__", False):
-                if self.config.dim_model_base_init is not None:
-                    module_std = module_std = module_std / math.sqrt(2*(self.config.hidden_size / self.config.dim_model_base_init)*self.config.num_hidden_layers)
-                else:
-                    module_std = module_std
-            elif getattr(module, "__do_scale_tager_mu_dim_model__", False):
-                if self.config.dim_model_base_init is not None:
-                    module_std = module_std / math.sqrt(self.config.hidden_size / self.config.dim_model_base_init)
-                else:
-                    module_std = module_std
-            elif getattr(module, "__do_scale_tager_mu_dim_base_model__", False):
-                module_std = module_std / math.sqrt(self.config.dim_model_base_lmh) ### lmhead.. 1
-            else:
-                module_std = module_std
             module.weight.data.normal_(mean=0.0, std=module_std)
             module.weight.data = torch.where(abs(module.weight.data) > module_std*3, 0, module.weight.data)
             #torch.nn.init.trunc_normal_(module.weight.data, mean=0.0, std=module_std, a=-3*module_std, b=3*module_std)
@@ -1419,9 +1397,6 @@ class MotifForCausalLM(MotifPreTrainedModel, GenerationMixin):
         if getattr(config, "tie_word_embeddings", True):
             logger.info('tie embeddings')
             self.tie_weights()
-        else:
-            # <|_5_|>
-            self.lm_head.__do_scale_tager_mu_dim_base_model__ = False
     def get_input_embeddings(self):
         return self.model.embed_tokens

     def _init_weights(self, module):
         module_std = self.config.initializer_range
         if isinstance(module, nn.Linear):
             module.weight.data.normal_(mean=0.0, std=module_std)
             module.weight.data = torch.where(abs(module.weight.data) > module_std*3, 0, module.weight.data)
             #torch.nn.init.trunc_normal_(module.weight.data, mean=0.0, std=module_std, a=-3*module_std, b=3*module_std)
         if getattr(config, "tie_word_embeddings", True):
             logger.info('tie embeddings')
             self.tie_weights()
     def get_input_embeddings(self):
         return self.model.embed_tokens