Update modeling_motif.py
Browse files- modeling_motif.py +0 -25
modeling_motif.py
CHANGED
@@ -959,28 +959,6 @@ class MotifPreTrainedModel(PreTrainedModel):
|
|
959 |
def _init_weights(self, module):
|
960 |
module_std = self.config.initializer_range
|
961 |
if isinstance(module, nn.Linear):
|
962 |
-
if getattr(module, "__do_scale_tager__", False):
|
963 |
-
module_std = module_std / self.config.init_scale_o
|
964 |
-
|
965 |
-
if getattr(module, "__do_scale_tager_mu_o__", False):
|
966 |
-
if self.config.dim_model_base_init is not None:
|
967 |
-
module_std = module_std / math.sqrt(2*(self.config.hidden_size / self.config.dim_model_base_init)*self.config.num_hidden_layers)
|
968 |
-
else:
|
969 |
-
module_std = module_std
|
970 |
-
elif getattr(module, "__do_scale_tager_mu_ffn__", False):
|
971 |
-
if self.config.dim_model_base_init is not None:
|
972 |
-
module_std = module_std = module_std / math.sqrt(2*(self.config.hidden_size / self.config.dim_model_base_init)*self.config.num_hidden_layers)
|
973 |
-
else:
|
974 |
-
module_std = module_std
|
975 |
-
elif getattr(module, "__do_scale_tager_mu_dim_model__", False):
|
976 |
-
if self.config.dim_model_base_init is not None:
|
977 |
-
module_std = module_std / math.sqrt(self.config.hidden_size / self.config.dim_model_base_init)
|
978 |
-
else:
|
979 |
-
module_std = module_std
|
980 |
-
elif getattr(module, "__do_scale_tager_mu_dim_base_model__", False):
|
981 |
-
module_std = module_std / math.sqrt(self.config.dim_model_base_lmh) ### lmhead.. 1
|
982 |
-
else:
|
983 |
-
module_std = module_std
|
984 |
module.weight.data.normal_(mean=0.0, std=module_std)
|
985 |
module.weight.data = torch.where(abs(module.weight.data) > module_std*3, 0, module.weight.data)
|
986 |
#torch.nn.init.trunc_normal_(module.weight.data, mean=0.0, std=module_std, a=-3*module_std, b=3*module_std)
|
@@ -1419,9 +1397,6 @@ class MotifForCausalLM(MotifPreTrainedModel, GenerationMixin):
|
|
1419 |
if getattr(config, "tie_word_embeddings", True):
|
1420 |
logger.info('tie embeddings')
|
1421 |
self.tie_weights()
|
1422 |
-
else:
|
1423 |
-
# <|_5_|>
|
1424 |
-
self.lm_head.__do_scale_tager_mu_dim_base_model__ = False
|
1425 |
|
1426 |
def get_input_embeddings(self):
|
1427 |
return self.model.embed_tokens
|
|
|
959 |
def _init_weights(self, module):
|
960 |
module_std = self.config.initializer_range
|
961 |
if isinstance(module, nn.Linear):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
962 |
module.weight.data.normal_(mean=0.0, std=module_std)
|
963 |
module.weight.data = torch.where(abs(module.weight.data) > module_std*3, 0, module.weight.data)
|
964 |
#torch.nn.init.trunc_normal_(module.weight.data, mean=0.0, std=module_std, a=-3*module_std, b=3*module_std)
|
|
|
1397 |
if getattr(config, "tie_word_embeddings", True):
|
1398 |
logger.info('tie embeddings')
|
1399 |
self.tie_weights()
|
|
|
|
|
|
|
1400 |
|
1401 |
def get_input_embeddings(self):
|
1402 |
return self.model.embed_tokens
|