ltg
/

norbert4-xsmall

Model card Files Files and versions

davda54 commited on Jun 7

Commit

40430cf

·

verified ·

1 Parent(s): a3cac50

FlashAttention support

Files changed (1) hide show

configuration_gptbert.py +1 -80

configuration_gptbert.py CHANGED Viewed

@@ -14,55 +14,7 @@ class GptBertConfig(PretrainedConfig):
         **kwargs
     ):
         super().__init__(**kwargs)
-        self.model: str
-        # General information
-        self.model = "base"
-        # Vocabulary
-        self.vocab_size = 16384
-        self.max_sequence_length = 512
-        # Model dimensions
-        self.hidden_size = 768
-        self.intermediate_size = 2048
-        self.num_attention_heads = 12
-        self.num_layers = 12
-        self.d_qk = 64
-        # Dropout probabilities
-        self.embedding_dropout_p = 0.1
-        self.attention_probabilities_dropout_p = 0.1
-        self.attention_output_dropout_p = 0.1
-        self.feed_forward_dropout_p = 0.1
-        self.attention_dropout = 0.1
-        self.hidden_dropout_prob = 0.2
-        # Position Emebedding
-        self.rope_theta = 160_000
-        # Norms
-        self.word_norm_eps = 1e-7
-        self.word_norm_affine = False
-        self.attention_pre_norm_eps = 1e-7
-        self.attention_pre_norm_affine = False
-        self.attention_inter_norm_eps = 1e-7
-        self.attention_inter_norm_affine = True
-        self.feed_forward_pre_norm_eps = 1e-7
-        self.feed_forward_pre_norm_affine = False
-        self.feed_forward_inter_norm_eps = 1e-7
-        self.feed_forward_inter_norm_affine = False
-        self.classifier_pre_norm_eps = 1e-7
-        self.classifier_pre_norm_affine = False
-        self.classifier_post_norm_eps = 1e-7
-        self.classifier_post_norm_affine = False
         if config_file is not None:
             if type(config_file) is str:
@@ -80,34 +32,3 @@ class GptBertConfig(PretrainedConfig):
             if isinstance(value, str):
                 value = value.lower()
             setattr(self, attr, value)
-    def __repr__(self) -> str:
-        return str(self.to_json_string())
-    def to_dict(self) -> dict:
-        """Serializes this instance to a Python dictionary."""
-        output: dict
-        output = copy.deepcopy(self.__dict__)
-        return output
-    def to_json_string(self) -> str:
-        """Serializes this instance to a JSON string."""
-        return json.dumps(self.to_dict(), indent=2, sort_keys=True) + "\n"
-    def to_json_file(self, json_file_path: Path | str) -> None:
-        """Save this instance to a json file."""
-        if isinstance(json_file_path, str):
-            json_file_path: Path = Path(json_file_path)
-        with json_file_path.open("w", encoding='utf-8') as writer:
-            writer.write(self.to_json_string())
-    @classmethod
-    def create_base_config(cls, json_file_path: Path | str | None = None) -> GptBertConfig:
-        config: GptBertConfig
-        config = GptBertConfig()
-        if json_file_path is not None:
-            config.to_json_file(json_file_path)
-        return config

         **kwargs
     ):
         super().__init__(**kwargs)
+        self.model = "norbert4"
         if config_file is not None:
             if type(config_file) is str:
             if isinstance(value, str):
                 value = value.lower()
             setattr(self, attr, value)