davda54 commited on
Commit
40430cf
·
verified ·
1 Parent(s): a3cac50

FlashAttention support

Browse files
Files changed (1) hide show
  1. configuration_gptbert.py +1 -80
configuration_gptbert.py CHANGED
@@ -14,55 +14,7 @@ class GptBertConfig(PretrainedConfig):
14
  **kwargs
15
  ):
16
  super().__init__(**kwargs)
17
-
18
- self.model: str
19
-
20
- # General information
21
- self.model = "base"
22
-
23
- # Vocabulary
24
- self.vocab_size = 16384
25
- self.max_sequence_length = 512
26
-
27
- # Model dimensions
28
- self.hidden_size = 768
29
- self.intermediate_size = 2048
30
- self.num_attention_heads = 12
31
- self.num_layers = 12
32
- self.d_qk = 64
33
-
34
- # Dropout probabilities
35
- self.embedding_dropout_p = 0.1
36
- self.attention_probabilities_dropout_p = 0.1
37
- self.attention_output_dropout_p = 0.1
38
- self.feed_forward_dropout_p = 0.1
39
- self.attention_dropout = 0.1
40
- self.hidden_dropout_prob = 0.2
41
-
42
- # Position Emebedding
43
- self.rope_theta = 160_000
44
-
45
- # Norms
46
- self.word_norm_eps = 1e-7
47
- self.word_norm_affine = False
48
-
49
- self.attention_pre_norm_eps = 1e-7
50
- self.attention_pre_norm_affine = False
51
-
52
- self.attention_inter_norm_eps = 1e-7
53
- self.attention_inter_norm_affine = True
54
-
55
- self.feed_forward_pre_norm_eps = 1e-7
56
- self.feed_forward_pre_norm_affine = False
57
-
58
- self.feed_forward_inter_norm_eps = 1e-7
59
- self.feed_forward_inter_norm_affine = False
60
-
61
- self.classifier_pre_norm_eps = 1e-7
62
- self.classifier_pre_norm_affine = False
63
-
64
- self.classifier_post_norm_eps = 1e-7
65
- self.classifier_post_norm_affine = False
66
 
67
  if config_file is not None:
68
  if type(config_file) is str:
@@ -80,34 +32,3 @@ class GptBertConfig(PretrainedConfig):
80
  if isinstance(value, str):
81
  value = value.lower()
82
  setattr(self, attr, value)
83
-
84
- def __repr__(self) -> str:
85
- return str(self.to_json_string())
86
-
87
- def to_dict(self) -> dict:
88
- """Serializes this instance to a Python dictionary."""
89
- output: dict
90
-
91
- output = copy.deepcopy(self.__dict__)
92
- return output
93
-
94
- def to_json_string(self) -> str:
95
- """Serializes this instance to a JSON string."""
96
- return json.dumps(self.to_dict(), indent=2, sort_keys=True) + "\n"
97
-
98
- def to_json_file(self, json_file_path: Path | str) -> None:
99
- """Save this instance to a json file."""
100
- if isinstance(json_file_path, str):
101
- json_file_path: Path = Path(json_file_path)
102
- with json_file_path.open("w", encoding='utf-8') as writer:
103
- writer.write(self.to_json_string())
104
-
105
- @classmethod
106
- def create_base_config(cls, json_file_path: Path | str | None = None) -> GptBertConfig:
107
- config: GptBertConfig
108
-
109
- config = GptBertConfig()
110
- if json_file_path is not None:
111
- config.to_json_file(json_file_path)
112
-
113
- return config
 
14
  **kwargs
15
  ):
16
  super().__init__(**kwargs)
17
+ self.model = "norbert4"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
  if config_file is not None:
20
  if type(config_file) is str:
 
32
  if isinstance(value, str):
33
  value = value.lower()
34
  setattr(self, attr, value)