SVECTOR-OFFICIAL commited on
Commit
a708217
·
verified ·
1 Parent(s): ca136bc

Create configuration_theta.py python Copy Edit

Browse files
configuration_theta.py python Copy Edit ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import PretrainedConfig
2
+
3
+ class ThetaConfig(PretrainedConfig):
4
+ model_type = "theta"
5
+
6
+ def __init__(
7
+ self,
8
+ vocab_size=32000,
9
+ hidden_size=4096,
10
+ intermediate_size=11008,
11
+ num_hidden_layers=32,
12
+ num_attention_heads=32,
13
+ hidden_act="silu",
14
+ max_position_embeddings=2048,
15
+ initializer_range=0.02,
16
+ rms_norm_eps=1e-5,
17
+ use_cache=True,
18
+ **kwargs,
19
+ ):
20
+ super().__init__(**kwargs)
21
+ self.vocab_size = vocab_size
22
+ self.hidden_size = hidden_size
23
+ self.intermediate_size = intermediate_size
24
+ self.num_hidden_layers = num_hidden_layers
25
+ self.num_attention_heads = num_attention_heads
26
+ self.hidden_act = hidden_act
27
+ self.max_position_embeddings = max_position_embeddings
28
+ self.initializer_range = initializer_range
29
+ self.rms_norm_eps = rms_norm_eps
30
+ self.use_cache = use_cache