Upload 2 files
Browse files- config.json +52 -51
- configuration_proteinglm.py +5 -5
config.json
CHANGED
@@ -1,52 +1,53 @@
|
|
1 |
{
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
}
|
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "proteinglm-3b-mlm",
|
3 |
+
"add_bias_linear": true,
|
4 |
+
"add_qkv_bias": true,
|
5 |
+
"apply_query_key_layer_scaling": true,
|
6 |
+
"apply_residual_connection_post_layernorm": true,
|
7 |
+
"architectures": [
|
8 |
+
"ProteinGLMModel"
|
9 |
+
],
|
10 |
+
"attention_dropout": 0.0,
|
11 |
+
"attention_softmax_in_fp32": true,
|
12 |
+
"auto_map": {
|
13 |
+
"AutoConfig": "configuration_proteinglm.ProteinGLMConfig",
|
14 |
+
"AutoModel": "modeling_proteinglm.ProteinGLMForMaskedLM",
|
15 |
+
"AutoModelForCausalLM": "modeling_proteinglm.ProteinGLMForCasualLM",
|
16 |
+
"AutoModelForMaskedLM": "modeling_proteinglm.ProteinGLMForMaskedLM",
|
17 |
+
"AutoModelForSequenceClassification": "modeling_proteinglm.ProteinGLMForSequenceClassification",
|
18 |
+
"AutoModelForTokenClassification": "modeling_proteinglm.ProteinGLMForTokenClassification"
|
19 |
+
},
|
20 |
+
"bias_dropout_fusion": true,
|
21 |
+
"deepnorm": true,
|
22 |
+
"experts_per_token": 0,
|
23 |
+
"ffn_hidden_size": 6832,
|
24 |
+
"fp32_residual_connection": false,
|
25 |
+
"glu_activation": "geglu",
|
26 |
+
"head_num": 1,
|
27 |
+
"hidden_dropout": 0.0,
|
28 |
+
"hidden_size": 2560,
|
29 |
+
"initializer_range": 0.02,
|
30 |
+
"is_causal": true,
|
31 |
+
"kv_channels": 64,
|
32 |
+
"layernorm_epsilon": 1e-05,
|
33 |
+
"model_type": "ProteinGLM",
|
34 |
+
"moe": false,
|
35 |
+
"multi_query_attention": false,
|
36 |
+
"multi_query_group_num": 1,
|
37 |
+
"num_attention_heads": 40,
|
38 |
+
"num_experts": 0,
|
39 |
+
"num_layers": 36,
|
40 |
+
"padded_vocab_size": 128,
|
41 |
+
"post_layer_norm": true,
|
42 |
+
"quantization_bit": 0,
|
43 |
+
"rmsnorm": false,
|
44 |
+
"rotary_embedding_2d": false,
|
45 |
+
"seq_length": 1024,
|
46 |
+
"torch_dtype": "float32",
|
47 |
+
"transformers_version": "4.41.2",
|
48 |
+
"untie_head": false,
|
49 |
+
"use_cache": true,
|
50 |
+
"use_pytorch_sdpa": true,
|
51 |
+
"vocab_size": 128
|
52 |
+
}
|
53 |
+
|
configuration_proteinglm.py
CHANGED
@@ -5,12 +5,12 @@ class ProteinGLMConfig(PretrainedConfig):
|
|
5 |
model_type = "ProteinGLM"
|
6 |
def __init__(
|
7 |
self,
|
8 |
-
num_layers=
|
9 |
padded_vocab_size=128,
|
10 |
-
hidden_size=
|
11 |
-
ffn_hidden_size=
|
12 |
kv_channels=64,
|
13 |
-
num_attention_heads=
|
14 |
seq_length=1024,
|
15 |
hidden_dropout=0.0,
|
16 |
attention_dropout=0.0,
|
@@ -31,7 +31,7 @@ class ProteinGLMConfig(PretrainedConfig):
|
|
31 |
quantization_bit=0,
|
32 |
rotary_embedding_2d=False,
|
33 |
use_pytorch_sdpa=True,
|
34 |
-
is_causal=
|
35 |
use_cache=True,
|
36 |
initializer_range=0.02,
|
37 |
moe=False,
|
|
|
5 |
model_type = "ProteinGLM"
|
6 |
def __init__(
|
7 |
self,
|
8 |
+
num_layers=36,
|
9 |
padded_vocab_size=128,
|
10 |
+
hidden_size=2560,
|
11 |
+
ffn_hidden_size=6832,
|
12 |
kv_channels=64,
|
13 |
+
num_attention_heads=40,
|
14 |
seq_length=1024,
|
15 |
hidden_dropout=0.0,
|
16 |
attention_dropout=0.0,
|
|
|
31 |
quantization_bit=0,
|
32 |
rotary_embedding_2d=False,
|
33 |
use_pytorch_sdpa=True,
|
34 |
+
is_causal=True,
|
35 |
use_cache=True,
|
36 |
initializer_range=0.02,
|
37 |
moe=False,
|