Bo1015 commited on
Commit
4d6d2ec
·
verified ·
1 Parent(s): f114771

Upload 2 files

Browse files
Files changed (2) hide show
  1. config.json +52 -51
  2. configuration_proteinglm.py +5 -5
config.json CHANGED
@@ -1,52 +1,53 @@
1
  {
2
- "_name_or_path": "proteinglm-1b-mlm",
3
- "add_bias_linear": true,
4
- "add_qkv_bias": true,
5
- "apply_query_key_layer_scaling": true,
6
- "apply_residual_connection_post_layernorm": true,
7
- "architectures": [
8
- "ProteinGLMModel"
9
- ],
10
- "attention_dropout": 0.0,
11
- "attention_softmax_in_fp32": true,
12
- "auto_map": {
13
- "AutoConfig": "configuration_proteinglm.ProteinGLMConfig",
14
- "AutoModel": "modeling_proteinglm.ProteinGLMForMaskedLM",
15
- "AutoModelForCausalLM": "modeling_proteinglm.ProteinGLMForCasualLM",
16
- "AutoModelForMaskedLM": "modeling_proteinglm.ProteinGLMForMaskedLM",
17
- "AutoModelForSequenceClassification": "modeling_proteinglm.ProteinGLMForSequenceClassification",
18
- "AutoModelForTokenClassification": "modeling_proteinglm.ProteinGLMForTokenClassification"
19
- },
20
- "bias_dropout_fusion": true,
21
- "deepnorm": true,
22
- "experts_per_token": 0,
23
- "ffn_hidden_size": 5461,
24
- "fp32_residual_connection": false,
25
- "glu_activation": "geglu",
26
- "head_num": 1,
27
- "hidden_dropout": 0.0,
28
- "hidden_size": 2048,
29
- "initializer_range": 0.02,
30
- "is_causal": false,
31
- "kv_channels": 64,
32
- "layernorm_epsilon": 1e-05,
33
- "model_type": "ProteinGLM",
34
- "moe": false,
35
- "multi_query_attention": false,
36
- "multi_query_group_num": 1,
37
- "num_attention_heads": 32,
38
- "num_experts": 0,
39
- "num_layers": 24,
40
- "padded_vocab_size": 128,
41
- "post_layer_norm": true,
42
- "quantization_bit": 0,
43
- "rmsnorm": false,
44
- "rotary_embedding_2d": false,
45
- "seq_length": 1024,
46
- "torch_dtype": "float32",
47
- "transformers_version": "4.41.2",
48
- "untie_head": false,
49
- "use_cache": true,
50
- "use_pytorch_sdpa": true,
51
- "vocab_size": 128
52
- }
 
 
1
  {
2
+ "_name_or_path": "proteinglm-3b-mlm",
3
+ "add_bias_linear": true,
4
+ "add_qkv_bias": true,
5
+ "apply_query_key_layer_scaling": true,
6
+ "apply_residual_connection_post_layernorm": true,
7
+ "architectures": [
8
+ "ProteinGLMModel"
9
+ ],
10
+ "attention_dropout": 0.0,
11
+ "attention_softmax_in_fp32": true,
12
+ "auto_map": {
13
+ "AutoConfig": "configuration_proteinglm.ProteinGLMConfig",
14
+ "AutoModel": "modeling_proteinglm.ProteinGLMForMaskedLM",
15
+ "AutoModelForCausalLM": "modeling_proteinglm.ProteinGLMForCasualLM",
16
+ "AutoModelForMaskedLM": "modeling_proteinglm.ProteinGLMForMaskedLM",
17
+ "AutoModelForSequenceClassification": "modeling_proteinglm.ProteinGLMForSequenceClassification",
18
+ "AutoModelForTokenClassification": "modeling_proteinglm.ProteinGLMForTokenClassification"
19
+ },
20
+ "bias_dropout_fusion": true,
21
+ "deepnorm": true,
22
+ "experts_per_token": 0,
23
+ "ffn_hidden_size": 6832,
24
+ "fp32_residual_connection": false,
25
+ "glu_activation": "geglu",
26
+ "head_num": 1,
27
+ "hidden_dropout": 0.0,
28
+ "hidden_size": 2560,
29
+ "initializer_range": 0.02,
30
+ "is_causal": true,
31
+ "kv_channels": 64,
32
+ "layernorm_epsilon": 1e-05,
33
+ "model_type": "ProteinGLM",
34
+ "moe": false,
35
+ "multi_query_attention": false,
36
+ "multi_query_group_num": 1,
37
+ "num_attention_heads": 40,
38
+ "num_experts": 0,
39
+ "num_layers": 36,
40
+ "padded_vocab_size": 128,
41
+ "post_layer_norm": true,
42
+ "quantization_bit": 0,
43
+ "rmsnorm": false,
44
+ "rotary_embedding_2d": false,
45
+ "seq_length": 1024,
46
+ "torch_dtype": "float32",
47
+ "transformers_version": "4.41.2",
48
+ "untie_head": false,
49
+ "use_cache": true,
50
+ "use_pytorch_sdpa": true,
51
+ "vocab_size": 128
52
+ }
53
+
configuration_proteinglm.py CHANGED
@@ -5,12 +5,12 @@ class ProteinGLMConfig(PretrainedConfig):
5
  model_type = "ProteinGLM"
6
  def __init__(
7
  self,
8
- num_layers=24,
9
  padded_vocab_size=128,
10
- hidden_size=2048,
11
- ffn_hidden_size=5461,
12
  kv_channels=64,
13
- num_attention_heads=32,
14
  seq_length=1024,
15
  hidden_dropout=0.0,
16
  attention_dropout=0.0,
@@ -31,7 +31,7 @@ class ProteinGLMConfig(PretrainedConfig):
31
  quantization_bit=0,
32
  rotary_embedding_2d=False,
33
  use_pytorch_sdpa=True,
34
- is_causal=False,
35
  use_cache=True,
36
  initializer_range=0.02,
37
  moe=False,
 
5
  model_type = "ProteinGLM"
6
  def __init__(
7
  self,
8
+ num_layers=36,
9
  padded_vocab_size=128,
10
+ hidden_size=2560,
11
+ ffn_hidden_size=6832,
12
  kv_channels=64,
13
+ num_attention_heads=40,
14
  seq_length=1024,
15
  hidden_dropout=0.0,
16
  attention_dropout=0.0,
 
31
  quantization_bit=0,
32
  rotary_embedding_2d=False,
33
  use_pytorch_sdpa=True,
34
+ is_causal=True,
35
  use_cache=True,
36
  initializer_range=0.02,
37
  moe=False,