biomap-research
/

proteinglm-3b-mlm

@@ -1,52 +1,53 @@
 {
-  "_name_or_path": "proteinglm-1b-mlm",
-  "add_bias_linear": true,
-  "add_qkv_bias": true,
-  "apply_query_key_layer_scaling": true,
-  "apply_residual_connection_post_layernorm": true,
-  "architectures": [
-    "ProteinGLMModel"
-  ],
-  "attention_dropout": 0.0,
-  "attention_softmax_in_fp32": true,
-  "auto_map": {
-    "AutoConfig": "configuration_proteinglm.ProteinGLMConfig",
-    "AutoModel": "modeling_proteinglm.ProteinGLMForMaskedLM",
-    "AutoModelForCausalLM": "modeling_proteinglm.ProteinGLMForCasualLM",
-    "AutoModelForMaskedLM": "modeling_proteinglm.ProteinGLMForMaskedLM",
-    "AutoModelForSequenceClassification": "modeling_proteinglm.ProteinGLMForSequenceClassification",
-    "AutoModelForTokenClassification": "modeling_proteinglm.ProteinGLMForTokenClassification"
-  },
-  "bias_dropout_fusion": true,
-  "deepnorm": true,
-  "experts_per_token": 0,
-  "ffn_hidden_size": 5461,
-  "fp32_residual_connection": false,
-  "glu_activation": "geglu",
-  "head_num": 1,
-  "hidden_dropout": 0.0,
-  "hidden_size": 2048,
-  "initializer_range": 0.02,
-  "is_causal": false,
-  "kv_channels": 64,
-  "layernorm_epsilon": 1e-05,
-  "model_type": "ProteinGLM",
-  "moe": false,
-  "multi_query_attention": false,
-  "multi_query_group_num": 1,
-  "num_attention_heads": 32,
-  "num_experts": 0,
-  "num_layers": 24,
-  "padded_vocab_size": 128,
-  "post_layer_norm": true,
-  "quantization_bit": 0,
-  "rmsnorm": false,
-  "rotary_embedding_2d": false,
-  "seq_length": 1024,
-  "torch_dtype": "float32",
-  "transformers_version": "4.41.2",
-  "untie_head": false,
-  "use_cache": true,
-  "use_pytorch_sdpa": true,
-  "vocab_size": 128
-}

 {
+    "_name_or_path": "proteinglm-3b-mlm",
+    "add_bias_linear": true,
+    "add_qkv_bias": true,
+    "apply_query_key_layer_scaling": true,
+    "apply_residual_connection_post_layernorm": true,
+    "architectures": [
+      "ProteinGLMModel"
+    ],
+    "attention_dropout": 0.0,
+    "attention_softmax_in_fp32": true,
+    "auto_map": {
+      "AutoConfig": "configuration_proteinglm.ProteinGLMConfig",
+      "AutoModel": "modeling_proteinglm.ProteinGLMForMaskedLM",
+      "AutoModelForCausalLM": "modeling_proteinglm.ProteinGLMForCasualLM",
+      "AutoModelForMaskedLM": "modeling_proteinglm.ProteinGLMForMaskedLM",
+      "AutoModelForSequenceClassification": "modeling_proteinglm.ProteinGLMForSequenceClassification",
+      "AutoModelForTokenClassification": "modeling_proteinglm.ProteinGLMForTokenClassification"
+    },
+    "bias_dropout_fusion": true,
+    "deepnorm": true,
+    "experts_per_token": 0,
+    "ffn_hidden_size": 6832,
+    "fp32_residual_connection": false,
+    "glu_activation": "geglu",
+    "head_num": 1,
+    "hidden_dropout": 0.0,
+    "hidden_size": 2560,
+    "initializer_range": 0.02,
+    "is_causal": true,
+    "kv_channels": 64,
+    "layernorm_epsilon": 1e-05,
+    "model_type": "ProteinGLM",
+    "moe": false,
+    "multi_query_attention": false,
+    "multi_query_group_num": 1,
+    "num_attention_heads": 40,
+    "num_experts": 0,
+    "num_layers": 36,
+    "padded_vocab_size": 128,
+    "post_layer_norm": true,
+    "quantization_bit": 0,
+    "rmsnorm": false,
+    "rotary_embedding_2d": false,
+    "seq_length": 1024,
+    "torch_dtype": "float32",
+    "transformers_version": "4.41.2",
+    "untie_head": false,
+    "use_cache": true,
+    "use_pytorch_sdpa": true,
+    "vocab_size": 128
+  }

configuration_proteinglm.py CHANGED Viewed

@@ -5,12 +5,12 @@ class ProteinGLMConfig(PretrainedConfig):
     model_type = "ProteinGLM"
     def __init__(
         self,
-        num_layers=24,
         padded_vocab_size=128,
-        hidden_size=2048,
-        ffn_hidden_size=5461,
         kv_channels=64,
-        num_attention_heads=32,
         seq_length=1024,
         hidden_dropout=0.0,
         attention_dropout=0.0,
@@ -31,7 +31,7 @@ class ProteinGLMConfig(PretrainedConfig):
         quantization_bit=0,
         rotary_embedding_2d=False,
         use_pytorch_sdpa=True,
-        is_causal=False,
         use_cache=True,
         initializer_range=0.02,
         moe=False,

     model_type = "ProteinGLM"
     def __init__(
         self,
+        num_layers=36,
         padded_vocab_size=128,
+        hidden_size=2560,
+        ffn_hidden_size=6832,
         kv_channels=64,
+        num_attention_heads=40,
         seq_length=1024,
         hidden_dropout=0.0,
         attention_dropout=0.0,
         quantization_bit=0,
         rotary_embedding_2d=False,
         use_pytorch_sdpa=True,
+        is_causal=True,
         use_cache=True,
         initializer_range=0.02,
         moe=False,