Upload InkubaLM multi-task with adapters

Browse files

Files changed (15) hide show

config.json +5 -52
machine-translation/adapter_config.json +2 -2
machine-translation/head_config.json +2 -2
machine-translation/pytorch_adapter.bin +2 -2
machine-translation/pytorch_model_head.bin +2 -2
model.safetensors +2 -2
sentiment-analysis/adapter_config.json +1 -1
sentiment-analysis/head_config.json +1 -1
sentiment-analysis/pytorch_adapter.bin +2 -2
sentiment-analysis/pytorch_model_head.bin +2 -2
tokenizer.json +2 -16
xnli/adapter_config.json +1 -1
xnli/head_config.json +1 -1
xnli/pytorch_adapter.bin +2 -2
xnli/pytorch_model_head.bin +2 -2

config.json CHANGED Viewed

@@ -2,46 +2,11 @@
   "_name_or_path": "lelapa/InkubaLM-0.4B",
   "adapters": {
     "adapters": {
-      "machine-translation": "26cd1b10db746518",
       "sentiment-analysis": "seq_bn",
       "xnli": "seq_bn"
     },
-    "config_map": {
-      "26cd1b10db746518": {
-        "adapter_residual_before_ln": false,
-        "cross_adapter": false,
-        "dropout": 0.0,
-        "factorized_phm_W": true,
-        "factorized_phm_rule": false,
-        "hypercomplex_nonlinearity": "glorot-uniform",
-        "init_weights": "bert",
-        "inv_adapter": null,
-        "inv_adapter_reduction_factor": null,
-        "is_parallel": false,
-        "learn_phm": true,
-        "leave_out": [],
-        "ln_after": false,
-        "ln_before": false,
-        "mh_adapter": false,
-        "non_linearity": "relu",
-        "original_ln_after": true,
-        "original_ln_before": true,
-        "output_adapter": true,
-        "phm_bias": true,
-        "phm_c_init": "normal",
-        "phm_dim": 4,
-        "phm_init_range": 0.0001,
-        "phm_layer": false,
-        "phm_rank": 1,
-        "reduction_factor": 8,
-        "residual_before_ln": true,
-        "scaling": 1.0,
-        "shared_W_phm": false,
-        "shared_phm_rule": true,
-        "stochastic_depth": 0.0,
-        "use_gating": false
-      }
-    },
     "fusion_config_map": {},
     "fusion_name_map": {},
     "fusions": {}
@@ -58,7 +23,7 @@
   "eos_token_id": 2,
   "head_dim": 64,
   "hidden_act": "silu",
-  "hidden_size": 2048,
   "id2label": null,
   "initializer_range": 0.02,
   "intermediate_size": 5632,
@@ -67,26 +32,14 @@
   "mlp_bias": false,
   "model_type": "llama",
   "num_attention_heads": 32,
-  "num_hidden_layers": 8,
   "num_key_value_heads": 32,
   "prediction_heads": {
-    "default": {
-      "activation_function": null,
-      "bias": false,
-      "dropout_prob": null,
-      "embedding_size": 2048,
-      "head_type": "causal_lm",
-      "label2id": null,
-      "layer_norm": false,
-      "layers": 1,
-      "shift_labels": true,
-      "vocab_size": 61788
-    },
     "machine-translation": {
       "activation_function": "gelu",
       "bias": true,
       "dropout_prob": null,
-      "embedding_size": 2048,
       "head_type": "causal_lm",
       "label2id": null,
       "layer_norm": true,

   "_name_or_path": "lelapa/InkubaLM-0.4B",
   "adapters": {
     "adapters": {
+      "machine-translation": "seq_bn",
       "sentiment-analysis": "seq_bn",
       "xnli": "seq_bn"
     },
+    "config_map": {},
     "fusion_config_map": {},
     "fusion_name_map": {},
     "fusions": {}
   "eos_token_id": 2,
   "head_dim": 64,
   "hidden_act": "silu",
+  "hidden_size": 1024,
   "id2label": null,
   "initializer_range": 0.02,
   "intermediate_size": 5632,
   "mlp_bias": false,
   "model_type": "llama",
   "num_attention_heads": 32,
+  "num_hidden_layers": 6,
   "num_key_value_heads": 32,
   "prediction_heads": {
     "machine-translation": {
       "activation_function": "gelu",
       "bias": true,
       "dropout_prob": null,
+      "embedding_size": 1024,
       "head_type": "causal_lm",
       "label2id": null,
       "layer_norm": true,

machine-translation/adapter_config.json CHANGED Viewed

@@ -25,7 +25,7 @@
     "phm_init_range": 0.0001,
     "phm_layer": false,
     "phm_rank": 1,
-    "reduction_factor": 8,
     "residual_before_ln": true,
     "scaling": 1.0,
     "shared_W_phm": false,
@@ -33,7 +33,7 @@
     "stochastic_depth": 0.0,
     "use_gating": false
   },
-  "hidden_size": 2048,
   "model_class": "LlamaAdapterModel",
   "model_name": "lelapa/InkubaLM-0.4B",
   "model_type": "llama",

     "phm_init_range": 0.0001,
     "phm_layer": false,
     "phm_rank": 1,
+    "reduction_factor": 16,
     "residual_before_ln": true,
     "scaling": 1.0,
     "shared_W_phm": false,
     "stochastic_depth": 0.0,
     "use_gating": false
   },
+  "hidden_size": 1024,
   "model_class": "LlamaAdapterModel",
   "model_name": "lelapa/InkubaLM-0.4B",
   "model_type": "llama",

machine-translation/head_config.json CHANGED Viewed

@@ -3,7 +3,7 @@
     "activation_function": "gelu",
     "bias": true,
     "dropout_prob": null,
-    "embedding_size": 2048,
     "head_type": "causal_lm",
     "label2id": null,
     "layer_norm": true,
@@ -11,7 +11,7 @@
     "shift_labels": true,
     "vocab_size": 61788
   },
-  "hidden_size": 2048,
   "model_class": "LlamaAdapterModel",
   "model_name": "lelapa/InkubaLM-0.4B",
   "model_type": "llama",

     "activation_function": "gelu",
     "bias": true,
     "dropout_prob": null,
+    "embedding_size": 1024,
     "head_type": "causal_lm",
     "label2id": null,
     "layer_norm": true,
     "shift_labels": true,
     "vocab_size": 61788
   },
+  "hidden_size": 1024,
   "model_class": "LlamaAdapterModel",
   "model_name": "lelapa/InkubaLM-0.4B",
   "model_type": "llama",

machine-translation/pytorch_adapter.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:05181a303bdf4633323da7cd7cc42c0dc42de0047093fd154bd3d3cf56475ccb
-size 33640406

 version https://git-lfs.github.com/spec/v1
+oid sha256:ee18b0f15d0bcf1f936629053274982e8671b72d06e622d3a28d023adc094518
+size 3181166

machine-translation/pytorch_model_head.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:11fd55df1c791afae000989604f2fefa0e12abd715657be35bf2a00a393874cd
-size 523218998

 version https://git-lfs.github.com/spec/v1
+oid sha256:cac19dd58a7c1d8e6066cc0c6bf5d2ab794aa3d7bcfd7389f1f0e6e3f62ac400
+size 257540150

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e02a8f87608cbc40d6a46096618ad785ab8dfff3f75d0ef496ede17c994c4901
-size 3280823264

 version https://git-lfs.github.com/spec/v1
+oid sha256:f2cd0d9eb26677e7eccc79da54ef1b30584ed523c09399c830b2e8d5a8e11b87
+size 1145192072

sentiment-analysis/adapter_config.json CHANGED Viewed

@@ -33,7 +33,7 @@
     "stochastic_depth": 0.0,
     "use_gating": false
   },
-  "hidden_size": 2048,
   "model_class": "LlamaAdapterModel",
   "model_name": "lelapa/InkubaLM-0.4B",
   "model_type": "llama",

     "stochastic_depth": 0.0,
     "use_gating": false
   },
+  "hidden_size": 1024,
   "model_class": "LlamaAdapterModel",
   "model_name": "lelapa/InkubaLM-0.4B",
   "model_type": "llama",

sentiment-analysis/head_config.json CHANGED Viewed

@@ -13,7 +13,7 @@
     "num_labels": 3,
     "use_pooler": false
   },
-  "hidden_size": 2048,
   "model_class": "LlamaAdapterModel",
   "model_name": "lelapa/InkubaLM-0.4B",
   "model_type": "llama",

     "num_labels": 3,
     "use_pooler": false
   },
+  "hidden_size": 1024,
   "model_class": "LlamaAdapterModel",
   "model_name": "lelapa/InkubaLM-0.4B",
   "model_type": "llama",

sentiment-analysis/pytorch_adapter.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b67fe84a865c161706578ea8e295c8f158b246896b98e8ee767e455f0fe5fded
-size 16859030

 version https://git-lfs.github.com/spec/v1
+oid sha256:02d6151ed3ddb779cc74a1aa1eafc9cba0d79f809372c9f7a9383e4122c4b7b2
+size 3181102

sentiment-analysis/pytorch_model_head.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cf7eac6525f75894ce9e9adbed2e71322205cbd82d0619042b473d006ab9edbc
-size 16812200

 version https://git-lfs.github.com/spec/v1
+oid sha256:86af309c93c9cfe180ab38bf6299400919848faf75a6e18fc42c834465a19181
+size 4212904

tokenizer.json CHANGED Viewed

@@ -1,21 +1,7 @@
 {
   "version": "1.0",
-  "truncation": {
-    "direction": "Right",
-    "max_length": 128,
-    "strategy": "LongestFirst",
-    "stride": 0
-  },
-  "padding": {
-    "strategy": {
-      "Fixed": 128
-    },
-    "direction": "Left",
-    "pad_to_multiple_of": null,
-    "pad_id": 2,
-    "pad_type_id": 0,
-    "pad_token": "</s>"
-  },
   "added_tokens": [
     {
       "id": 0,

 {
   "version": "1.0",
+  "truncation": null,
+  "padding": null,
   "added_tokens": [
     {
       "id": 0,

xnli/adapter_config.json CHANGED Viewed

@@ -33,7 +33,7 @@
     "stochastic_depth": 0.0,
     "use_gating": false
   },
-  "hidden_size": 2048,
   "model_class": "LlamaAdapterModel",
   "model_name": "lelapa/InkubaLM-0.4B",
   "model_type": "llama",

     "stochastic_depth": 0.0,
     "use_gating": false
   },
+  "hidden_size": 1024,
   "model_class": "LlamaAdapterModel",
   "model_name": "lelapa/InkubaLM-0.4B",
   "model_type": "llama",

xnli/head_config.json CHANGED Viewed

@@ -13,7 +13,7 @@
     "num_labels": 3,
     "use_pooler": false
   },
-  "hidden_size": 2048,
   "model_class": "LlamaAdapterModel",
   "model_name": "lelapa/InkubaLM-0.4B",
   "model_type": "llama",

     "num_labels": 3,
     "use_pooler": false
   },
+  "hidden_size": 1024,
   "model_class": "LlamaAdapterModel",
   "model_name": "lelapa/InkubaLM-0.4B",
   "model_type": "llama",

xnli/pytorch_adapter.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0a584e75646d868757e70744f3fed1c83c25d66ff6cdf9994d6775d70874a806
-size 16858582

 version https://git-lfs.github.com/spec/v1
+oid sha256:2fb8af9fee7f72877e0dba67b1128ccc86e4030083fae7ce97e388797826c259
+size 3180782

xnli/pytorch_model_head.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f1366e1b83a7b97a418f3f3053659d15a96ac13a53b4114249979ca6fce64fbd
-size 16812136

 version https://git-lfs.github.com/spec/v1
+oid sha256:748270c96b63931a7ebd1c4d61b56f3a91a32b3016888dd2fead98d271f16572
+size 4212840