YvanCarre commited on
Commit
d87e8b6
·
verified ·
1 Parent(s): 44b4ccf

Upload InkubaLM multi-task with adapters

Browse files
config.json CHANGED
@@ -2,46 +2,11 @@
2
  "_name_or_path": "lelapa/InkubaLM-0.4B",
3
  "adapters": {
4
  "adapters": {
5
- "machine-translation": "26cd1b10db746518",
6
  "sentiment-analysis": "seq_bn",
7
  "xnli": "seq_bn"
8
  },
9
- "config_map": {
10
- "26cd1b10db746518": {
11
- "adapter_residual_before_ln": false,
12
- "cross_adapter": false,
13
- "dropout": 0.0,
14
- "factorized_phm_W": true,
15
- "factorized_phm_rule": false,
16
- "hypercomplex_nonlinearity": "glorot-uniform",
17
- "init_weights": "bert",
18
- "inv_adapter": null,
19
- "inv_adapter_reduction_factor": null,
20
- "is_parallel": false,
21
- "learn_phm": true,
22
- "leave_out": [],
23
- "ln_after": false,
24
- "ln_before": false,
25
- "mh_adapter": false,
26
- "non_linearity": "relu",
27
- "original_ln_after": true,
28
- "original_ln_before": true,
29
- "output_adapter": true,
30
- "phm_bias": true,
31
- "phm_c_init": "normal",
32
- "phm_dim": 4,
33
- "phm_init_range": 0.0001,
34
- "phm_layer": false,
35
- "phm_rank": 1,
36
- "reduction_factor": 8,
37
- "residual_before_ln": true,
38
- "scaling": 1.0,
39
- "shared_W_phm": false,
40
- "shared_phm_rule": true,
41
- "stochastic_depth": 0.0,
42
- "use_gating": false
43
- }
44
- },
45
  "fusion_config_map": {},
46
  "fusion_name_map": {},
47
  "fusions": {}
@@ -58,7 +23,7 @@
58
  "eos_token_id": 2,
59
  "head_dim": 64,
60
  "hidden_act": "silu",
61
- "hidden_size": 2048,
62
  "id2label": null,
63
  "initializer_range": 0.02,
64
  "intermediate_size": 5632,
@@ -67,26 +32,14 @@
67
  "mlp_bias": false,
68
  "model_type": "llama",
69
  "num_attention_heads": 32,
70
- "num_hidden_layers": 8,
71
  "num_key_value_heads": 32,
72
  "prediction_heads": {
73
- "default": {
74
- "activation_function": null,
75
- "bias": false,
76
- "dropout_prob": null,
77
- "embedding_size": 2048,
78
- "head_type": "causal_lm",
79
- "label2id": null,
80
- "layer_norm": false,
81
- "layers": 1,
82
- "shift_labels": true,
83
- "vocab_size": 61788
84
- },
85
  "machine-translation": {
86
  "activation_function": "gelu",
87
  "bias": true,
88
  "dropout_prob": null,
89
- "embedding_size": 2048,
90
  "head_type": "causal_lm",
91
  "label2id": null,
92
  "layer_norm": true,
 
2
  "_name_or_path": "lelapa/InkubaLM-0.4B",
3
  "adapters": {
4
  "adapters": {
5
+ "machine-translation": "seq_bn",
6
  "sentiment-analysis": "seq_bn",
7
  "xnli": "seq_bn"
8
  },
9
+ "config_map": {},
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  "fusion_config_map": {},
11
  "fusion_name_map": {},
12
  "fusions": {}
 
23
  "eos_token_id": 2,
24
  "head_dim": 64,
25
  "hidden_act": "silu",
26
+ "hidden_size": 1024,
27
  "id2label": null,
28
  "initializer_range": 0.02,
29
  "intermediate_size": 5632,
 
32
  "mlp_bias": false,
33
  "model_type": "llama",
34
  "num_attention_heads": 32,
35
+ "num_hidden_layers": 6,
36
  "num_key_value_heads": 32,
37
  "prediction_heads": {
 
 
 
 
 
 
 
 
 
 
 
 
38
  "machine-translation": {
39
  "activation_function": "gelu",
40
  "bias": true,
41
  "dropout_prob": null,
42
+ "embedding_size": 1024,
43
  "head_type": "causal_lm",
44
  "label2id": null,
45
  "layer_norm": true,
machine-translation/adapter_config.json CHANGED
@@ -25,7 +25,7 @@
25
  "phm_init_range": 0.0001,
26
  "phm_layer": false,
27
  "phm_rank": 1,
28
- "reduction_factor": 8,
29
  "residual_before_ln": true,
30
  "scaling": 1.0,
31
  "shared_W_phm": false,
@@ -33,7 +33,7 @@
33
  "stochastic_depth": 0.0,
34
  "use_gating": false
35
  },
36
- "hidden_size": 2048,
37
  "model_class": "LlamaAdapterModel",
38
  "model_name": "lelapa/InkubaLM-0.4B",
39
  "model_type": "llama",
 
25
  "phm_init_range": 0.0001,
26
  "phm_layer": false,
27
  "phm_rank": 1,
28
+ "reduction_factor": 16,
29
  "residual_before_ln": true,
30
  "scaling": 1.0,
31
  "shared_W_phm": false,
 
33
  "stochastic_depth": 0.0,
34
  "use_gating": false
35
  },
36
+ "hidden_size": 1024,
37
  "model_class": "LlamaAdapterModel",
38
  "model_name": "lelapa/InkubaLM-0.4B",
39
  "model_type": "llama",
machine-translation/head_config.json CHANGED
@@ -3,7 +3,7 @@
3
  "activation_function": "gelu",
4
  "bias": true,
5
  "dropout_prob": null,
6
- "embedding_size": 2048,
7
  "head_type": "causal_lm",
8
  "label2id": null,
9
  "layer_norm": true,
@@ -11,7 +11,7 @@
11
  "shift_labels": true,
12
  "vocab_size": 61788
13
  },
14
- "hidden_size": 2048,
15
  "model_class": "LlamaAdapterModel",
16
  "model_name": "lelapa/InkubaLM-0.4B",
17
  "model_type": "llama",
 
3
  "activation_function": "gelu",
4
  "bias": true,
5
  "dropout_prob": null,
6
+ "embedding_size": 1024,
7
  "head_type": "causal_lm",
8
  "label2id": null,
9
  "layer_norm": true,
 
11
  "shift_labels": true,
12
  "vocab_size": 61788
13
  },
14
+ "hidden_size": 1024,
15
  "model_class": "LlamaAdapterModel",
16
  "model_name": "lelapa/InkubaLM-0.4B",
17
  "model_type": "llama",
machine-translation/pytorch_adapter.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:05181a303bdf4633323da7cd7cc42c0dc42de0047093fd154bd3d3cf56475ccb
3
- size 33640406
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee18b0f15d0bcf1f936629053274982e8671b72d06e622d3a28d023adc094518
3
+ size 3181166
machine-translation/pytorch_model_head.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:11fd55df1c791afae000989604f2fefa0e12abd715657be35bf2a00a393874cd
3
- size 523218998
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cac19dd58a7c1d8e6066cc0c6bf5d2ab794aa3d7bcfd7389f1f0e6e3f62ac400
3
+ size 257540150
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e02a8f87608cbc40d6a46096618ad785ab8dfff3f75d0ef496ede17c994c4901
3
- size 3280823264
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2cd0d9eb26677e7eccc79da54ef1b30584ed523c09399c830b2e8d5a8e11b87
3
+ size 1145192072
sentiment-analysis/adapter_config.json CHANGED
@@ -33,7 +33,7 @@
33
  "stochastic_depth": 0.0,
34
  "use_gating": false
35
  },
36
- "hidden_size": 2048,
37
  "model_class": "LlamaAdapterModel",
38
  "model_name": "lelapa/InkubaLM-0.4B",
39
  "model_type": "llama",
 
33
  "stochastic_depth": 0.0,
34
  "use_gating": false
35
  },
36
+ "hidden_size": 1024,
37
  "model_class": "LlamaAdapterModel",
38
  "model_name": "lelapa/InkubaLM-0.4B",
39
  "model_type": "llama",
sentiment-analysis/head_config.json CHANGED
@@ -13,7 +13,7 @@
13
  "num_labels": 3,
14
  "use_pooler": false
15
  },
16
- "hidden_size": 2048,
17
  "model_class": "LlamaAdapterModel",
18
  "model_name": "lelapa/InkubaLM-0.4B",
19
  "model_type": "llama",
 
13
  "num_labels": 3,
14
  "use_pooler": false
15
  },
16
+ "hidden_size": 1024,
17
  "model_class": "LlamaAdapterModel",
18
  "model_name": "lelapa/InkubaLM-0.4B",
19
  "model_type": "llama",
sentiment-analysis/pytorch_adapter.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b67fe84a865c161706578ea8e295c8f158b246896b98e8ee767e455f0fe5fded
3
- size 16859030
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02d6151ed3ddb779cc74a1aa1eafc9cba0d79f809372c9f7a9383e4122c4b7b2
3
+ size 3181102
sentiment-analysis/pytorch_model_head.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cf7eac6525f75894ce9e9adbed2e71322205cbd82d0619042b473d006ab9edbc
3
- size 16812200
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86af309c93c9cfe180ab38bf6299400919848faf75a6e18fc42c834465a19181
3
+ size 4212904
tokenizer.json CHANGED
@@ -1,21 +1,7 @@
1
  {
2
  "version": "1.0",
3
- "truncation": {
4
- "direction": "Right",
5
- "max_length": 128,
6
- "strategy": "LongestFirst",
7
- "stride": 0
8
- },
9
- "padding": {
10
- "strategy": {
11
- "Fixed": 128
12
- },
13
- "direction": "Left",
14
- "pad_to_multiple_of": null,
15
- "pad_id": 2,
16
- "pad_type_id": 0,
17
- "pad_token": "</s>"
18
- },
19
  "added_tokens": [
20
  {
21
  "id": 0,
 
1
  {
2
  "version": "1.0",
3
+ "truncation": null,
4
+ "padding": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  "added_tokens": [
6
  {
7
  "id": 0,
xnli/adapter_config.json CHANGED
@@ -33,7 +33,7 @@
33
  "stochastic_depth": 0.0,
34
  "use_gating": false
35
  },
36
- "hidden_size": 2048,
37
  "model_class": "LlamaAdapterModel",
38
  "model_name": "lelapa/InkubaLM-0.4B",
39
  "model_type": "llama",
 
33
  "stochastic_depth": 0.0,
34
  "use_gating": false
35
  },
36
+ "hidden_size": 1024,
37
  "model_class": "LlamaAdapterModel",
38
  "model_name": "lelapa/InkubaLM-0.4B",
39
  "model_type": "llama",
xnli/head_config.json CHANGED
@@ -13,7 +13,7 @@
13
  "num_labels": 3,
14
  "use_pooler": false
15
  },
16
- "hidden_size": 2048,
17
  "model_class": "LlamaAdapterModel",
18
  "model_name": "lelapa/InkubaLM-0.4B",
19
  "model_type": "llama",
 
13
  "num_labels": 3,
14
  "use_pooler": false
15
  },
16
+ "hidden_size": 1024,
17
  "model_class": "LlamaAdapterModel",
18
  "model_name": "lelapa/InkubaLM-0.4B",
19
  "model_type": "llama",
xnli/pytorch_adapter.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0a584e75646d868757e70744f3fed1c83c25d66ff6cdf9994d6775d70874a806
3
- size 16858582
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2fb8af9fee7f72877e0dba67b1128ccc86e4030083fae7ce97e388797826c259
3
+ size 3180782
xnli/pytorch_model_head.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f1366e1b83a7b97a418f3f3053659d15a96ac13a53b4114249979ca6fce64fbd
3
- size 16812136
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:748270c96b63931a7ebd1c4d61b56f3a91a32b3016888dd2fead98d271f16572
3
+ size 4212840