jdorairaj commited on
Commit
0c2ae73
·
1 Parent(s): 644c36e
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/args.json +34 -0
  2. reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/logfile.log +213 -0
  3. reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_0/adapter_config.json +41 -0
  4. reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_0/all_results.json +1 -0
  5. reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_0/all_results_val.json +1 -0
  6. reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_0/eval_res.json +0 -0
  7. reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_0/eval_res_val.json +0 -0
  8. reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_0/gpu_stats.json +127 -0
  9. reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_0/head_config.json +21 -0
  10. reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_0/pytorch_adapter.bin +3 -0
  11. reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_0/pytorch_model_head.bin +3 -0
  12. reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_0/special_tokens_map.json +7 -0
  13. reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_0/tokenizer.json +0 -0
  14. reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_0/tokenizer_config.json +56 -0
  15. reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_0/vocab.txt +0 -0
  16. reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_1069/adapter_config.json +41 -0
  17. reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_1069/all_results.json +1 -0
  18. reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_1069/all_results_val.json +1 -0
  19. reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_1069/eval_res.json +0 -0
  20. reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_1069/eval_res_val.json +0 -0
  21. reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_1069/gpu_stats.json +127 -0
  22. reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_1069/head_config.json +21 -0
  23. reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_1069/pytorch_adapter.bin +3 -0
  24. reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_1069/pytorch_model_head.bin +3 -0
  25. reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_1069/special_tokens_map.json +7 -0
  26. reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_1069/tokenizer.json +0 -0
  27. reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_1069/tokenizer_config.json +56 -0
  28. reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_1069/vocab.txt +0 -0
  29. reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_213/adapter_config.json +41 -0
  30. reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_213/all_results.json +1 -0
  31. reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_213/all_results_val.json +1 -0
  32. reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_213/eval_res.json +0 -0
  33. reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_213/eval_res_val.json +0 -0
  34. reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_213/gpu_stats.json +127 -0
  35. reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_213/head_config.json +21 -0
  36. reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_213/pytorch_adapter.bin +3 -0
  37. reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_213/pytorch_model_head.bin +3 -0
  38. reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_213/special_tokens_map.json +7 -0
  39. reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_213/tokenizer.json +0 -0
  40. reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_213/tokenizer_config.json +56 -0
  41. reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_213/vocab.txt +0 -0
  42. reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_427/adapter_config.json +41 -0
  43. reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_427/all_results.json +1 -0
  44. reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_427/all_results_val.json +1 -0
  45. reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_427/eval_res.json +0 -0
  46. reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_427/eval_res_val.json +0 -0
  47. reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_427/gpu_stats.json +127 -0
  48. reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_427/head_config.json +21 -0
  49. reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_427/pytorch_adapter.bin +3 -0
  50. reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_427/pytorch_model_head.bin +3 -0
reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/args.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "cola",
3
+ "train_file": null,
4
+ "validation_file": null,
5
+ "max_length": 256,
6
+ "pad_to_max_length": false,
7
+ "model_name_or_path": "google-bert/bert-base-uncased",
8
+ "use_slow_tokenizer": false,
9
+ "per_device_train_batch_size": 32,
10
+ "per_device_eval_batch_size": 32,
11
+ "learning_rate": 0.0001,
12
+ "max_grad_norm": 0.5,
13
+ "weight_decay": 0.0,
14
+ "num_train_epochs": 5,
15
+ "max_train_steps": null,
16
+ "gradient_accumulation_steps": 1,
17
+ "lr_scheduler_type": "linear",
18
+ "num_warmup_steps": 0,
19
+ "output_dir": "./outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16",
20
+ "seed": 12345,
21
+ "push_to_hub": false,
22
+ "hub_model_id": null,
23
+ "hub_token": null,
24
+ "checkpointing_steps": null,
25
+ "resume_from_checkpoint": null,
26
+ "with_tracking": false,
27
+ "report_to": "all",
28
+ "ignore_mismatched_sizes": true,
29
+ "save_train_results": false,
30
+ "testing_set": "train_val",
31
+ "lm_head": true,
32
+ "leave_out": null,
33
+ "reduction_factor": 16
34
+ }
reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/logfile.log ADDED
@@ -0,0 +1,213 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 05/02/2024 13:44:03 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 30522, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}.
2
+ 05/02/2024 13:44:04 - INFO - __main__ - Number of labels detected = 2
3
+ 05/02/2024 13:44:04 - INFO - adapters.heads.model_mixin - Adding head 'cola' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}.
4
+ 05/02/2024 13:44:04 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'cola'.
5
+ 05/02/2024 13:44:04 - INFO - __main__ - ================================================================================
6
+ Name Architecture #Param %Param Active Train
7
+ --------------------------------------------------------------------------------
8
+ cola bottleneck 1,789,056 1.634 1 1
9
+ --------------------------------------------------------------------------------
10
+ Full model 109,482,240 100.000 0
11
+ ================================================================================
12
+ 05/02/2024 13:44:04 - INFO - __main__ - printing model
13
+ 05/02/2024 13:44:04 - INFO - __main__ - BertAdapterModel(
14
+ (bert): BertModel(
15
+ (embeddings): BertEmbeddings(
16
+ (word_embeddings): Embedding(30522, 768, padding_idx=0)
17
+ (position_embeddings): Embedding(512, 768)
18
+ (token_type_embeddings): Embedding(2, 768)
19
+ (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
20
+ (dropout): Dropout(p=0.1, inplace=False)
21
+ )
22
+ (encoder): BertEncoder(
23
+ (layer): ModuleList(
24
+ (0-11): 12 x BertLayer(
25
+ (attention): BertAttention(
26
+ (self): BertSelfAttentionWithAdapters(
27
+ (query): LoRALinearTorch(
28
+ in_features=768, out_features=768, bias=True
29
+ (loras): ModuleDict()
30
+ )
31
+ (key): LoRALinearTorch(
32
+ in_features=768, out_features=768, bias=True
33
+ (loras): ModuleDict()
34
+ )
35
+ (value): LoRALinearTorch(
36
+ in_features=768, out_features=768, bias=True
37
+ (loras): ModuleDict()
38
+ )
39
+ (dropout): Dropout(p=0.1, inplace=False)
40
+ (prefix_tuning): PrefixTuningLayer(
41
+ (prefix_gates): ModuleDict()
42
+ (pool): PrefixTuningPool(
43
+ (prefix_tunings): ModuleDict()
44
+ )
45
+ )
46
+ )
47
+ (output): BertSelfOutputWithAdapters(
48
+ (dense): Linear(in_features=768, out_features=768, bias=True)
49
+ (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
50
+ (dropout): Dropout(p=0.1, inplace=False)
51
+ (adapters): ModuleDict(
52
+ (cola): Adapter(
53
+ (non_linearity): Activation_Function_Class(
54
+ (f): SiLU()
55
+ )
56
+ (adapter_down): Sequential(
57
+ (0): Linear(in_features=768, out_features=48, bias=True)
58
+ (1): Activation_Function_Class(
59
+ (f): SiLU()
60
+ )
61
+ )
62
+ (adapter_up): Linear(in_features=48, out_features=768, bias=True)
63
+ (dropout): Dropout(p=0.0, inplace=False)
64
+ )
65
+ )
66
+ (adapter_fusion_layer): ModuleDict()
67
+ )
68
+ )
69
+ (intermediate): BertIntermediate(
70
+ (dense): LoRALinearTorch(
71
+ in_features=768, out_features=3072, bias=True
72
+ (loras): ModuleDict()
73
+ )
74
+ (intermediate_act_fn): GELUActivation()
75
+ )
76
+ (output): BertOutputWithAdapters(
77
+ (dense): LoRALinearTorch(
78
+ in_features=3072, out_features=768, bias=True
79
+ (loras): ModuleDict()
80
+ )
81
+ (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
82
+ (dropout): Dropout(p=0.1, inplace=False)
83
+ (adapters): ModuleDict(
84
+ (cola): Adapter(
85
+ (non_linearity): Activation_Function_Class(
86
+ (f): SiLU()
87
+ )
88
+ (adapter_down): Sequential(
89
+ (0): Linear(in_features=768, out_features=48, bias=True)
90
+ (1): Activation_Function_Class(
91
+ (f): SiLU()
92
+ )
93
+ )
94
+ (adapter_up): Linear(in_features=48, out_features=768, bias=True)
95
+ (dropout): Dropout(p=0.0, inplace=False)
96
+ )
97
+ )
98
+ (adapter_fusion_layer): ModuleDict()
99
+ )
100
+ )
101
+ )
102
+ )
103
+ (pooler): BertPooler(
104
+ (dense): Linear(in_features=768, out_features=768, bias=True)
105
+ (activation): Tanh()
106
+ )
107
+ (invertible_adapters): ModuleDict()
108
+ (shared_parameters): ModuleDict()
109
+ (prefix_tuning): PrefixTuningPool(
110
+ (prefix_tunings): ModuleDict()
111
+ )
112
+ (prompt_tuning): PromptTuningLayer(
113
+ (base_model_embeddings): Embedding(30522, 768, padding_idx=0)
114
+ (prompt_tunings): ModuleDict()
115
+ )
116
+ )
117
+ (heads): ModuleDict(
118
+ (default): BertStyleMaskedLMHead(
119
+ (0): Linear(in_features=768, out_features=768, bias=True)
120
+ (1): Activation_Function_Class(
121
+ (f): GELUActivation()
122
+ )
123
+ (2): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
124
+ (3): Linear(in_features=768, out_features=30522, bias=True)
125
+ )
126
+ (cola): ClassificationHead(
127
+ (0): Dropout(p=0.1, inplace=False)
128
+ (1): Linear(in_features=768, out_features=768, bias=True)
129
+ (2): Activation_Function_Class(
130
+ (f): Tanh()
131
+ )
132
+ (3): Dropout(p=0.1, inplace=False)
133
+ (4): Linear(in_features=768, out_features=2, bias=True)
134
+ )
135
+ )
136
+ )
137
+ 05/02/2024 13:44:05 - INFO - __main__ - Sample 3412 of the training set: {'input_ids': [101, 1045, 12781, 1996, 7427, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1], 'labels': 1}.
138
+ 05/02/2024 13:44:05 - INFO - __main__ - Sample 6002 of the training set: {'input_ids': [101, 1045, 2442, 2064, 4521, 22088, 2015, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
139
+ 05/02/2024 13:44:05 - INFO - __main__ - Sample 83 of the training set: {'input_ids': [101, 1996, 7764, 22257, 2993, 2000, 1996, 2598, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
140
+ 05/02/2024 13:44:05 - INFO - __main__ - Max training steps before recalculation = None
141
+ 05/02/2024 13:44:05 - INFO - __main__ - num_update_steps_per_epoch initial = 214
142
+ 05/02/2024 13:44:05 - INFO - __main__ - num training epochs initial = 5
143
+ 05/02/2024 13:44:05 - INFO - __main__ - Adjusted num_train_epochs based on max_train_steps: 5
144
+ 05/02/2024 13:44:05 - INFO - __main__ - num_update_steps_per_epoch before recalculation = 214
145
+ 05/02/2024 13:44:05 - INFO - __main__ - num_update_steps_per_epoch after recalculation = 214
146
+ 05/02/2024 13:44:05 - INFO - __main__ - num training epochs before recalculation = 5
147
+ 05/02/2024 13:44:06 - INFO - __main__ - ***** Running training *****
148
+ 05/02/2024 13:44:06 - INFO - __main__ - Num examples = 6840
149
+ 05/02/2024 13:44:06 - INFO - __main__ - Num Epochs = 5
150
+ 05/02/2024 13:44:06 - INFO - __main__ - Instantaneous batch size per device = 32
151
+ 05/02/2024 13:44:06 - INFO - __main__ - Total train batch size (w. parallel, distributed & accumulation) = 32
152
+ 05/02/2024 13:44:06 - INFO - __main__ - Gradient Accumulation steps = 1
153
+ 05/02/2024 13:44:06 - INFO - __main__ - Total optimization steps = 1070
154
+ 05/02/2024 13:44:08 - INFO - __main__ - epoch 0: {'matthews_correlation': -0.02929206145132745}
155
+ 05/02/2024 13:44:08 - INFO - adapters.loading - Configuration saved in ./outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_0/adapter_config.json
156
+ 05/02/2024 13:44:08 - INFO - adapters.loading - Module weights saved in ./outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_0/pytorch_adapter.bin
157
+ 05/02/2024 13:44:08 - INFO - adapters.loading - Configuration saved in ./outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_0/head_config.json
158
+ 05/02/2024 13:44:09 - INFO - adapters.loading - Module weights saved in ./outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_0/pytorch_model_head.bin
159
+ 05/02/2024 13:44:11 - INFO - __main__ - epoch 0: {'matthews_correlation': 0.0}
160
+ 05/02/2024 13:44:11 - INFO - adapters.loading - Configuration saved in ./outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_0/adapter_config.json
161
+ 05/02/2024 13:44:11 - INFO - adapters.loading - Module weights saved in ./outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_0/pytorch_adapter.bin
162
+ 05/02/2024 13:44:11 - INFO - adapters.loading - Configuration saved in ./outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_0/head_config.json
163
+ 05/02/2024 13:44:11 - INFO - adapters.loading - Module weights saved in ./outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_0/pytorch_model_head.bin
164
+ 05/02/2024 13:44:32 - INFO - __main__ - epoch 0: {'matthews_correlation': 0.3913530742857523}
165
+ 05/02/2024 13:44:32 - INFO - adapters.loading - Configuration saved in ./outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_213/adapter_config.json
166
+ 05/02/2024 13:44:32 - INFO - adapters.loading - Module weights saved in ./outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_213/pytorch_adapter.bin
167
+ 05/02/2024 13:44:32 - INFO - adapters.loading - Configuration saved in ./outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_213/head_config.json
168
+ 05/02/2024 13:44:32 - INFO - adapters.loading - Module weights saved in ./outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_213/pytorch_model_head.bin
169
+ 05/02/2024 13:44:35 - INFO - __main__ - epoch 0: {'matthews_correlation': 0.43209766412198286}
170
+ 05/02/2024 13:44:35 - INFO - adapters.loading - Configuration saved in ./outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_213/adapter_config.json
171
+ 05/02/2024 13:44:35 - INFO - adapters.loading - Module weights saved in ./outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_213/pytorch_adapter.bin
172
+ 05/02/2024 13:44:35 - INFO - adapters.loading - Configuration saved in ./outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_213/head_config.json
173
+ 05/02/2024 13:44:35 - INFO - adapters.loading - Module weights saved in ./outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_213/pytorch_model_head.bin
174
+ 05/02/2024 13:44:56 - INFO - __main__ - epoch 1: {'matthews_correlation': 0.40686271625095977}
175
+ 05/02/2024 13:44:56 - INFO - adapters.loading - Configuration saved in ./outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_427/adapter_config.json
176
+ 05/02/2024 13:44:57 - INFO - adapters.loading - Module weights saved in ./outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_427/pytorch_adapter.bin
177
+ 05/02/2024 13:44:57 - INFO - adapters.loading - Configuration saved in ./outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_427/head_config.json
178
+ 05/02/2024 13:44:57 - INFO - adapters.loading - Module weights saved in ./outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_427/pytorch_model_head.bin
179
+ 05/02/2024 13:44:59 - INFO - __main__ - epoch 1: {'matthews_correlation': 0.43657984362111635}
180
+ 05/02/2024 13:44:59 - INFO - adapters.loading - Configuration saved in ./outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_427/adapter_config.json
181
+ 05/02/2024 13:44:59 - INFO - adapters.loading - Module weights saved in ./outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_427/pytorch_adapter.bin
182
+ 05/02/2024 13:44:59 - INFO - adapters.loading - Configuration saved in ./outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_427/head_config.json
183
+ 05/02/2024 13:44:59 - INFO - adapters.loading - Module weights saved in ./outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_427/pytorch_model_head.bin
184
+ 05/02/2024 13:45:21 - INFO - __main__ - epoch 2: {'matthews_correlation': 0.4300679389836819}
185
+ 05/02/2024 13:45:21 - INFO - adapters.loading - Configuration saved in ./outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_641/adapter_config.json
186
+ 05/02/2024 13:45:21 - INFO - adapters.loading - Module weights saved in ./outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_641/pytorch_adapter.bin
187
+ 05/02/2024 13:45:21 - INFO - adapters.loading - Configuration saved in ./outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_641/head_config.json
188
+ 05/02/2024 13:45:21 - INFO - adapters.loading - Module weights saved in ./outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_641/pytorch_model_head.bin
189
+ 05/02/2024 13:45:23 - INFO - __main__ - epoch 2: {'matthews_correlation': 0.46279932268480173}
190
+ 05/02/2024 13:45:23 - INFO - adapters.loading - Configuration saved in ./outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_641/adapter_config.json
191
+ 05/02/2024 13:45:23 - INFO - adapters.loading - Module weights saved in ./outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_641/pytorch_adapter.bin
192
+ 05/02/2024 13:45:23 - INFO - adapters.loading - Configuration saved in ./outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_641/head_config.json
193
+ 05/02/2024 13:45:23 - INFO - adapters.loading - Module weights saved in ./outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_641/pytorch_model_head.bin
194
+ 05/02/2024 13:45:45 - INFO - __main__ - epoch 3: {'matthews_correlation': 0.4911444953794644}
195
+ 05/02/2024 13:45:45 - INFO - adapters.loading - Configuration saved in ./outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_855/adapter_config.json
196
+ 05/02/2024 13:45:45 - INFO - adapters.loading - Module weights saved in ./outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_855/pytorch_adapter.bin
197
+ 05/02/2024 13:45:45 - INFO - adapters.loading - Configuration saved in ./outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_855/head_config.json
198
+ 05/02/2024 13:45:45 - INFO - adapters.loading - Module weights saved in ./outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_855/pytorch_model_head.bin
199
+ 05/02/2024 13:45:48 - INFO - __main__ - epoch 3: {'matthews_correlation': 0.4935488969659219}
200
+ 05/02/2024 13:45:48 - INFO - adapters.loading - Configuration saved in ./outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_855/adapter_config.json
201
+ 05/02/2024 13:45:48 - INFO - adapters.loading - Module weights saved in ./outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_855/pytorch_adapter.bin
202
+ 05/02/2024 13:45:48 - INFO - adapters.loading - Configuration saved in ./outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_855/head_config.json
203
+ 05/02/2024 13:45:48 - INFO - adapters.loading - Module weights saved in ./outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_855/pytorch_model_head.bin
204
+ 05/02/2024 13:46:10 - INFO - __main__ - epoch 4: {'matthews_correlation': 0.49110677806667025}
205
+ 05/02/2024 13:46:10 - INFO - adapters.loading - Configuration saved in ./outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_1069/adapter_config.json
206
+ 05/02/2024 13:46:10 - INFO - adapters.loading - Module weights saved in ./outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_1069/pytorch_adapter.bin
207
+ 05/02/2024 13:46:10 - INFO - adapters.loading - Configuration saved in ./outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_1069/head_config.json
208
+ 05/02/2024 13:46:10 - INFO - adapters.loading - Module weights saved in ./outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_1069/pytorch_model_head.bin
209
+ 05/02/2024 13:46:12 - INFO - __main__ - epoch 4: {'matthews_correlation': 0.5026187978800131}
210
+ 05/02/2024 13:46:12 - INFO - adapters.loading - Configuration saved in ./outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_1069/adapter_config.json
211
+ 05/02/2024 13:46:12 - INFO - adapters.loading - Module weights saved in ./outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_1069/pytorch_adapter.bin
212
+ 05/02/2024 13:46:12 - INFO - adapters.loading - Configuration saved in ./outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_1069/head_config.json
213
+ 05/02/2024 13:46:12 - INFO - adapters.loading - Module weights saved in ./outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_1069/pytorch_model_head.bin
reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_0/adapter_config.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config": {
3
+ "adapter_residual_before_ln": false,
4
+ "cross_adapter": false,
5
+ "dropout": 0.0,
6
+ "factorized_phm_W": true,
7
+ "factorized_phm_rule": false,
8
+ "hypercomplex_nonlinearity": "glorot-uniform",
9
+ "init_weights": "bert",
10
+ "inv_adapter": null,
11
+ "inv_adapter_reduction_factor": null,
12
+ "is_parallel": false,
13
+ "learn_phm": true,
14
+ "leave_out": [],
15
+ "ln_after": false,
16
+ "ln_before": false,
17
+ "mh_adapter": true,
18
+ "non_linearity": "swish",
19
+ "original_ln_after": true,
20
+ "original_ln_before": false,
21
+ "output_adapter": true,
22
+ "phm_bias": true,
23
+ "phm_c_init": "normal",
24
+ "phm_dim": 4,
25
+ "phm_init_range": 0.0001,
26
+ "phm_layer": false,
27
+ "phm_rank": 1,
28
+ "reduction_factor": 16,
29
+ "residual_before_ln": true,
30
+ "scaling": 1.0,
31
+ "shared_W_phm": false,
32
+ "shared_phm_rule": true,
33
+ "use_gating": false
34
+ },
35
+ "hidden_size": 768,
36
+ "model_class": "BertAdapterModel",
37
+ "model_name": "google-bert/bert-base-uncased",
38
+ "model_type": "bert",
39
+ "name": "cola",
40
+ "version": "0.2.0"
41
+ }
reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_0/all_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"eval_matthews_correlation": -0.02929206145132745}
reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_0/all_results_val.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"eval_matthews_correlation": 0.0}
reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_0/eval_res.json ADDED
The diff for this file is too large to render. See raw diff
 
reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_0/eval_res_val.json ADDED
The diff for this file is too large to render. See raw diff
 
reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_0/gpu_stats.json ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "memory_allocated": 459641856,
3
+ "max_memory_allocated": 508764160,
4
+ "memory_reserved": 593494016,
5
+ "max_memory_reserved": 593494016,
6
+ "memory_stats": {
7
+ "active.all.allocated": 64079,
8
+ "active.all.current": 318,
9
+ "active.all.freed": 63761,
10
+ "active.all.peak": 336,
11
+ "active.large_pool.allocated": 28390,
12
+ "active.large_pool.current": 78,
13
+ "active.large_pool.freed": 28312,
14
+ "active.large_pool.peak": 89,
15
+ "active.small_pool.allocated": 35689,
16
+ "active.small_pool.current": 240,
17
+ "active.small_pool.freed": 35449,
18
+ "active.small_pool.peak": 257,
19
+ "active_bytes.all.allocated": 88733824000,
20
+ "active_bytes.all.current": 459641856,
21
+ "active_bytes.all.freed": 88274182144,
22
+ "active_bytes.all.peak": 508764160,
23
+ "active_bytes.large_pool.allocated": 74593599488,
24
+ "active_bytes.large_pool.current": 451805184,
25
+ "active_bytes.large_pool.freed": 74141794304,
26
+ "active_bytes.large_pool.peak": 500891648,
27
+ "active_bytes.small_pool.allocated": 14140224512,
28
+ "active_bytes.small_pool.current": 7836672,
29
+ "active_bytes.small_pool.freed": 14132387840,
30
+ "active_bytes.small_pool.peak": 15428096,
31
+ "allocated_bytes.all.allocated": 88733824000,
32
+ "allocated_bytes.all.current": 459641856,
33
+ "allocated_bytes.all.freed": 88274182144,
34
+ "allocated_bytes.all.peak": 508764160,
35
+ "allocated_bytes.large_pool.allocated": 74593599488,
36
+ "allocated_bytes.large_pool.current": 451805184,
37
+ "allocated_bytes.large_pool.freed": 74141794304,
38
+ "allocated_bytes.large_pool.peak": 500891648,
39
+ "allocated_bytes.small_pool.allocated": 14140224512,
40
+ "allocated_bytes.small_pool.current": 7836672,
41
+ "allocated_bytes.small_pool.freed": 14132387840,
42
+ "allocated_bytes.small_pool.peak": 15428096,
43
+ "allocation.all.allocated": 64079,
44
+ "allocation.all.current": 318,
45
+ "allocation.all.freed": 63761,
46
+ "allocation.all.peak": 336,
47
+ "allocation.large_pool.allocated": 28390,
48
+ "allocation.large_pool.current": 78,
49
+ "allocation.large_pool.freed": 28312,
50
+ "allocation.large_pool.peak": 89,
51
+ "allocation.small_pool.allocated": 35689,
52
+ "allocation.small_pool.current": 240,
53
+ "allocation.small_pool.freed": 35449,
54
+ "allocation.small_pool.peak": 257,
55
+ "inactive_split.all.allocated": 51474,
56
+ "inactive_split.all.current": 22,
57
+ "inactive_split.all.freed": 51452,
58
+ "inactive_split.all.peak": 33,
59
+ "inactive_split.large_pool.allocated": 26239,
60
+ "inactive_split.large_pool.current": 18,
61
+ "inactive_split.large_pool.freed": 26221,
62
+ "inactive_split.large_pool.peak": 26,
63
+ "inactive_split.small_pool.allocated": 25235,
64
+ "inactive_split.small_pool.current": 4,
65
+ "inactive_split.small_pool.freed": 25231,
66
+ "inactive_split.small_pool.peak": 14,
67
+ "inactive_split_bytes.all.allocated": 90022852096,
68
+ "inactive_split_bytes.all.current": 41577472,
69
+ "inactive_split_bytes.all.freed": 89981274624,
70
+ "inactive_split_bytes.all.peak": 89366016,
71
+ "inactive_split_bytes.large_pool.allocated": 74234757120,
72
+ "inactive_split_bytes.large_pool.current": 41025536,
73
+ "inactive_split_bytes.large_pool.freed": 74193731584,
74
+ "inactive_split_bytes.large_pool.peak": 88866816,
75
+ "inactive_split_bytes.small_pool.allocated": 15788094976,
76
+ "inactive_split_bytes.small_pool.current": 551936,
77
+ "inactive_split_bytes.small_pool.freed": 15787543040,
78
+ "inactive_split_bytes.small_pool.peak": 6483456,
79
+ "max_split_size": -1,
80
+ "num_alloc_retries": 0,
81
+ "num_ooms": 0,
82
+ "oversize_allocations.allocated": 0,
83
+ "oversize_allocations.current": 0,
84
+ "oversize_allocations.freed": 0,
85
+ "oversize_allocations.peak": 0,
86
+ "oversize_segments.allocated": 0,
87
+ "oversize_segments.current": 0,
88
+ "oversize_segments.freed": 0,
89
+ "oversize_segments.peak": 0,
90
+ "requested_bytes.all.allocated": 80085666224,
91
+ "requested_bytes.all.current": 458497396,
92
+ "requested_bytes.all.freed": 79627168828,
93
+ "requested_bytes.all.peak": 506701940,
94
+ "requested_bytes.large_pool.allocated": 65951070208,
95
+ "requested_bytes.large_pool.current": 450672640,
96
+ "requested_bytes.large_pool.freed": 65500397568,
97
+ "requested_bytes.large_pool.peak": 498841600,
98
+ "requested_bytes.small_pool.allocated": 14134596016,
99
+ "requested_bytes.small_pool.current": 7824756,
100
+ "requested_bytes.small_pool.freed": 14126771260,
101
+ "requested_bytes.small_pool.peak": 15416648,
102
+ "reserved_bytes.all.allocated": 593494016,
103
+ "reserved_bytes.all.current": 593494016,
104
+ "reserved_bytes.all.freed": 0,
105
+ "reserved_bytes.all.peak": 593494016,
106
+ "reserved_bytes.large_pool.allocated": 574619648,
107
+ "reserved_bytes.large_pool.current": 574619648,
108
+ "reserved_bytes.large_pool.freed": 0,
109
+ "reserved_bytes.large_pool.peak": 574619648,
110
+ "reserved_bytes.small_pool.allocated": 18874368,
111
+ "reserved_bytes.small_pool.current": 18874368,
112
+ "reserved_bytes.small_pool.freed": 0,
113
+ "reserved_bytes.small_pool.peak": 18874368,
114
+ "segment.all.allocated": 34,
115
+ "segment.all.current": 34,
116
+ "segment.all.freed": 0,
117
+ "segment.all.peak": 34,
118
+ "segment.large_pool.allocated": 25,
119
+ "segment.large_pool.current": 25,
120
+ "segment.large_pool.freed": 0,
121
+ "segment.large_pool.peak": 25,
122
+ "segment.small_pool.allocated": 9,
123
+ "segment.small_pool.current": 9,
124
+ "segment.small_pool.freed": 0,
125
+ "segment.small_pool.peak": 9
126
+ }
127
+ }
reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_0/head_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config": {
3
+ "activation_function": "tanh",
4
+ "bias": true,
5
+ "dropout_prob": null,
6
+ "head_type": "classification",
7
+ "label2id": {
8
+ "LABEL_0": 0,
9
+ "LABEL_1": 1
10
+ },
11
+ "layers": 2,
12
+ "num_labels": 2,
13
+ "use_pooler": false
14
+ },
15
+ "hidden_size": 768,
16
+ "model_class": "BertAdapterModel",
17
+ "model_name": "google-bert/bert-base-uncased",
18
+ "model_type": "bert",
19
+ "name": "cola",
20
+ "version": "0.2.0"
21
+ }
reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_0/pytorch_adapter.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29b06f7b9617fec3c5bc63794ca9c89f4238d46db9edb1de32e4dd4e5086fae0
3
+ size 7191062
reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_0/pytorch_model_head.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:244a549f85f7dcaa9f5082c6ca9f115dff60bbcbbd8534f0a78f92ef9b70720d
3
+ size 2370664
reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_0/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_0/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_0/tokenizer_config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "mask_token": "[MASK]",
48
+ "model_max_length": 512,
49
+ "pad_token": "[PAD]",
50
+ "padding_side": "left",
51
+ "sep_token": "[SEP]",
52
+ "strip_accents": null,
53
+ "tokenize_chinese_chars": true,
54
+ "tokenizer_class": "BertTokenizer",
55
+ "unk_token": "[UNK]"
56
+ }
reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_0/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_1069/adapter_config.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config": {
3
+ "adapter_residual_before_ln": false,
4
+ "cross_adapter": false,
5
+ "dropout": 0.0,
6
+ "factorized_phm_W": true,
7
+ "factorized_phm_rule": false,
8
+ "hypercomplex_nonlinearity": "glorot-uniform",
9
+ "init_weights": "bert",
10
+ "inv_adapter": null,
11
+ "inv_adapter_reduction_factor": null,
12
+ "is_parallel": false,
13
+ "learn_phm": true,
14
+ "leave_out": [],
15
+ "ln_after": false,
16
+ "ln_before": false,
17
+ "mh_adapter": true,
18
+ "non_linearity": "swish",
19
+ "original_ln_after": true,
20
+ "original_ln_before": false,
21
+ "output_adapter": true,
22
+ "phm_bias": true,
23
+ "phm_c_init": "normal",
24
+ "phm_dim": 4,
25
+ "phm_init_range": 0.0001,
26
+ "phm_layer": false,
27
+ "phm_rank": 1,
28
+ "reduction_factor": 16,
29
+ "residual_before_ln": true,
30
+ "scaling": 1.0,
31
+ "shared_W_phm": false,
32
+ "shared_phm_rule": true,
33
+ "use_gating": false
34
+ },
35
+ "hidden_size": 768,
36
+ "model_class": "BertAdapterModel",
37
+ "model_name": "google-bert/bert-base-uncased",
38
+ "model_type": "bert",
39
+ "name": "cola",
40
+ "version": "0.2.0"
41
+ }
reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_1069/all_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"eval_matthews_correlation": 0.49110677806667025}
reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_1069/all_results_val.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"eval_matthews_correlation": 0.5026187978800131}
reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_1069/eval_res.json ADDED
The diff for this file is too large to render. See raw diff
 
reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_1069/eval_res_val.json ADDED
The diff for this file is too large to render. See raw diff
 
reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_1069/gpu_stats.json ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "memory_allocated": 487876608,
3
+ "max_memory_allocated": 1280701440,
4
+ "memory_reserved": 1432354816,
5
+ "max_memory_reserved": 1432354816,
6
+ "memory_stats": {
7
+ "active.all.allocated": 1785177,
8
+ "active.all.current": 519,
9
+ "active.all.freed": 1784658,
10
+ "active.all.peak": 777,
11
+ "active.large_pool.allocated": 869905,
12
+ "active.large_pool.current": 81,
13
+ "active.large_pool.freed": 869824,
14
+ "active.large_pool.peak": 228,
15
+ "active.small_pool.allocated": 915272,
16
+ "active.small_pool.current": 438,
17
+ "active.small_pool.freed": 914834,
18
+ "active.small_pool.peak": 603,
19
+ "active_bytes.all.allocated": 2795125268992,
20
+ "active_bytes.all.current": 487876608,
21
+ "active_bytes.all.freed": 2794637392384,
22
+ "active_bytes.all.peak": 1280701440,
23
+ "active_bytes.large_pool.allocated": 2448186030592,
24
+ "active_bytes.large_pool.current": 465698816,
25
+ "active_bytes.large_pool.freed": 2447720331776,
26
+ "active_bytes.large_pool.peak": 1235008000,
27
+ "active_bytes.small_pool.allocated": 346939238400,
28
+ "active_bytes.small_pool.current": 22177792,
29
+ "active_bytes.small_pool.freed": 346917060608,
30
+ "active_bytes.small_pool.peak": 71908864,
31
+ "allocated_bytes.all.allocated": 2795125268992,
32
+ "allocated_bytes.all.current": 487876608,
33
+ "allocated_bytes.all.freed": 2794637392384,
34
+ "allocated_bytes.all.peak": 1280701440,
35
+ "allocated_bytes.large_pool.allocated": 2448186030592,
36
+ "allocated_bytes.large_pool.current": 465698816,
37
+ "allocated_bytes.large_pool.freed": 2447720331776,
38
+ "allocated_bytes.large_pool.peak": 1235008000,
39
+ "allocated_bytes.small_pool.allocated": 346939238400,
40
+ "allocated_bytes.small_pool.current": 22177792,
41
+ "allocated_bytes.small_pool.freed": 346917060608,
42
+ "allocated_bytes.small_pool.peak": 71908864,
43
+ "allocation.all.allocated": 1785177,
44
+ "allocation.all.current": 519,
45
+ "allocation.all.freed": 1784658,
46
+ "allocation.all.peak": 777,
47
+ "allocation.large_pool.allocated": 869905,
48
+ "allocation.large_pool.current": 81,
49
+ "allocation.large_pool.freed": 869824,
50
+ "allocation.large_pool.peak": 228,
51
+ "allocation.small_pool.allocated": 915272,
52
+ "allocation.small_pool.current": 438,
53
+ "allocation.small_pool.freed": 914834,
54
+ "allocation.small_pool.peak": 603,
55
+ "inactive_split.all.allocated": 1069303,
56
+ "inactive_split.all.current": 51,
57
+ "inactive_split.all.freed": 1069252,
58
+ "inactive_split.all.peak": 111,
59
+ "inactive_split.large_pool.allocated": 730208,
60
+ "inactive_split.large_pool.current": 18,
61
+ "inactive_split.large_pool.freed": 730190,
62
+ "inactive_split.large_pool.peak": 46,
63
+ "inactive_split.small_pool.allocated": 339095,
64
+ "inactive_split.small_pool.current": 33,
65
+ "inactive_split.small_pool.freed": 339062,
66
+ "inactive_split.small_pool.peak": 90,
67
+ "inactive_split_bytes.all.allocated": 2833239371264,
68
+ "inactive_split_bytes.all.current": 57382912,
69
+ "inactive_split_bytes.all.freed": 2833181988352,
70
+ "inactive_split_bytes.all.peak": 166851072,
71
+ "inactive_split_bytes.large_pool.allocated": 2464762337280,
72
+ "inactive_split_bytes.large_pool.current": 48103424,
73
+ "inactive_split_bytes.large_pool.freed": 2464714233856,
74
+ "inactive_split_bytes.large_pool.peak": 162594816,
75
+ "inactive_split_bytes.small_pool.allocated": 368477033984,
76
+ "inactive_split_bytes.small_pool.current": 9279488,
77
+ "inactive_split_bytes.small_pool.freed": 368467754496,
78
+ "inactive_split_bytes.small_pool.peak": 41911808,
79
+ "max_split_size": -1,
80
+ "num_alloc_retries": 0,
81
+ "num_ooms": 0,
82
+ "oversize_allocations.allocated": 0,
83
+ "oversize_allocations.current": 0,
84
+ "oversize_allocations.freed": 0,
85
+ "oversize_allocations.peak": 0,
86
+ "oversize_segments.allocated": 0,
87
+ "oversize_segments.current": 0,
88
+ "oversize_segments.freed": 0,
89
+ "oversize_segments.peak": 0,
90
+ "requested_bytes.all.allocated": 2699496701188,
91
+ "requested_bytes.all.current": 486060548,
92
+ "requested_bytes.all.freed": 2699010640640,
93
+ "requested_bytes.all.peak": 1262243472,
94
+ "requested_bytes.large_pool.allocated": 2352632215040,
95
+ "requested_bytes.large_pool.current": 463910912,
96
+ "requested_bytes.large_pool.freed": 2352168304128,
97
+ "requested_bytes.large_pool.peak": 1216584704,
98
+ "requested_bytes.small_pool.allocated": 346864486148,
99
+ "requested_bytes.small_pool.current": 22149636,
100
+ "requested_bytes.small_pool.freed": 346842336512,
101
+ "requested_bytes.small_pool.peak": 71870096,
102
+ "reserved_bytes.all.allocated": 1432354816,
103
+ "reserved_bytes.all.current": 1432354816,
104
+ "reserved_bytes.all.freed": 0,
105
+ "reserved_bytes.all.peak": 1432354816,
106
+ "reserved_bytes.large_pool.allocated": 1354760192,
107
+ "reserved_bytes.large_pool.current": 1354760192,
108
+ "reserved_bytes.large_pool.freed": 0,
109
+ "reserved_bytes.large_pool.peak": 1354760192,
110
+ "reserved_bytes.small_pool.allocated": 77594624,
111
+ "reserved_bytes.small_pool.current": 77594624,
112
+ "reserved_bytes.small_pool.freed": 0,
113
+ "reserved_bytes.small_pool.peak": 77594624,
114
+ "segment.all.allocated": 104,
115
+ "segment.all.current": 104,
116
+ "segment.all.freed": 0,
117
+ "segment.all.peak": 104,
118
+ "segment.large_pool.allocated": 67,
119
+ "segment.large_pool.current": 67,
120
+ "segment.large_pool.freed": 0,
121
+ "segment.large_pool.peak": 67,
122
+ "segment.small_pool.allocated": 37,
123
+ "segment.small_pool.current": 37,
124
+ "segment.small_pool.freed": 0,
125
+ "segment.small_pool.peak": 37
126
+ }
127
+ }
reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_1069/head_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config": {
3
+ "activation_function": "tanh",
4
+ "bias": true,
5
+ "dropout_prob": null,
6
+ "head_type": "classification",
7
+ "label2id": {
8
+ "LABEL_0": 0,
9
+ "LABEL_1": 1
10
+ },
11
+ "layers": 2,
12
+ "num_labels": 2,
13
+ "use_pooler": false
14
+ },
15
+ "hidden_size": 768,
16
+ "model_class": "BertAdapterModel",
17
+ "model_name": "google-bert/bert-base-uncased",
18
+ "model_type": "bert",
19
+ "name": "cola",
20
+ "version": "0.2.0"
21
+ }
reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_1069/pytorch_adapter.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:594579be0de914d421f8d8c6a9957d397ae71059bd10629c23a0810b67278982
3
+ size 7191062
reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_1069/pytorch_model_head.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db21e52d43d22f16af233d57438c41adcd7682abda660068c8dc752afab4b9f4
3
+ size 2370664
reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_1069/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_1069/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_1069/tokenizer_config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "mask_token": "[MASK]",
48
+ "model_max_length": 512,
49
+ "pad_token": "[PAD]",
50
+ "padding_side": "left",
51
+ "sep_token": "[SEP]",
52
+ "strip_accents": null,
53
+ "tokenize_chinese_chars": true,
54
+ "tokenizer_class": "BertTokenizer",
55
+ "unk_token": "[UNK]"
56
+ }
reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_1069/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_213/adapter_config.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config": {
3
+ "adapter_residual_before_ln": false,
4
+ "cross_adapter": false,
5
+ "dropout": 0.0,
6
+ "factorized_phm_W": true,
7
+ "factorized_phm_rule": false,
8
+ "hypercomplex_nonlinearity": "glorot-uniform",
9
+ "init_weights": "bert",
10
+ "inv_adapter": null,
11
+ "inv_adapter_reduction_factor": null,
12
+ "is_parallel": false,
13
+ "learn_phm": true,
14
+ "leave_out": [],
15
+ "ln_after": false,
16
+ "ln_before": false,
17
+ "mh_adapter": true,
18
+ "non_linearity": "swish",
19
+ "original_ln_after": true,
20
+ "original_ln_before": false,
21
+ "output_adapter": true,
22
+ "phm_bias": true,
23
+ "phm_c_init": "normal",
24
+ "phm_dim": 4,
25
+ "phm_init_range": 0.0001,
26
+ "phm_layer": false,
27
+ "phm_rank": 1,
28
+ "reduction_factor": 16,
29
+ "residual_before_ln": true,
30
+ "scaling": 1.0,
31
+ "shared_W_phm": false,
32
+ "shared_phm_rule": true,
33
+ "use_gating": false
34
+ },
35
+ "hidden_size": 768,
36
+ "model_class": "BertAdapterModel",
37
+ "model_name": "google-bert/bert-base-uncased",
38
+ "model_type": "bert",
39
+ "name": "cola",
40
+ "version": "0.2.0"
41
+ }
reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_213/all_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"eval_matthews_correlation": 0.3913530742857523}
reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_213/all_results_val.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"eval_matthews_correlation": 0.43209766412198286}
reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_213/eval_res.json ADDED
The diff for this file is too large to render. See raw diff
 
reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_213/eval_res_val.json ADDED
The diff for this file is too large to render. See raw diff
 
reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_213/gpu_stats.json ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "memory_allocated": 487881216,
3
+ "max_memory_allocated": 1280701440,
4
+ "memory_reserved": 1432354816,
5
+ "max_memory_reserved": 1432354816,
6
+ "memory_stats": {
7
+ "active.all.allocated": 407393,
8
+ "active.all.current": 519,
9
+ "active.all.freed": 406874,
10
+ "active.all.peak": 777,
11
+ "active.large_pool.allocated": 195997,
12
+ "active.large_pool.current": 81,
13
+ "active.large_pool.freed": 195916,
14
+ "active.large_pool.peak": 228,
15
+ "active.small_pool.allocated": 211396,
16
+ "active.small_pool.current": 438,
17
+ "active.small_pool.freed": 210958,
18
+ "active.small_pool.peak": 603,
19
+ "active_bytes.all.allocated": 627316937216,
20
+ "active_bytes.all.current": 487881216,
21
+ "active_bytes.all.freed": 626829056000,
22
+ "active_bytes.all.peak": 1280701440,
23
+ "active_bytes.large_pool.allocated": 546686381056,
24
+ "active_bytes.large_pool.current": 465698816,
25
+ "active_bytes.large_pool.freed": 546220682240,
26
+ "active_bytes.large_pool.peak": 1235008000,
27
+ "active_bytes.small_pool.allocated": 80630556160,
28
+ "active_bytes.small_pool.current": 22182400,
29
+ "active_bytes.small_pool.freed": 80608373760,
30
+ "active_bytes.small_pool.peak": 71908864,
31
+ "allocated_bytes.all.allocated": 627316937216,
32
+ "allocated_bytes.all.current": 487881216,
33
+ "allocated_bytes.all.freed": 626829056000,
34
+ "allocated_bytes.all.peak": 1280701440,
35
+ "allocated_bytes.large_pool.allocated": 546686381056,
36
+ "allocated_bytes.large_pool.current": 465698816,
37
+ "allocated_bytes.large_pool.freed": 546220682240,
38
+ "allocated_bytes.large_pool.peak": 1235008000,
39
+ "allocated_bytes.small_pool.allocated": 80630556160,
40
+ "allocated_bytes.small_pool.current": 22182400,
41
+ "allocated_bytes.small_pool.freed": 80608373760,
42
+ "allocated_bytes.small_pool.peak": 71908864,
43
+ "allocation.all.allocated": 407393,
44
+ "allocation.all.current": 519,
45
+ "allocation.all.freed": 406874,
46
+ "allocation.all.peak": 777,
47
+ "allocation.large_pool.allocated": 195997,
48
+ "allocation.large_pool.current": 81,
49
+ "allocation.large_pool.freed": 195916,
50
+ "allocation.large_pool.peak": 228,
51
+ "allocation.small_pool.allocated": 211396,
52
+ "allocation.small_pool.current": 438,
53
+ "allocation.small_pool.freed": 210958,
54
+ "allocation.small_pool.peak": 603,
55
+ "inactive_split.all.allocated": 254024,
56
+ "inactive_split.all.current": 51,
57
+ "inactive_split.all.freed": 253973,
58
+ "inactive_split.all.peak": 109,
59
+ "inactive_split.large_pool.allocated": 166367,
60
+ "inactive_split.large_pool.current": 18,
61
+ "inactive_split.large_pool.freed": 166349,
62
+ "inactive_split.large_pool.peak": 46,
63
+ "inactive_split.small_pool.allocated": 87657,
64
+ "inactive_split.small_pool.current": 33,
65
+ "inactive_split.small_pool.freed": 87624,
66
+ "inactive_split.small_pool.peak": 88,
67
+ "inactive_split_bytes.all.allocated": 637194957312,
68
+ "inactive_split_bytes.all.current": 57378304,
69
+ "inactive_split_bytes.all.freed": 637137579008,
70
+ "inactive_split_bytes.all.peak": 166851072,
71
+ "inactive_split_bytes.large_pool.allocated": 550988233216,
72
+ "inactive_split_bytes.large_pool.current": 48103424,
73
+ "inactive_split_bytes.large_pool.freed": 550940129792,
74
+ "inactive_split_bytes.large_pool.peak": 162594816,
75
+ "inactive_split_bytes.small_pool.allocated": 86206724096,
76
+ "inactive_split_bytes.small_pool.current": 9274880,
77
+ "inactive_split_bytes.small_pool.freed": 86197449216,
78
+ "inactive_split_bytes.small_pool.peak": 41911808,
79
+ "max_split_size": -1,
80
+ "num_alloc_retries": 0,
81
+ "num_ooms": 0,
82
+ "oversize_allocations.allocated": 0,
83
+ "oversize_allocations.current": 0,
84
+ "oversize_allocations.freed": 0,
85
+ "oversize_allocations.peak": 0,
86
+ "oversize_segments.allocated": 0,
87
+ "oversize_segments.current": 0,
88
+ "oversize_segments.freed": 0,
89
+ "oversize_segments.peak": 0,
90
+ "requested_bytes.all.allocated": 601292072388,
91
+ "requested_bytes.all.current": 486065732,
92
+ "requested_bytes.all.freed": 600806006656,
93
+ "requested_bytes.all.peak": 1262243472,
94
+ "requested_bytes.large_pool.allocated": 520680939008,
95
+ "requested_bytes.large_pool.current": 463910912,
96
+ "requested_bytes.large_pool.freed": 520217028096,
97
+ "requested_bytes.large_pool.peak": 1216584704,
98
+ "requested_bytes.small_pool.allocated": 80611133380,
99
+ "requested_bytes.small_pool.current": 22154820,
100
+ "requested_bytes.small_pool.freed": 80588978560,
101
+ "requested_bytes.small_pool.peak": 71870096,
102
+ "reserved_bytes.all.allocated": 1432354816,
103
+ "reserved_bytes.all.current": 1432354816,
104
+ "reserved_bytes.all.freed": 0,
105
+ "reserved_bytes.all.peak": 1432354816,
106
+ "reserved_bytes.large_pool.allocated": 1354760192,
107
+ "reserved_bytes.large_pool.current": 1354760192,
108
+ "reserved_bytes.large_pool.freed": 0,
109
+ "reserved_bytes.large_pool.peak": 1354760192,
110
+ "reserved_bytes.small_pool.allocated": 77594624,
111
+ "reserved_bytes.small_pool.current": 77594624,
112
+ "reserved_bytes.small_pool.freed": 0,
113
+ "reserved_bytes.small_pool.peak": 77594624,
114
+ "segment.all.allocated": 104,
115
+ "segment.all.current": 104,
116
+ "segment.all.freed": 0,
117
+ "segment.all.peak": 104,
118
+ "segment.large_pool.allocated": 67,
119
+ "segment.large_pool.current": 67,
120
+ "segment.large_pool.freed": 0,
121
+ "segment.large_pool.peak": 67,
122
+ "segment.small_pool.allocated": 37,
123
+ "segment.small_pool.current": 37,
124
+ "segment.small_pool.freed": 0,
125
+ "segment.small_pool.peak": 37
126
+ }
127
+ }
reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_213/head_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config": {
3
+ "activation_function": "tanh",
4
+ "bias": true,
5
+ "dropout_prob": null,
6
+ "head_type": "classification",
7
+ "label2id": {
8
+ "LABEL_0": 0,
9
+ "LABEL_1": 1
10
+ },
11
+ "layers": 2,
12
+ "num_labels": 2,
13
+ "use_pooler": false
14
+ },
15
+ "hidden_size": 768,
16
+ "model_class": "BertAdapterModel",
17
+ "model_name": "google-bert/bert-base-uncased",
18
+ "model_type": "bert",
19
+ "name": "cola",
20
+ "version": "0.2.0"
21
+ }
reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_213/pytorch_adapter.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:482fc0834d1df5adce6bcf5146619aa13ecc1ab3ae7b12594fa52d245dc14955
3
+ size 7191062
reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_213/pytorch_model_head.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:787ad3070eee0372b81b7af80aa69bb557487fb3af342c6227d4d2ff523a3daa
3
+ size 2370664
reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_213/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_213/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_213/tokenizer_config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "mask_token": "[MASK]",
48
+ "model_max_length": 512,
49
+ "pad_token": "[PAD]",
50
+ "padding_side": "left",
51
+ "sep_token": "[SEP]",
52
+ "strip_accents": null,
53
+ "tokenize_chinese_chars": true,
54
+ "tokenizer_class": "BertTokenizer",
55
+ "unk_token": "[UNK]"
56
+ }
reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_213/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_427/adapter_config.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config": {
3
+ "adapter_residual_before_ln": false,
4
+ "cross_adapter": false,
5
+ "dropout": 0.0,
6
+ "factorized_phm_W": true,
7
+ "factorized_phm_rule": false,
8
+ "hypercomplex_nonlinearity": "glorot-uniform",
9
+ "init_weights": "bert",
10
+ "inv_adapter": null,
11
+ "inv_adapter_reduction_factor": null,
12
+ "is_parallel": false,
13
+ "learn_phm": true,
14
+ "leave_out": [],
15
+ "ln_after": false,
16
+ "ln_before": false,
17
+ "mh_adapter": true,
18
+ "non_linearity": "swish",
19
+ "original_ln_after": true,
20
+ "original_ln_before": false,
21
+ "output_adapter": true,
22
+ "phm_bias": true,
23
+ "phm_c_init": "normal",
24
+ "phm_dim": 4,
25
+ "phm_init_range": 0.0001,
26
+ "phm_layer": false,
27
+ "phm_rank": 1,
28
+ "reduction_factor": 16,
29
+ "residual_before_ln": true,
30
+ "scaling": 1.0,
31
+ "shared_W_phm": false,
32
+ "shared_phm_rule": true,
33
+ "use_gating": false
34
+ },
35
+ "hidden_size": 768,
36
+ "model_class": "BertAdapterModel",
37
+ "model_name": "google-bert/bert-base-uncased",
38
+ "model_type": "bert",
39
+ "name": "cola",
40
+ "version": "0.2.0"
41
+ }
reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_427/all_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"eval_matthews_correlation": 0.40686271625095977}
reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_427/all_results_val.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"eval_matthews_correlation": 0.43657984362111635}
reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_427/eval_res.json ADDED
The diff for this file is too large to render. See raw diff
 
reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_427/eval_res_val.json ADDED
The diff for this file is too large to render. See raw diff
 
reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_427/gpu_stats.json ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "memory_allocated": 487876608,
3
+ "max_memory_allocated": 1280701440,
4
+ "memory_reserved": 1432354816,
5
+ "max_memory_reserved": 1432354816,
6
+ "memory_stats": {
7
+ "active.all.allocated": 751815,
8
+ "active.all.current": 519,
9
+ "active.all.freed": 751296,
10
+ "active.all.peak": 777,
11
+ "active.large_pool.allocated": 364577,
12
+ "active.large_pool.current": 81,
13
+ "active.large_pool.freed": 364496,
14
+ "active.large_pool.peak": 228,
15
+ "active.small_pool.allocated": 387238,
16
+ "active.small_pool.current": 438,
17
+ "active.small_pool.freed": 386800,
18
+ "active.small_pool.peak": 603,
19
+ "active_bytes.all.allocated": 1171049445376,
20
+ "active_bytes.all.current": 487876608,
21
+ "active_bytes.all.freed": 1170561568768,
22
+ "active_bytes.all.peak": 1280701440,
23
+ "active_bytes.large_pool.allocated": 1023755623424,
24
+ "active_bytes.large_pool.current": 465698816,
25
+ "active_bytes.large_pool.freed": 1023289924608,
26
+ "active_bytes.large_pool.peak": 1235008000,
27
+ "active_bytes.small_pool.allocated": 147293821952,
28
+ "active_bytes.small_pool.current": 22177792,
29
+ "active_bytes.small_pool.freed": 147271644160,
30
+ "active_bytes.small_pool.peak": 71908864,
31
+ "allocated_bytes.all.allocated": 1171049445376,
32
+ "allocated_bytes.all.current": 487876608,
33
+ "allocated_bytes.all.freed": 1170561568768,
34
+ "allocated_bytes.all.peak": 1280701440,
35
+ "allocated_bytes.large_pool.allocated": 1023755623424,
36
+ "allocated_bytes.large_pool.current": 465698816,
37
+ "allocated_bytes.large_pool.freed": 1023289924608,
38
+ "allocated_bytes.large_pool.peak": 1235008000,
39
+ "allocated_bytes.small_pool.allocated": 147293821952,
40
+ "allocated_bytes.small_pool.current": 22177792,
41
+ "allocated_bytes.small_pool.freed": 147271644160,
42
+ "allocated_bytes.small_pool.peak": 71908864,
43
+ "allocation.all.allocated": 751815,
44
+ "allocation.all.current": 519,
45
+ "allocation.all.freed": 751296,
46
+ "allocation.all.peak": 777,
47
+ "allocation.large_pool.allocated": 364577,
48
+ "allocation.large_pool.current": 81,
49
+ "allocation.large_pool.freed": 364496,
50
+ "allocation.large_pool.peak": 228,
51
+ "allocation.small_pool.allocated": 387238,
52
+ "allocation.small_pool.current": 438,
53
+ "allocation.small_pool.freed": 386800,
54
+ "allocation.small_pool.peak": 603,
55
+ "inactive_split.all.allocated": 459348,
56
+ "inactive_split.all.current": 51,
57
+ "inactive_split.all.freed": 459297,
58
+ "inactive_split.all.peak": 109,
59
+ "inactive_split.large_pool.allocated": 307153,
60
+ "inactive_split.large_pool.current": 18,
61
+ "inactive_split.large_pool.freed": 307135,
62
+ "inactive_split.large_pool.peak": 46,
63
+ "inactive_split.small_pool.allocated": 152195,
64
+ "inactive_split.small_pool.current": 33,
65
+ "inactive_split.small_pool.freed": 152162,
66
+ "inactive_split.small_pool.peak": 88,
67
+ "inactive_split_bytes.all.allocated": 1188144974848,
68
+ "inactive_split_bytes.all.current": 57382912,
69
+ "inactive_split_bytes.all.freed": 1188087591936,
70
+ "inactive_split_bytes.all.peak": 166851072,
71
+ "inactive_split_bytes.large_pool.allocated": 1031282970112,
72
+ "inactive_split_bytes.large_pool.current": 48103424,
73
+ "inactive_split_bytes.large_pool.freed": 1031234866688,
74
+ "inactive_split_bytes.large_pool.peak": 162594816,
75
+ "inactive_split_bytes.small_pool.allocated": 156862004736,
76
+ "inactive_split_bytes.small_pool.current": 9279488,
77
+ "inactive_split_bytes.small_pool.freed": 156852725248,
78
+ "inactive_split_bytes.small_pool.peak": 41911808,
79
+ "max_split_size": -1,
80
+ "num_alloc_retries": 0,
81
+ "num_ooms": 0,
82
+ "oversize_allocations.allocated": 0,
83
+ "oversize_allocations.current": 0,
84
+ "oversize_allocations.freed": 0,
85
+ "oversize_allocations.peak": 0,
86
+ "oversize_segments.allocated": 0,
87
+ "oversize_segments.current": 0,
88
+ "oversize_segments.freed": 0,
89
+ "oversize_segments.peak": 0,
90
+ "requested_bytes.all.allocated": 1127665463740,
91
+ "requested_bytes.all.current": 486061124,
92
+ "requested_bytes.all.freed": 1127179402616,
93
+ "requested_bytes.all.peak": 1262243472,
94
+ "requested_bytes.large_pool.allocated": 980404817408,
95
+ "requested_bytes.large_pool.current": 463910912,
96
+ "requested_bytes.large_pool.freed": 979940906496,
97
+ "requested_bytes.large_pool.peak": 1216584704,
98
+ "requested_bytes.small_pool.allocated": 147260646332,
99
+ "requested_bytes.small_pool.current": 22150212,
100
+ "requested_bytes.small_pool.freed": 147238496120,
101
+ "requested_bytes.small_pool.peak": 71870096,
102
+ "reserved_bytes.all.allocated": 1432354816,
103
+ "reserved_bytes.all.current": 1432354816,
104
+ "reserved_bytes.all.freed": 0,
105
+ "reserved_bytes.all.peak": 1432354816,
106
+ "reserved_bytes.large_pool.allocated": 1354760192,
107
+ "reserved_bytes.large_pool.current": 1354760192,
108
+ "reserved_bytes.large_pool.freed": 0,
109
+ "reserved_bytes.large_pool.peak": 1354760192,
110
+ "reserved_bytes.small_pool.allocated": 77594624,
111
+ "reserved_bytes.small_pool.current": 77594624,
112
+ "reserved_bytes.small_pool.freed": 0,
113
+ "reserved_bytes.small_pool.peak": 77594624,
114
+ "segment.all.allocated": 104,
115
+ "segment.all.current": 104,
116
+ "segment.all.freed": 0,
117
+ "segment.all.peak": 104,
118
+ "segment.large_pool.allocated": 67,
119
+ "segment.large_pool.current": 67,
120
+ "segment.large_pool.freed": 0,
121
+ "segment.large_pool.peak": 67,
122
+ "segment.small_pool.allocated": 37,
123
+ "segment.small_pool.current": 37,
124
+ "segment.small_pool.freed": 0,
125
+ "segment.small_pool.peak": 37
126
+ }
127
+ }
reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_427/head_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config": {
3
+ "activation_function": "tanh",
4
+ "bias": true,
5
+ "dropout_prob": null,
6
+ "head_type": "classification",
7
+ "label2id": {
8
+ "LABEL_0": 0,
9
+ "LABEL_1": 1
10
+ },
11
+ "layers": 2,
12
+ "num_labels": 2,
13
+ "use_pooler": false
14
+ },
15
+ "hidden_size": 768,
16
+ "model_class": "BertAdapterModel",
17
+ "model_name": "google-bert/bert-base-uncased",
18
+ "model_type": "bert",
19
+ "name": "cola",
20
+ "version": "0.2.0"
21
+ }
reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_427/pytorch_adapter.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:771222b26db5d88895bc699fb79e70c9d1da824caccdf0a75260af1dfbfadf09
3
+ size 7191062
reduction_factor/outputs/cola/google-bert/bert-base-uncased_adapterstrain_val_0.0001_12345_16/step_427/pytorch_model_head.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21e3cb68f6a3745c4ff986922eaa19c1cafa7f6161ddb71bc904ddf864243706
3
+ size 2370664