almost completed la runs,sst2 remains
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/logfile_la.log +846 -0
- outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_0/all_results_la_kron_all_homo_mc_corr_1000.json +1 -0
- outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_0/eval_res_la_kron_all_homo_mc_corr_1000.json +0 -0
- outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_0/gpu_stats_la.json +130 -0
- outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_1999/all_results_la_kron_all_homo_mc_corr_1000.json +1 -0
- outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_1999/eval_res_la_kron_all_homo_mc_corr_1000.json +0 -0
- outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_1999/gpu_stats_la.json +130 -0
- outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_3999/all_results_la_kron_all_homo_mc_corr_1000.json +1 -0
- outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_3999/eval_res_la_kron_all_homo_mc_corr_1000.json +0 -0
- outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_3999/gpu_stats_la.json +130 -0
- outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_5999/all_results_la_kron_all_homo_mc_corr_1000.json +1 -0
- outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_5999/eval_res_la_kron_all_homo_mc_corr_1000.json +0 -0
- outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_5999/gpu_stats_la.json +130 -0
- outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_7999/all_results_la_kron_all_homo_mc_corr_1000.json +1 -0
- outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_7999/eval_res_la_kron_all_homo_mc_corr_1000.json +0 -0
- outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_7999/gpu_stats_la.json +130 -0
- outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_9999/all_results_la_kron_all_homo_mc_corr_1000.json +1 -0
- outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_9999/eval_res_la_kron_all_homo_mc_corr_1000.json +0 -0
- outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_9999/gpu_stats_la.json +130 -0
- outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/logfile_la.log +846 -0
- outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_0/all_results_la_kron_all_homo_mc_corr_1000.json +1 -0
- outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_0/eval_res_la_kron_all_homo_mc_corr_1000.json +0 -0
- outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_0/gpu_stats_la.json +130 -0
- outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_1999/all_results_la_kron_all_homo_mc_corr_1000.json +1 -0
- outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_1999/eval_res_la_kron_all_homo_mc_corr_1000.json +0 -0
- outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_1999/gpu_stats_la.json +130 -0
- outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_3999/all_results_la_kron_all_homo_mc_corr_1000.json +1 -0
- outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_3999/eval_res_la_kron_all_homo_mc_corr_1000.json +0 -0
- outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_3999/gpu_stats_la.json +130 -0
- outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_5999/all_results_la_kron_all_homo_mc_corr_1000.json +1 -0
- outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_5999/eval_res_la_kron_all_homo_mc_corr_1000.json +0 -0
- outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_5999/gpu_stats_la.json +130 -0
- outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_7999/all_results_la_kron_all_homo_mc_corr_1000.json +1 -0
- outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_7999/eval_res_la_kron_all_homo_mc_corr_1000.json +0 -0
- outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_7999/gpu_stats_la.json +130 -0
- outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_9999/all_results_la_kron_all_homo_mc_corr_1000.json +1 -0
- outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_9999/eval_res_la_kron_all_homo_mc_corr_1000.json +0 -0
- outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_9999/gpu_stats_la.json +130 -0
- outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/logfile_la.log +846 -0
- outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_0/all_results_la_kron_all_homo_mc_corr_1000.json +1 -0
- outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_0/eval_res_la_kron_all_homo_mc_corr_1000.json +0 -0
- outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_0/gpu_stats_la.json +130 -0
- outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_1999/all_results_la_kron_all_homo_mc_corr_1000.json +1 -0
- outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_1999/eval_res_la_kron_all_homo_mc_corr_1000.json +0 -0
- outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_1999/gpu_stats_la.json +130 -0
- outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_3999/all_results_la_kron_all_homo_mc_corr_1000.json +1 -0
- outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_3999/eval_res_la_kron_all_homo_mc_corr_1000.json +0 -0
- outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_3999/gpu_stats_la.json +130 -0
- outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_5999/all_results_la_kron_all_homo_mc_corr_1000.json +1 -0
- outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_5999/eval_res_la_kron_all_homo_mc_corr_1000.json +0 -0
outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/logfile_la.log
ADDED
@@ -0,0 +1,846 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
06/01/2024 23:31:05 - INFO - __main__ - Number of labels detected = 2
|
2 |
+
06/01/2024 23:31:06 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 30522, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}.
|
3 |
+
06/01/2024 23:31:07 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_0/adapter_config.json
|
4 |
+
06/01/2024 23:31:07 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'cola'.
|
5 |
+
06/01/2024 23:31:07 - INFO - adapters.loading - Loading module weights from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_0/pytorch_adapter.bin
|
6 |
+
06/01/2024 23:31:07 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_0/head_config.json
|
7 |
+
06/01/2024 23:31:07 - INFO - adapters.heads.model_mixin - Adding head 'cola' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}.
|
8 |
+
06/01/2024 23:31:07 - INFO - adapters.loading - Loading module weights from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_0/pytorch_model_head.bin
|
9 |
+
06/01/2024 23:31:07 - INFO - __main__ - Adapter Name = cola
|
10 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.weight
|
11 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.bias
|
12 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_up.weight
|
13 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_up.bias
|
14 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_down.0.weight
|
15 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_down.0.bias
|
16 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_up.weight
|
17 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_up.bias
|
18 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.weight
|
19 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.bias
|
20 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_up.weight
|
21 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_up.bias
|
22 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_down.0.weight
|
23 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_down.0.bias
|
24 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_up.weight
|
25 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_up.bias
|
26 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.weight
|
27 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.bias
|
28 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_up.weight
|
29 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_up.bias
|
30 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_down.0.weight
|
31 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_down.0.bias
|
32 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_up.weight
|
33 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_up.bias
|
34 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.weight
|
35 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.bias
|
36 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_up.weight
|
37 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_up.bias
|
38 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_down.0.weight
|
39 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_down.0.bias
|
40 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_up.weight
|
41 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_up.bias
|
42 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.weight
|
43 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.bias
|
44 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_up.weight
|
45 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_up.bias
|
46 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_down.0.weight
|
47 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_down.0.bias
|
48 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_up.weight
|
49 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_up.bias
|
50 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.weight
|
51 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.bias
|
52 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_up.weight
|
53 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_up.bias
|
54 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_down.0.weight
|
55 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_down.0.bias
|
56 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_up.weight
|
57 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_up.bias
|
58 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.weight
|
59 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.bias
|
60 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_up.weight
|
61 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_up.bias
|
62 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_down.0.weight
|
63 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_down.0.bias
|
64 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_up.weight
|
65 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_up.bias
|
66 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.weight
|
67 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.bias
|
68 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_up.weight
|
69 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_up.bias
|
70 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_down.0.weight
|
71 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_down.0.bias
|
72 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_up.weight
|
73 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_up.bias
|
74 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.weight
|
75 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.bias
|
76 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_up.weight
|
77 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_up.bias
|
78 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_down.0.weight
|
79 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_down.0.bias
|
80 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_up.weight
|
81 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_up.bias
|
82 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.weight
|
83 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.bias
|
84 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_up.weight
|
85 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_up.bias
|
86 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_down.0.weight
|
87 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_down.0.bias
|
88 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_up.weight
|
89 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_up.bias
|
90 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.weight
|
91 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.bias
|
92 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_up.weight
|
93 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_up.bias
|
94 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_down.0.weight
|
95 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_down.0.bias
|
96 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_up.weight
|
97 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_up.bias
|
98 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.weight
|
99 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.bias
|
100 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_up.weight
|
101 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_up.bias
|
102 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_down.0.weight
|
103 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_down.0.bias
|
104 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_up.weight
|
105 |
+
06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_up.bias
|
106 |
+
06/01/2024 23:31:07 - INFO - __main__ - heads.cola.1.weight
|
107 |
+
06/01/2024 23:31:07 - INFO - __main__ - heads.cola.1.bias
|
108 |
+
06/01/2024 23:31:07 - INFO - __main__ - heads.cola.4.weight
|
109 |
+
06/01/2024 23:31:07 - INFO - __main__ - heads.cola.4.bias
|
110 |
+
06/01/2024 23:31:07 - INFO - __main__ - Sample 3412 of the training set: {'input_ids': [101, 1045, 12781, 1996, 7427, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1], 'labels': 1}.
|
111 |
+
06/01/2024 23:31:07 - INFO - __main__ - Sample 6002 of the training set: {'input_ids': [101, 1045, 2442, 2064, 4521, 22088, 2015, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
|
112 |
+
06/01/2024 23:31:07 - INFO - __main__ - Sample 83 of the training set: {'input_ids': [101, 1996, 7764, 22257, 2993, 2000, 1996, 2598, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
|
113 |
+
06/01/2024 23:35:56 - INFO - __main__ - f_mu shape : torch.Size([1043, 2])
|
114 |
+
06/01/2024 23:35:56 - INFO - __main__ - f_var shape : torch.Size([1043, 2, 2])
|
115 |
+
06/01/2024 23:35:56 - INFO - __main__ - tensor([[-0.1079, 0.3158],
|
116 |
+
[-0.1277, 0.1944],
|
117 |
+
[-0.1159, 0.2506],
|
118 |
+
...,
|
119 |
+
[-0.1310, 0.2133],
|
120 |
+
[-0.1701, 0.2358],
|
121 |
+
[-0.1486, 0.1628]], device='cuda:0')
|
122 |
+
06/01/2024 23:35:56 - INFO - __main__ - tensor([[[12.4738, 12.2974],
|
123 |
+
[12.2974, 12.4902]],
|
124 |
+
|
125 |
+
[[11.5270, 11.1531],
|
126 |
+
[11.1531, 11.4932]],
|
127 |
+
|
128 |
+
[[11.6347, 11.4029],
|
129 |
+
[11.4029, 11.6472]],
|
130 |
+
|
131 |
+
...,
|
132 |
+
|
133 |
+
[[13.0798, 12.7824],
|
134 |
+
[12.7824, 13.1277]],
|
135 |
+
|
136 |
+
[[11.2542, 11.0789],
|
137 |
+
[11.0788, 11.2619]],
|
138 |
+
|
139 |
+
[[11.7360, 11.4998],
|
140 |
+
[11.4998, 11.7346]]], device='cuda:0')
|
141 |
+
06/01/2024 23:35:56 - INFO - __main__ - ***** Completed training *****
|
142 |
+
06/01/2024 23:35:59 - INFO - __main__ - Number of labels detected = 2
|
143 |
+
06/01/2024 23:36:00 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 30522, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}.
|
144 |
+
06/01/2024 23:36:00 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_1999/adapter_config.json
|
145 |
+
06/01/2024 23:36:00 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'cola'.
|
146 |
+
06/01/2024 23:36:01 - INFO - adapters.loading - Loading module weights from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_1999/pytorch_adapter.bin
|
147 |
+
06/01/2024 23:36:01 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_1999/head_config.json
|
148 |
+
06/01/2024 23:36:01 - INFO - adapters.heads.model_mixin - Adding head 'cola' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}.
|
149 |
+
06/01/2024 23:36:01 - INFO - adapters.loading - Loading module weights from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_1999/pytorch_model_head.bin
|
150 |
+
06/01/2024 23:36:01 - INFO - __main__ - Adapter Name = cola
|
151 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.weight
|
152 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.bias
|
153 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_up.weight
|
154 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_up.bias
|
155 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_down.0.weight
|
156 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_down.0.bias
|
157 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_up.weight
|
158 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_up.bias
|
159 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.weight
|
160 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.bias
|
161 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_up.weight
|
162 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_up.bias
|
163 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_down.0.weight
|
164 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_down.0.bias
|
165 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_up.weight
|
166 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_up.bias
|
167 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.weight
|
168 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.bias
|
169 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_up.weight
|
170 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_up.bias
|
171 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_down.0.weight
|
172 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_down.0.bias
|
173 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_up.weight
|
174 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_up.bias
|
175 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.weight
|
176 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.bias
|
177 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_up.weight
|
178 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_up.bias
|
179 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_down.0.weight
|
180 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_down.0.bias
|
181 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_up.weight
|
182 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_up.bias
|
183 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.weight
|
184 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.bias
|
185 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_up.weight
|
186 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_up.bias
|
187 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_down.0.weight
|
188 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_down.0.bias
|
189 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_up.weight
|
190 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_up.bias
|
191 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.weight
|
192 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.bias
|
193 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_up.weight
|
194 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_up.bias
|
195 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_down.0.weight
|
196 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_down.0.bias
|
197 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_up.weight
|
198 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_up.bias
|
199 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.weight
|
200 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.bias
|
201 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_up.weight
|
202 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_up.bias
|
203 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_down.0.weight
|
204 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_down.0.bias
|
205 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_up.weight
|
206 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_up.bias
|
207 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.weight
|
208 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.bias
|
209 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_up.weight
|
210 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_up.bias
|
211 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_down.0.weight
|
212 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_down.0.bias
|
213 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_up.weight
|
214 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_up.bias
|
215 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.weight
|
216 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.bias
|
217 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_up.weight
|
218 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_up.bias
|
219 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_down.0.weight
|
220 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_down.0.bias
|
221 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_up.weight
|
222 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_up.bias
|
223 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.weight
|
224 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.bias
|
225 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_up.weight
|
226 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_up.bias
|
227 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_down.0.weight
|
228 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_down.0.bias
|
229 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_up.weight
|
230 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_up.bias
|
231 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.weight
|
232 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.bias
|
233 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_up.weight
|
234 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_up.bias
|
235 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_down.0.weight
|
236 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_down.0.bias
|
237 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_up.weight
|
238 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_up.bias
|
239 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.weight
|
240 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.bias
|
241 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_up.weight
|
242 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_up.bias
|
243 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_down.0.weight
|
244 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_down.0.bias
|
245 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_up.weight
|
246 |
+
06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_up.bias
|
247 |
+
06/01/2024 23:36:01 - INFO - __main__ - heads.cola.1.weight
|
248 |
+
06/01/2024 23:36:01 - INFO - __main__ - heads.cola.1.bias
|
249 |
+
06/01/2024 23:36:01 - INFO - __main__ - heads.cola.4.weight
|
250 |
+
06/01/2024 23:36:01 - INFO - __main__ - heads.cola.4.bias
|
251 |
+
06/01/2024 23:36:02 - INFO - __main__ - Sample 3412 of the training set: {'input_ids': [101, 1045, 12781, 1996, 7427, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1], 'labels': 1}.
|
252 |
+
06/01/2024 23:36:02 - INFO - __main__ - Sample 6002 of the training set: {'input_ids': [101, 1045, 2442, 2064, 4521, 22088, 2015, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
|
253 |
+
06/01/2024 23:36:02 - INFO - __main__ - Sample 83 of the training set: {'input_ids': [101, 1996, 7764, 22257, 2993, 2000, 1996, 2598, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
|
254 |
+
06/01/2024 23:40:56 - INFO - __main__ - f_mu shape : torch.Size([1043, 2])
|
255 |
+
06/01/2024 23:40:56 - INFO - __main__ - f_var shape : torch.Size([1043, 2, 2])
|
256 |
+
06/01/2024 23:40:56 - INFO - __main__ - tensor([[-2.1297, 2.2213],
|
257 |
+
[-1.8189, 1.7128],
|
258 |
+
[-1.5682, 1.5394],
|
259 |
+
...,
|
260 |
+
[-2.5910, 2.4892],
|
261 |
+
[-0.3724, 0.3888],
|
262 |
+
[-0.4022, 0.3224]], device='cuda:0')
|
263 |
+
06/01/2024 23:40:56 - INFO - __main__ - tensor([[[4.5618, 2.1410],
|
264 |
+
[2.1410, 4.4561]],
|
265 |
+
|
266 |
+
[[3.3290, 2.0678],
|
267 |
+
[2.0678, 3.3150]],
|
268 |
+
|
269 |
+
[[3.4761, 1.5668],
|
270 |
+
[1.5668, 3.3424]],
|
271 |
+
|
272 |
+
...,
|
273 |
+
|
274 |
+
[[4.5626, 3.5440],
|
275 |
+
[3.5440, 4.6095]],
|
276 |
+
|
277 |
+
[[3.2903, 0.2714],
|
278 |
+
[0.2714, 3.0593]],
|
279 |
+
|
280 |
+
[[3.2947, 0.1413],
|
281 |
+
[0.1413, 2.9931]]], device='cuda:0')
|
282 |
+
06/01/2024 23:40:56 - INFO - __main__ - ***** Completed training *****
|
283 |
+
06/01/2024 23:40:58 - INFO - __main__ - Number of labels detected = 2
|
284 |
+
06/01/2024 23:40:59 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 30522, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}.
|
285 |
+
06/01/2024 23:40:59 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_3999/adapter_config.json
|
286 |
+
06/01/2024 23:40:59 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'cola'.
|
287 |
+
06/01/2024 23:40:59 - INFO - adapters.loading - Loading module weights from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_3999/pytorch_adapter.bin
|
288 |
+
06/01/2024 23:40:59 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_3999/head_config.json
|
289 |
+
06/01/2024 23:40:59 - INFO - adapters.heads.model_mixin - Adding head 'cola' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}.
|
290 |
+
06/01/2024 23:40:59 - INFO - adapters.loading - Loading module weights from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_3999/pytorch_model_head.bin
|
291 |
+
06/01/2024 23:40:59 - INFO - __main__ - Adapter Name = cola
|
292 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.weight
|
293 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.bias
|
294 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_up.weight
|
295 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_up.bias
|
296 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_down.0.weight
|
297 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_down.0.bias
|
298 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_up.weight
|
299 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_up.bias
|
300 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.weight
|
301 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.bias
|
302 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_up.weight
|
303 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_up.bias
|
304 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_down.0.weight
|
305 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_down.0.bias
|
306 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_up.weight
|
307 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_up.bias
|
308 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.weight
|
309 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.bias
|
310 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_up.weight
|
311 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_up.bias
|
312 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_down.0.weight
|
313 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_down.0.bias
|
314 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_up.weight
|
315 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_up.bias
|
316 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.weight
|
317 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.bias
|
318 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_up.weight
|
319 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_up.bias
|
320 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_down.0.weight
|
321 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_down.0.bias
|
322 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_up.weight
|
323 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_up.bias
|
324 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.weight
|
325 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.bias
|
326 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_up.weight
|
327 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_up.bias
|
328 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_down.0.weight
|
329 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_down.0.bias
|
330 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_up.weight
|
331 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_up.bias
|
332 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.weight
|
333 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.bias
|
334 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_up.weight
|
335 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_up.bias
|
336 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_down.0.weight
|
337 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_down.0.bias
|
338 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_up.weight
|
339 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_up.bias
|
340 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.weight
|
341 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.bias
|
342 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_up.weight
|
343 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_up.bias
|
344 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_down.0.weight
|
345 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_down.0.bias
|
346 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_up.weight
|
347 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_up.bias
|
348 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.weight
|
349 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.bias
|
350 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_up.weight
|
351 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_up.bias
|
352 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_down.0.weight
|
353 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_down.0.bias
|
354 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_up.weight
|
355 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_up.bias
|
356 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.weight
|
357 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.bias
|
358 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_up.weight
|
359 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_up.bias
|
360 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_down.0.weight
|
361 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_down.0.bias
|
362 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_up.weight
|
363 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_up.bias
|
364 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.weight
|
365 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.bias
|
366 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_up.weight
|
367 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_up.bias
|
368 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_down.0.weight
|
369 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_down.0.bias
|
370 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_up.weight
|
371 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_up.bias
|
372 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.weight
|
373 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.bias
|
374 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_up.weight
|
375 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_up.bias
|
376 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_down.0.weight
|
377 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_down.0.bias
|
378 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_up.weight
|
379 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_up.bias
|
380 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.weight
|
381 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.bias
|
382 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_up.weight
|
383 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_up.bias
|
384 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_down.0.weight
|
385 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_down.0.bias
|
386 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_up.weight
|
387 |
+
06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_up.bias
|
388 |
+
06/01/2024 23:40:59 - INFO - __main__ - heads.cola.1.weight
|
389 |
+
06/01/2024 23:40:59 - INFO - __main__ - heads.cola.1.bias
|
390 |
+
06/01/2024 23:40:59 - INFO - __main__ - heads.cola.4.weight
|
391 |
+
06/01/2024 23:40:59 - INFO - __main__ - heads.cola.4.bias
|
392 |
+
06/01/2024 23:41:00 - INFO - __main__ - Sample 3412 of the training set: {'input_ids': [101, 1045, 12781, 1996, 7427, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1], 'labels': 1}.
|
393 |
+
06/01/2024 23:41:00 - INFO - __main__ - Sample 6002 of the training set: {'input_ids': [101, 1045, 2442, 2064, 4521, 22088, 2015, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
|
394 |
+
06/01/2024 23:41:00 - INFO - __main__ - Sample 83 of the training set: {'input_ids': [101, 1996, 7764, 22257, 2993, 2000, 1996, 2598, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
|
395 |
+
06/01/2024 23:46:03 - INFO - __main__ - f_mu shape : torch.Size([1043, 2])
|
396 |
+
06/01/2024 23:46:03 - INFO - __main__ - f_var shape : torch.Size([1043, 2, 2])
|
397 |
+
06/01/2024 23:46:03 - INFO - __main__ - tensor([[-2.6237, 2.6967],
|
398 |
+
[-2.0123, 1.8970],
|
399 |
+
[-1.4105, 1.4059],
|
400 |
+
...,
|
401 |
+
[-3.3720, 3.2471],
|
402 |
+
[-0.4277, 0.4479],
|
403 |
+
[-0.6115, 0.5214]], device='cuda:0')
|
404 |
+
06/01/2024 23:46:03 - INFO - __main__ - tensor([[[ 5.0017, 1.7107],
|
405 |
+
[ 1.7107, 4.7907]],
|
406 |
+
|
407 |
+
[[ 3.6648, 1.0633],
|
408 |
+
[ 1.0633, 3.5681]],
|
409 |
+
|
410 |
+
[[ 3.6117, 0.3526],
|
411 |
+
[ 0.3526, 3.3652]],
|
412 |
+
|
413 |
+
...,
|
414 |
+
|
415 |
+
[[ 4.8242, 3.7225],
|
416 |
+
[ 3.7225, 4.8405]],
|
417 |
+
|
418 |
+
[[ 4.8466, -1.7092],
|
419 |
+
[-1.7092, 4.2847]],
|
420 |
+
|
421 |
+
[[ 4.6424, -1.6387],
|
422 |
+
[-1.6387, 4.0598]]], device='cuda:0')
|
423 |
+
06/01/2024 23:46:03 - INFO - __main__ - ***** Completed training *****
|
424 |
+
06/01/2024 23:46:06 - INFO - __main__ - Number of labels detected = 2
|
425 |
+
06/01/2024 23:46:06 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 30522, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}.
|
426 |
+
06/01/2024 23:46:07 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_5999/adapter_config.json
|
427 |
+
06/01/2024 23:46:07 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'cola'.
|
428 |
+
06/01/2024 23:46:07 - INFO - adapters.loading - Loading module weights from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_5999/pytorch_adapter.bin
|
429 |
+
06/01/2024 23:46:07 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_5999/head_config.json
|
430 |
+
06/01/2024 23:46:07 - INFO - adapters.heads.model_mixin - Adding head 'cola' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}.
|
431 |
+
06/01/2024 23:46:07 - INFO - adapters.loading - Loading module weights from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_5999/pytorch_model_head.bin
|
432 |
+
06/01/2024 23:46:07 - INFO - __main__ - Adapter Name = cola
|
433 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.weight
|
434 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.bias
|
435 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_up.weight
|
436 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_up.bias
|
437 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_down.0.weight
|
438 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_down.0.bias
|
439 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_up.weight
|
440 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_up.bias
|
441 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.weight
|
442 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.bias
|
443 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_up.weight
|
444 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_up.bias
|
445 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_down.0.weight
|
446 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_down.0.bias
|
447 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_up.weight
|
448 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_up.bias
|
449 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.weight
|
450 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.bias
|
451 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_up.weight
|
452 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_up.bias
|
453 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_down.0.weight
|
454 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_down.0.bias
|
455 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_up.weight
|
456 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_up.bias
|
457 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.weight
|
458 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.bias
|
459 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_up.weight
|
460 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_up.bias
|
461 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_down.0.weight
|
462 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_down.0.bias
|
463 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_up.weight
|
464 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_up.bias
|
465 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.weight
|
466 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.bias
|
467 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_up.weight
|
468 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_up.bias
|
469 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_down.0.weight
|
470 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_down.0.bias
|
471 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_up.weight
|
472 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_up.bias
|
473 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.weight
|
474 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.bias
|
475 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_up.weight
|
476 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_up.bias
|
477 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_down.0.weight
|
478 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_down.0.bias
|
479 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_up.weight
|
480 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_up.bias
|
481 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.weight
|
482 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.bias
|
483 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_up.weight
|
484 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_up.bias
|
485 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_down.0.weight
|
486 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_down.0.bias
|
487 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_up.weight
|
488 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_up.bias
|
489 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.weight
|
490 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.bias
|
491 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_up.weight
|
492 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_up.bias
|
493 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_down.0.weight
|
494 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_down.0.bias
|
495 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_up.weight
|
496 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_up.bias
|
497 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.weight
|
498 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.bias
|
499 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_up.weight
|
500 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_up.bias
|
501 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_down.0.weight
|
502 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_down.0.bias
|
503 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_up.weight
|
504 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_up.bias
|
505 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.weight
|
506 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.bias
|
507 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_up.weight
|
508 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_up.bias
|
509 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_down.0.weight
|
510 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_down.0.bias
|
511 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_up.weight
|
512 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_up.bias
|
513 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.weight
|
514 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.bias
|
515 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_up.weight
|
516 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_up.bias
|
517 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_down.0.weight
|
518 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_down.0.bias
|
519 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_up.weight
|
520 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_up.bias
|
521 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.weight
|
522 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.bias
|
523 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_up.weight
|
524 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_up.bias
|
525 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_down.0.weight
|
526 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_down.0.bias
|
527 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_up.weight
|
528 |
+
06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_up.bias
|
529 |
+
06/01/2024 23:46:07 - INFO - __main__ - heads.cola.1.weight
|
530 |
+
06/01/2024 23:46:07 - INFO - __main__ - heads.cola.1.bias
|
531 |
+
06/01/2024 23:46:07 - INFO - __main__ - heads.cola.4.weight
|
532 |
+
06/01/2024 23:46:07 - INFO - __main__ - heads.cola.4.bias
|
533 |
+
06/01/2024 23:46:08 - INFO - __main__ - Sample 3412 of the training set: {'input_ids': [101, 1045, 12781, 1996, 7427, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1], 'labels': 1}.
|
534 |
+
06/01/2024 23:46:08 - INFO - __main__ - Sample 6002 of the training set: {'input_ids': [101, 1045, 2442, 2064, 4521, 22088, 2015, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
|
535 |
+
06/01/2024 23:46:08 - INFO - __main__ - Sample 83 of the training set: {'input_ids': [101, 1996, 7764, 22257, 2993, 2000, 1996, 2598, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
|
536 |
+
06/01/2024 23:51:11 - INFO - __main__ - f_mu shape : torch.Size([1043, 2])
|
537 |
+
06/01/2024 23:51:11 - INFO - __main__ - f_var shape : torch.Size([1043, 2, 2])
|
538 |
+
06/01/2024 23:51:11 - INFO - __main__ - tensor([[-2.9662, 2.9773],
|
539 |
+
[-2.7168, 2.5565],
|
540 |
+
[-1.7817, 1.7464],
|
541 |
+
...,
|
542 |
+
[-3.3487, 3.2080],
|
543 |
+
[-1.8054, 1.7300],
|
544 |
+
[-1.0835, 0.9648]], device='cuda:0')
|
545 |
+
06/01/2024 23:51:11 - INFO - __main__ - tensor([[[ 4.7834, 2.8649],
|
546 |
+
[ 2.8649, 4.6725]],
|
547 |
+
|
548 |
+
[[ 3.9393, 2.2446],
|
549 |
+
[ 2.2446, 3.9447]],
|
550 |
+
|
551 |
+
[[ 3.9373, 0.4859],
|
552 |
+
[ 0.4859, 3.6285]],
|
553 |
+
|
554 |
+
...,
|
555 |
+
|
556 |
+
[[ 5.0232, 3.7086],
|
557 |
+
[ 3.7086, 5.0201]],
|
558 |
+
|
559 |
+
[[ 8.5577, -3.8952],
|
560 |
+
[-3.8952, 7.9227]],
|
561 |
+
|
562 |
+
[[ 5.8607, -2.4746],
|
563 |
+
[-2.4746, 5.3347]]], device='cuda:0')
|
564 |
+
06/01/2024 23:51:11 - INFO - __main__ - ***** Completed training *****
|
565 |
+
06/01/2024 23:51:14 - INFO - __main__ - Number of labels detected = 2
|
566 |
+
06/01/2024 23:51:15 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 30522, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}.
|
567 |
+
06/01/2024 23:51:15 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_7999/adapter_config.json
|
568 |
+
06/01/2024 23:51:15 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'cola'.
|
569 |
+
06/01/2024 23:51:15 - INFO - adapters.loading - Loading module weights from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_7999/pytorch_adapter.bin
|
570 |
+
06/01/2024 23:51:15 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_7999/head_config.json
|
571 |
+
06/01/2024 23:51:15 - INFO - adapters.heads.model_mixin - Adding head 'cola' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}.
|
572 |
+
06/01/2024 23:51:15 - INFO - adapters.loading - Loading module weights from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_7999/pytorch_model_head.bin
|
573 |
+
06/01/2024 23:51:15 - INFO - __main__ - Adapter Name = cola
|
574 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.weight
|
575 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.bias
|
576 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_up.weight
|
577 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_up.bias
|
578 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_down.0.weight
|
579 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_down.0.bias
|
580 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_up.weight
|
581 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_up.bias
|
582 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.weight
|
583 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.bias
|
584 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_up.weight
|
585 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_up.bias
|
586 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_down.0.weight
|
587 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_down.0.bias
|
588 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_up.weight
|
589 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_up.bias
|
590 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.weight
|
591 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.bias
|
592 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_up.weight
|
593 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_up.bias
|
594 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_down.0.weight
|
595 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_down.0.bias
|
596 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_up.weight
|
597 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_up.bias
|
598 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.weight
|
599 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.bias
|
600 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_up.weight
|
601 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_up.bias
|
602 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_down.0.weight
|
603 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_down.0.bias
|
604 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_up.weight
|
605 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_up.bias
|
606 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.weight
|
607 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.bias
|
608 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_up.weight
|
609 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_up.bias
|
610 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_down.0.weight
|
611 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_down.0.bias
|
612 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_up.weight
|
613 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_up.bias
|
614 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.weight
|
615 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.bias
|
616 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_up.weight
|
617 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_up.bias
|
618 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_down.0.weight
|
619 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_down.0.bias
|
620 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_up.weight
|
621 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_up.bias
|
622 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.weight
|
623 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.bias
|
624 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_up.weight
|
625 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_up.bias
|
626 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_down.0.weight
|
627 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_down.0.bias
|
628 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_up.weight
|
629 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_up.bias
|
630 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.weight
|
631 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.bias
|
632 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_up.weight
|
633 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_up.bias
|
634 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_down.0.weight
|
635 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_down.0.bias
|
636 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_up.weight
|
637 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_up.bias
|
638 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.weight
|
639 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.bias
|
640 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_up.weight
|
641 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_up.bias
|
642 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_down.0.weight
|
643 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_down.0.bias
|
644 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_up.weight
|
645 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_up.bias
|
646 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.weight
|
647 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.bias
|
648 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_up.weight
|
649 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_up.bias
|
650 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_down.0.weight
|
651 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_down.0.bias
|
652 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_up.weight
|
653 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_up.bias
|
654 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.weight
|
655 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.bias
|
656 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_up.weight
|
657 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_up.bias
|
658 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_down.0.weight
|
659 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_down.0.bias
|
660 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_up.weight
|
661 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_up.bias
|
662 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.weight
|
663 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.bias
|
664 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_up.weight
|
665 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_up.bias
|
666 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_down.0.weight
|
667 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_down.0.bias
|
668 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_up.weight
|
669 |
+
06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_up.bias
|
670 |
+
06/01/2024 23:51:15 - INFO - __main__ - heads.cola.1.weight
|
671 |
+
06/01/2024 23:51:15 - INFO - __main__ - heads.cola.1.bias
|
672 |
+
06/01/2024 23:51:15 - INFO - __main__ - heads.cola.4.weight
|
673 |
+
06/01/2024 23:51:15 - INFO - __main__ - heads.cola.4.bias
|
674 |
+
06/01/2024 23:51:16 - INFO - __main__ - Sample 3412 of the training set: {'input_ids': [101, 1045, 12781, 1996, 7427, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1], 'labels': 1}.
|
675 |
+
06/01/2024 23:51:16 - INFO - __main__ - Sample 6002 of the training set: {'input_ids': [101, 1045, 2442, 2064, 4521, 22088, 2015, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
|
676 |
+
06/01/2024 23:51:16 - INFO - __main__ - Sample 83 of the training set: {'input_ids': [101, 1996, 7764, 22257, 2993, 2000, 1996, 2598, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
|
677 |
+
06/01/2024 23:56:23 - INFO - __main__ - f_mu shape : torch.Size([1043, 2])
|
678 |
+
06/01/2024 23:56:23 - INFO - __main__ - f_var shape : torch.Size([1043, 2, 2])
|
679 |
+
06/01/2024 23:56:23 - INFO - __main__ - tensor([[-3.3699, 3.3674],
|
680 |
+
[-3.1780, 3.0025],
|
681 |
+
[-2.2889, 2.2263],
|
682 |
+
...,
|
683 |
+
[-4.2645, 4.0783],
|
684 |
+
[-2.1805, 2.0611],
|
685 |
+
[-1.5461, 1.3975]], device='cuda:0')
|
686 |
+
06/01/2024 23:56:23 - INFO - __main__ - tensor([[[ 5.1090, 2.1602],
|
687 |
+
[ 2.1602, 4.9487]],
|
688 |
+
|
689 |
+
[[ 4.3270, 1.8304],
|
690 |
+
[ 1.8304, 4.3619]],
|
691 |
+
|
692 |
+
[[ 5.2377, -0.5261],
|
693 |
+
[ -0.5261, 4.6952]],
|
694 |
+
|
695 |
+
...,
|
696 |
+
|
697 |
+
[[ 5.2573, 4.1918],
|
698 |
+
[ 4.1918, 5.2629]],
|
699 |
+
|
700 |
+
[[ 17.0288, -12.2766],
|
701 |
+
[-12.2766, 16.0319]],
|
702 |
+
|
703 |
+
[[ 14.0171, -10.2656],
|
704 |
+
[-10.2656, 12.9278]]], device='cuda:0')
|
705 |
+
06/01/2024 23:56:23 - INFO - __main__ - ***** Completed training *****
|
706 |
+
06/01/2024 23:56:26 - INFO - __main__ - Number of labels detected = 2
|
707 |
+
06/01/2024 23:56:27 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 30522, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}.
|
708 |
+
06/01/2024 23:56:27 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_9999/adapter_config.json
|
709 |
+
06/01/2024 23:56:27 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'cola'.
|
710 |
+
06/01/2024 23:56:27 - INFO - adapters.loading - Loading module weights from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_9999/pytorch_adapter.bin
|
711 |
+
06/01/2024 23:56:27 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_9999/head_config.json
|
712 |
+
06/01/2024 23:56:27 - INFO - adapters.heads.model_mixin - Adding head 'cola' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}.
|
713 |
+
06/01/2024 23:56:27 - INFO - adapters.loading - Loading module weights from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_9999/pytorch_model_head.bin
|
714 |
+
06/01/2024 23:56:27 - INFO - __main__ - Adapter Name = cola
|
715 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.weight
|
716 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.bias
|
717 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_up.weight
|
718 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_up.bias
|
719 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_down.0.weight
|
720 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_down.0.bias
|
721 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_up.weight
|
722 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_up.bias
|
723 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.weight
|
724 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.bias
|
725 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_up.weight
|
726 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_up.bias
|
727 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_down.0.weight
|
728 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_down.0.bias
|
729 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_up.weight
|
730 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_up.bias
|
731 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.weight
|
732 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.bias
|
733 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_up.weight
|
734 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_up.bias
|
735 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_down.0.weight
|
736 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_down.0.bias
|
737 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_up.weight
|
738 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_up.bias
|
739 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.weight
|
740 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.bias
|
741 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_up.weight
|
742 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_up.bias
|
743 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_down.0.weight
|
744 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_down.0.bias
|
745 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_up.weight
|
746 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_up.bias
|
747 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.weight
|
748 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.bias
|
749 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_up.weight
|
750 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_up.bias
|
751 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_down.0.weight
|
752 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_down.0.bias
|
753 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_up.weight
|
754 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_up.bias
|
755 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.weight
|
756 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.bias
|
757 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_up.weight
|
758 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_up.bias
|
759 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_down.0.weight
|
760 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_down.0.bias
|
761 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_up.weight
|
762 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_up.bias
|
763 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.weight
|
764 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.bias
|
765 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_up.weight
|
766 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_up.bias
|
767 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_down.0.weight
|
768 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_down.0.bias
|
769 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_up.weight
|
770 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_up.bias
|
771 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.weight
|
772 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.bias
|
773 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_up.weight
|
774 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_up.bias
|
775 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_down.0.weight
|
776 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_down.0.bias
|
777 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_up.weight
|
778 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_up.bias
|
779 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.weight
|
780 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.bias
|
781 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_up.weight
|
782 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_up.bias
|
783 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_down.0.weight
|
784 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_down.0.bias
|
785 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_up.weight
|
786 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_up.bias
|
787 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.weight
|
788 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.bias
|
789 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_up.weight
|
790 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_up.bias
|
791 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_down.0.weight
|
792 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_down.0.bias
|
793 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_up.weight
|
794 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_up.bias
|
795 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.weight
|
796 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.bias
|
797 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_up.weight
|
798 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_up.bias
|
799 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_down.0.weight
|
800 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_down.0.bias
|
801 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_up.weight
|
802 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_up.bias
|
803 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.weight
|
804 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.bias
|
805 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_up.weight
|
806 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_up.bias
|
807 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_down.0.weight
|
808 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_down.0.bias
|
809 |
+
06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_up.weight
|
810 |
+
06/01/2024 23:56:28 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_up.bias
|
811 |
+
06/01/2024 23:56:28 - INFO - __main__ - heads.cola.1.weight
|
812 |
+
06/01/2024 23:56:28 - INFO - __main__ - heads.cola.1.bias
|
813 |
+
06/01/2024 23:56:28 - INFO - __main__ - heads.cola.4.weight
|
814 |
+
06/01/2024 23:56:28 - INFO - __main__ - heads.cola.4.bias
|
815 |
+
06/01/2024 23:56:28 - INFO - __main__ - Sample 3412 of the training set: {'input_ids': [101, 1045, 12781, 1996, 7427, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1], 'labels': 1}.
|
816 |
+
06/01/2024 23:56:28 - INFO - __main__ - Sample 6002 of the training set: {'input_ids': [101, 1045, 2442, 2064, 4521, 22088, 2015, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
|
817 |
+
06/01/2024 23:56:28 - INFO - __main__ - Sample 83 of the training set: {'input_ids': [101, 1996, 7764, 22257, 2993, 2000, 1996, 2598, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
|
818 |
+
06/02/2024 00:01:37 - INFO - __main__ - f_mu shape : torch.Size([1043, 2])
|
819 |
+
06/02/2024 00:01:37 - INFO - __main__ - f_var shape : torch.Size([1043, 2, 2])
|
820 |
+
06/02/2024 00:01:37 - INFO - __main__ - tensor([[-3.2560, 3.2816],
|
821 |
+
[-3.1414, 2.9828],
|
822 |
+
[-2.0300, 2.0107],
|
823 |
+
...,
|
824 |
+
[-4.2195, 4.0517],
|
825 |
+
[-2.9542, 2.8213],
|
826 |
+
[-1.6776, 1.5290]], device='cuda:0')
|
827 |
+
06/02/2024 00:01:37 - INFO - __main__ - tensor([[[ 4.9482, 1.9264],
|
828 |
+
[ 1.9264, 4.7548]],
|
829 |
+
|
830 |
+
[[ 4.2033, 1.7314],
|
831 |
+
[ 1.7314, 4.2234]],
|
832 |
+
|
833 |
+
[[ 5.0032, -0.7315],
|
834 |
+
[ -0.7315, 4.4748]],
|
835 |
+
|
836 |
+
...,
|
837 |
+
|
838 |
+
[[ 5.1036, 3.9530],
|
839 |
+
[ 3.9530, 5.1016]],
|
840 |
+
|
841 |
+
[[ 11.9591, -6.0827],
|
842 |
+
[ -6.0827, 11.4617]],
|
843 |
+
|
844 |
+
[[ 14.0977, -10.2441],
|
845 |
+
[-10.2441, 12.9932]]], device='cuda:0')
|
846 |
+
06/02/2024 00:01:37 - INFO - __main__ - ***** Completed training *****
|
outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_0/all_results_la_kron_all_homo_mc_corr_1000.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"eval_matthews_correlation": -0.02929206145132745}
|
outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_0/eval_res_la_kron_all_homo_mc_corr_1000.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_0/gpu_stats_la.json
ADDED
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"memory_allocated": 969647616,
|
3 |
+
"max_memory_allocated": 3324437504,
|
4 |
+
"memory_reserved": 8908701696,
|
5 |
+
"max_memory_reserved": 8908701696,
|
6 |
+
"memory_stats": {
|
7 |
+
"active.all.allocated": 2905439,
|
8 |
+
"active.all.current": 1234,
|
9 |
+
"active.all.freed": 2904205,
|
10 |
+
"active.all.peak": 1487,
|
11 |
+
"active.large_pool.allocated": 360903,
|
12 |
+
"active.large_pool.current": 284,
|
13 |
+
"active.large_pool.freed": 360619,
|
14 |
+
"active.large_pool.peak": 384,
|
15 |
+
"active.small_pool.allocated": 2544536,
|
16 |
+
"active.small_pool.current": 950,
|
17 |
+
"active.small_pool.freed": 2543586,
|
18 |
+
"active.small_pool.peak": 1200,
|
19 |
+
"active_bytes.all.allocated": 2036389039616,
|
20 |
+
"active_bytes.all.current": 969647616,
|
21 |
+
"active_bytes.all.freed": 2035419392000,
|
22 |
+
"active_bytes.all.peak": 3324437504,
|
23 |
+
"active_bytes.large_pool.allocated": 1464350304256,
|
24 |
+
"active_bytes.large_pool.current": 952457216,
|
25 |
+
"active_bytes.large_pool.freed": 1463397847040,
|
26 |
+
"active_bytes.large_pool.peak": 3240482816,
|
27 |
+
"active_bytes.small_pool.allocated": 572038735360,
|
28 |
+
"active_bytes.small_pool.current": 17190400,
|
29 |
+
"active_bytes.small_pool.freed": 572021544960,
|
30 |
+
"active_bytes.small_pool.peak": 114983424,
|
31 |
+
"allocated_bytes.all.allocated": 2036389039616,
|
32 |
+
"allocated_bytes.all.current": 969647616,
|
33 |
+
"allocated_bytes.all.freed": 2035419392000,
|
34 |
+
"allocated_bytes.all.peak": 3324437504,
|
35 |
+
"allocated_bytes.large_pool.allocated": 1464350304256,
|
36 |
+
"allocated_bytes.large_pool.current": 952457216,
|
37 |
+
"allocated_bytes.large_pool.freed": 1463397847040,
|
38 |
+
"allocated_bytes.large_pool.peak": 3240482816,
|
39 |
+
"allocated_bytes.small_pool.allocated": 572038735360,
|
40 |
+
"allocated_bytes.small_pool.current": 17190400,
|
41 |
+
"allocated_bytes.small_pool.freed": 572021544960,
|
42 |
+
"allocated_bytes.small_pool.peak": 114983424,
|
43 |
+
"allocation.all.allocated": 2905439,
|
44 |
+
"allocation.all.current": 1234,
|
45 |
+
"allocation.all.freed": 2904205,
|
46 |
+
"allocation.all.peak": 1487,
|
47 |
+
"allocation.large_pool.allocated": 360903,
|
48 |
+
"allocation.large_pool.current": 284,
|
49 |
+
"allocation.large_pool.freed": 360619,
|
50 |
+
"allocation.large_pool.peak": 384,
|
51 |
+
"allocation.small_pool.allocated": 2544536,
|
52 |
+
"allocation.small_pool.current": 950,
|
53 |
+
"allocation.small_pool.freed": 2543586,
|
54 |
+
"allocation.small_pool.peak": 1200,
|
55 |
+
"inactive_split.all.allocated": 1420248,
|
56 |
+
"inactive_split.all.current": 170,
|
57 |
+
"inactive_split.all.freed": 1420078,
|
58 |
+
"inactive_split.all.peak": 220,
|
59 |
+
"inactive_split.large_pool.allocated": 166957,
|
60 |
+
"inactive_split.large_pool.current": 43,
|
61 |
+
"inactive_split.large_pool.freed": 166914,
|
62 |
+
"inactive_split.large_pool.peak": 96,
|
63 |
+
"inactive_split.small_pool.allocated": 1253291,
|
64 |
+
"inactive_split.small_pool.current": 127,
|
65 |
+
"inactive_split.small_pool.freed": 1253164,
|
66 |
+
"inactive_split.small_pool.peak": 144,
|
67 |
+
"inactive_split_bytes.all.allocated": 2042948382208,
|
68 |
+
"inactive_split_bytes.all.current": 148134400,
|
69 |
+
"inactive_split_bytes.all.freed": 2042800247808,
|
70 |
+
"inactive_split_bytes.all.peak": 896064512,
|
71 |
+
"inactive_split_bytes.large_pool.allocated": 1450380810240,
|
72 |
+
"inactive_split_bytes.large_pool.current": 106604544,
|
73 |
+
"inactive_split_bytes.large_pool.freed": 1450274205696,
|
74 |
+
"inactive_split_bytes.large_pool.peak": 859522048,
|
75 |
+
"inactive_split_bytes.small_pool.allocated": 592567571968,
|
76 |
+
"inactive_split_bytes.small_pool.current": 41529856,
|
77 |
+
"inactive_split_bytes.small_pool.freed": 592526042112,
|
78 |
+
"inactive_split_bytes.small_pool.peak": 77619712,
|
79 |
+
"max_split_size": -1,
|
80 |
+
"num_alloc_retries": 0,
|
81 |
+
"num_device_alloc": 389,
|
82 |
+
"num_device_free": 47,
|
83 |
+
"num_ooms": 0,
|
84 |
+
"num_sync_all_streams": 1,
|
85 |
+
"oversize_allocations.allocated": 0,
|
86 |
+
"oversize_allocations.current": 0,
|
87 |
+
"oversize_allocations.freed": 0,
|
88 |
+
"oversize_allocations.peak": 0,
|
89 |
+
"oversize_segments.allocated": 0,
|
90 |
+
"oversize_segments.current": 0,
|
91 |
+
"oversize_segments.freed": 0,
|
92 |
+
"oversize_segments.peak": 0,
|
93 |
+
"requested_bytes.all.allocated": 1994872523059,
|
94 |
+
"requested_bytes.all.current": 967046712,
|
95 |
+
"requested_bytes.all.freed": 1993905476347,
|
96 |
+
"requested_bytes.all.peak": 3263085376,
|
97 |
+
"requested_bytes.large_pool.allocated": 1423070574288,
|
98 |
+
"requested_bytes.large_pool.current": 950013448,
|
99 |
+
"requested_bytes.large_pool.freed": 1422120560840,
|
100 |
+
"requested_bytes.large_pool.peak": 3179241096,
|
101 |
+
"requested_bytes.small_pool.allocated": 571801948771,
|
102 |
+
"requested_bytes.small_pool.current": 17033264,
|
103 |
+
"requested_bytes.small_pool.freed": 571784915507,
|
104 |
+
"requested_bytes.small_pool.peak": 114852048,
|
105 |
+
"reserved_bytes.all.allocated": 9214885888,
|
106 |
+
"reserved_bytes.all.current": 8908701696,
|
107 |
+
"reserved_bytes.all.freed": 306184192,
|
108 |
+
"reserved_bytes.all.peak": 8908701696,
|
109 |
+
"reserved_bytes.large_pool.allocated": 9003073536,
|
110 |
+
"reserved_bytes.large_pool.current": 8772386816,
|
111 |
+
"reserved_bytes.large_pool.freed": 230686720,
|
112 |
+
"reserved_bytes.large_pool.peak": 8772386816,
|
113 |
+
"reserved_bytes.small_pool.allocated": 211812352,
|
114 |
+
"reserved_bytes.small_pool.current": 136314880,
|
115 |
+
"reserved_bytes.small_pool.freed": 75497472,
|
116 |
+
"reserved_bytes.small_pool.peak": 136314880,
|
117 |
+
"segment.all.allocated": 389,
|
118 |
+
"segment.all.current": 342,
|
119 |
+
"segment.all.freed": 47,
|
120 |
+
"segment.all.peak": 342,
|
121 |
+
"segment.large_pool.allocated": 288,
|
122 |
+
"segment.large_pool.current": 277,
|
123 |
+
"segment.large_pool.freed": 11,
|
124 |
+
"segment.large_pool.peak": 277,
|
125 |
+
"segment.small_pool.allocated": 101,
|
126 |
+
"segment.small_pool.current": 65,
|
127 |
+
"segment.small_pool.freed": 36,
|
128 |
+
"segment.small_pool.peak": 65
|
129 |
+
}
|
130 |
+
}
|
outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_1999/all_results_la_kron_all_homo_mc_corr_1000.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"eval_matthews_correlation": 0.47194522204020767}
|
outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_1999/eval_res_la_kron_all_homo_mc_corr_1000.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_1999/gpu_stats_la.json
ADDED
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"memory_allocated": 968467968,
|
3 |
+
"max_memory_allocated": 3324437504,
|
4 |
+
"memory_reserved": 8910798848,
|
5 |
+
"max_memory_reserved": 8910798848,
|
6 |
+
"memory_stats": {
|
7 |
+
"active.all.allocated": 5810926,
|
8 |
+
"active.all.current": 1234,
|
9 |
+
"active.all.freed": 5809692,
|
10 |
+
"active.all.peak": 1487,
|
11 |
+
"active.large_pool.allocated": 721804,
|
12 |
+
"active.large_pool.current": 284,
|
13 |
+
"active.large_pool.freed": 721520,
|
14 |
+
"active.large_pool.peak": 384,
|
15 |
+
"active.small_pool.allocated": 5089122,
|
16 |
+
"active.small_pool.current": 950,
|
17 |
+
"active.small_pool.freed": 5088172,
|
18 |
+
"active.small_pool.peak": 1200,
|
19 |
+
"active_bytes.all.allocated": 4071282991616,
|
20 |
+
"active_bytes.all.current": 968467968,
|
21 |
+
"active_bytes.all.freed": 4070314523648,
|
22 |
+
"active_bytes.all.peak": 3324437504,
|
23 |
+
"active_bytes.large_pool.allocated": 2927205494272,
|
24 |
+
"active_bytes.large_pool.current": 951277568,
|
25 |
+
"active_bytes.large_pool.freed": 2926254216704,
|
26 |
+
"active_bytes.large_pool.peak": 3240482816,
|
27 |
+
"active_bytes.small_pool.allocated": 1144077497344,
|
28 |
+
"active_bytes.small_pool.current": 17190400,
|
29 |
+
"active_bytes.small_pool.freed": 1144060306944,
|
30 |
+
"active_bytes.small_pool.peak": 114983424,
|
31 |
+
"allocated_bytes.all.allocated": 4071282991616,
|
32 |
+
"allocated_bytes.all.current": 968467968,
|
33 |
+
"allocated_bytes.all.freed": 4070314523648,
|
34 |
+
"allocated_bytes.all.peak": 3324437504,
|
35 |
+
"allocated_bytes.large_pool.allocated": 2927205494272,
|
36 |
+
"allocated_bytes.large_pool.current": 951277568,
|
37 |
+
"allocated_bytes.large_pool.freed": 2926254216704,
|
38 |
+
"allocated_bytes.large_pool.peak": 3240482816,
|
39 |
+
"allocated_bytes.small_pool.allocated": 1144077497344,
|
40 |
+
"allocated_bytes.small_pool.current": 17190400,
|
41 |
+
"allocated_bytes.small_pool.freed": 1144060306944,
|
42 |
+
"allocated_bytes.small_pool.peak": 114983424,
|
43 |
+
"allocation.all.allocated": 5810926,
|
44 |
+
"allocation.all.current": 1234,
|
45 |
+
"allocation.all.freed": 5809692,
|
46 |
+
"allocation.all.peak": 1487,
|
47 |
+
"allocation.large_pool.allocated": 721804,
|
48 |
+
"allocation.large_pool.current": 284,
|
49 |
+
"allocation.large_pool.freed": 721520,
|
50 |
+
"allocation.large_pool.peak": 384,
|
51 |
+
"allocation.small_pool.allocated": 5089122,
|
52 |
+
"allocation.small_pool.current": 950,
|
53 |
+
"allocation.small_pool.freed": 5088172,
|
54 |
+
"allocation.small_pool.peak": 1200,
|
55 |
+
"inactive_split.all.allocated": 2786921,
|
56 |
+
"inactive_split.all.current": 161,
|
57 |
+
"inactive_split.all.freed": 2786760,
|
58 |
+
"inactive_split.all.peak": 228,
|
59 |
+
"inactive_split.large_pool.allocated": 334121,
|
60 |
+
"inactive_split.large_pool.current": 45,
|
61 |
+
"inactive_split.large_pool.freed": 334076,
|
62 |
+
"inactive_split.large_pool.peak": 98,
|
63 |
+
"inactive_split.small_pool.allocated": 2452800,
|
64 |
+
"inactive_split.small_pool.current": 116,
|
65 |
+
"inactive_split.small_pool.freed": 2452684,
|
66 |
+
"inactive_split.small_pool.peak": 193,
|
67 |
+
"inactive_split_bytes.all.allocated": 4080610900992,
|
68 |
+
"inactive_split_bytes.all.current": 147216896,
|
69 |
+
"inactive_split_bytes.all.freed": 4080463684096,
|
70 |
+
"inactive_split_bytes.all.peak": 897244160,
|
71 |
+
"inactive_split_bytes.large_pool.allocated": 2895166959616,
|
72 |
+
"inactive_split_bytes.large_pool.current": 107784192,
|
73 |
+
"inactive_split_bytes.large_pool.freed": 2895059175424,
|
74 |
+
"inactive_split_bytes.large_pool.peak": 860701696,
|
75 |
+
"inactive_split_bytes.small_pool.allocated": 1185443941376,
|
76 |
+
"inactive_split_bytes.small_pool.current": 39432704,
|
77 |
+
"inactive_split_bytes.small_pool.freed": 1185404508672,
|
78 |
+
"inactive_split_bytes.small_pool.peak": 77619712,
|
79 |
+
"max_split_size": -1,
|
80 |
+
"num_alloc_retries": 0,
|
81 |
+
"num_device_alloc": 725,
|
82 |
+
"num_device_free": 382,
|
83 |
+
"num_ooms": 0,
|
84 |
+
"num_sync_all_streams": 3,
|
85 |
+
"oversize_allocations.allocated": 0,
|
86 |
+
"oversize_allocations.current": 0,
|
87 |
+
"oversize_allocations.freed": 0,
|
88 |
+
"oversize_allocations.peak": 0,
|
89 |
+
"oversize_segments.allocated": 0,
|
90 |
+
"oversize_segments.current": 0,
|
91 |
+
"oversize_segments.freed": 0,
|
92 |
+
"oversize_segments.peak": 0,
|
93 |
+
"requested_bytes.all.allocated": 3989728009794,
|
94 |
+
"requested_bytes.all.current": 967046712,
|
95 |
+
"requested_bytes.all.freed": 3988760963082,
|
96 |
+
"requested_bytes.all.peak": 3263085376,
|
97 |
+
"requested_bytes.large_pool.allocated": 2846124109216,
|
98 |
+
"requested_bytes.large_pool.current": 950013448,
|
99 |
+
"requested_bytes.large_pool.freed": 2845174095768,
|
100 |
+
"requested_bytes.large_pool.peak": 3179241096,
|
101 |
+
"requested_bytes.small_pool.allocated": 1143603900578,
|
102 |
+
"requested_bytes.small_pool.current": 17033264,
|
103 |
+
"requested_bytes.small_pool.freed": 1143586867314,
|
104 |
+
"requested_bytes.small_pool.peak": 114852048,
|
105 |
+
"reserved_bytes.all.allocated": 17678991360,
|
106 |
+
"reserved_bytes.all.current": 8910798848,
|
107 |
+
"reserved_bytes.all.freed": 8768192512,
|
108 |
+
"reserved_bytes.all.peak": 8910798848,
|
109 |
+
"reserved_bytes.large_pool.allocated": 17303601152,
|
110 |
+
"reserved_bytes.large_pool.current": 8772386816,
|
111 |
+
"reserved_bytes.large_pool.freed": 8531214336,
|
112 |
+
"reserved_bytes.large_pool.peak": 8772386816,
|
113 |
+
"reserved_bytes.small_pool.allocated": 375390208,
|
114 |
+
"reserved_bytes.small_pool.current": 138412032,
|
115 |
+
"reserved_bytes.small_pool.freed": 236978176,
|
116 |
+
"reserved_bytes.small_pool.peak": 138412032,
|
117 |
+
"segment.all.allocated": 725,
|
118 |
+
"segment.all.current": 343,
|
119 |
+
"segment.all.freed": 382,
|
120 |
+
"segment.all.peak": 343,
|
121 |
+
"segment.large_pool.allocated": 546,
|
122 |
+
"segment.large_pool.current": 277,
|
123 |
+
"segment.large_pool.freed": 269,
|
124 |
+
"segment.large_pool.peak": 277,
|
125 |
+
"segment.small_pool.allocated": 179,
|
126 |
+
"segment.small_pool.current": 66,
|
127 |
+
"segment.small_pool.freed": 113,
|
128 |
+
"segment.small_pool.peak": 66
|
129 |
+
}
|
130 |
+
}
|
outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_3999/all_results_la_kron_all_homo_mc_corr_1000.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"eval_matthews_correlation": 0.5399503104637741}
|
outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_3999/eval_res_la_kron_all_homo_mc_corr_1000.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_3999/gpu_stats_la.json
ADDED
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"memory_allocated": 969647616,
|
3 |
+
"max_memory_allocated": 3324437504,
|
4 |
+
"memory_reserved": 8910798848,
|
5 |
+
"max_memory_reserved": 8910798848,
|
6 |
+
"memory_stats": {
|
7 |
+
"active.all.allocated": 8716463,
|
8 |
+
"active.all.current": 1234,
|
9 |
+
"active.all.freed": 8715229,
|
10 |
+
"active.all.peak": 1487,
|
11 |
+
"active.large_pool.allocated": 1082705,
|
12 |
+
"active.large_pool.current": 284,
|
13 |
+
"active.large_pool.freed": 1082421,
|
14 |
+
"active.large_pool.peak": 482,
|
15 |
+
"active.small_pool.allocated": 7633758,
|
16 |
+
"active.small_pool.current": 950,
|
17 |
+
"active.small_pool.freed": 7632808,
|
18 |
+
"active.small_pool.peak": 1200,
|
19 |
+
"active_bytes.all.allocated": 6107360731136,
|
20 |
+
"active_bytes.all.current": 969647616,
|
21 |
+
"active_bytes.all.freed": 6106391083520,
|
22 |
+
"active_bytes.all.peak": 3324437504,
|
23 |
+
"active_bytes.large_pool.allocated": 4391244445184,
|
24 |
+
"active_bytes.large_pool.current": 952457216,
|
25 |
+
"active_bytes.large_pool.freed": 4390291987968,
|
26 |
+
"active_bytes.large_pool.peak": 3240482816,
|
27 |
+
"active_bytes.small_pool.allocated": 1716116285952,
|
28 |
+
"active_bytes.small_pool.current": 17190400,
|
29 |
+
"active_bytes.small_pool.freed": 1716099095552,
|
30 |
+
"active_bytes.small_pool.peak": 114983424,
|
31 |
+
"allocated_bytes.all.allocated": 6107360731136,
|
32 |
+
"allocated_bytes.all.current": 969647616,
|
33 |
+
"allocated_bytes.all.freed": 6106391083520,
|
34 |
+
"allocated_bytes.all.peak": 3324437504,
|
35 |
+
"allocated_bytes.large_pool.allocated": 4391244445184,
|
36 |
+
"allocated_bytes.large_pool.current": 952457216,
|
37 |
+
"allocated_bytes.large_pool.freed": 4390291987968,
|
38 |
+
"allocated_bytes.large_pool.peak": 3240482816,
|
39 |
+
"allocated_bytes.small_pool.allocated": 1716116285952,
|
40 |
+
"allocated_bytes.small_pool.current": 17190400,
|
41 |
+
"allocated_bytes.small_pool.freed": 1716099095552,
|
42 |
+
"allocated_bytes.small_pool.peak": 114983424,
|
43 |
+
"allocation.all.allocated": 8716463,
|
44 |
+
"allocation.all.current": 1234,
|
45 |
+
"allocation.all.freed": 8715229,
|
46 |
+
"allocation.all.peak": 1487,
|
47 |
+
"allocation.large_pool.allocated": 1082705,
|
48 |
+
"allocation.large_pool.current": 284,
|
49 |
+
"allocation.large_pool.freed": 1082421,
|
50 |
+
"allocation.large_pool.peak": 482,
|
51 |
+
"allocation.small_pool.allocated": 7633758,
|
52 |
+
"allocation.small_pool.current": 950,
|
53 |
+
"allocation.small_pool.freed": 7632808,
|
54 |
+
"allocation.small_pool.peak": 1200,
|
55 |
+
"inactive_split.all.allocated": 4205221,
|
56 |
+
"inactive_split.all.current": 173,
|
57 |
+
"inactive_split.all.freed": 4205048,
|
58 |
+
"inactive_split.all.peak": 228,
|
59 |
+
"inactive_split.large_pool.allocated": 505158,
|
60 |
+
"inactive_split.large_pool.current": 43,
|
61 |
+
"inactive_split.large_pool.freed": 505115,
|
62 |
+
"inactive_split.large_pool.peak": 98,
|
63 |
+
"inactive_split.small_pool.allocated": 3700063,
|
64 |
+
"inactive_split.small_pool.current": 130,
|
65 |
+
"inactive_split.small_pool.freed": 3699933,
|
66 |
+
"inactive_split.small_pool.peak": 193,
|
67 |
+
"inactive_split_bytes.all.allocated": 6091702189568,
|
68 |
+
"inactive_split_bytes.all.current": 150231552,
|
69 |
+
"inactive_split_bytes.all.freed": 6091551958016,
|
70 |
+
"inactive_split_bytes.all.peak": 898161664,
|
71 |
+
"inactive_split_bytes.large_pool.allocated": 4323643853824,
|
72 |
+
"inactive_split_bytes.large_pool.current": 106604544,
|
73 |
+
"inactive_split_bytes.large_pool.freed": 4323537249280,
|
74 |
+
"inactive_split_bytes.large_pool.peak": 860701696,
|
75 |
+
"inactive_split_bytes.small_pool.allocated": 1768058335744,
|
76 |
+
"inactive_split_bytes.small_pool.current": 43627008,
|
77 |
+
"inactive_split_bytes.small_pool.freed": 1768014708736,
|
78 |
+
"inactive_split_bytes.small_pool.peak": 77619712,
|
79 |
+
"max_split_size": -1,
|
80 |
+
"num_alloc_retries": 0,
|
81 |
+
"num_device_alloc": 1087,
|
82 |
+
"num_device_free": 744,
|
83 |
+
"num_ooms": 0,
|
84 |
+
"num_sync_all_streams": 5,
|
85 |
+
"oversize_allocations.allocated": 0,
|
86 |
+
"oversize_allocations.current": 0,
|
87 |
+
"oversize_allocations.freed": 0,
|
88 |
+
"oversize_allocations.peak": 0,
|
89 |
+
"oversize_segments.allocated": 0,
|
90 |
+
"oversize_segments.current": 0,
|
91 |
+
"oversize_segments.freed": 0,
|
92 |
+
"oversize_segments.peak": 0,
|
93 |
+
"requested_bytes.all.allocated": 5984583499565,
|
94 |
+
"requested_bytes.all.current": 967046712,
|
95 |
+
"requested_bytes.all.freed": 5983616452853,
|
96 |
+
"requested_bytes.all.peak": 3263085376,
|
97 |
+
"requested_bytes.large_pool.allocated": 4269177644144,
|
98 |
+
"requested_bytes.large_pool.current": 950013448,
|
99 |
+
"requested_bytes.large_pool.freed": 4268227630696,
|
100 |
+
"requested_bytes.large_pool.peak": 3179241096,
|
101 |
+
"requested_bytes.small_pool.allocated": 1715405855421,
|
102 |
+
"requested_bytes.small_pool.current": 17033264,
|
103 |
+
"requested_bytes.small_pool.freed": 1715388822157,
|
104 |
+
"requested_bytes.small_pool.peak": 114852048,
|
105 |
+
"reserved_bytes.all.allocated": 26742882304,
|
106 |
+
"reserved_bytes.all.current": 8910798848,
|
107 |
+
"reserved_bytes.all.freed": 17832083456,
|
108 |
+
"reserved_bytes.all.peak": 8910798848,
|
109 |
+
"reserved_bytes.large_pool.allocated": 26201817088,
|
110 |
+
"reserved_bytes.large_pool.current": 8772386816,
|
111 |
+
"reserved_bytes.large_pool.freed": 17429430272,
|
112 |
+
"reserved_bytes.large_pool.peak": 8772386816,
|
113 |
+
"reserved_bytes.small_pool.allocated": 541065216,
|
114 |
+
"reserved_bytes.small_pool.current": 138412032,
|
115 |
+
"reserved_bytes.small_pool.freed": 402653184,
|
116 |
+
"reserved_bytes.small_pool.peak": 138412032,
|
117 |
+
"segment.all.allocated": 1087,
|
118 |
+
"segment.all.current": 343,
|
119 |
+
"segment.all.freed": 744,
|
120 |
+
"segment.all.peak": 343,
|
121 |
+
"segment.large_pool.allocated": 829,
|
122 |
+
"segment.large_pool.current": 277,
|
123 |
+
"segment.large_pool.freed": 552,
|
124 |
+
"segment.large_pool.peak": 277,
|
125 |
+
"segment.small_pool.allocated": 258,
|
126 |
+
"segment.small_pool.current": 66,
|
127 |
+
"segment.small_pool.freed": 192,
|
128 |
+
"segment.small_pool.peak": 66
|
129 |
+
}
|
130 |
+
}
|
outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_5999/all_results_la_kron_all_homo_mc_corr_1000.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"eval_matthews_correlation": 0.5127103010689016}
|
outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_5999/eval_res_la_kron_all_homo_mc_corr_1000.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_5999/gpu_stats_la.json
ADDED
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"memory_allocated": 969735680,
|
3 |
+
"max_memory_allocated": 3324437504,
|
4 |
+
"memory_reserved": 8914993152,
|
5 |
+
"max_memory_reserved": 8914993152,
|
6 |
+
"memory_stats": {
|
7 |
+
"active.all.allocated": 11622050,
|
8 |
+
"active.all.current": 1234,
|
9 |
+
"active.all.freed": 11620816,
|
10 |
+
"active.all.peak": 1487,
|
11 |
+
"active.large_pool.allocated": 1443606,
|
12 |
+
"active.large_pool.current": 284,
|
13 |
+
"active.large_pool.freed": 1443322,
|
14 |
+
"active.large_pool.peak": 482,
|
15 |
+
"active.small_pool.allocated": 10178444,
|
16 |
+
"active.small_pool.current": 950,
|
17 |
+
"active.small_pool.freed": 10177494,
|
18 |
+
"active.small_pool.peak": 1200,
|
19 |
+
"active_bytes.all.allocated": 8142390843904,
|
20 |
+
"active_bytes.all.current": 969735680,
|
21 |
+
"active_bytes.all.freed": 8141421108224,
|
22 |
+
"active_bytes.all.peak": 3324437504,
|
23 |
+
"active_bytes.large_pool.allocated": 5854235742720,
|
24 |
+
"active_bytes.large_pool.current": 952545280,
|
25 |
+
"active_bytes.large_pool.freed": 5853283197440,
|
26 |
+
"active_bytes.large_pool.peak": 3240482816,
|
27 |
+
"active_bytes.small_pool.allocated": 2288155101184,
|
28 |
+
"active_bytes.small_pool.current": 17190400,
|
29 |
+
"active_bytes.small_pool.freed": 2288137910784,
|
30 |
+
"active_bytes.small_pool.peak": 114983424,
|
31 |
+
"allocated_bytes.all.allocated": 8142390843904,
|
32 |
+
"allocated_bytes.all.current": 969735680,
|
33 |
+
"allocated_bytes.all.freed": 8141421108224,
|
34 |
+
"allocated_bytes.all.peak": 3324437504,
|
35 |
+
"allocated_bytes.large_pool.allocated": 5854235742720,
|
36 |
+
"allocated_bytes.large_pool.current": 952545280,
|
37 |
+
"allocated_bytes.large_pool.freed": 5853283197440,
|
38 |
+
"allocated_bytes.large_pool.peak": 3240482816,
|
39 |
+
"allocated_bytes.small_pool.allocated": 2288155101184,
|
40 |
+
"allocated_bytes.small_pool.current": 17190400,
|
41 |
+
"allocated_bytes.small_pool.freed": 2288137910784,
|
42 |
+
"allocated_bytes.small_pool.peak": 114983424,
|
43 |
+
"allocation.all.allocated": 11622050,
|
44 |
+
"allocation.all.current": 1234,
|
45 |
+
"allocation.all.freed": 11620816,
|
46 |
+
"allocation.all.peak": 1487,
|
47 |
+
"allocation.large_pool.allocated": 1443606,
|
48 |
+
"allocation.large_pool.current": 284,
|
49 |
+
"allocation.large_pool.freed": 1443322,
|
50 |
+
"allocation.large_pool.peak": 482,
|
51 |
+
"allocation.small_pool.allocated": 10178444,
|
52 |
+
"allocation.small_pool.current": 950,
|
53 |
+
"allocation.small_pool.freed": 10177494,
|
54 |
+
"allocation.small_pool.peak": 1200,
|
55 |
+
"inactive_split.all.allocated": 5657934,
|
56 |
+
"inactive_split.all.current": 179,
|
57 |
+
"inactive_split.all.freed": 5657755,
|
58 |
+
"inactive_split.all.peak": 260,
|
59 |
+
"inactive_split.large_pool.allocated": 678637,
|
60 |
+
"inactive_split.large_pool.current": 43,
|
61 |
+
"inactive_split.large_pool.freed": 678594,
|
62 |
+
"inactive_split.large_pool.peak": 98,
|
63 |
+
"inactive_split.small_pool.allocated": 4979297,
|
64 |
+
"inactive_split.small_pool.current": 136,
|
65 |
+
"inactive_split.small_pool.freed": 4979161,
|
66 |
+
"inactive_split.small_pool.peak": 223,
|
67 |
+
"inactive_split_bytes.all.allocated": 8082423126528,
|
68 |
+
"inactive_split_bytes.all.current": 148046336,
|
69 |
+
"inactive_split_bytes.all.freed": 8082275080192,
|
70 |
+
"inactive_split_bytes.all.peak": 900170752,
|
71 |
+
"inactive_split_bytes.large_pool.allocated": 5729011654656,
|
72 |
+
"inactive_split_bytes.large_pool.current": 106516480,
|
73 |
+
"inactive_split_bytes.large_pool.freed": 5728905138176,
|
74 |
+
"inactive_split_bytes.large_pool.peak": 860701696,
|
75 |
+
"inactive_split_bytes.small_pool.allocated": 2353411471872,
|
76 |
+
"inactive_split_bytes.small_pool.current": 41529856,
|
77 |
+
"inactive_split_bytes.small_pool.freed": 2353369942016,
|
78 |
+
"inactive_split_bytes.small_pool.peak": 77619712,
|
79 |
+
"max_split_size": -1,
|
80 |
+
"num_alloc_retries": 0,
|
81 |
+
"num_device_alloc": 1459,
|
82 |
+
"num_device_free": 1114,
|
83 |
+
"num_ooms": 0,
|
84 |
+
"num_sync_all_streams": 7,
|
85 |
+
"oversize_allocations.allocated": 0,
|
86 |
+
"oversize_allocations.current": 0,
|
87 |
+
"oversize_allocations.freed": 0,
|
88 |
+
"oversize_allocations.peak": 0,
|
89 |
+
"oversize_segments.allocated": 0,
|
90 |
+
"oversize_segments.current": 0,
|
91 |
+
"oversize_segments.freed": 0,
|
92 |
+
"oversize_segments.peak": 0,
|
93 |
+
"requested_bytes.all.allocated": 7979438992372,
|
94 |
+
"requested_bytes.all.current": 967046712,
|
95 |
+
"requested_bytes.all.freed": 7978471945660,
|
96 |
+
"requested_bytes.all.peak": 3263085376,
|
97 |
+
"requested_bytes.large_pool.allocated": 5692231179072,
|
98 |
+
"requested_bytes.large_pool.current": 950013448,
|
99 |
+
"requested_bytes.large_pool.freed": 5691281165624,
|
100 |
+
"requested_bytes.large_pool.peak": 3179241096,
|
101 |
+
"requested_bytes.small_pool.allocated": 2287207813300,
|
102 |
+
"requested_bytes.small_pool.current": 17033264,
|
103 |
+
"requested_bytes.small_pool.freed": 2287190780036,
|
104 |
+
"requested_bytes.small_pool.peak": 114852048,
|
105 |
+
"reserved_bytes.all.allocated": 35846619136,
|
106 |
+
"reserved_bytes.all.current": 8914993152,
|
107 |
+
"reserved_bytes.all.freed": 26931625984,
|
108 |
+
"reserved_bytes.all.peak": 8914993152,
|
109 |
+
"reserved_bytes.large_pool.allocated": 35121004544,
|
110 |
+
"reserved_bytes.large_pool.current": 8772386816,
|
111 |
+
"reserved_bytes.large_pool.freed": 26348617728,
|
112 |
+
"reserved_bytes.large_pool.peak": 8772386816,
|
113 |
+
"reserved_bytes.small_pool.allocated": 725614592,
|
114 |
+
"reserved_bytes.small_pool.current": 142606336,
|
115 |
+
"reserved_bytes.small_pool.freed": 583008256,
|
116 |
+
"reserved_bytes.small_pool.peak": 142606336,
|
117 |
+
"segment.all.allocated": 1459,
|
118 |
+
"segment.all.current": 345,
|
119 |
+
"segment.all.freed": 1114,
|
120 |
+
"segment.all.peak": 345,
|
121 |
+
"segment.large_pool.allocated": 1113,
|
122 |
+
"segment.large_pool.current": 277,
|
123 |
+
"segment.large_pool.freed": 836,
|
124 |
+
"segment.large_pool.peak": 277,
|
125 |
+
"segment.small_pool.allocated": 346,
|
126 |
+
"segment.small_pool.current": 68,
|
127 |
+
"segment.small_pool.freed": 278,
|
128 |
+
"segment.small_pool.peak": 68
|
129 |
+
}
|
130 |
+
}
|
outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_7999/all_results_la_kron_all_homo_mc_corr_1000.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"eval_matthews_correlation": 0.5327637463001902}
|
outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_7999/eval_res_la_kron_all_homo_mc_corr_1000.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_7999/gpu_stats_la.json
ADDED
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"memory_allocated": 968467968,
|
3 |
+
"max_memory_allocated": 3324437504,
|
4 |
+
"memory_reserved": 8910798848,
|
5 |
+
"max_memory_reserved": 8914993152,
|
6 |
+
"memory_stats": {
|
7 |
+
"active.all.allocated": 14527687,
|
8 |
+
"active.all.current": 1234,
|
9 |
+
"active.all.freed": 14526453,
|
10 |
+
"active.all.peak": 1487,
|
11 |
+
"active.large_pool.allocated": 1804507,
|
12 |
+
"active.large_pool.current": 284,
|
13 |
+
"active.large_pool.freed": 1804223,
|
14 |
+
"active.large_pool.peak": 482,
|
15 |
+
"active.small_pool.allocated": 12723180,
|
16 |
+
"active.small_pool.current": 950,
|
17 |
+
"active.small_pool.freed": 12722230,
|
18 |
+
"active.small_pool.peak": 1200,
|
19 |
+
"active_bytes.all.allocated": 10177284875776,
|
20 |
+
"active_bytes.all.current": 968467968,
|
21 |
+
"active_bytes.all.freed": 10176316407808,
|
22 |
+
"active_bytes.all.peak": 3324437504,
|
23 |
+
"active_bytes.large_pool.allocated": 7317090932736,
|
24 |
+
"active_bytes.large_pool.current": 951277568,
|
25 |
+
"active_bytes.large_pool.freed": 7316139655168,
|
26 |
+
"active_bytes.large_pool.peak": 3240482816,
|
27 |
+
"active_bytes.small_pool.allocated": 2860193943040,
|
28 |
+
"active_bytes.small_pool.current": 17190400,
|
29 |
+
"active_bytes.small_pool.freed": 2860176752640,
|
30 |
+
"active_bytes.small_pool.peak": 114983424,
|
31 |
+
"allocated_bytes.all.allocated": 10177284875776,
|
32 |
+
"allocated_bytes.all.current": 968467968,
|
33 |
+
"allocated_bytes.all.freed": 10176316407808,
|
34 |
+
"allocated_bytes.all.peak": 3324437504,
|
35 |
+
"allocated_bytes.large_pool.allocated": 7317090932736,
|
36 |
+
"allocated_bytes.large_pool.current": 951277568,
|
37 |
+
"allocated_bytes.large_pool.freed": 7316139655168,
|
38 |
+
"allocated_bytes.large_pool.peak": 3240482816,
|
39 |
+
"allocated_bytes.small_pool.allocated": 2860193943040,
|
40 |
+
"allocated_bytes.small_pool.current": 17190400,
|
41 |
+
"allocated_bytes.small_pool.freed": 2860176752640,
|
42 |
+
"allocated_bytes.small_pool.peak": 114983424,
|
43 |
+
"allocation.all.allocated": 14527687,
|
44 |
+
"allocation.all.current": 1234,
|
45 |
+
"allocation.all.freed": 14526453,
|
46 |
+
"allocation.all.peak": 1487,
|
47 |
+
"allocation.large_pool.allocated": 1804507,
|
48 |
+
"allocation.large_pool.current": 284,
|
49 |
+
"allocation.large_pool.freed": 1804223,
|
50 |
+
"allocation.large_pool.peak": 482,
|
51 |
+
"allocation.small_pool.allocated": 12723180,
|
52 |
+
"allocation.small_pool.current": 950,
|
53 |
+
"allocation.small_pool.freed": 12722230,
|
54 |
+
"allocation.small_pool.peak": 1200,
|
55 |
+
"inactive_split.all.allocated": 7000582,
|
56 |
+
"inactive_split.all.current": 163,
|
57 |
+
"inactive_split.all.freed": 7000419,
|
58 |
+
"inactive_split.all.peak": 286,
|
59 |
+
"inactive_split.large_pool.allocated": 845816,
|
60 |
+
"inactive_split.large_pool.current": 45,
|
61 |
+
"inactive_split.large_pool.freed": 845771,
|
62 |
+
"inactive_split.large_pool.peak": 98,
|
63 |
+
"inactive_split.small_pool.allocated": 6154766,
|
64 |
+
"inactive_split.small_pool.current": 118,
|
65 |
+
"inactive_split.small_pool.freed": 6154648,
|
66 |
+
"inactive_split.small_pool.peak": 247,
|
67 |
+
"inactive_split_bytes.all.allocated": 10120438529536,
|
68 |
+
"inactive_split_bytes.all.current": 147216896,
|
69 |
+
"inactive_split_bytes.all.freed": 10120291312640,
|
70 |
+
"inactive_split_bytes.all.peak": 900170752,
|
71 |
+
"inactive_split_bytes.large_pool.allocated": 7173797017600,
|
72 |
+
"inactive_split_bytes.large_pool.current": 107784192,
|
73 |
+
"inactive_split_bytes.large_pool.freed": 7173689233408,
|
74 |
+
"inactive_split_bytes.large_pool.peak": 860701696,
|
75 |
+
"inactive_split_bytes.small_pool.allocated": 2946641511936,
|
76 |
+
"inactive_split_bytes.small_pool.current": 39432704,
|
77 |
+
"inactive_split_bytes.small_pool.freed": 2946602079232,
|
78 |
+
"inactive_split_bytes.small_pool.peak": 77619712,
|
79 |
+
"max_split_size": -1,
|
80 |
+
"num_alloc_retries": 0,
|
81 |
+
"num_device_alloc": 1798,
|
82 |
+
"num_device_free": 1455,
|
83 |
+
"num_ooms": 0,
|
84 |
+
"num_sync_all_streams": 9,
|
85 |
+
"oversize_allocations.allocated": 0,
|
86 |
+
"oversize_allocations.current": 0,
|
87 |
+
"oversize_allocations.freed": 0,
|
88 |
+
"oversize_allocations.peak": 0,
|
89 |
+
"oversize_segments.allocated": 0,
|
90 |
+
"oversize_segments.current": 0,
|
91 |
+
"oversize_segments.freed": 0,
|
92 |
+
"oversize_segments.peak": 0,
|
93 |
+
"requested_bytes.all.allocated": 9974294488215,
|
94 |
+
"requested_bytes.all.current": 967046712,
|
95 |
+
"requested_bytes.all.freed": 9973327441503,
|
96 |
+
"requested_bytes.all.peak": 3263085376,
|
97 |
+
"requested_bytes.large_pool.allocated": 7115284714000,
|
98 |
+
"requested_bytes.large_pool.current": 950013448,
|
99 |
+
"requested_bytes.large_pool.freed": 7114334700552,
|
100 |
+
"requested_bytes.large_pool.peak": 3179241096,
|
101 |
+
"requested_bytes.small_pool.allocated": 2859009774215,
|
102 |
+
"requested_bytes.small_pool.current": 17033264,
|
103 |
+
"requested_bytes.small_pool.freed": 2858992740951,
|
104 |
+
"requested_bytes.small_pool.peak": 114852048,
|
105 |
+
"reserved_bytes.all.allocated": 44317016064,
|
106 |
+
"reserved_bytes.all.current": 8910798848,
|
107 |
+
"reserved_bytes.all.freed": 35406217216,
|
108 |
+
"reserved_bytes.all.peak": 8914993152,
|
109 |
+
"reserved_bytes.large_pool.allocated": 43421532160,
|
110 |
+
"reserved_bytes.large_pool.current": 8772386816,
|
111 |
+
"reserved_bytes.large_pool.freed": 34649145344,
|
112 |
+
"reserved_bytes.large_pool.peak": 8772386816,
|
113 |
+
"reserved_bytes.small_pool.allocated": 895483904,
|
114 |
+
"reserved_bytes.small_pool.current": 138412032,
|
115 |
+
"reserved_bytes.small_pool.freed": 757071872,
|
116 |
+
"reserved_bytes.small_pool.peak": 142606336,
|
117 |
+
"segment.all.allocated": 1798,
|
118 |
+
"segment.all.current": 343,
|
119 |
+
"segment.all.freed": 1455,
|
120 |
+
"segment.all.peak": 345,
|
121 |
+
"segment.large_pool.allocated": 1371,
|
122 |
+
"segment.large_pool.current": 277,
|
123 |
+
"segment.large_pool.freed": 1094,
|
124 |
+
"segment.large_pool.peak": 277,
|
125 |
+
"segment.small_pool.allocated": 427,
|
126 |
+
"segment.small_pool.current": 66,
|
127 |
+
"segment.small_pool.freed": 361,
|
128 |
+
"segment.small_pool.peak": 68
|
129 |
+
}
|
130 |
+
}
|
outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_9999/all_results_la_kron_all_homo_mc_corr_1000.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"eval_matthews_correlation": 0.5226700639354173}
|
outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_9999/eval_res_la_kron_all_homo_mc_corr_1000.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_9999/gpu_stats_la.json
ADDED
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"memory_allocated": 969647616,
|
3 |
+
"max_memory_allocated": 3324437504,
|
4 |
+
"memory_reserved": 8914993152,
|
5 |
+
"max_memory_reserved": 8914993152,
|
6 |
+
"memory_stats": {
|
7 |
+
"active.all.allocated": 17433374,
|
8 |
+
"active.all.current": 1234,
|
9 |
+
"active.all.freed": 17432140,
|
10 |
+
"active.all.peak": 1487,
|
11 |
+
"active.large_pool.allocated": 2165408,
|
12 |
+
"active.large_pool.current": 284,
|
13 |
+
"active.large_pool.freed": 2165124,
|
14 |
+
"active.large_pool.peak": 482,
|
15 |
+
"active.small_pool.allocated": 15267966,
|
16 |
+
"active.small_pool.current": 950,
|
17 |
+
"active.small_pool.freed": 15267016,
|
18 |
+
"active.small_pool.peak": 1200,
|
19 |
+
"active_bytes.all.allocated": 12213042263040,
|
20 |
+
"active_bytes.all.current": 969647616,
|
21 |
+
"active_bytes.all.freed": 12212072615424,
|
22 |
+
"active_bytes.all.peak": 3324437504,
|
23 |
+
"active_bytes.large_pool.allocated": 8780809451520,
|
24 |
+
"active_bytes.large_pool.current": 952457216,
|
25 |
+
"active_bytes.large_pool.freed": 8779856994304,
|
26 |
+
"active_bytes.large_pool.peak": 3240482816,
|
27 |
+
"active_bytes.small_pool.allocated": 3432232811520,
|
28 |
+
"active_bytes.small_pool.current": 17190400,
|
29 |
+
"active_bytes.small_pool.freed": 3432215621120,
|
30 |
+
"active_bytes.small_pool.peak": 114983424,
|
31 |
+
"allocated_bytes.all.allocated": 12213042263040,
|
32 |
+
"allocated_bytes.all.current": 969647616,
|
33 |
+
"allocated_bytes.all.freed": 12212072615424,
|
34 |
+
"allocated_bytes.all.peak": 3324437504,
|
35 |
+
"allocated_bytes.large_pool.allocated": 8780809451520,
|
36 |
+
"allocated_bytes.large_pool.current": 952457216,
|
37 |
+
"allocated_bytes.large_pool.freed": 8779856994304,
|
38 |
+
"allocated_bytes.large_pool.peak": 3240482816,
|
39 |
+
"allocated_bytes.small_pool.allocated": 3432232811520,
|
40 |
+
"allocated_bytes.small_pool.current": 17190400,
|
41 |
+
"allocated_bytes.small_pool.freed": 3432215621120,
|
42 |
+
"allocated_bytes.small_pool.peak": 114983424,
|
43 |
+
"allocation.all.allocated": 17433374,
|
44 |
+
"allocation.all.current": 1234,
|
45 |
+
"allocation.all.freed": 17432140,
|
46 |
+
"allocation.all.peak": 1487,
|
47 |
+
"allocation.large_pool.allocated": 2165408,
|
48 |
+
"allocation.large_pool.current": 284,
|
49 |
+
"allocation.large_pool.freed": 2165124,
|
50 |
+
"allocation.large_pool.peak": 482,
|
51 |
+
"allocation.small_pool.allocated": 15267966,
|
52 |
+
"allocation.small_pool.current": 950,
|
53 |
+
"allocation.small_pool.freed": 15267016,
|
54 |
+
"allocation.small_pool.peak": 1200,
|
55 |
+
"inactive_split.all.allocated": 8443535,
|
56 |
+
"inactive_split.all.current": 173,
|
57 |
+
"inactive_split.all.freed": 8443362,
|
58 |
+
"inactive_split.all.peak": 286,
|
59 |
+
"inactive_split.large_pool.allocated": 1015117,
|
60 |
+
"inactive_split.large_pool.current": 43,
|
61 |
+
"inactive_split.large_pool.freed": 1015074,
|
62 |
+
"inactive_split.large_pool.peak": 102,
|
63 |
+
"inactive_split.small_pool.allocated": 7428418,
|
64 |
+
"inactive_split.small_pool.current": 130,
|
65 |
+
"inactive_split.small_pool.freed": 7428288,
|
66 |
+
"inactive_split.small_pool.peak": 247,
|
67 |
+
"inactive_split_bytes.all.allocated": 12134537098240,
|
68 |
+
"inactive_split_bytes.all.current": 152328704,
|
69 |
+
"inactive_split_bytes.all.freed": 12134384769536,
|
70 |
+
"inactive_split_bytes.all.peak": 900170752,
|
71 |
+
"inactive_split_bytes.large_pool.allocated": 8605136866304,
|
72 |
+
"inactive_split_bytes.large_pool.current": 106604544,
|
73 |
+
"inactive_split_bytes.large_pool.freed": 8605030261760,
|
74 |
+
"inactive_split_bytes.large_pool.peak": 860701696,
|
75 |
+
"inactive_split_bytes.small_pool.allocated": 3529400231936,
|
76 |
+
"inactive_split_bytes.small_pool.current": 45724160,
|
77 |
+
"inactive_split_bytes.small_pool.freed": 3529354507776,
|
78 |
+
"inactive_split_bytes.small_pool.peak": 77619712,
|
79 |
+
"max_split_size": -1,
|
80 |
+
"num_alloc_retries": 0,
|
81 |
+
"num_device_alloc": 2166,
|
82 |
+
"num_device_free": 1821,
|
83 |
+
"num_ooms": 0,
|
84 |
+
"num_sync_all_streams": 11,
|
85 |
+
"oversize_allocations.allocated": 0,
|
86 |
+
"oversize_allocations.current": 0,
|
87 |
+
"oversize_allocations.freed": 0,
|
88 |
+
"oversize_allocations.peak": 0,
|
89 |
+
"oversize_segments.allocated": 0,
|
90 |
+
"oversize_segments.current": 0,
|
91 |
+
"oversize_segments.freed": 0,
|
92 |
+
"oversize_segments.peak": 0,
|
93 |
+
"requested_bytes.all.allocated": 11969149987094,
|
94 |
+
"requested_bytes.all.current": 967046712,
|
95 |
+
"requested_bytes.all.freed": 11968182940382,
|
96 |
+
"requested_bytes.all.peak": 3263085376,
|
97 |
+
"requested_bytes.large_pool.allocated": 8538338248928,
|
98 |
+
"requested_bytes.large_pool.current": 950013448,
|
99 |
+
"requested_bytes.large_pool.freed": 8537388235480,
|
100 |
+
"requested_bytes.large_pool.peak": 3179241096,
|
101 |
+
"requested_bytes.small_pool.allocated": 3430811738166,
|
102 |
+
"requested_bytes.small_pool.current": 17033264,
|
103 |
+
"requested_bytes.small_pool.freed": 3430794704902,
|
104 |
+
"requested_bytes.small_pool.peak": 114852048,
|
105 |
+
"reserved_bytes.all.allocated": 53393489920,
|
106 |
+
"reserved_bytes.all.current": 8914993152,
|
107 |
+
"reserved_bytes.all.freed": 44478496768,
|
108 |
+
"reserved_bytes.all.peak": 8914993152,
|
109 |
+
"reserved_bytes.large_pool.allocated": 52319748096,
|
110 |
+
"reserved_bytes.large_pool.current": 8772386816,
|
111 |
+
"reserved_bytes.large_pool.freed": 43547361280,
|
112 |
+
"reserved_bytes.large_pool.peak": 8772386816,
|
113 |
+
"reserved_bytes.small_pool.allocated": 1073741824,
|
114 |
+
"reserved_bytes.small_pool.current": 142606336,
|
115 |
+
"reserved_bytes.small_pool.freed": 931135488,
|
116 |
+
"reserved_bytes.small_pool.peak": 142606336,
|
117 |
+
"segment.all.allocated": 2166,
|
118 |
+
"segment.all.current": 345,
|
119 |
+
"segment.all.freed": 1821,
|
120 |
+
"segment.all.peak": 345,
|
121 |
+
"segment.large_pool.allocated": 1654,
|
122 |
+
"segment.large_pool.current": 277,
|
123 |
+
"segment.large_pool.freed": 1377,
|
124 |
+
"segment.large_pool.peak": 277,
|
125 |
+
"segment.small_pool.allocated": 512,
|
126 |
+
"segment.small_pool.current": 68,
|
127 |
+
"segment.small_pool.freed": 444,
|
128 |
+
"segment.small_pool.peak": 68
|
129 |
+
}
|
130 |
+
}
|
outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/logfile_la.log
ADDED
@@ -0,0 +1,846 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
06/02/2024 00:01:55 - INFO - __main__ - Number of labels detected = 2
|
2 |
+
06/02/2024 00:01:56 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 30522, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}.
|
3 |
+
06/02/2024 00:01:57 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_0/adapter_config.json
|
4 |
+
06/02/2024 00:01:57 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'cola'.
|
5 |
+
06/02/2024 00:01:57 - INFO - adapters.loading - Loading module weights from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_0/pytorch_adapter.bin
|
6 |
+
06/02/2024 00:01:57 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_0/head_config.json
|
7 |
+
06/02/2024 00:01:57 - INFO - adapters.heads.model_mixin - Adding head 'cola' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}.
|
8 |
+
06/02/2024 00:01:57 - INFO - adapters.loading - Loading module weights from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_0/pytorch_model_head.bin
|
9 |
+
06/02/2024 00:01:57 - INFO - __main__ - Adapter Name = cola
|
10 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.weight
|
11 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.bias
|
12 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_up.weight
|
13 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_up.bias
|
14 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_down.0.weight
|
15 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_down.0.bias
|
16 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_up.weight
|
17 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_up.bias
|
18 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.weight
|
19 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.bias
|
20 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_up.weight
|
21 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_up.bias
|
22 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_down.0.weight
|
23 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_down.0.bias
|
24 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_up.weight
|
25 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_up.bias
|
26 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.weight
|
27 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.bias
|
28 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_up.weight
|
29 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_up.bias
|
30 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_down.0.weight
|
31 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_down.0.bias
|
32 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_up.weight
|
33 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_up.bias
|
34 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.weight
|
35 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.bias
|
36 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_up.weight
|
37 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_up.bias
|
38 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_down.0.weight
|
39 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_down.0.bias
|
40 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_up.weight
|
41 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_up.bias
|
42 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.weight
|
43 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.bias
|
44 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_up.weight
|
45 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_up.bias
|
46 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_down.0.weight
|
47 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_down.0.bias
|
48 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_up.weight
|
49 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_up.bias
|
50 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.weight
|
51 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.bias
|
52 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_up.weight
|
53 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_up.bias
|
54 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_down.0.weight
|
55 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_down.0.bias
|
56 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_up.weight
|
57 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_up.bias
|
58 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.weight
|
59 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.bias
|
60 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_up.weight
|
61 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_up.bias
|
62 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_down.0.weight
|
63 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_down.0.bias
|
64 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_up.weight
|
65 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_up.bias
|
66 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.weight
|
67 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.bias
|
68 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_up.weight
|
69 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_up.bias
|
70 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_down.0.weight
|
71 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_down.0.bias
|
72 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_up.weight
|
73 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_up.bias
|
74 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.weight
|
75 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.bias
|
76 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_up.weight
|
77 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_up.bias
|
78 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_down.0.weight
|
79 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_down.0.bias
|
80 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_up.weight
|
81 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_up.bias
|
82 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.weight
|
83 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.bias
|
84 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_up.weight
|
85 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_up.bias
|
86 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_down.0.weight
|
87 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_down.0.bias
|
88 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_up.weight
|
89 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_up.bias
|
90 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.weight
|
91 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.bias
|
92 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_up.weight
|
93 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_up.bias
|
94 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_down.0.weight
|
95 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_down.0.bias
|
96 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_up.weight
|
97 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_up.bias
|
98 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.weight
|
99 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.bias
|
100 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_up.weight
|
101 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_up.bias
|
102 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_down.0.weight
|
103 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_down.0.bias
|
104 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_up.weight
|
105 |
+
06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_up.bias
|
106 |
+
06/02/2024 00:01:57 - INFO - __main__ - heads.cola.1.weight
|
107 |
+
06/02/2024 00:01:57 - INFO - __main__ - heads.cola.1.bias
|
108 |
+
06/02/2024 00:01:57 - INFO - __main__ - heads.cola.4.weight
|
109 |
+
06/02/2024 00:01:57 - INFO - __main__ - heads.cola.4.bias
|
110 |
+
06/02/2024 00:01:58 - INFO - __main__ - Sample 5238 of the training set: {'input_ids': [101, 2009, 1005, 1055, 2986, 2008, 2002, 3825, 1998, 17806, 1010, 2021, 1045, 2123, 1005, 1056, 2428, 2729, 2055, 2010, 15531, 1010, 2030, 1996, 2769, 1010, 2030, 2505, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}.
|
111 |
+
06/02/2024 00:01:58 - INFO - __main__ - Sample 912 of the training set: {'input_ids': [101, 1045, 2113, 2029, 2338, 23848, 3191, 1010, 1998, 2029, 2338, 3960, 2356, 2339, 2017, 2910, 1005, 1056, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
|
112 |
+
06/02/2024 00:01:58 - INFO - __main__ - Sample 204 of the training set: {'input_ids': [101, 1996, 26108, 2002, 4152, 1010, 1996, 2062, 2198, 6010, 11067, 2229, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
|
113 |
+
06/02/2024 00:07:01 - INFO - __main__ - f_mu shape : torch.Size([1043, 2])
|
114 |
+
06/02/2024 00:07:01 - INFO - __main__ - f_var shape : torch.Size([1043, 2, 2])
|
115 |
+
06/02/2024 00:07:01 - INFO - __main__ - tensor([[ 0.0687, 0.1107],
|
116 |
+
[ 0.1065, 0.1546],
|
117 |
+
[ 0.1252, 0.1252],
|
118 |
+
...,
|
119 |
+
[ 0.0203, 0.1087],
|
120 |
+
[ 0.0962, 0.1235],
|
121 |
+
[ 0.0511, -0.0174]], device='cuda:0')
|
122 |
+
06/02/2024 00:07:01 - INFO - __main__ - tensor([[[12.5866, 12.3973],
|
123 |
+
[12.3973, 12.5533]],
|
124 |
+
|
125 |
+
[[11.7669, 11.4376],
|
126 |
+
[11.4376, 11.7918]],
|
127 |
+
|
128 |
+
[[11.8492, 11.6148],
|
129 |
+
[11.6148, 11.8600]],
|
130 |
+
|
131 |
+
...,
|
132 |
+
|
133 |
+
[[13.4014, 13.0880],
|
134 |
+
[13.0880, 13.4134]],
|
135 |
+
|
136 |
+
[[11.4929, 11.3232],
|
137 |
+
[11.3232, 11.5001]],
|
138 |
+
|
139 |
+
[[11.7616, 11.5308],
|
140 |
+
[11.5308, 11.7491]]], device='cuda:0')
|
141 |
+
06/02/2024 00:07:01 - INFO - __main__ - ***** Completed training *****
|
142 |
+
06/02/2024 00:07:05 - INFO - __main__ - Number of labels detected = 2
|
143 |
+
06/02/2024 00:07:05 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 30522, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}.
|
144 |
+
06/02/2024 00:07:06 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_1999/adapter_config.json
|
145 |
+
06/02/2024 00:07:06 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'cola'.
|
146 |
+
06/02/2024 00:07:06 - INFO - adapters.loading - Loading module weights from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_1999/pytorch_adapter.bin
|
147 |
+
06/02/2024 00:07:06 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_1999/head_config.json
|
148 |
+
06/02/2024 00:07:06 - INFO - adapters.heads.model_mixin - Adding head 'cola' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}.
|
149 |
+
06/02/2024 00:07:06 - INFO - adapters.loading - Loading module weights from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_1999/pytorch_model_head.bin
|
150 |
+
06/02/2024 00:07:06 - INFO - __main__ - Adapter Name = cola
|
151 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.weight
|
152 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.bias
|
153 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_up.weight
|
154 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_up.bias
|
155 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_down.0.weight
|
156 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_down.0.bias
|
157 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_up.weight
|
158 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_up.bias
|
159 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.weight
|
160 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.bias
|
161 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_up.weight
|
162 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_up.bias
|
163 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_down.0.weight
|
164 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_down.0.bias
|
165 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_up.weight
|
166 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_up.bias
|
167 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.weight
|
168 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.bias
|
169 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_up.weight
|
170 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_up.bias
|
171 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_down.0.weight
|
172 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_down.0.bias
|
173 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_up.weight
|
174 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_up.bias
|
175 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.weight
|
176 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.bias
|
177 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_up.weight
|
178 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_up.bias
|
179 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_down.0.weight
|
180 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_down.0.bias
|
181 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_up.weight
|
182 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_up.bias
|
183 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.weight
|
184 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.bias
|
185 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_up.weight
|
186 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_up.bias
|
187 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_down.0.weight
|
188 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_down.0.bias
|
189 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_up.weight
|
190 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_up.bias
|
191 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.weight
|
192 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.bias
|
193 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_up.weight
|
194 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_up.bias
|
195 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_down.0.weight
|
196 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_down.0.bias
|
197 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_up.weight
|
198 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_up.bias
|
199 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.weight
|
200 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.bias
|
201 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_up.weight
|
202 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_up.bias
|
203 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_down.0.weight
|
204 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_down.0.bias
|
205 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_up.weight
|
206 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_up.bias
|
207 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.weight
|
208 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.bias
|
209 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_up.weight
|
210 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_up.bias
|
211 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_down.0.weight
|
212 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_down.0.bias
|
213 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_up.weight
|
214 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_up.bias
|
215 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.weight
|
216 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.bias
|
217 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_up.weight
|
218 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_up.bias
|
219 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_down.0.weight
|
220 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_down.0.bias
|
221 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_up.weight
|
222 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_up.bias
|
223 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.weight
|
224 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.bias
|
225 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_up.weight
|
226 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_up.bias
|
227 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_down.0.weight
|
228 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_down.0.bias
|
229 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_up.weight
|
230 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_up.bias
|
231 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.weight
|
232 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.bias
|
233 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_up.weight
|
234 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_up.bias
|
235 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_down.0.weight
|
236 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_down.0.bias
|
237 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_up.weight
|
238 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_up.bias
|
239 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.weight
|
240 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.bias
|
241 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_up.weight
|
242 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_up.bias
|
243 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_down.0.weight
|
244 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_down.0.bias
|
245 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_up.weight
|
246 |
+
06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_up.bias
|
247 |
+
06/02/2024 00:07:06 - INFO - __main__ - heads.cola.1.weight
|
248 |
+
06/02/2024 00:07:06 - INFO - __main__ - heads.cola.1.bias
|
249 |
+
06/02/2024 00:07:06 - INFO - __main__ - heads.cola.4.weight
|
250 |
+
06/02/2024 00:07:06 - INFO - __main__ - heads.cola.4.bias
|
251 |
+
06/02/2024 00:07:07 - INFO - __main__ - Sample 5238 of the training set: {'input_ids': [101, 2009, 1005, 1055, 2986, 2008, 2002, 3825, 1998, 17806, 1010, 2021, 1045, 2123, 1005, 1056, 2428, 2729, 2055, 2010, 15531, 1010, 2030, 1996, 2769, 1010, 2030, 2505, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}.
|
252 |
+
06/02/2024 00:07:07 - INFO - __main__ - Sample 912 of the training set: {'input_ids': [101, 1045, 2113, 2029, 2338, 23848, 3191, 1010, 1998, 2029, 2338, 3960, 2356, 2339, 2017, 2910, 1005, 1056, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
|
253 |
+
06/02/2024 00:07:07 - INFO - __main__ - Sample 204 of the training set: {'input_ids': [101, 1996, 26108, 2002, 4152, 1010, 1996, 2062, 2198, 6010, 11067, 2229, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
|
254 |
+
06/02/2024 00:12:15 - INFO - __main__ - f_mu shape : torch.Size([1043, 2])
|
255 |
+
06/02/2024 00:12:15 - INFO - __main__ - f_var shape : torch.Size([1043, 2, 2])
|
256 |
+
06/02/2024 00:12:15 - INFO - __main__ - tensor([[-1.6485, 2.0073],
|
257 |
+
[-1.4895, 1.9543],
|
258 |
+
[-0.8011, 1.1930],
|
259 |
+
...,
|
260 |
+
[-2.4070, 2.6930],
|
261 |
+
[ 0.0670, 0.1763],
|
262 |
+
[-0.8631, 1.0701]], device='cuda:0')
|
263 |
+
06/02/2024 00:12:15 - INFO - __main__ - tensor([[[ 4.6435, 0.9781],
|
264 |
+
[ 0.9781, 4.6696]],
|
265 |
+
|
266 |
+
[[ 3.1061, 1.7478],
|
267 |
+
[ 1.7478, 3.1230]],
|
268 |
+
|
269 |
+
[[ 2.7134, 1.0829],
|
270 |
+
[ 1.0829, 2.8030]],
|
271 |
+
|
272 |
+
...,
|
273 |
+
|
274 |
+
[[ 4.3186, 3.3896],
|
275 |
+
[ 3.3896, 4.2989]],
|
276 |
+
|
277 |
+
[[ 2.5481, 0.3680],
|
278 |
+
[ 0.3680, 2.8497]],
|
279 |
+
|
280 |
+
[[ 3.6499, -0.1064],
|
281 |
+
[-0.1064, 3.7341]]], device='cuda:0')
|
282 |
+
06/02/2024 00:12:15 - INFO - __main__ - ***** Completed training *****
|
283 |
+
06/02/2024 00:12:17 - INFO - __main__ - Number of labels detected = 2
|
284 |
+
06/02/2024 00:12:18 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 30522, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}.
|
285 |
+
06/02/2024 00:12:18 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_3999/adapter_config.json
|
286 |
+
06/02/2024 00:12:18 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'cola'.
|
287 |
+
06/02/2024 00:12:18 - INFO - adapters.loading - Loading module weights from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_3999/pytorch_adapter.bin
|
288 |
+
06/02/2024 00:12:18 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_3999/head_config.json
|
289 |
+
06/02/2024 00:12:18 - INFO - adapters.heads.model_mixin - Adding head 'cola' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}.
|
290 |
+
06/02/2024 00:12:18 - INFO - adapters.loading - Loading module weights from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_3999/pytorch_model_head.bin
|
291 |
+
06/02/2024 00:12:18 - INFO - __main__ - Adapter Name = cola
|
292 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.weight
|
293 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.bias
|
294 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_up.weight
|
295 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_up.bias
|
296 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_down.0.weight
|
297 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_down.0.bias
|
298 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_up.weight
|
299 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_up.bias
|
300 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.weight
|
301 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.bias
|
302 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_up.weight
|
303 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_up.bias
|
304 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_down.0.weight
|
305 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_down.0.bias
|
306 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_up.weight
|
307 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_up.bias
|
308 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.weight
|
309 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.bias
|
310 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_up.weight
|
311 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_up.bias
|
312 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_down.0.weight
|
313 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_down.0.bias
|
314 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_up.weight
|
315 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_up.bias
|
316 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.weight
|
317 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.bias
|
318 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_up.weight
|
319 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_up.bias
|
320 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_down.0.weight
|
321 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_down.0.bias
|
322 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_up.weight
|
323 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_up.bias
|
324 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.weight
|
325 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.bias
|
326 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_up.weight
|
327 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_up.bias
|
328 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_down.0.weight
|
329 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_down.0.bias
|
330 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_up.weight
|
331 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_up.bias
|
332 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.weight
|
333 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.bias
|
334 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_up.weight
|
335 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_up.bias
|
336 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_down.0.weight
|
337 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_down.0.bias
|
338 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_up.weight
|
339 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_up.bias
|
340 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.weight
|
341 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.bias
|
342 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_up.weight
|
343 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_up.bias
|
344 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_down.0.weight
|
345 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_down.0.bias
|
346 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_up.weight
|
347 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_up.bias
|
348 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.weight
|
349 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.bias
|
350 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_up.weight
|
351 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_up.bias
|
352 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_down.0.weight
|
353 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_down.0.bias
|
354 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_up.weight
|
355 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_up.bias
|
356 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.weight
|
357 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.bias
|
358 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_up.weight
|
359 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_up.bias
|
360 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_down.0.weight
|
361 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_down.0.bias
|
362 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_up.weight
|
363 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_up.bias
|
364 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.weight
|
365 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.bias
|
366 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_up.weight
|
367 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_up.bias
|
368 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_down.0.weight
|
369 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_down.0.bias
|
370 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_up.weight
|
371 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_up.bias
|
372 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.weight
|
373 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.bias
|
374 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_up.weight
|
375 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_up.bias
|
376 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_down.0.weight
|
377 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_down.0.bias
|
378 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_up.weight
|
379 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_up.bias
|
380 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.weight
|
381 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.bias
|
382 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_up.weight
|
383 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_up.bias
|
384 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_down.0.weight
|
385 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_down.0.bias
|
386 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_up.weight
|
387 |
+
06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_up.bias
|
388 |
+
06/02/2024 00:12:18 - INFO - __main__ - heads.cola.1.weight
|
389 |
+
06/02/2024 00:12:18 - INFO - __main__ - heads.cola.1.bias
|
390 |
+
06/02/2024 00:12:18 - INFO - __main__ - heads.cola.4.weight
|
391 |
+
06/02/2024 00:12:18 - INFO - __main__ - heads.cola.4.bias
|
392 |
+
06/02/2024 00:12:19 - INFO - __main__ - Sample 5238 of the training set: {'input_ids': [101, 2009, 1005, 1055, 2986, 2008, 2002, 3825, 1998, 17806, 1010, 2021, 1045, 2123, 1005, 1056, 2428, 2729, 2055, 2010, 15531, 1010, 2030, 1996, 2769, 1010, 2030, 2505, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}.
|
393 |
+
06/02/2024 00:12:19 - INFO - __main__ - Sample 912 of the training set: {'input_ids': [101, 1045, 2113, 2029, 2338, 23848, 3191, 1010, 1998, 2029, 2338, 3960, 2356, 2339, 2017, 2910, 1005, 1056, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
|
394 |
+
06/02/2024 00:12:19 - INFO - __main__ - Sample 204 of the training set: {'input_ids': [101, 1996, 26108, 2002, 4152, 1010, 1996, 2062, 2198, 6010, 11067, 2229, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
|
395 |
+
06/02/2024 00:17:29 - INFO - __main__ - f_mu shape : torch.Size([1043, 2])
|
396 |
+
06/02/2024 00:17:29 - INFO - __main__ - f_var shape : torch.Size([1043, 2, 2])
|
397 |
+
06/02/2024 00:17:29 - INFO - __main__ - tensor([[-2.6441, 3.0048],
|
398 |
+
[-2.1385, 2.6015],
|
399 |
+
[-0.8923, 1.2743],
|
400 |
+
...,
|
401 |
+
[-3.1286, 3.4216],
|
402 |
+
[-1.2865, 1.6362],
|
403 |
+
[-2.3139, 2.5438]], device='cuda:0')
|
404 |
+
06/02/2024 00:17:29 - INFO - __main__ - tensor([[[ 4.6644, 2.6618],
|
405 |
+
[ 2.6618, 4.6231]],
|
406 |
+
|
407 |
+
[[ 3.5755, 1.9887],
|
408 |
+
[ 1.9887, 3.5351]],
|
409 |
+
|
410 |
+
[[ 2.7346, 0.4514],
|
411 |
+
[ 0.4514, 2.9041]],
|
412 |
+
|
413 |
+
...,
|
414 |
+
|
415 |
+
[[ 4.7327, 3.9928],
|
416 |
+
[ 3.9928, 4.7332]],
|
417 |
+
|
418 |
+
[[ 5.0674, -1.8297],
|
419 |
+
[-1.8297, 5.7372]],
|
420 |
+
|
421 |
+
[[ 5.9379, -0.2734],
|
422 |
+
[-0.2734, 5.8899]]], device='cuda:0')
|
423 |
+
06/02/2024 00:17:29 - INFO - __main__ - ***** Completed training *****
|
424 |
+
06/02/2024 00:17:33 - INFO - __main__ - Number of labels detected = 2
|
425 |
+
06/02/2024 00:17:33 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 30522, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}.
|
426 |
+
06/02/2024 00:17:34 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_5999/adapter_config.json
|
427 |
+
06/02/2024 00:17:34 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'cola'.
|
428 |
+
06/02/2024 00:17:34 - INFO - adapters.loading - Loading module weights from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_5999/pytorch_adapter.bin
|
429 |
+
06/02/2024 00:17:34 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_5999/head_config.json
|
430 |
+
06/02/2024 00:17:34 - INFO - adapters.heads.model_mixin - Adding head 'cola' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}.
|
431 |
+
06/02/2024 00:17:34 - INFO - adapters.loading - Loading module weights from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_5999/pytorch_model_head.bin
|
432 |
+
06/02/2024 00:17:34 - INFO - __main__ - Adapter Name = cola
|
433 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.weight
|
434 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.bias
|
435 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_up.weight
|
436 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_up.bias
|
437 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_down.0.weight
|
438 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_down.0.bias
|
439 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_up.weight
|
440 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_up.bias
|
441 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.weight
|
442 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.bias
|
443 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_up.weight
|
444 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_up.bias
|
445 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_down.0.weight
|
446 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_down.0.bias
|
447 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_up.weight
|
448 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_up.bias
|
449 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.weight
|
450 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.bias
|
451 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_up.weight
|
452 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_up.bias
|
453 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_down.0.weight
|
454 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_down.0.bias
|
455 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_up.weight
|
456 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_up.bias
|
457 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.weight
|
458 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.bias
|
459 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_up.weight
|
460 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_up.bias
|
461 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_down.0.weight
|
462 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_down.0.bias
|
463 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_up.weight
|
464 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_up.bias
|
465 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.weight
|
466 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.bias
|
467 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_up.weight
|
468 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_up.bias
|
469 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_down.0.weight
|
470 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_down.0.bias
|
471 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_up.weight
|
472 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_up.bias
|
473 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.weight
|
474 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.bias
|
475 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_up.weight
|
476 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_up.bias
|
477 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_down.0.weight
|
478 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_down.0.bias
|
479 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_up.weight
|
480 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_up.bias
|
481 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.weight
|
482 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.bias
|
483 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_up.weight
|
484 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_up.bias
|
485 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_down.0.weight
|
486 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_down.0.bias
|
487 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_up.weight
|
488 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_up.bias
|
489 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.weight
|
490 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.bias
|
491 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_up.weight
|
492 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_up.bias
|
493 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_down.0.weight
|
494 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_down.0.bias
|
495 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_up.weight
|
496 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_up.bias
|
497 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.weight
|
498 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.bias
|
499 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_up.weight
|
500 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_up.bias
|
501 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_down.0.weight
|
502 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_down.0.bias
|
503 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_up.weight
|
504 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_up.bias
|
505 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.weight
|
506 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.bias
|
507 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_up.weight
|
508 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_up.bias
|
509 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_down.0.weight
|
510 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_down.0.bias
|
511 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_up.weight
|
512 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_up.bias
|
513 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.weight
|
514 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.bias
|
515 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_up.weight
|
516 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_up.bias
|
517 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_down.0.weight
|
518 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_down.0.bias
|
519 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_up.weight
|
520 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_up.bias
|
521 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.weight
|
522 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.bias
|
523 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_up.weight
|
524 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_up.bias
|
525 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_down.0.weight
|
526 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_down.0.bias
|
527 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_up.weight
|
528 |
+
06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_up.bias
|
529 |
+
06/02/2024 00:17:34 - INFO - __main__ - heads.cola.1.weight
|
530 |
+
06/02/2024 00:17:34 - INFO - __main__ - heads.cola.1.bias
|
531 |
+
06/02/2024 00:17:34 - INFO - __main__ - heads.cola.4.weight
|
532 |
+
06/02/2024 00:17:34 - INFO - __main__ - heads.cola.4.bias
|
533 |
+
06/02/2024 00:17:35 - INFO - __main__ - Sample 5238 of the training set: {'input_ids': [101, 2009, 1005, 1055, 2986, 2008, 2002, 3825, 1998, 17806, 1010, 2021, 1045, 2123, 1005, 1056, 2428, 2729, 2055, 2010, 15531, 1010, 2030, 1996, 2769, 1010, 2030, 2505, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}.
|
534 |
+
06/02/2024 00:17:35 - INFO - __main__ - Sample 912 of the training set: {'input_ids': [101, 1045, 2113, 2029, 2338, 23848, 3191, 1010, 1998, 2029, 2338, 3960, 2356, 2339, 2017, 2910, 1005, 1056, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
|
535 |
+
06/02/2024 00:17:35 - INFO - __main__ - Sample 204 of the training set: {'input_ids': [101, 1996, 26108, 2002, 4152, 1010, 1996, 2062, 2198, 6010, 11067, 2229, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
|
536 |
+
06/02/2024 00:22:46 - INFO - __main__ - f_mu shape : torch.Size([1043, 2])
|
537 |
+
06/02/2024 00:22:46 - INFO - __main__ - f_var shape : torch.Size([1043, 2, 2])
|
538 |
+
06/02/2024 00:22:46 - INFO - __main__ - tensor([[-2.6790, 3.0428],
|
539 |
+
[-1.9493, 2.4252],
|
540 |
+
[-0.6514, 1.0621],
|
541 |
+
...,
|
542 |
+
[-3.0951, 3.3807],
|
543 |
+
[-1.4203, 1.8088],
|
544 |
+
[-1.5409, 1.7612]], device='cuda:0')
|
545 |
+
06/02/2024 00:22:46 - INFO - __main__ - tensor([[[ 4.6626, 2.1988],
|
546 |
+
[ 2.1988, 4.6641]],
|
547 |
+
|
548 |
+
[[ 3.3556, 1.3597],
|
549 |
+
[ 1.3597, 3.3220]],
|
550 |
+
|
551 |
+
[[ 2.4772, 0.2199],
|
552 |
+
[ 0.2199, 2.6351]],
|
553 |
+
|
554 |
+
...,
|
555 |
+
|
556 |
+
[[ 4.4411, 3.4198],
|
557 |
+
[ 3.4198, 4.4294]],
|
558 |
+
|
559 |
+
[[ 5.6441, -2.3624],
|
560 |
+
[-2.3624, 6.0773]],
|
561 |
+
|
562 |
+
[[ 6.1997, -2.6992],
|
563 |
+
[-2.6992, 6.2934]]], device='cuda:0')
|
564 |
+
06/02/2024 00:22:46 - INFO - __main__ - ***** Completed training *****
|
565 |
+
06/02/2024 00:22:48 - INFO - __main__ - Number of labels detected = 2
|
566 |
+
06/02/2024 00:22:49 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 30522, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}.
|
567 |
+
06/02/2024 00:22:49 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_7999/adapter_config.json
|
568 |
+
06/02/2024 00:22:49 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'cola'.
|
569 |
+
06/02/2024 00:22:49 - INFO - adapters.loading - Loading module weights from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_7999/pytorch_adapter.bin
|
570 |
+
06/02/2024 00:22:49 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_7999/head_config.json
|
571 |
+
06/02/2024 00:22:49 - INFO - adapters.heads.model_mixin - Adding head 'cola' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}.
|
572 |
+
06/02/2024 00:22:49 - INFO - adapters.loading - Loading module weights from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_7999/pytorch_model_head.bin
|
573 |
+
06/02/2024 00:22:49 - INFO - __main__ - Adapter Name = cola
|
574 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.weight
|
575 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.bias
|
576 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_up.weight
|
577 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_up.bias
|
578 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_down.0.weight
|
579 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_down.0.bias
|
580 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_up.weight
|
581 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_up.bias
|
582 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.weight
|
583 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.bias
|
584 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_up.weight
|
585 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_up.bias
|
586 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_down.0.weight
|
587 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_down.0.bias
|
588 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_up.weight
|
589 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_up.bias
|
590 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.weight
|
591 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.bias
|
592 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_up.weight
|
593 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_up.bias
|
594 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_down.0.weight
|
595 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_down.0.bias
|
596 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_up.weight
|
597 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_up.bias
|
598 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.weight
|
599 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.bias
|
600 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_up.weight
|
601 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_up.bias
|
602 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_down.0.weight
|
603 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_down.0.bias
|
604 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_up.weight
|
605 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_up.bias
|
606 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.weight
|
607 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.bias
|
608 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_up.weight
|
609 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_up.bias
|
610 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_down.0.weight
|
611 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_down.0.bias
|
612 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_up.weight
|
613 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_up.bias
|
614 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.weight
|
615 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.bias
|
616 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_up.weight
|
617 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_up.bias
|
618 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_down.0.weight
|
619 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_down.0.bias
|
620 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_up.weight
|
621 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_up.bias
|
622 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.weight
|
623 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.bias
|
624 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_up.weight
|
625 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_up.bias
|
626 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_down.0.weight
|
627 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_down.0.bias
|
628 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_up.weight
|
629 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_up.bias
|
630 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.weight
|
631 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.bias
|
632 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_up.weight
|
633 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_up.bias
|
634 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_down.0.weight
|
635 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_down.0.bias
|
636 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_up.weight
|
637 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_up.bias
|
638 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.weight
|
639 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.bias
|
640 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_up.weight
|
641 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_up.bias
|
642 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_down.0.weight
|
643 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_down.0.bias
|
644 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_up.weight
|
645 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_up.bias
|
646 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.weight
|
647 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.bias
|
648 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_up.weight
|
649 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_up.bias
|
650 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_down.0.weight
|
651 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_down.0.bias
|
652 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_up.weight
|
653 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_up.bias
|
654 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.weight
|
655 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.bias
|
656 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_up.weight
|
657 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_up.bias
|
658 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_down.0.weight
|
659 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_down.0.bias
|
660 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_up.weight
|
661 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_up.bias
|
662 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.weight
|
663 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.bias
|
664 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_up.weight
|
665 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_up.bias
|
666 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_down.0.weight
|
667 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_down.0.bias
|
668 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_up.weight
|
669 |
+
06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_up.bias
|
670 |
+
06/02/2024 00:22:49 - INFO - __main__ - heads.cola.1.weight
|
671 |
+
06/02/2024 00:22:49 - INFO - __main__ - heads.cola.1.bias
|
672 |
+
06/02/2024 00:22:49 - INFO - __main__ - heads.cola.4.weight
|
673 |
+
06/02/2024 00:22:49 - INFO - __main__ - heads.cola.4.bias
|
674 |
+
06/02/2024 00:22:51 - INFO - __main__ - Sample 5238 of the training set: {'input_ids': [101, 2009, 1005, 1055, 2986, 2008, 2002, 3825, 1998, 17806, 1010, 2021, 1045, 2123, 1005, 1056, 2428, 2729, 2055, 2010, 15531, 1010, 2030, 1996, 2769, 1010, 2030, 2505, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}.
|
675 |
+
06/02/2024 00:22:51 - INFO - __main__ - Sample 912 of the training set: {'input_ids': [101, 1045, 2113, 2029, 2338, 23848, 3191, 1010, 1998, 2029, 2338, 3960, 2356, 2339, 2017, 2910, 1005, 1056, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
|
676 |
+
06/02/2024 00:22:51 - INFO - __main__ - Sample 204 of the training set: {'input_ids': [101, 1996, 26108, 2002, 4152, 1010, 1996, 2062, 2198, 6010, 11067, 2229, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
|
677 |
+
06/02/2024 00:27:58 - INFO - __main__ - f_mu shape : torch.Size([1043, 2])
|
678 |
+
06/02/2024 00:27:58 - INFO - __main__ - f_var shape : torch.Size([1043, 2, 2])
|
679 |
+
06/02/2024 00:27:58 - INFO - __main__ - tensor([[-3.3962, 3.7076],
|
680 |
+
[-2.5677, 2.9764],
|
681 |
+
[-0.9388, 1.3018],
|
682 |
+
...,
|
683 |
+
[-3.7986, 4.0254],
|
684 |
+
[-2.4808, 2.8059],
|
685 |
+
[-2.0593, 2.2397]], device='cuda:0')
|
686 |
+
06/02/2024 00:27:58 - INFO - __main__ - tensor([[[ 5.1139, 2.8399],
|
687 |
+
[ 2.8399, 5.1270]],
|
688 |
+
|
689 |
+
[[ 4.0497, 1.4560],
|
690 |
+
[ 1.4560, 3.9944]],
|
691 |
+
|
692 |
+
[[ 3.5599, -0.8490],
|
693 |
+
[-0.8490, 3.7326]],
|
694 |
+
|
695 |
+
...,
|
696 |
+
|
697 |
+
[[ 4.9693, 3.9614],
|
698 |
+
[ 3.9614, 4.9527]],
|
699 |
+
|
700 |
+
[[ 7.3436, -2.3866],
|
701 |
+
[-2.3866, 7.4845]],
|
702 |
+
|
703 |
+
[[11.3763, -7.5061],
|
704 |
+
[-7.5062, 11.6565]]], device='cuda:0')
|
705 |
+
06/02/2024 00:27:58 - INFO - __main__ - ***** Completed training *****
|
706 |
+
06/02/2024 00:28:00 - INFO - __main__ - Number of labels detected = 2
|
707 |
+
06/02/2024 00:28:01 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 30522, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}.
|
708 |
+
06/02/2024 00:28:02 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_9999/adapter_config.json
|
709 |
+
06/02/2024 00:28:02 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'cola'.
|
710 |
+
06/02/2024 00:28:02 - INFO - adapters.loading - Loading module weights from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_9999/pytorch_adapter.bin
|
711 |
+
06/02/2024 00:28:02 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_9999/head_config.json
|
712 |
+
06/02/2024 00:28:02 - INFO - adapters.heads.model_mixin - Adding head 'cola' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}.
|
713 |
+
06/02/2024 00:28:02 - INFO - adapters.loading - Loading module weights from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_9999/pytorch_model_head.bin
|
714 |
+
06/02/2024 00:28:02 - INFO - __main__ - Adapter Name = cola
|
715 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.weight
|
716 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.bias
|
717 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_up.weight
|
718 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_up.bias
|
719 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_down.0.weight
|
720 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_down.0.bias
|
721 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_up.weight
|
722 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_up.bias
|
723 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.weight
|
724 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.bias
|
725 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_up.weight
|
726 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_up.bias
|
727 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_down.0.weight
|
728 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_down.0.bias
|
729 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_up.weight
|
730 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_up.bias
|
731 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.weight
|
732 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.bias
|
733 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_up.weight
|
734 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_up.bias
|
735 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_down.0.weight
|
736 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_down.0.bias
|
737 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_up.weight
|
738 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_up.bias
|
739 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.weight
|
740 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.bias
|
741 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_up.weight
|
742 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_up.bias
|
743 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_down.0.weight
|
744 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_down.0.bias
|
745 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_up.weight
|
746 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_up.bias
|
747 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.weight
|
748 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.bias
|
749 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_up.weight
|
750 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_up.bias
|
751 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_down.0.weight
|
752 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_down.0.bias
|
753 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_up.weight
|
754 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_up.bias
|
755 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.weight
|
756 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.bias
|
757 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_up.weight
|
758 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_up.bias
|
759 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_down.0.weight
|
760 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_down.0.bias
|
761 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_up.weight
|
762 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_up.bias
|
763 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.weight
|
764 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.bias
|
765 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_up.weight
|
766 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_up.bias
|
767 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_down.0.weight
|
768 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_down.0.bias
|
769 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_up.weight
|
770 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_up.bias
|
771 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.weight
|
772 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.bias
|
773 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_up.weight
|
774 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_up.bias
|
775 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_down.0.weight
|
776 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_down.0.bias
|
777 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_up.weight
|
778 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_up.bias
|
779 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.weight
|
780 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.bias
|
781 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_up.weight
|
782 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_up.bias
|
783 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_down.0.weight
|
784 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_down.0.bias
|
785 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_up.weight
|
786 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_up.bias
|
787 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.weight
|
788 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.bias
|
789 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_up.weight
|
790 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_up.bias
|
791 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_down.0.weight
|
792 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_down.0.bias
|
793 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_up.weight
|
794 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_up.bias
|
795 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.weight
|
796 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.bias
|
797 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_up.weight
|
798 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_up.bias
|
799 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_down.0.weight
|
800 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_down.0.bias
|
801 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_up.weight
|
802 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_up.bias
|
803 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.weight
|
804 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.bias
|
805 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_up.weight
|
806 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_up.bias
|
807 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_down.0.weight
|
808 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_down.0.bias
|
809 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_up.weight
|
810 |
+
06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_up.bias
|
811 |
+
06/02/2024 00:28:02 - INFO - __main__ - heads.cola.1.weight
|
812 |
+
06/02/2024 00:28:02 - INFO - __main__ - heads.cola.1.bias
|
813 |
+
06/02/2024 00:28:02 - INFO - __main__ - heads.cola.4.weight
|
814 |
+
06/02/2024 00:28:02 - INFO - __main__ - heads.cola.4.bias
|
815 |
+
06/02/2024 00:28:03 - INFO - __main__ - Sample 5238 of the training set: {'input_ids': [101, 2009, 1005, 1055, 2986, 2008, 2002, 3825, 1998, 17806, 1010, 2021, 1045, 2123, 1005, 1056, 2428, 2729, 2055, 2010, 15531, 1010, 2030, 1996, 2769, 1010, 2030, 2505, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}.
|
816 |
+
06/02/2024 00:28:03 - INFO - __main__ - Sample 912 of the training set: {'input_ids': [101, 1045, 2113, 2029, 2338, 23848, 3191, 1010, 1998, 2029, 2338, 3960, 2356, 2339, 2017, 2910, 1005, 1056, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
|
817 |
+
06/02/2024 00:28:03 - INFO - __main__ - Sample 204 of the training set: {'input_ids': [101, 1996, 26108, 2002, 4152, 1010, 1996, 2062, 2198, 6010, 11067, 2229, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
|
818 |
+
06/02/2024 00:33:06 - INFO - __main__ - f_mu shape : torch.Size([1043, 2])
|
819 |
+
06/02/2024 00:33:06 - INFO - __main__ - f_var shape : torch.Size([1043, 2, 2])
|
820 |
+
06/02/2024 00:33:06 - INFO - __main__ - tensor([[-3.6432, 3.9375],
|
821 |
+
[-2.6919, 3.0975],
|
822 |
+
[-1.0813, 1.4512],
|
823 |
+
...,
|
824 |
+
[-3.9697, 4.1863],
|
825 |
+
[-2.6218, 2.9444],
|
826 |
+
[-2.3552, 2.5516]], device='cuda:0')
|
827 |
+
06/02/2024 00:33:06 - INFO - __main__ - tensor([[[ 5.2339, 3.0798],
|
828 |
+
[ 3.0798, 5.2397]],
|
829 |
+
|
830 |
+
[[ 4.3019, 1.2904],
|
831 |
+
[ 1.2904, 4.2205]],
|
832 |
+
|
833 |
+
[[ 4.0779, -1.2783],
|
834 |
+
[-1.2783, 4.2286]],
|
835 |
+
|
836 |
+
...,
|
837 |
+
|
838 |
+
[[ 5.0681, 3.9858],
|
839 |
+
[ 3.9858, 5.0388]],
|
840 |
+
|
841 |
+
[[ 7.9208, -2.8210],
|
842 |
+
[-2.8210, 8.0162]],
|
843 |
+
|
844 |
+
[[12.2069, -7.8169],
|
845 |
+
[-7.8169, 12.2902]]], device='cuda:0')
|
846 |
+
06/02/2024 00:33:06 - INFO - __main__ - ***** Completed training *****
|
outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_0/all_results_la_kron_all_homo_mc_corr_1000.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"eval_matthews_correlation": 0.013232794083812355}
|
outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_0/eval_res_la_kron_all_homo_mc_corr_1000.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_0/gpu_stats_la.json
ADDED
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"memory_allocated": 969647616,
|
3 |
+
"max_memory_allocated": 3324437504,
|
4 |
+
"memory_reserved": 8908701696,
|
5 |
+
"max_memory_reserved": 8908701696,
|
6 |
+
"memory_stats": {
|
7 |
+
"active.all.allocated": 2905439,
|
8 |
+
"active.all.current": 1234,
|
9 |
+
"active.all.freed": 2904205,
|
10 |
+
"active.all.peak": 1487,
|
11 |
+
"active.large_pool.allocated": 360975,
|
12 |
+
"active.large_pool.current": 284,
|
13 |
+
"active.large_pool.freed": 360691,
|
14 |
+
"active.large_pool.peak": 384,
|
15 |
+
"active.small_pool.allocated": 2544464,
|
16 |
+
"active.small_pool.current": 950,
|
17 |
+
"active.small_pool.freed": 2543514,
|
18 |
+
"active.small_pool.peak": 1200,
|
19 |
+
"active_bytes.all.allocated": 2033417876480,
|
20 |
+
"active_bytes.all.current": 969647616,
|
21 |
+
"active_bytes.all.freed": 2032448228864,
|
22 |
+
"active_bytes.all.peak": 3324437504,
|
23 |
+
"active_bytes.large_pool.allocated": 1464489492480,
|
24 |
+
"active_bytes.large_pool.current": 952457216,
|
25 |
+
"active_bytes.large_pool.freed": 1463537035264,
|
26 |
+
"active_bytes.large_pool.peak": 3240482816,
|
27 |
+
"active_bytes.small_pool.allocated": 568928384000,
|
28 |
+
"active_bytes.small_pool.current": 17190400,
|
29 |
+
"active_bytes.small_pool.freed": 568911193600,
|
30 |
+
"active_bytes.small_pool.peak": 114983424,
|
31 |
+
"allocated_bytes.all.allocated": 2033417876480,
|
32 |
+
"allocated_bytes.all.current": 969647616,
|
33 |
+
"allocated_bytes.all.freed": 2032448228864,
|
34 |
+
"allocated_bytes.all.peak": 3324437504,
|
35 |
+
"allocated_bytes.large_pool.allocated": 1464489492480,
|
36 |
+
"allocated_bytes.large_pool.current": 952457216,
|
37 |
+
"allocated_bytes.large_pool.freed": 1463537035264,
|
38 |
+
"allocated_bytes.large_pool.peak": 3240482816,
|
39 |
+
"allocated_bytes.small_pool.allocated": 568928384000,
|
40 |
+
"allocated_bytes.small_pool.current": 17190400,
|
41 |
+
"allocated_bytes.small_pool.freed": 568911193600,
|
42 |
+
"allocated_bytes.small_pool.peak": 114983424,
|
43 |
+
"allocation.all.allocated": 2905439,
|
44 |
+
"allocation.all.current": 1234,
|
45 |
+
"allocation.all.freed": 2904205,
|
46 |
+
"allocation.all.peak": 1487,
|
47 |
+
"allocation.large_pool.allocated": 360975,
|
48 |
+
"allocation.large_pool.current": 284,
|
49 |
+
"allocation.large_pool.freed": 360691,
|
50 |
+
"allocation.large_pool.peak": 384,
|
51 |
+
"allocation.small_pool.allocated": 2544464,
|
52 |
+
"allocation.small_pool.current": 950,
|
53 |
+
"allocation.small_pool.freed": 2543514,
|
54 |
+
"allocation.small_pool.peak": 1200,
|
55 |
+
"inactive_split.all.allocated": 1330181,
|
56 |
+
"inactive_split.all.current": 168,
|
57 |
+
"inactive_split.all.freed": 1330013,
|
58 |
+
"inactive_split.all.peak": 224,
|
59 |
+
"inactive_split.large_pool.allocated": 167153,
|
60 |
+
"inactive_split.large_pool.current": 43,
|
61 |
+
"inactive_split.large_pool.freed": 167110,
|
62 |
+
"inactive_split.large_pool.peak": 96,
|
63 |
+
"inactive_split.small_pool.allocated": 1163028,
|
64 |
+
"inactive_split.small_pool.current": 125,
|
65 |
+
"inactive_split.small_pool.freed": 1162903,
|
66 |
+
"inactive_split.small_pool.peak": 149,
|
67 |
+
"inactive_split_bytes.all.allocated": 2039758622720,
|
68 |
+
"inactive_split_bytes.all.current": 146037248,
|
69 |
+
"inactive_split_bytes.all.freed": 2039612585472,
|
70 |
+
"inactive_split_bytes.all.peak": 893967360,
|
71 |
+
"inactive_split_bytes.large_pool.allocated": 1450436033536,
|
72 |
+
"inactive_split_bytes.large_pool.current": 106604544,
|
73 |
+
"inactive_split_bytes.large_pool.freed": 1450329428992,
|
74 |
+
"inactive_split_bytes.large_pool.peak": 859522048,
|
75 |
+
"inactive_split_bytes.small_pool.allocated": 589322589184,
|
76 |
+
"inactive_split_bytes.small_pool.current": 39432704,
|
77 |
+
"inactive_split_bytes.small_pool.freed": 589283156480,
|
78 |
+
"inactive_split_bytes.small_pool.peak": 78419456,
|
79 |
+
"max_split_size": -1,
|
80 |
+
"num_alloc_retries": 0,
|
81 |
+
"num_device_alloc": 394,
|
82 |
+
"num_device_free": 52,
|
83 |
+
"num_ooms": 0,
|
84 |
+
"num_sync_all_streams": 1,
|
85 |
+
"oversize_allocations.allocated": 0,
|
86 |
+
"oversize_allocations.current": 0,
|
87 |
+
"oversize_allocations.freed": 0,
|
88 |
+
"oversize_allocations.peak": 0,
|
89 |
+
"oversize_segments.allocated": 0,
|
90 |
+
"oversize_segments.current": 0,
|
91 |
+
"oversize_segments.freed": 0,
|
92 |
+
"oversize_segments.peak": 0,
|
93 |
+
"requested_bytes.all.allocated": 1990795460851,
|
94 |
+
"requested_bytes.all.current": 967046712,
|
95 |
+
"requested_bytes.all.freed": 1989828414139,
|
96 |
+
"requested_bytes.all.peak": 3263085376,
|
97 |
+
"requested_bytes.large_pool.allocated": 1422103262928,
|
98 |
+
"requested_bytes.large_pool.current": 950013448,
|
99 |
+
"requested_bytes.large_pool.freed": 1421153249480,
|
100 |
+
"requested_bytes.large_pool.peak": 3179241096,
|
101 |
+
"requested_bytes.small_pool.allocated": 568692197923,
|
102 |
+
"requested_bytes.small_pool.current": 17033264,
|
103 |
+
"requested_bytes.small_pool.freed": 568675164659,
|
104 |
+
"requested_bytes.small_pool.peak": 114852048,
|
105 |
+
"reserved_bytes.all.allocated": 9225371648,
|
106 |
+
"reserved_bytes.all.current": 8908701696,
|
107 |
+
"reserved_bytes.all.freed": 316669952,
|
108 |
+
"reserved_bytes.all.peak": 8908701696,
|
109 |
+
"reserved_bytes.large_pool.allocated": 9003073536,
|
110 |
+
"reserved_bytes.large_pool.current": 8772386816,
|
111 |
+
"reserved_bytes.large_pool.freed": 230686720,
|
112 |
+
"reserved_bytes.large_pool.peak": 8772386816,
|
113 |
+
"reserved_bytes.small_pool.allocated": 222298112,
|
114 |
+
"reserved_bytes.small_pool.current": 136314880,
|
115 |
+
"reserved_bytes.small_pool.freed": 85983232,
|
116 |
+
"reserved_bytes.small_pool.peak": 136314880,
|
117 |
+
"segment.all.allocated": 394,
|
118 |
+
"segment.all.current": 342,
|
119 |
+
"segment.all.freed": 52,
|
120 |
+
"segment.all.peak": 342,
|
121 |
+
"segment.large_pool.allocated": 288,
|
122 |
+
"segment.large_pool.current": 277,
|
123 |
+
"segment.large_pool.freed": 11,
|
124 |
+
"segment.large_pool.peak": 277,
|
125 |
+
"segment.small_pool.allocated": 106,
|
126 |
+
"segment.small_pool.current": 65,
|
127 |
+
"segment.small_pool.freed": 41,
|
128 |
+
"segment.small_pool.peak": 65
|
129 |
+
}
|
130 |
+
}
|
outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_1999/all_results_la_kron_all_homo_mc_corr_1000.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"eval_matthews_correlation": 0.500854588319398}
|
outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_1999/eval_res_la_kron_all_homo_mc_corr_1000.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_1999/gpu_stats_la.json
ADDED
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"memory_allocated": 968467968,
|
3 |
+
"max_memory_allocated": 3324437504,
|
4 |
+
"memory_reserved": 8908701696,
|
5 |
+
"max_memory_reserved": 8908701696,
|
6 |
+
"memory_stats": {
|
7 |
+
"active.all.allocated": 5810926,
|
8 |
+
"active.all.current": 1234,
|
9 |
+
"active.all.freed": 5809692,
|
10 |
+
"active.all.peak": 1487,
|
11 |
+
"active.large_pool.allocated": 721948,
|
12 |
+
"active.large_pool.current": 284,
|
13 |
+
"active.large_pool.freed": 721664,
|
14 |
+
"active.large_pool.peak": 384,
|
15 |
+
"active.small_pool.allocated": 5088978,
|
16 |
+
"active.small_pool.current": 950,
|
17 |
+
"active.small_pool.freed": 5088028,
|
18 |
+
"active.small_pool.peak": 1200,
|
19 |
+
"active_bytes.all.allocated": 4065322237440,
|
20 |
+
"active_bytes.all.current": 968467968,
|
21 |
+
"active_bytes.all.freed": 4064353769472,
|
22 |
+
"active_bytes.all.peak": 3324437504,
|
23 |
+
"active_bytes.large_pool.allocated": 2927465442816,
|
24 |
+
"active_bytes.large_pool.current": 951277568,
|
25 |
+
"active_bytes.large_pool.freed": 2926514165248,
|
26 |
+
"active_bytes.large_pool.peak": 3240482816,
|
27 |
+
"active_bytes.small_pool.allocated": 1137856794624,
|
28 |
+
"active_bytes.small_pool.current": 17190400,
|
29 |
+
"active_bytes.small_pool.freed": 1137839604224,
|
30 |
+
"active_bytes.small_pool.peak": 114983424,
|
31 |
+
"allocated_bytes.all.allocated": 4065322237440,
|
32 |
+
"allocated_bytes.all.current": 968467968,
|
33 |
+
"allocated_bytes.all.freed": 4064353769472,
|
34 |
+
"allocated_bytes.all.peak": 3324437504,
|
35 |
+
"allocated_bytes.large_pool.allocated": 2927465442816,
|
36 |
+
"allocated_bytes.large_pool.current": 951277568,
|
37 |
+
"allocated_bytes.large_pool.freed": 2926514165248,
|
38 |
+
"allocated_bytes.large_pool.peak": 3240482816,
|
39 |
+
"allocated_bytes.small_pool.allocated": 1137856794624,
|
40 |
+
"allocated_bytes.small_pool.current": 17190400,
|
41 |
+
"allocated_bytes.small_pool.freed": 1137839604224,
|
42 |
+
"allocated_bytes.small_pool.peak": 114983424,
|
43 |
+
"allocation.all.allocated": 5810926,
|
44 |
+
"allocation.all.current": 1234,
|
45 |
+
"allocation.all.freed": 5809692,
|
46 |
+
"allocation.all.peak": 1487,
|
47 |
+
"allocation.large_pool.allocated": 721948,
|
48 |
+
"allocation.large_pool.current": 284,
|
49 |
+
"allocation.large_pool.freed": 721664,
|
50 |
+
"allocation.large_pool.peak": 384,
|
51 |
+
"allocation.small_pool.allocated": 5088978,
|
52 |
+
"allocation.small_pool.current": 950,
|
53 |
+
"allocation.small_pool.freed": 5088028,
|
54 |
+
"allocation.small_pool.peak": 1200,
|
55 |
+
"inactive_split.all.allocated": 2713249,
|
56 |
+
"inactive_split.all.current": 170,
|
57 |
+
"inactive_split.all.freed": 2713079,
|
58 |
+
"inactive_split.all.peak": 227,
|
59 |
+
"inactive_split.large_pool.allocated": 334447,
|
60 |
+
"inactive_split.large_pool.current": 45,
|
61 |
+
"inactive_split.large_pool.freed": 334402,
|
62 |
+
"inactive_split.large_pool.peak": 98,
|
63 |
+
"inactive_split.small_pool.allocated": 2378802,
|
64 |
+
"inactive_split.small_pool.current": 125,
|
65 |
+
"inactive_split.small_pool.freed": 2378677,
|
66 |
+
"inactive_split.small_pool.peak": 186,
|
67 |
+
"inactive_split_bytes.all.allocated": 4074440010240,
|
68 |
+
"inactive_split_bytes.all.current": 149314048,
|
69 |
+
"inactive_split_bytes.all.freed": 4074290696192,
|
70 |
+
"inactive_split_bytes.all.peak": 899341312,
|
71 |
+
"inactive_split_bytes.large_pool.allocated": 2895554385920,
|
72 |
+
"inactive_split_bytes.large_pool.current": 107784192,
|
73 |
+
"inactive_split_bytes.large_pool.freed": 2895446601728,
|
74 |
+
"inactive_split_bytes.large_pool.peak": 860701696,
|
75 |
+
"inactive_split_bytes.small_pool.allocated": 1178885624320,
|
76 |
+
"inactive_split_bytes.small_pool.current": 41529856,
|
77 |
+
"inactive_split_bytes.small_pool.freed": 1178844094464,
|
78 |
+
"inactive_split_bytes.small_pool.peak": 79716864,
|
79 |
+
"max_split_size": -1,
|
80 |
+
"num_alloc_retries": 0,
|
81 |
+
"num_device_alloc": 735,
|
82 |
+
"num_device_free": 393,
|
83 |
+
"num_ooms": 0,
|
84 |
+
"num_sync_all_streams": 3,
|
85 |
+
"oversize_allocations.allocated": 0,
|
86 |
+
"oversize_allocations.current": 0,
|
87 |
+
"oversize_allocations.freed": 0,
|
88 |
+
"oversize_allocations.peak": 0,
|
89 |
+
"oversize_segments.allocated": 0,
|
90 |
+
"oversize_segments.current": 0,
|
91 |
+
"oversize_segments.freed": 0,
|
92 |
+
"oversize_segments.peak": 0,
|
93 |
+
"requested_bytes.all.allocated": 3981573885378,
|
94 |
+
"requested_bytes.all.current": 967046712,
|
95 |
+
"requested_bytes.all.freed": 3980606838666,
|
96 |
+
"requested_bytes.all.peak": 3263085376,
|
97 |
+
"requested_bytes.large_pool.allocated": 2844189486496,
|
98 |
+
"requested_bytes.large_pool.current": 950013448,
|
99 |
+
"requested_bytes.large_pool.freed": 2843239473048,
|
100 |
+
"requested_bytes.large_pool.peak": 3179241096,
|
101 |
+
"requested_bytes.small_pool.allocated": 1137384398882,
|
102 |
+
"requested_bytes.small_pool.current": 17033264,
|
103 |
+
"requested_bytes.small_pool.freed": 1137367365618,
|
104 |
+
"requested_bytes.small_pool.peak": 114852048,
|
105 |
+
"reserved_bytes.all.allocated": 17718837248,
|
106 |
+
"reserved_bytes.all.current": 8908701696,
|
107 |
+
"reserved_bytes.all.freed": 8810135552,
|
108 |
+
"reserved_bytes.all.peak": 8908701696,
|
109 |
+
"reserved_bytes.large_pool.allocated": 17324572672,
|
110 |
+
"reserved_bytes.large_pool.current": 8772386816,
|
111 |
+
"reserved_bytes.large_pool.freed": 8552185856,
|
112 |
+
"reserved_bytes.large_pool.peak": 8772386816,
|
113 |
+
"reserved_bytes.small_pool.allocated": 394264576,
|
114 |
+
"reserved_bytes.small_pool.current": 136314880,
|
115 |
+
"reserved_bytes.small_pool.freed": 257949696,
|
116 |
+
"reserved_bytes.small_pool.peak": 136314880,
|
117 |
+
"segment.all.allocated": 735,
|
118 |
+
"segment.all.current": 342,
|
119 |
+
"segment.all.freed": 393,
|
120 |
+
"segment.all.peak": 342,
|
121 |
+
"segment.large_pool.allocated": 547,
|
122 |
+
"segment.large_pool.current": 277,
|
123 |
+
"segment.large_pool.freed": 270,
|
124 |
+
"segment.large_pool.peak": 277,
|
125 |
+
"segment.small_pool.allocated": 188,
|
126 |
+
"segment.small_pool.current": 65,
|
127 |
+
"segment.small_pool.freed": 123,
|
128 |
+
"segment.small_pool.peak": 65
|
129 |
+
}
|
130 |
+
}
|
outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_3999/all_results_la_kron_all_homo_mc_corr_1000.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"eval_matthews_correlation": 0.5186267566332291}
|
outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_3999/eval_res_la_kron_all_homo_mc_corr_1000.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_3999/gpu_stats_la.json
ADDED
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"memory_allocated": 969647616,
|
3 |
+
"max_memory_allocated": 3324437504,
|
4 |
+
"memory_reserved": 8910798848,
|
5 |
+
"max_memory_reserved": 8910798848,
|
6 |
+
"memory_stats": {
|
7 |
+
"active.all.allocated": 8716463,
|
8 |
+
"active.all.current": 1234,
|
9 |
+
"active.all.freed": 8715229,
|
10 |
+
"active.all.peak": 1487,
|
11 |
+
"active.large_pool.allocated": 1082921,
|
12 |
+
"active.large_pool.current": 284,
|
13 |
+
"active.large_pool.freed": 1082637,
|
14 |
+
"active.large_pool.peak": 482,
|
15 |
+
"active.small_pool.allocated": 7633542,
|
16 |
+
"active.small_pool.current": 950,
|
17 |
+
"active.small_pool.freed": 7632592,
|
18 |
+
"active.small_pool.peak": 1200,
|
19 |
+
"active_bytes.all.allocated": 6098303822336,
|
20 |
+
"active_bytes.all.current": 969647616,
|
21 |
+
"active_bytes.all.freed": 6097334174720,
|
22 |
+
"active_bytes.all.peak": 3324437504,
|
23 |
+
"active_bytes.large_pool.allocated": 4391518590464,
|
24 |
+
"active_bytes.large_pool.current": 952457216,
|
25 |
+
"active_bytes.large_pool.freed": 4390566133248,
|
26 |
+
"active_bytes.large_pool.peak": 3240482816,
|
27 |
+
"active_bytes.small_pool.allocated": 1706785231872,
|
28 |
+
"active_bytes.small_pool.current": 17190400,
|
29 |
+
"active_bytes.small_pool.freed": 1706768041472,
|
30 |
+
"active_bytes.small_pool.peak": 114983424,
|
31 |
+
"allocated_bytes.all.allocated": 6098303822336,
|
32 |
+
"allocated_bytes.all.current": 969647616,
|
33 |
+
"allocated_bytes.all.freed": 6097334174720,
|
34 |
+
"allocated_bytes.all.peak": 3324437504,
|
35 |
+
"allocated_bytes.large_pool.allocated": 4391518590464,
|
36 |
+
"allocated_bytes.large_pool.current": 952457216,
|
37 |
+
"allocated_bytes.large_pool.freed": 4390566133248,
|
38 |
+
"allocated_bytes.large_pool.peak": 3240482816,
|
39 |
+
"allocated_bytes.small_pool.allocated": 1706785231872,
|
40 |
+
"allocated_bytes.small_pool.current": 17190400,
|
41 |
+
"allocated_bytes.small_pool.freed": 1706768041472,
|
42 |
+
"allocated_bytes.small_pool.peak": 114983424,
|
43 |
+
"allocation.all.allocated": 8716463,
|
44 |
+
"allocation.all.current": 1234,
|
45 |
+
"allocation.all.freed": 8715229,
|
46 |
+
"allocation.all.peak": 1487,
|
47 |
+
"allocation.large_pool.allocated": 1082921,
|
48 |
+
"allocation.large_pool.current": 284,
|
49 |
+
"allocation.large_pool.freed": 1082637,
|
50 |
+
"allocation.large_pool.peak": 482,
|
51 |
+
"allocation.small_pool.allocated": 7633542,
|
52 |
+
"allocation.small_pool.current": 950,
|
53 |
+
"allocation.small_pool.freed": 7632592,
|
54 |
+
"allocation.small_pool.peak": 1200,
|
55 |
+
"inactive_split.all.allocated": 4132126,
|
56 |
+
"inactive_split.all.current": 181,
|
57 |
+
"inactive_split.all.freed": 4131945,
|
58 |
+
"inactive_split.all.peak": 227,
|
59 |
+
"inactive_split.large_pool.allocated": 500912,
|
60 |
+
"inactive_split.large_pool.current": 43,
|
61 |
+
"inactive_split.large_pool.freed": 500869,
|
62 |
+
"inactive_split.large_pool.peak": 98,
|
63 |
+
"inactive_split.small_pool.allocated": 3631214,
|
64 |
+
"inactive_split.small_pool.current": 138,
|
65 |
+
"inactive_split.small_pool.freed": 3631076,
|
66 |
+
"inactive_split.small_pool.peak": 194,
|
67 |
+
"inactive_split_bytes.all.allocated": 6074756495872,
|
68 |
+
"inactive_split_bytes.all.current": 152328704,
|
69 |
+
"inactive_split_bytes.all.freed": 6074604167168,
|
70 |
+
"inactive_split_bytes.all.peak": 904453120,
|
71 |
+
"inactive_split_bytes.large_pool.allocated": 4316633251840,
|
72 |
+
"inactive_split_bytes.large_pool.current": 106604544,
|
73 |
+
"inactive_split_bytes.large_pool.freed": 4316526647296,
|
74 |
+
"inactive_split_bytes.large_pool.peak": 860701696,
|
75 |
+
"inactive_split_bytes.small_pool.allocated": 1758123244032,
|
76 |
+
"inactive_split_bytes.small_pool.current": 45724160,
|
77 |
+
"inactive_split_bytes.small_pool.freed": 1758077519872,
|
78 |
+
"inactive_split_bytes.small_pool.peak": 79716864,
|
79 |
+
"max_split_size": -1,
|
80 |
+
"num_alloc_retries": 0,
|
81 |
+
"num_device_alloc": 1097,
|
82 |
+
"num_device_free": 754,
|
83 |
+
"num_ooms": 0,
|
84 |
+
"num_sync_all_streams": 5,
|
85 |
+
"oversize_allocations.allocated": 0,
|
86 |
+
"oversize_allocations.current": 0,
|
87 |
+
"oversize_allocations.freed": 0,
|
88 |
+
"oversize_allocations.peak": 0,
|
89 |
+
"oversize_segments.allocated": 0,
|
90 |
+
"oversize_segments.current": 0,
|
91 |
+
"oversize_segments.freed": 0,
|
92 |
+
"oversize_segments.peak": 0,
|
93 |
+
"requested_bytes.all.allocated": 5972352312941,
|
94 |
+
"requested_bytes.all.current": 967046712,
|
95 |
+
"requested_bytes.all.freed": 5971385266229,
|
96 |
+
"requested_bytes.all.peak": 3263085376,
|
97 |
+
"requested_bytes.large_pool.allocated": 4266275710064,
|
98 |
+
"requested_bytes.large_pool.current": 950013448,
|
99 |
+
"requested_bytes.large_pool.freed": 4265325696616,
|
100 |
+
"requested_bytes.large_pool.peak": 3179241096,
|
101 |
+
"requested_bytes.small_pool.allocated": 1706076602877,
|
102 |
+
"requested_bytes.small_pool.current": 17033264,
|
103 |
+
"requested_bytes.small_pool.freed": 1706059569613,
|
104 |
+
"requested_bytes.small_pool.peak": 114852048,
|
105 |
+
"reserved_bytes.all.allocated": 26782728192,
|
106 |
+
"reserved_bytes.all.current": 8910798848,
|
107 |
+
"reserved_bytes.all.freed": 17871929344,
|
108 |
+
"reserved_bytes.all.peak": 8910798848,
|
109 |
+
"reserved_bytes.large_pool.allocated": 26222788608,
|
110 |
+
"reserved_bytes.large_pool.current": 8772386816,
|
111 |
+
"reserved_bytes.large_pool.freed": 17450401792,
|
112 |
+
"reserved_bytes.large_pool.peak": 8772386816,
|
113 |
+
"reserved_bytes.small_pool.allocated": 559939584,
|
114 |
+
"reserved_bytes.small_pool.current": 138412032,
|
115 |
+
"reserved_bytes.small_pool.freed": 421527552,
|
116 |
+
"reserved_bytes.small_pool.peak": 138412032,
|
117 |
+
"segment.all.allocated": 1097,
|
118 |
+
"segment.all.current": 343,
|
119 |
+
"segment.all.freed": 754,
|
120 |
+
"segment.all.peak": 343,
|
121 |
+
"segment.large_pool.allocated": 830,
|
122 |
+
"segment.large_pool.current": 277,
|
123 |
+
"segment.large_pool.freed": 553,
|
124 |
+
"segment.large_pool.peak": 277,
|
125 |
+
"segment.small_pool.allocated": 267,
|
126 |
+
"segment.small_pool.current": 66,
|
127 |
+
"segment.small_pool.freed": 201,
|
128 |
+
"segment.small_pool.peak": 66
|
129 |
+
}
|
130 |
+
}
|
outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_5999/all_results_la_kron_all_homo_mc_corr_1000.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"eval_matthews_correlation": 0.5611975320184954}
|
outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_5999/eval_res_la_kron_all_homo_mc_corr_1000.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_5999/gpu_stats_la.json
ADDED
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"memory_allocated": 969647616,
|
3 |
+
"max_memory_allocated": 3324437504,
|
4 |
+
"memory_reserved": 8917090304,
|
5 |
+
"max_memory_reserved": 8917090304,
|
6 |
+
"memory_stats": {
|
7 |
+
"active.all.allocated": 11622050,
|
8 |
+
"active.all.current": 1234,
|
9 |
+
"active.all.freed": 11620816,
|
10 |
+
"active.all.peak": 1487,
|
11 |
+
"active.large_pool.allocated": 1443894,
|
12 |
+
"active.large_pool.current": 284,
|
13 |
+
"active.large_pool.freed": 1443610,
|
14 |
+
"active.large_pool.peak": 482,
|
15 |
+
"active.small_pool.allocated": 10178156,
|
16 |
+
"active.small_pool.current": 950,
|
17 |
+
"active.small_pool.freed": 10177206,
|
18 |
+
"active.small_pool.peak": 1200,
|
19 |
+
"active_bytes.all.allocated": 8130957157888,
|
20 |
+
"active_bytes.all.current": 969647616,
|
21 |
+
"active_bytes.all.freed": 8129987510272,
|
22 |
+
"active_bytes.all.peak": 3324437504,
|
23 |
+
"active_bytes.large_pool.allocated": 5855243462144,
|
24 |
+
"active_bytes.large_pool.current": 952457216,
|
25 |
+
"active_bytes.large_pool.freed": 5854291004928,
|
26 |
+
"active_bytes.large_pool.peak": 3240482816,
|
27 |
+
"active_bytes.small_pool.allocated": 2275713695744,
|
28 |
+
"active_bytes.small_pool.current": 17190400,
|
29 |
+
"active_bytes.small_pool.freed": 2275696505344,
|
30 |
+
"active_bytes.small_pool.peak": 114983424,
|
31 |
+
"allocated_bytes.all.allocated": 8130957157888,
|
32 |
+
"allocated_bytes.all.current": 969647616,
|
33 |
+
"allocated_bytes.all.freed": 8129987510272,
|
34 |
+
"allocated_bytes.all.peak": 3324437504,
|
35 |
+
"allocated_bytes.large_pool.allocated": 5855243462144,
|
36 |
+
"allocated_bytes.large_pool.current": 952457216,
|
37 |
+
"allocated_bytes.large_pool.freed": 5854291004928,
|
38 |
+
"allocated_bytes.large_pool.peak": 3240482816,
|
39 |
+
"allocated_bytes.small_pool.allocated": 2275713695744,
|
40 |
+
"allocated_bytes.small_pool.current": 17190400,
|
41 |
+
"allocated_bytes.small_pool.freed": 2275696505344,
|
42 |
+
"allocated_bytes.small_pool.peak": 114983424,
|
43 |
+
"allocation.all.allocated": 11622050,
|
44 |
+
"allocation.all.current": 1234,
|
45 |
+
"allocation.all.freed": 11620816,
|
46 |
+
"allocation.all.peak": 1487,
|
47 |
+
"allocation.large_pool.allocated": 1443894,
|
48 |
+
"allocation.large_pool.current": 284,
|
49 |
+
"allocation.large_pool.freed": 1443610,
|
50 |
+
"allocation.large_pool.peak": 482,
|
51 |
+
"allocation.small_pool.allocated": 10178156,
|
52 |
+
"allocation.small_pool.current": 950,
|
53 |
+
"allocation.small_pool.freed": 10177206,
|
54 |
+
"allocation.small_pool.peak": 1200,
|
55 |
+
"inactive_split.all.allocated": 5588000,
|
56 |
+
"inactive_split.all.current": 178,
|
57 |
+
"inactive_split.all.freed": 5587822,
|
58 |
+
"inactive_split.all.peak": 247,
|
59 |
+
"inactive_split.large_pool.allocated": 667566,
|
60 |
+
"inactive_split.large_pool.current": 43,
|
61 |
+
"inactive_split.large_pool.freed": 667523,
|
62 |
+
"inactive_split.large_pool.peak": 98,
|
63 |
+
"inactive_split.small_pool.allocated": 4920434,
|
64 |
+
"inactive_split.small_pool.current": 135,
|
65 |
+
"inactive_split.small_pool.freed": 4920299,
|
66 |
+
"inactive_split.small_pool.peak": 221,
|
67 |
+
"inactive_split_bytes.all.allocated": 8074710071808,
|
68 |
+
"inactive_split_bytes.all.current": 150231552,
|
69 |
+
"inactive_split_bytes.all.freed": 8074559840256,
|
70 |
+
"inactive_split_bytes.all.peak": 906550272,
|
71 |
+
"inactive_split_bytes.large_pool.allocated": 5736156622848,
|
72 |
+
"inactive_split_bytes.large_pool.current": 106604544,
|
73 |
+
"inactive_split_bytes.large_pool.freed": 5736050018304,
|
74 |
+
"inactive_split_bytes.large_pool.peak": 860701696,
|
75 |
+
"inactive_split_bytes.small_pool.allocated": 2338553448960,
|
76 |
+
"inactive_split_bytes.small_pool.current": 43627008,
|
77 |
+
"inactive_split_bytes.small_pool.freed": 2338509821952,
|
78 |
+
"inactive_split_bytes.small_pool.peak": 79716864,
|
79 |
+
"max_split_size": -1,
|
80 |
+
"num_alloc_retries": 0,
|
81 |
+
"num_device_alloc": 1464,
|
82 |
+
"num_device_free": 1118,
|
83 |
+
"num_ooms": 0,
|
84 |
+
"num_sync_all_streams": 7,
|
85 |
+
"oversize_allocations.allocated": 0,
|
86 |
+
"oversize_allocations.current": 0,
|
87 |
+
"oversize_allocations.freed": 0,
|
88 |
+
"oversize_allocations.peak": 0,
|
89 |
+
"oversize_segments.allocated": 0,
|
90 |
+
"oversize_segments.current": 0,
|
91 |
+
"oversize_segments.freed": 0,
|
92 |
+
"oversize_segments.peak": 0,
|
93 |
+
"requested_bytes.all.allocated": 7963130743540,
|
94 |
+
"requested_bytes.all.current": 967046712,
|
95 |
+
"requested_bytes.all.freed": 7962163696828,
|
96 |
+
"requested_bytes.all.peak": 3263085376,
|
97 |
+
"requested_bytes.large_pool.allocated": 5688361933632,
|
98 |
+
"requested_bytes.large_pool.current": 950013448,
|
99 |
+
"requested_bytes.large_pool.freed": 5687411920184,
|
100 |
+
"requested_bytes.large_pool.peak": 3179241096,
|
101 |
+
"requested_bytes.small_pool.allocated": 2274768809908,
|
102 |
+
"requested_bytes.small_pool.current": 17033264,
|
103 |
+
"requested_bytes.small_pool.freed": 2274751776644,
|
104 |
+
"requested_bytes.small_pool.peak": 114852048,
|
105 |
+
"reserved_bytes.all.allocated": 35894853632,
|
106 |
+
"reserved_bytes.all.current": 8917090304,
|
107 |
+
"reserved_bytes.all.freed": 26977763328,
|
108 |
+
"reserved_bytes.all.peak": 8917090304,
|
109 |
+
"reserved_bytes.large_pool.allocated": 35162947584,
|
110 |
+
"reserved_bytes.large_pool.current": 8772386816,
|
111 |
+
"reserved_bytes.large_pool.freed": 26390560768,
|
112 |
+
"reserved_bytes.large_pool.peak": 8772386816,
|
113 |
+
"reserved_bytes.small_pool.allocated": 731906048,
|
114 |
+
"reserved_bytes.small_pool.current": 144703488,
|
115 |
+
"reserved_bytes.small_pool.freed": 587202560,
|
116 |
+
"reserved_bytes.small_pool.peak": 144703488,
|
117 |
+
"segment.all.allocated": 1464,
|
118 |
+
"segment.all.current": 346,
|
119 |
+
"segment.all.freed": 1118,
|
120 |
+
"segment.all.peak": 346,
|
121 |
+
"segment.large_pool.allocated": 1115,
|
122 |
+
"segment.large_pool.current": 277,
|
123 |
+
"segment.large_pool.freed": 838,
|
124 |
+
"segment.large_pool.peak": 277,
|
125 |
+
"segment.small_pool.allocated": 349,
|
126 |
+
"segment.small_pool.current": 69,
|
127 |
+
"segment.small_pool.freed": 280,
|
128 |
+
"segment.small_pool.peak": 69
|
129 |
+
}
|
130 |
+
}
|
outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_7999/all_results_la_kron_all_homo_mc_corr_1000.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"eval_matthews_correlation": 0.5403785768297347}
|
outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_7999/eval_res_la_kron_all_homo_mc_corr_1000.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_7999/gpu_stats_la.json
ADDED
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"memory_allocated": 968467968,
|
3 |
+
"max_memory_allocated": 3324437504,
|
4 |
+
"memory_reserved": 8908701696,
|
5 |
+
"max_memory_reserved": 8917090304,
|
6 |
+
"memory_stats": {
|
7 |
+
"active.all.allocated": 14527687,
|
8 |
+
"active.all.current": 1234,
|
9 |
+
"active.all.freed": 14526453,
|
10 |
+
"active.all.peak": 1487,
|
11 |
+
"active.large_pool.allocated": 1804867,
|
12 |
+
"active.large_pool.current": 284,
|
13 |
+
"active.large_pool.freed": 1804583,
|
14 |
+
"active.large_pool.peak": 482,
|
15 |
+
"active.small_pool.allocated": 12722820,
|
16 |
+
"active.small_pool.current": 950,
|
17 |
+
"active.small_pool.freed": 12721870,
|
18 |
+
"active.small_pool.peak": 1200,
|
19 |
+
"active_bytes.all.allocated": 10162861598720,
|
20 |
+
"active_bytes.all.current": 968467968,
|
21 |
+
"active_bytes.all.freed": 10161893130752,
|
22 |
+
"active_bytes.all.peak": 3324437504,
|
23 |
+
"active_bytes.large_pool.allocated": 7318219412480,
|
24 |
+
"active_bytes.large_pool.current": 951277568,
|
25 |
+
"active_bytes.large_pool.freed": 7317268134912,
|
26 |
+
"active_bytes.large_pool.peak": 3240482816,
|
27 |
+
"active_bytes.small_pool.allocated": 2844642186240,
|
28 |
+
"active_bytes.small_pool.current": 17190400,
|
29 |
+
"active_bytes.small_pool.freed": 2844624995840,
|
30 |
+
"active_bytes.small_pool.peak": 114983424,
|
31 |
+
"allocated_bytes.all.allocated": 10162861598720,
|
32 |
+
"allocated_bytes.all.current": 968467968,
|
33 |
+
"allocated_bytes.all.freed": 10161893130752,
|
34 |
+
"allocated_bytes.all.peak": 3324437504,
|
35 |
+
"allocated_bytes.large_pool.allocated": 7318219412480,
|
36 |
+
"allocated_bytes.large_pool.current": 951277568,
|
37 |
+
"allocated_bytes.large_pool.freed": 7317268134912,
|
38 |
+
"allocated_bytes.large_pool.peak": 3240482816,
|
39 |
+
"allocated_bytes.small_pool.allocated": 2844642186240,
|
40 |
+
"allocated_bytes.small_pool.current": 17190400,
|
41 |
+
"allocated_bytes.small_pool.freed": 2844624995840,
|
42 |
+
"allocated_bytes.small_pool.peak": 114983424,
|
43 |
+
"allocation.all.allocated": 14527687,
|
44 |
+
"allocation.all.current": 1234,
|
45 |
+
"allocation.all.freed": 14526453,
|
46 |
+
"allocation.all.peak": 1487,
|
47 |
+
"allocation.large_pool.allocated": 1804867,
|
48 |
+
"allocation.large_pool.current": 284,
|
49 |
+
"allocation.large_pool.freed": 1804583,
|
50 |
+
"allocation.large_pool.peak": 482,
|
51 |
+
"allocation.small_pool.allocated": 12722820,
|
52 |
+
"allocation.small_pool.current": 950,
|
53 |
+
"allocation.small_pool.freed": 12721870,
|
54 |
+
"allocation.small_pool.peak": 1200,
|
55 |
+
"inactive_split.all.allocated": 6924013,
|
56 |
+
"inactive_split.all.current": 173,
|
57 |
+
"inactive_split.all.freed": 6923840,
|
58 |
+
"inactive_split.all.peak": 268,
|
59 |
+
"inactive_split.large_pool.allocated": 834865,
|
60 |
+
"inactive_split.large_pool.current": 45,
|
61 |
+
"inactive_split.large_pool.freed": 834820,
|
62 |
+
"inactive_split.large_pool.peak": 98,
|
63 |
+
"inactive_split.small_pool.allocated": 6089148,
|
64 |
+
"inactive_split.small_pool.current": 128,
|
65 |
+
"inactive_split.small_pool.freed": 6089020,
|
66 |
+
"inactive_split.small_pool.peak": 242,
|
67 |
+
"inactive_split_bytes.all.allocated": 10110169738752,
|
68 |
+
"inactive_split_bytes.all.current": 147216896,
|
69 |
+
"inactive_split_bytes.all.freed": 10110022521856,
|
70 |
+
"inactive_split_bytes.all.peak": 906550272,
|
71 |
+
"inactive_split_bytes.large_pool.allocated": 7181281266688,
|
72 |
+
"inactive_split_bytes.large_pool.current": 107784192,
|
73 |
+
"inactive_split_bytes.large_pool.freed": 7181173482496,
|
74 |
+
"inactive_split_bytes.large_pool.peak": 860701696,
|
75 |
+
"inactive_split_bytes.small_pool.allocated": 2928888472064,
|
76 |
+
"inactive_split_bytes.small_pool.current": 39432704,
|
77 |
+
"inactive_split_bytes.small_pool.freed": 2928849039360,
|
78 |
+
"inactive_split_bytes.small_pool.peak": 79716864,
|
79 |
+
"max_split_size": -1,
|
80 |
+
"num_alloc_retries": 0,
|
81 |
+
"num_device_alloc": 1806,
|
82 |
+
"num_device_free": 1464,
|
83 |
+
"num_ooms": 0,
|
84 |
+
"num_sync_all_streams": 9,
|
85 |
+
"oversize_allocations.allocated": 0,
|
86 |
+
"oversize_allocations.current": 0,
|
87 |
+
"oversize_allocations.freed": 0,
|
88 |
+
"oversize_allocations.peak": 0,
|
89 |
+
"oversize_segments.allocated": 0,
|
90 |
+
"oversize_segments.current": 0,
|
91 |
+
"oversize_segments.freed": 0,
|
92 |
+
"oversize_segments.peak": 0,
|
93 |
+
"requested_bytes.all.allocated": 9953909177175,
|
94 |
+
"requested_bytes.all.current": 967046712,
|
95 |
+
"requested_bytes.all.freed": 9952942130463,
|
96 |
+
"requested_bytes.all.peak": 3263085376,
|
97 |
+
"requested_bytes.large_pool.allocated": 7110448157200,
|
98 |
+
"requested_bytes.large_pool.current": 950013448,
|
99 |
+
"requested_bytes.large_pool.freed": 7109498143752,
|
100 |
+
"requested_bytes.large_pool.peak": 3179241096,
|
101 |
+
"requested_bytes.small_pool.allocated": 2843461019975,
|
102 |
+
"requested_bytes.small_pool.current": 17033264,
|
103 |
+
"requested_bytes.small_pool.freed": 2843443986711,
|
104 |
+
"requested_bytes.small_pool.peak": 114852048,
|
105 |
+
"reserved_bytes.all.allocated": 44409290752,
|
106 |
+
"reserved_bytes.all.current": 8908701696,
|
107 |
+
"reserved_bytes.all.freed": 35500589056,
|
108 |
+
"reserved_bytes.all.peak": 8917090304,
|
109 |
+
"reserved_bytes.large_pool.allocated": 43505418240,
|
110 |
+
"reserved_bytes.large_pool.current": 8772386816,
|
111 |
+
"reserved_bytes.large_pool.freed": 34733031424,
|
112 |
+
"reserved_bytes.large_pool.peak": 8772386816,
|
113 |
+
"reserved_bytes.small_pool.allocated": 903872512,
|
114 |
+
"reserved_bytes.small_pool.current": 136314880,
|
115 |
+
"reserved_bytes.small_pool.freed": 767557632,
|
116 |
+
"reserved_bytes.small_pool.peak": 144703488,
|
117 |
+
"segment.all.allocated": 1806,
|
118 |
+
"segment.all.current": 342,
|
119 |
+
"segment.all.freed": 1464,
|
120 |
+
"segment.all.peak": 346,
|
121 |
+
"segment.large_pool.allocated": 1375,
|
122 |
+
"segment.large_pool.current": 277,
|
123 |
+
"segment.large_pool.freed": 1098,
|
124 |
+
"segment.large_pool.peak": 277,
|
125 |
+
"segment.small_pool.allocated": 431,
|
126 |
+
"segment.small_pool.current": 65,
|
127 |
+
"segment.small_pool.freed": 366,
|
128 |
+
"segment.small_pool.peak": 69
|
129 |
+
}
|
130 |
+
}
|
outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_9999/all_results_la_kron_all_homo_mc_corr_1000.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"eval_matthews_correlation": 0.5347381322825221}
|
outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_9999/eval_res_la_kron_all_homo_mc_corr_1000.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_9999/gpu_stats_la.json
ADDED
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"memory_allocated": 969647616,
|
3 |
+
"max_memory_allocated": 3324437504,
|
4 |
+
"memory_reserved": 8912896000,
|
5 |
+
"max_memory_reserved": 8917090304,
|
6 |
+
"memory_stats": {
|
7 |
+
"active.all.allocated": 17433374,
|
8 |
+
"active.all.current": 1234,
|
9 |
+
"active.all.freed": 17432140,
|
10 |
+
"active.all.peak": 1487,
|
11 |
+
"active.large_pool.allocated": 2165840,
|
12 |
+
"active.large_pool.current": 284,
|
13 |
+
"active.large_pool.freed": 2165556,
|
14 |
+
"active.large_pool.peak": 482,
|
15 |
+
"active.small_pool.allocated": 15267534,
|
16 |
+
"active.small_pool.current": 950,
|
17 |
+
"active.small_pool.freed": 15266584,
|
18 |
+
"active.small_pool.peak": 1200,
|
19 |
+
"active_bytes.all.allocated": 12195657911296,
|
20 |
+
"active_bytes.all.current": 969647616,
|
21 |
+
"active_bytes.all.freed": 12194688263680,
|
22 |
+
"active_bytes.all.peak": 3324437504,
|
23 |
+
"active_bytes.large_pool.allocated": 8782087207936,
|
24 |
+
"active_bytes.large_pool.current": 952457216,
|
25 |
+
"active_bytes.large_pool.freed": 8781134750720,
|
26 |
+
"active_bytes.large_pool.peak": 3240482816,
|
27 |
+
"active_bytes.small_pool.allocated": 3413570703360,
|
28 |
+
"active_bytes.small_pool.current": 17190400,
|
29 |
+
"active_bytes.small_pool.freed": 3413553512960,
|
30 |
+
"active_bytes.small_pool.peak": 114983424,
|
31 |
+
"allocated_bytes.all.allocated": 12195657911296,
|
32 |
+
"allocated_bytes.all.current": 969647616,
|
33 |
+
"allocated_bytes.all.freed": 12194688263680,
|
34 |
+
"allocated_bytes.all.peak": 3324437504,
|
35 |
+
"allocated_bytes.large_pool.allocated": 8782087207936,
|
36 |
+
"allocated_bytes.large_pool.current": 952457216,
|
37 |
+
"allocated_bytes.large_pool.freed": 8781134750720,
|
38 |
+
"allocated_bytes.large_pool.peak": 3240482816,
|
39 |
+
"allocated_bytes.small_pool.allocated": 3413570703360,
|
40 |
+
"allocated_bytes.small_pool.current": 17190400,
|
41 |
+
"allocated_bytes.small_pool.freed": 3413553512960,
|
42 |
+
"allocated_bytes.small_pool.peak": 114983424,
|
43 |
+
"allocation.all.allocated": 17433374,
|
44 |
+
"allocation.all.current": 1234,
|
45 |
+
"allocation.all.freed": 17432140,
|
46 |
+
"allocation.all.peak": 1487,
|
47 |
+
"allocation.large_pool.allocated": 2165840,
|
48 |
+
"allocation.large_pool.current": 284,
|
49 |
+
"allocation.large_pool.freed": 2165556,
|
50 |
+
"allocation.large_pool.peak": 482,
|
51 |
+
"allocation.small_pool.allocated": 15267534,
|
52 |
+
"allocation.small_pool.current": 950,
|
53 |
+
"allocation.small_pool.freed": 15266584,
|
54 |
+
"allocation.small_pool.peak": 1200,
|
55 |
+
"inactive_split.all.allocated": 8349332,
|
56 |
+
"inactive_split.all.current": 167,
|
57 |
+
"inactive_split.all.freed": 8349165,
|
58 |
+
"inactive_split.all.peak": 268,
|
59 |
+
"inactive_split.large_pool.allocated": 1002355,
|
60 |
+
"inactive_split.large_pool.current": 43,
|
61 |
+
"inactive_split.large_pool.freed": 1002312,
|
62 |
+
"inactive_split.large_pool.peak": 98,
|
63 |
+
"inactive_split.small_pool.allocated": 7346977,
|
64 |
+
"inactive_split.small_pool.current": 124,
|
65 |
+
"inactive_split.small_pool.freed": 7346853,
|
66 |
+
"inactive_split.small_pool.peak": 242,
|
67 |
+
"inactive_split_bytes.all.allocated": 12144424250880,
|
68 |
+
"inactive_split_bytes.all.current": 141842944,
|
69 |
+
"inactive_split_bytes.all.freed": 12144282407936,
|
70 |
+
"inactive_split_bytes.all.peak": 906550272,
|
71 |
+
"inactive_split_bytes.large_pool.allocated": 8631940797440,
|
72 |
+
"inactive_split_bytes.large_pool.current": 106604544,
|
73 |
+
"inactive_split_bytes.large_pool.freed": 8631834192896,
|
74 |
+
"inactive_split_bytes.large_pool.peak": 860701696,
|
75 |
+
"inactive_split_bytes.small_pool.allocated": 3512483453440,
|
76 |
+
"inactive_split_bytes.small_pool.current": 35238400,
|
77 |
+
"inactive_split_bytes.small_pool.freed": 3512448215040,
|
78 |
+
"inactive_split_bytes.small_pool.peak": 79716864,
|
79 |
+
"max_split_size": -1,
|
80 |
+
"num_alloc_retries": 0,
|
81 |
+
"num_device_alloc": 2177,
|
82 |
+
"num_device_free": 1833,
|
83 |
+
"num_ooms": 0,
|
84 |
+
"num_sync_all_streams": 11,
|
85 |
+
"oversize_allocations.allocated": 0,
|
86 |
+
"oversize_allocations.current": 0,
|
87 |
+
"oversize_allocations.freed": 0,
|
88 |
+
"oversize_allocations.peak": 0,
|
89 |
+
"oversize_segments.allocated": 0,
|
90 |
+
"oversize_segments.current": 0,
|
91 |
+
"oversize_segments.freed": 0,
|
92 |
+
"oversize_segments.peak": 0,
|
93 |
+
"requested_bytes.all.allocated": 11944687613846,
|
94 |
+
"requested_bytes.all.current": 967046712,
|
95 |
+
"requested_bytes.all.freed": 11943720567134,
|
96 |
+
"requested_bytes.all.peak": 3263085376,
|
97 |
+
"requested_bytes.large_pool.allocated": 8532534380768,
|
98 |
+
"requested_bytes.large_pool.current": 950013448,
|
99 |
+
"requested_bytes.large_pool.freed": 8531584367320,
|
100 |
+
"requested_bytes.large_pool.peak": 3179241096,
|
101 |
+
"requested_bytes.small_pool.allocated": 3412153233078,
|
102 |
+
"requested_bytes.small_pool.current": 17033264,
|
103 |
+
"requested_bytes.small_pool.freed": 3412136199814,
|
104 |
+
"requested_bytes.small_pool.peak": 114852048,
|
105 |
+
"reserved_bytes.all.allocated": 53510930432,
|
106 |
+
"reserved_bytes.all.current": 8912896000,
|
107 |
+
"reserved_bytes.all.freed": 44598034432,
|
108 |
+
"reserved_bytes.all.peak": 8917090304,
|
109 |
+
"reserved_bytes.large_pool.allocated": 52424605696,
|
110 |
+
"reserved_bytes.large_pool.current": 8772386816,
|
111 |
+
"reserved_bytes.large_pool.freed": 43652218880,
|
112 |
+
"reserved_bytes.large_pool.peak": 8772386816,
|
113 |
+
"reserved_bytes.small_pool.allocated": 1086324736,
|
114 |
+
"reserved_bytes.small_pool.current": 140509184,
|
115 |
+
"reserved_bytes.small_pool.freed": 945815552,
|
116 |
+
"reserved_bytes.small_pool.peak": 144703488,
|
117 |
+
"segment.all.allocated": 2177,
|
118 |
+
"segment.all.current": 344,
|
119 |
+
"segment.all.freed": 1833,
|
120 |
+
"segment.all.peak": 346,
|
121 |
+
"segment.large_pool.allocated": 1659,
|
122 |
+
"segment.large_pool.current": 277,
|
123 |
+
"segment.large_pool.freed": 1382,
|
124 |
+
"segment.large_pool.peak": 277,
|
125 |
+
"segment.small_pool.allocated": 518,
|
126 |
+
"segment.small_pool.current": 67,
|
127 |
+
"segment.small_pool.freed": 451,
|
128 |
+
"segment.small_pool.peak": 69
|
129 |
+
}
|
130 |
+
}
|
outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/logfile_la.log
ADDED
@@ -0,0 +1,846 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
06/02/2024 07:57:08 - INFO - __main__ - Number of labels detected = 2
|
2 |
+
06/02/2024 07:57:08 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 50265, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}.
|
3 |
+
06/02/2024 07:57:09 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_0/adapter_config.json
|
4 |
+
06/02/2024 07:57:09 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'cola'.
|
5 |
+
06/02/2024 07:57:09 - INFO - adapters.loading - Loading module weights from ./outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_0/pytorch_adapter.bin
|
6 |
+
06/02/2024 07:57:09 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_0/head_config.json
|
7 |
+
06/02/2024 07:57:09 - INFO - adapters.heads.model_mixin - Adding head 'cola' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}.
|
8 |
+
06/02/2024 07:57:09 - INFO - adapters.loading - Loading module weights from ./outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_0/pytorch_model_head.bin
|
9 |
+
06/02/2024 07:57:09 - INFO - __main__ - Adapter Name = cola
|
10 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.weight
|
11 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.bias
|
12 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.cola.adapter_up.weight
|
13 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.cola.adapter_up.bias
|
14 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.cola.adapter_down.0.weight
|
15 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.cola.adapter_down.0.bias
|
16 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.cola.adapter_up.weight
|
17 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.cola.adapter_up.bias
|
18 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.weight
|
19 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.bias
|
20 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.cola.adapter_up.weight
|
21 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.cola.adapter_up.bias
|
22 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.cola.adapter_down.0.weight
|
23 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.cola.adapter_down.0.bias
|
24 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.cola.adapter_up.weight
|
25 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.cola.adapter_up.bias
|
26 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.weight
|
27 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.bias
|
28 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.cola.adapter_up.weight
|
29 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.cola.adapter_up.bias
|
30 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.cola.adapter_down.0.weight
|
31 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.cola.adapter_down.0.bias
|
32 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.cola.adapter_up.weight
|
33 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.cola.adapter_up.bias
|
34 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.weight
|
35 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.bias
|
36 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.cola.adapter_up.weight
|
37 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.cola.adapter_up.bias
|
38 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.cola.adapter_down.0.weight
|
39 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.cola.adapter_down.0.bias
|
40 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.cola.adapter_up.weight
|
41 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.cola.adapter_up.bias
|
42 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.weight
|
43 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.bias
|
44 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.cola.adapter_up.weight
|
45 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.cola.adapter_up.bias
|
46 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.cola.adapter_down.0.weight
|
47 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.cola.adapter_down.0.bias
|
48 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.cola.adapter_up.weight
|
49 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.cola.adapter_up.bias
|
50 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.weight
|
51 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.bias
|
52 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.cola.adapter_up.weight
|
53 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.cola.adapter_up.bias
|
54 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.cola.adapter_down.0.weight
|
55 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.cola.adapter_down.0.bias
|
56 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.cola.adapter_up.weight
|
57 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.cola.adapter_up.bias
|
58 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.weight
|
59 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.bias
|
60 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.cola.adapter_up.weight
|
61 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.cola.adapter_up.bias
|
62 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.cola.adapter_down.0.weight
|
63 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.cola.adapter_down.0.bias
|
64 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.cola.adapter_up.weight
|
65 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.cola.adapter_up.bias
|
66 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.weight
|
67 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.bias
|
68 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.cola.adapter_up.weight
|
69 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.cola.adapter_up.bias
|
70 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.cola.adapter_down.0.weight
|
71 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.cola.adapter_down.0.bias
|
72 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.cola.adapter_up.weight
|
73 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.cola.adapter_up.bias
|
74 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.weight
|
75 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.bias
|
76 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.cola.adapter_up.weight
|
77 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.cola.adapter_up.bias
|
78 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.cola.adapter_down.0.weight
|
79 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.cola.adapter_down.0.bias
|
80 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.cola.adapter_up.weight
|
81 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.cola.adapter_up.bias
|
82 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.weight
|
83 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.bias
|
84 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.cola.adapter_up.weight
|
85 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.cola.adapter_up.bias
|
86 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.cola.adapter_down.0.weight
|
87 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.cola.adapter_down.0.bias
|
88 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.cola.adapter_up.weight
|
89 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.cola.adapter_up.bias
|
90 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.weight
|
91 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.bias
|
92 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.cola.adapter_up.weight
|
93 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.cola.adapter_up.bias
|
94 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.cola.adapter_down.0.weight
|
95 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.cola.adapter_down.0.bias
|
96 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.cola.adapter_up.weight
|
97 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.cola.adapter_up.bias
|
98 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.weight
|
99 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.bias
|
100 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.cola.adapter_up.weight
|
101 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.cola.adapter_up.bias
|
102 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.cola.adapter_down.0.weight
|
103 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.cola.adapter_down.0.bias
|
104 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.cola.adapter_up.weight
|
105 |
+
06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.cola.adapter_up.bias
|
106 |
+
06/02/2024 07:57:09 - INFO - __main__ - heads.cola.1.weight
|
107 |
+
06/02/2024 07:57:09 - INFO - __main__ - heads.cola.1.bias
|
108 |
+
06/02/2024 07:57:09 - INFO - __main__ - heads.cola.4.weight
|
109 |
+
06/02/2024 07:57:09 - INFO - __main__ - heads.cola.4.bias
|
110 |
+
06/02/2024 07:57:10 - INFO - __main__ - Sample 3412 of the training set: {'input_ids': [0, 100, 7546, 5, 3737, 4, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1], 'labels': 1}.
|
111 |
+
06/02/2024 07:57:10 - INFO - __main__ - Sample 6002 of the training set: {'input_ids': [0, 100, 531, 64, 3529, 2968, 8014, 4, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
|
112 |
+
06/02/2024 07:57:10 - INFO - __main__ - Sample 83 of the training set: {'input_ids': [0, 133, 20276, 910, 10434, 1495, 7, 5, 1255, 4, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
|
113 |
+
06/02/2024 08:01:59 - INFO - __main__ - f_mu shape : torch.Size([1043, 2])
|
114 |
+
06/02/2024 08:01:59 - INFO - __main__ - f_var shape : torch.Size([1043, 2, 2])
|
115 |
+
06/02/2024 08:01:59 - INFO - __main__ - tensor([[0.0461, 0.1350],
|
116 |
+
[0.0575, 0.1638],
|
117 |
+
[0.0505, 0.0935],
|
118 |
+
...,
|
119 |
+
[0.1243, 0.0734],
|
120 |
+
[0.0638, 0.1309],
|
121 |
+
[0.1254, 0.0759]], device='cuda:0')
|
122 |
+
06/02/2024 08:01:59 - INFO - __main__ - tensor([[[ 9.6094, 9.5192],
|
123 |
+
[ 9.5192, 9.6058]],
|
124 |
+
|
125 |
+
[[ 9.4777, 9.4281],
|
126 |
+
[ 9.4281, 9.4676]],
|
127 |
+
|
128 |
+
[[ 9.1818, 9.1107],
|
129 |
+
[ 9.1107, 9.1895]],
|
130 |
+
|
131 |
+
...,
|
132 |
+
|
133 |
+
[[10.9495, 10.9250],
|
134 |
+
[10.9249, 10.9505]],
|
135 |
+
|
136 |
+
[[ 9.3165, 9.2374],
|
137 |
+
[ 9.2374, 9.3122]],
|
138 |
+
|
139 |
+
[[10.9862, 10.9572],
|
140 |
+
[10.9572, 10.9816]]], device='cuda:0')
|
141 |
+
06/02/2024 08:01:59 - INFO - __main__ - ***** Completed training *****
|
142 |
+
06/02/2024 08:02:02 - INFO - __main__ - Number of labels detected = 2
|
143 |
+
06/02/2024 08:02:03 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 50265, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}.
|
144 |
+
06/02/2024 08:02:04 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_1999/adapter_config.json
|
145 |
+
06/02/2024 08:02:04 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'cola'.
|
146 |
+
06/02/2024 08:02:04 - INFO - adapters.loading - Loading module weights from ./outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_1999/pytorch_adapter.bin
|
147 |
+
06/02/2024 08:02:04 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_1999/head_config.json
|
148 |
+
06/02/2024 08:02:04 - INFO - adapters.heads.model_mixin - Adding head 'cola' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}.
|
149 |
+
06/02/2024 08:02:04 - INFO - adapters.loading - Loading module weights from ./outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_1999/pytorch_model_head.bin
|
150 |
+
06/02/2024 08:02:04 - INFO - __main__ - Adapter Name = cola
|
151 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.weight
|
152 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.bias
|
153 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.cola.adapter_up.weight
|
154 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.cola.adapter_up.bias
|
155 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.cola.adapter_down.0.weight
|
156 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.cola.adapter_down.0.bias
|
157 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.cola.adapter_up.weight
|
158 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.cola.adapter_up.bias
|
159 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.weight
|
160 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.bias
|
161 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.cola.adapter_up.weight
|
162 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.cola.adapter_up.bias
|
163 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.cola.adapter_down.0.weight
|
164 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.cola.adapter_down.0.bias
|
165 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.cola.adapter_up.weight
|
166 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.cola.adapter_up.bias
|
167 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.weight
|
168 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.bias
|
169 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.cola.adapter_up.weight
|
170 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.cola.adapter_up.bias
|
171 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.cola.adapter_down.0.weight
|
172 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.cola.adapter_down.0.bias
|
173 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.cola.adapter_up.weight
|
174 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.cola.adapter_up.bias
|
175 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.weight
|
176 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.bias
|
177 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.cola.adapter_up.weight
|
178 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.cola.adapter_up.bias
|
179 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.cola.adapter_down.0.weight
|
180 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.cola.adapter_down.0.bias
|
181 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.cola.adapter_up.weight
|
182 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.cola.adapter_up.bias
|
183 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.weight
|
184 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.bias
|
185 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.cola.adapter_up.weight
|
186 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.cola.adapter_up.bias
|
187 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.cola.adapter_down.0.weight
|
188 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.cola.adapter_down.0.bias
|
189 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.cola.adapter_up.weight
|
190 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.cola.adapter_up.bias
|
191 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.weight
|
192 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.bias
|
193 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.cola.adapter_up.weight
|
194 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.cola.adapter_up.bias
|
195 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.cola.adapter_down.0.weight
|
196 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.cola.adapter_down.0.bias
|
197 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.cola.adapter_up.weight
|
198 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.cola.adapter_up.bias
|
199 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.weight
|
200 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.bias
|
201 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.cola.adapter_up.weight
|
202 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.cola.adapter_up.bias
|
203 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.cola.adapter_down.0.weight
|
204 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.cola.adapter_down.0.bias
|
205 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.cola.adapter_up.weight
|
206 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.cola.adapter_up.bias
|
207 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.weight
|
208 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.bias
|
209 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.cola.adapter_up.weight
|
210 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.cola.adapter_up.bias
|
211 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.cola.adapter_down.0.weight
|
212 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.cola.adapter_down.0.bias
|
213 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.cola.adapter_up.weight
|
214 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.cola.adapter_up.bias
|
215 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.weight
|
216 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.bias
|
217 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.cola.adapter_up.weight
|
218 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.cola.adapter_up.bias
|
219 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.cola.adapter_down.0.weight
|
220 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.cola.adapter_down.0.bias
|
221 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.cola.adapter_up.weight
|
222 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.cola.adapter_up.bias
|
223 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.weight
|
224 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.bias
|
225 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.cola.adapter_up.weight
|
226 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.cola.adapter_up.bias
|
227 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.cola.adapter_down.0.weight
|
228 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.cola.adapter_down.0.bias
|
229 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.cola.adapter_up.weight
|
230 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.cola.adapter_up.bias
|
231 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.weight
|
232 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.bias
|
233 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.cola.adapter_up.weight
|
234 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.cola.adapter_up.bias
|
235 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.cola.adapter_down.0.weight
|
236 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.cola.adapter_down.0.bias
|
237 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.cola.adapter_up.weight
|
238 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.cola.adapter_up.bias
|
239 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.weight
|
240 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.bias
|
241 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.cola.adapter_up.weight
|
242 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.cola.adapter_up.bias
|
243 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.cola.adapter_down.0.weight
|
244 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.cola.adapter_down.0.bias
|
245 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.cola.adapter_up.weight
|
246 |
+
06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.cola.adapter_up.bias
|
247 |
+
06/02/2024 08:02:04 - INFO - __main__ - heads.cola.1.weight
|
248 |
+
06/02/2024 08:02:04 - INFO - __main__ - heads.cola.1.bias
|
249 |
+
06/02/2024 08:02:04 - INFO - __main__ - heads.cola.4.weight
|
250 |
+
06/02/2024 08:02:04 - INFO - __main__ - heads.cola.4.bias
|
251 |
+
06/02/2024 08:02:05 - INFO - __main__ - Sample 3412 of the training set: {'input_ids': [0, 100, 7546, 5, 3737, 4, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1], 'labels': 1}.
|
252 |
+
06/02/2024 08:02:05 - INFO - __main__ - Sample 6002 of the training set: {'input_ids': [0, 100, 531, 64, 3529, 2968, 8014, 4, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
|
253 |
+
06/02/2024 08:02:05 - INFO - __main__ - Sample 83 of the training set: {'input_ids': [0, 133, 20276, 910, 10434, 1495, 7, 5, 1255, 4, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
|
254 |
+
06/02/2024 08:06:58 - INFO - __main__ - f_mu shape : torch.Size([1043, 2])
|
255 |
+
06/02/2024 08:06:58 - INFO - __main__ - f_var shape : torch.Size([1043, 2, 2])
|
256 |
+
06/02/2024 08:06:58 - INFO - __main__ - tensor([[-1.3785, 1.5697],
|
257 |
+
[-1.5844, 1.6368],
|
258 |
+
[-1.4067, 1.5115],
|
259 |
+
...,
|
260 |
+
[-1.6957, 2.0050],
|
261 |
+
[ 0.1387, 0.0336],
|
262 |
+
[ 0.2648, -0.1128]], device='cuda:0')
|
263 |
+
06/02/2024 08:06:58 - INFO - __main__ - tensor([[[ 4.2579, 1.3738],
|
264 |
+
[ 1.3738, 4.2773]],
|
265 |
+
|
266 |
+
[[ 2.6641, 1.8229],
|
267 |
+
[ 1.8229, 2.6801]],
|
268 |
+
|
269 |
+
[[ 4.7692, 1.4135],
|
270 |
+
[ 1.4135, 4.7653]],
|
271 |
+
|
272 |
+
...,
|
273 |
+
|
274 |
+
[[ 4.3119, 3.3220],
|
275 |
+
[ 3.3220, 4.4150]],
|
276 |
+
|
277 |
+
[[ 3.6567, -0.2874],
|
278 |
+
[-0.2874, 3.7251]],
|
279 |
+
|
280 |
+
[[ 3.1754, 0.5567],
|
281 |
+
[ 0.5567, 3.1824]]], device='cuda:0')
|
282 |
+
06/02/2024 08:06:58 - INFO - __main__ - ***** Completed training *****
|
283 |
+
06/02/2024 08:07:24 - INFO - __main__ - Number of labels detected = 2
|
284 |
+
06/02/2024 08:07:25 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 50265, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}.
|
285 |
+
06/02/2024 08:07:25 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_3999/adapter_config.json
|
286 |
+
06/02/2024 08:07:25 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'cola'.
|
287 |
+
06/02/2024 08:07:26 - INFO - adapters.loading - Loading module weights from ./outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_3999/pytorch_adapter.bin
|
288 |
+
06/02/2024 08:07:26 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_3999/head_config.json
|
289 |
+
06/02/2024 08:07:26 - INFO - adapters.heads.model_mixin - Adding head 'cola' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}.
|
290 |
+
06/02/2024 08:07:26 - INFO - adapters.loading - Loading module weights from ./outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_3999/pytorch_model_head.bin
|
291 |
+
06/02/2024 08:07:26 - INFO - __main__ - Adapter Name = cola
|
292 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.weight
|
293 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.bias
|
294 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.cola.adapter_up.weight
|
295 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.cola.adapter_up.bias
|
296 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.cola.adapter_down.0.weight
|
297 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.cola.adapter_down.0.bias
|
298 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.cola.adapter_up.weight
|
299 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.cola.adapter_up.bias
|
300 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.weight
|
301 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.bias
|
302 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.cola.adapter_up.weight
|
303 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.cola.adapter_up.bias
|
304 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.cola.adapter_down.0.weight
|
305 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.cola.adapter_down.0.bias
|
306 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.cola.adapter_up.weight
|
307 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.cola.adapter_up.bias
|
308 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.weight
|
309 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.bias
|
310 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.cola.adapter_up.weight
|
311 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.cola.adapter_up.bias
|
312 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.cola.adapter_down.0.weight
|
313 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.cola.adapter_down.0.bias
|
314 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.cola.adapter_up.weight
|
315 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.cola.adapter_up.bias
|
316 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.weight
|
317 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.bias
|
318 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.cola.adapter_up.weight
|
319 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.cola.adapter_up.bias
|
320 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.cola.adapter_down.0.weight
|
321 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.cola.adapter_down.0.bias
|
322 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.cola.adapter_up.weight
|
323 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.cola.adapter_up.bias
|
324 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.weight
|
325 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.bias
|
326 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.cola.adapter_up.weight
|
327 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.cola.adapter_up.bias
|
328 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.cola.adapter_down.0.weight
|
329 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.cola.adapter_down.0.bias
|
330 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.cola.adapter_up.weight
|
331 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.cola.adapter_up.bias
|
332 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.weight
|
333 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.bias
|
334 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.cola.adapter_up.weight
|
335 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.cola.adapter_up.bias
|
336 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.cola.adapter_down.0.weight
|
337 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.cola.adapter_down.0.bias
|
338 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.cola.adapter_up.weight
|
339 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.cola.adapter_up.bias
|
340 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.weight
|
341 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.bias
|
342 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.cola.adapter_up.weight
|
343 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.cola.adapter_up.bias
|
344 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.cola.adapter_down.0.weight
|
345 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.cola.adapter_down.0.bias
|
346 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.cola.adapter_up.weight
|
347 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.cola.adapter_up.bias
|
348 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.weight
|
349 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.bias
|
350 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.cola.adapter_up.weight
|
351 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.cola.adapter_up.bias
|
352 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.cola.adapter_down.0.weight
|
353 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.cola.adapter_down.0.bias
|
354 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.cola.adapter_up.weight
|
355 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.cola.adapter_up.bias
|
356 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.weight
|
357 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.bias
|
358 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.cola.adapter_up.weight
|
359 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.cola.adapter_up.bias
|
360 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.cola.adapter_down.0.weight
|
361 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.cola.adapter_down.0.bias
|
362 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.cola.adapter_up.weight
|
363 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.cola.adapter_up.bias
|
364 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.weight
|
365 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.bias
|
366 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.cola.adapter_up.weight
|
367 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.cola.adapter_up.bias
|
368 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.cola.adapter_down.0.weight
|
369 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.cola.adapter_down.0.bias
|
370 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.cola.adapter_up.weight
|
371 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.cola.adapter_up.bias
|
372 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.weight
|
373 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.bias
|
374 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.cola.adapter_up.weight
|
375 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.cola.adapter_up.bias
|
376 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.cola.adapter_down.0.weight
|
377 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.cola.adapter_down.0.bias
|
378 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.cola.adapter_up.weight
|
379 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.cola.adapter_up.bias
|
380 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.weight
|
381 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.bias
|
382 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.cola.adapter_up.weight
|
383 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.cola.adapter_up.bias
|
384 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.cola.adapter_down.0.weight
|
385 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.cola.adapter_down.0.bias
|
386 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.cola.adapter_up.weight
|
387 |
+
06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.cola.adapter_up.bias
|
388 |
+
06/02/2024 08:07:26 - INFO - __main__ - heads.cola.1.weight
|
389 |
+
06/02/2024 08:07:26 - INFO - __main__ - heads.cola.1.bias
|
390 |
+
06/02/2024 08:07:26 - INFO - __main__ - heads.cola.4.weight
|
391 |
+
06/02/2024 08:07:26 - INFO - __main__ - heads.cola.4.bias
|
392 |
+
06/02/2024 08:07:26 - INFO - __main__ - Sample 3412 of the training set: {'input_ids': [0, 100, 7546, 5, 3737, 4, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1], 'labels': 1}.
|
393 |
+
06/02/2024 08:07:26 - INFO - __main__ - Sample 6002 of the training set: {'input_ids': [0, 100, 531, 64, 3529, 2968, 8014, 4, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
|
394 |
+
06/02/2024 08:07:26 - INFO - __main__ - Sample 83 of the training set: {'input_ids': [0, 133, 20276, 910, 10434, 1495, 7, 5, 1255, 4, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
|
395 |
+
06/02/2024 08:12:24 - INFO - __main__ - f_mu shape : torch.Size([1043, 2])
|
396 |
+
06/02/2024 08:12:24 - INFO - __main__ - f_var shape : torch.Size([1043, 2, 2])
|
397 |
+
06/02/2024 08:12:24 - INFO - __main__ - tensor([[-1.8083, 1.9876],
|
398 |
+
[-2.1538, 2.1866],
|
399 |
+
[-1.8847, 1.9186],
|
400 |
+
...,
|
401 |
+
[-2.4217, 2.7522],
|
402 |
+
[ 0.4212, -0.2226],
|
403 |
+
[ 0.7813, -0.5914]], device='cuda:0')
|
404 |
+
06/02/2024 08:12:24 - INFO - __main__ - tensor([[[ 4.8062, 0.8630],
|
405 |
+
[ 0.8630, 4.7482]],
|
406 |
+
|
407 |
+
[[ 2.8347, 1.9434],
|
408 |
+
[ 1.9434, 2.8418]],
|
409 |
+
|
410 |
+
[[ 6.4753, -1.0532],
|
411 |
+
[-1.0532, 6.2592]],
|
412 |
+
|
413 |
+
...,
|
414 |
+
|
415 |
+
[[ 4.3534, 3.6074],
|
416 |
+
[ 3.6074, 4.3897]],
|
417 |
+
|
418 |
+
[[ 4.9381, -1.9654],
|
419 |
+
[-1.9654, 4.7852]],
|
420 |
+
|
421 |
+
[[ 5.4259, -1.6392],
|
422 |
+
[-1.6392, 5.1288]]], device='cuda:0')
|
423 |
+
06/02/2024 08:12:24 - INFO - __main__ - ***** Completed training *****
|
424 |
+
06/02/2024 08:12:27 - INFO - __main__ - Number of labels detected = 2
|
425 |
+
06/02/2024 08:12:28 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 50265, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}.
|
426 |
+
06/02/2024 08:12:28 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_5999/adapter_config.json
|
427 |
+
06/02/2024 08:12:28 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'cola'.
|
428 |
+
06/02/2024 08:12:28 - INFO - adapters.loading - Loading module weights from ./outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_5999/pytorch_adapter.bin
|
429 |
+
06/02/2024 08:12:28 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_5999/head_config.json
|
430 |
+
06/02/2024 08:12:28 - INFO - adapters.heads.model_mixin - Adding head 'cola' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}.
|
431 |
+
06/02/2024 08:12:28 - INFO - adapters.loading - Loading module weights from ./outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_5999/pytorch_model_head.bin
|
432 |
+
06/02/2024 08:12:28 - INFO - __main__ - Adapter Name = cola
|
433 |
+
06/02/2024 08:12:28 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.weight
|
434 |
+
06/02/2024 08:12:28 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.bias
|
435 |
+
06/02/2024 08:12:28 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.cola.adapter_up.weight
|
436 |
+
06/02/2024 08:12:28 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.cola.adapter_up.bias
|
437 |
+
06/02/2024 08:12:28 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.cola.adapter_down.0.weight
|
438 |
+
06/02/2024 08:12:28 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.cola.adapter_down.0.bias
|
439 |
+
06/02/2024 08:12:28 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.cola.adapter_up.weight
|
440 |
+
06/02/2024 08:12:28 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.cola.adapter_up.bias
|
441 |
+
06/02/2024 08:12:28 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.weight
|
442 |
+
06/02/2024 08:12:28 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.bias
|
443 |
+
06/02/2024 08:12:28 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.cola.adapter_up.weight
|
444 |
+
06/02/2024 08:12:28 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.cola.adapter_up.bias
|
445 |
+
06/02/2024 08:12:28 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.cola.adapter_down.0.weight
|
446 |
+
06/02/2024 08:12:28 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.cola.adapter_down.0.bias
|
447 |
+
06/02/2024 08:12:28 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.cola.adapter_up.weight
|
448 |
+
06/02/2024 08:12:28 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.cola.adapter_up.bias
|
449 |
+
06/02/2024 08:12:28 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.weight
|
450 |
+
06/02/2024 08:12:28 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.bias
|
451 |
+
06/02/2024 08:12:28 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.cola.adapter_up.weight
|
452 |
+
06/02/2024 08:12:28 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.cola.adapter_up.bias
|
453 |
+
06/02/2024 08:12:28 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.cola.adapter_down.0.weight
|
454 |
+
06/02/2024 08:12:28 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.cola.adapter_down.0.bias
|
455 |
+
06/02/2024 08:12:28 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.cola.adapter_up.weight
|
456 |
+
06/02/2024 08:12:28 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.cola.adapter_up.bias
|
457 |
+
06/02/2024 08:12:28 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.weight
|
458 |
+
06/02/2024 08:12:28 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.bias
|
459 |
+
06/02/2024 08:12:28 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.cola.adapter_up.weight
|
460 |
+
06/02/2024 08:12:28 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.cola.adapter_up.bias
|
461 |
+
06/02/2024 08:12:28 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.cola.adapter_down.0.weight
|
462 |
+
06/02/2024 08:12:28 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.cola.adapter_down.0.bias
|
463 |
+
06/02/2024 08:12:28 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.cola.adapter_up.weight
|
464 |
+
06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.cola.adapter_up.bias
|
465 |
+
06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.weight
|
466 |
+
06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.bias
|
467 |
+
06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.cola.adapter_up.weight
|
468 |
+
06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.cola.adapter_up.bias
|
469 |
+
06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.cola.adapter_down.0.weight
|
470 |
+
06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.cola.adapter_down.0.bias
|
471 |
+
06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.cola.adapter_up.weight
|
472 |
+
06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.cola.adapter_up.bias
|
473 |
+
06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.weight
|
474 |
+
06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.bias
|
475 |
+
06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.cola.adapter_up.weight
|
476 |
+
06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.cola.adapter_up.bias
|
477 |
+
06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.cola.adapter_down.0.weight
|
478 |
+
06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.cola.adapter_down.0.bias
|
479 |
+
06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.cola.adapter_up.weight
|
480 |
+
06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.cola.adapter_up.bias
|
481 |
+
06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.weight
|
482 |
+
06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.bias
|
483 |
+
06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.cola.adapter_up.weight
|
484 |
+
06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.cola.adapter_up.bias
|
485 |
+
06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.cola.adapter_down.0.weight
|
486 |
+
06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.cola.adapter_down.0.bias
|
487 |
+
06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.cola.adapter_up.weight
|
488 |
+
06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.cola.adapter_up.bias
|
489 |
+
06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.weight
|
490 |
+
06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.bias
|
491 |
+
06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.cola.adapter_up.weight
|
492 |
+
06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.cola.adapter_up.bias
|
493 |
+
06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.cola.adapter_down.0.weight
|
494 |
+
06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.cola.adapter_down.0.bias
|
495 |
+
06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.cola.adapter_up.weight
|
496 |
+
06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.cola.adapter_up.bias
|
497 |
+
06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.weight
|
498 |
+
06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.bias
|
499 |
+
06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.cola.adapter_up.weight
|
500 |
+
06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.cola.adapter_up.bias
|
501 |
+
06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.cola.adapter_down.0.weight
|
502 |
+
06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.cola.adapter_down.0.bias
|
503 |
+
06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.cola.adapter_up.weight
|
504 |
+
06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.cola.adapter_up.bias
|
505 |
+
06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.weight
|
506 |
+
06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.bias
|
507 |
+
06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.cola.adapter_up.weight
|
508 |
+
06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.cola.adapter_up.bias
|
509 |
+
06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.cola.adapter_down.0.weight
|
510 |
+
06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.cola.adapter_down.0.bias
|
511 |
+
06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.cola.adapter_up.weight
|
512 |
+
06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.cola.adapter_up.bias
|
513 |
+
06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.weight
|
514 |
+
06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.bias
|
515 |
+
06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.cola.adapter_up.weight
|
516 |
+
06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.cola.adapter_up.bias
|
517 |
+
06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.cola.adapter_down.0.weight
|
518 |
+
06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.cola.adapter_down.0.bias
|
519 |
+
06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.cola.adapter_up.weight
|
520 |
+
06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.cola.adapter_up.bias
|
521 |
+
06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.weight
|
522 |
+
06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.bias
|
523 |
+
06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.cola.adapter_up.weight
|
524 |
+
06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.cola.adapter_up.bias
|
525 |
+
06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.cola.adapter_down.0.weight
|
526 |
+
06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.cola.adapter_down.0.bias
|
527 |
+
06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.cola.adapter_up.weight
|
528 |
+
06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.cola.adapter_up.bias
|
529 |
+
06/02/2024 08:12:29 - INFO - __main__ - heads.cola.1.weight
|
530 |
+
06/02/2024 08:12:29 - INFO - __main__ - heads.cola.1.bias
|
531 |
+
06/02/2024 08:12:29 - INFO - __main__ - heads.cola.4.weight
|
532 |
+
06/02/2024 08:12:29 - INFO - __main__ - heads.cola.4.bias
|
533 |
+
06/02/2024 08:12:30 - INFO - __main__ - Sample 3412 of the training set: {'input_ids': [0, 100, 7546, 5, 3737, 4, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1], 'labels': 1}.
|
534 |
+
06/02/2024 08:12:30 - INFO - __main__ - Sample 6002 of the training set: {'input_ids': [0, 100, 531, 64, 3529, 2968, 8014, 4, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
|
535 |
+
06/02/2024 08:12:30 - INFO - __main__ - Sample 83 of the training set: {'input_ids': [0, 133, 20276, 910, 10434, 1495, 7, 5, 1255, 4, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
|
536 |
+
06/02/2024 08:17:27 - INFO - __main__ - f_mu shape : torch.Size([1043, 2])
|
537 |
+
06/02/2024 08:17:27 - INFO - __main__ - f_var shape : torch.Size([1043, 2, 2])
|
538 |
+
06/02/2024 08:17:27 - INFO - __main__ - tensor([[-2.4468, 2.6517],
|
539 |
+
[-2.4047, 2.4407],
|
540 |
+
[-2.1919, 2.2412],
|
541 |
+
...,
|
542 |
+
[-2.7916, 3.1134],
|
543 |
+
[-0.2845, 0.4917],
|
544 |
+
[ 1.0054, -0.7745]], device='cuda:0')
|
545 |
+
06/02/2024 08:17:27 - INFO - __main__ - tensor([[[ 5.8574, 1.0059],
|
546 |
+
[ 1.0059, 5.7621]],
|
547 |
+
|
548 |
+
[[ 3.2467, 1.8953],
|
549 |
+
[ 1.8953, 3.2247]],
|
550 |
+
|
551 |
+
[[ 8.0175, -2.0250],
|
552 |
+
[-2.0250, 7.6710]],
|
553 |
+
|
554 |
+
...,
|
555 |
+
|
556 |
+
[[ 5.0837, 4.4143],
|
557 |
+
[ 4.4143, 5.0397]],
|
558 |
+
|
559 |
+
[[ 6.5210, -3.7889],
|
560 |
+
[-3.7889, 6.3052]],
|
561 |
+
|
562 |
+
[[ 6.4597, -1.8953],
|
563 |
+
[-1.8953, 5.9696]]], device='cuda:0')
|
564 |
+
06/02/2024 08:17:27 - INFO - __main__ - ***** Completed training *****
|
565 |
+
06/02/2024 08:17:30 - INFO - __main__ - Number of labels detected = 2
|
566 |
+
06/02/2024 08:17:31 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 50265, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}.
|
567 |
+
06/02/2024 08:17:32 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_7999/adapter_config.json
|
568 |
+
06/02/2024 08:17:32 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'cola'.
|
569 |
+
06/02/2024 08:17:32 - INFO - adapters.loading - Loading module weights from ./outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_7999/pytorch_adapter.bin
|
570 |
+
06/02/2024 08:17:32 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_7999/head_config.json
|
571 |
+
06/02/2024 08:17:32 - INFO - adapters.heads.model_mixin - Adding head 'cola' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}.
|
572 |
+
06/02/2024 08:17:32 - INFO - adapters.loading - Loading module weights from ./outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_7999/pytorch_model_head.bin
|
573 |
+
06/02/2024 08:17:32 - INFO - __main__ - Adapter Name = cola
|
574 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.weight
|
575 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.bias
|
576 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.cola.adapter_up.weight
|
577 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.cola.adapter_up.bias
|
578 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.cola.adapter_down.0.weight
|
579 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.cola.adapter_down.0.bias
|
580 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.cola.adapter_up.weight
|
581 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.cola.adapter_up.bias
|
582 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.weight
|
583 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.bias
|
584 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.cola.adapter_up.weight
|
585 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.cola.adapter_up.bias
|
586 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.cola.adapter_down.0.weight
|
587 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.cola.adapter_down.0.bias
|
588 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.cola.adapter_up.weight
|
589 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.cola.adapter_up.bias
|
590 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.weight
|
591 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.bias
|
592 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.cola.adapter_up.weight
|
593 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.cola.adapter_up.bias
|
594 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.cola.adapter_down.0.weight
|
595 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.cola.adapter_down.0.bias
|
596 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.cola.adapter_up.weight
|
597 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.cola.adapter_up.bias
|
598 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.weight
|
599 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.bias
|
600 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.cola.adapter_up.weight
|
601 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.cola.adapter_up.bias
|
602 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.cola.adapter_down.0.weight
|
603 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.cola.adapter_down.0.bias
|
604 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.cola.adapter_up.weight
|
605 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.cola.adapter_up.bias
|
606 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.weight
|
607 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.bias
|
608 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.cola.adapter_up.weight
|
609 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.cola.adapter_up.bias
|
610 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.cola.adapter_down.0.weight
|
611 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.cola.adapter_down.0.bias
|
612 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.cola.adapter_up.weight
|
613 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.cola.adapter_up.bias
|
614 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.weight
|
615 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.bias
|
616 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.cola.adapter_up.weight
|
617 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.cola.adapter_up.bias
|
618 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.cola.adapter_down.0.weight
|
619 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.cola.adapter_down.0.bias
|
620 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.cola.adapter_up.weight
|
621 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.cola.adapter_up.bias
|
622 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.weight
|
623 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.bias
|
624 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.cola.adapter_up.weight
|
625 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.cola.adapter_up.bias
|
626 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.cola.adapter_down.0.weight
|
627 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.cola.adapter_down.0.bias
|
628 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.cola.adapter_up.weight
|
629 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.cola.adapter_up.bias
|
630 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.weight
|
631 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.bias
|
632 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.cola.adapter_up.weight
|
633 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.cola.adapter_up.bias
|
634 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.cola.adapter_down.0.weight
|
635 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.cola.adapter_down.0.bias
|
636 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.cola.adapter_up.weight
|
637 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.cola.adapter_up.bias
|
638 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.weight
|
639 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.bias
|
640 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.cola.adapter_up.weight
|
641 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.cola.adapter_up.bias
|
642 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.cola.adapter_down.0.weight
|
643 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.cola.adapter_down.0.bias
|
644 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.cola.adapter_up.weight
|
645 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.cola.adapter_up.bias
|
646 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.weight
|
647 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.bias
|
648 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.cola.adapter_up.weight
|
649 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.cola.adapter_up.bias
|
650 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.cola.adapter_down.0.weight
|
651 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.cola.adapter_down.0.bias
|
652 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.cola.adapter_up.weight
|
653 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.cola.adapter_up.bias
|
654 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.weight
|
655 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.bias
|
656 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.cola.adapter_up.weight
|
657 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.cola.adapter_up.bias
|
658 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.cola.adapter_down.0.weight
|
659 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.cola.adapter_down.0.bias
|
660 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.cola.adapter_up.weight
|
661 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.cola.adapter_up.bias
|
662 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.weight
|
663 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.bias
|
664 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.cola.adapter_up.weight
|
665 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.cola.adapter_up.bias
|
666 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.cola.adapter_down.0.weight
|
667 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.cola.adapter_down.0.bias
|
668 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.cola.adapter_up.weight
|
669 |
+
06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.cola.adapter_up.bias
|
670 |
+
06/02/2024 08:17:32 - INFO - __main__ - heads.cola.1.weight
|
671 |
+
06/02/2024 08:17:32 - INFO - __main__ - heads.cola.1.bias
|
672 |
+
06/02/2024 08:17:32 - INFO - __main__ - heads.cola.4.weight
|
673 |
+
06/02/2024 08:17:32 - INFO - __main__ - heads.cola.4.bias
|
674 |
+
06/02/2024 08:17:32 - INFO - __main__ - Sample 3412 of the training set: {'input_ids': [0, 100, 7546, 5, 3737, 4, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1], 'labels': 1}.
|
675 |
+
06/02/2024 08:17:32 - INFO - __main__ - Sample 6002 of the training set: {'input_ids': [0, 100, 531, 64, 3529, 2968, 8014, 4, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
|
676 |
+
06/02/2024 08:17:32 - INFO - __main__ - Sample 83 of the training set: {'input_ids': [0, 133, 20276, 910, 10434, 1495, 7, 5, 1255, 4, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
|
677 |
+
06/02/2024 08:22:33 - INFO - __main__ - f_mu shape : torch.Size([1043, 2])
|
678 |
+
06/02/2024 08:22:33 - INFO - __main__ - f_var shape : torch.Size([1043, 2, 2])
|
679 |
+
06/02/2024 08:22:33 - INFO - __main__ - tensor([[-2.7738, 2.9883],
|
680 |
+
[-2.7051, 2.7341],
|
681 |
+
[-2.7796, 2.7984],
|
682 |
+
...,
|
683 |
+
[-3.0705, 3.3764],
|
684 |
+
[-0.9860, 1.1826],
|
685 |
+
[-1.1101, 1.2928]], device='cuda:0')
|
686 |
+
06/02/2024 08:22:33 - INFO - __main__ - tensor([[[ 4.8272, 1.4823],
|
687 |
+
[ 1.4823, 4.7181]],
|
688 |
+
|
689 |
+
[[ 2.9251, 1.7708],
|
690 |
+
[ 1.7708, 2.9030]],
|
691 |
+
|
692 |
+
[[ 7.8872, -2.1296],
|
693 |
+
[-2.1296, 7.5143]],
|
694 |
+
|
695 |
+
...,
|
696 |
+
|
697 |
+
[[ 4.5578, 3.7887],
|
698 |
+
[ 3.7887, 4.5065]],
|
699 |
+
|
700 |
+
[[ 7.6898, -5.0990],
|
701 |
+
[-5.0990, 7.4951]],
|
702 |
+
|
703 |
+
[[11.8868, -8.6863],
|
704 |
+
[-8.6863, 11.9500]]], device='cuda:0')
|
705 |
+
06/02/2024 08:22:33 - INFO - __main__ - ***** Completed training *****
|
706 |
+
06/02/2024 08:22:35 - INFO - __main__ - Number of labels detected = 2
|
707 |
+
06/02/2024 08:22:36 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 50265, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}.
|
708 |
+
06/02/2024 08:22:37 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_9999/adapter_config.json
|
709 |
+
06/02/2024 08:22:37 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'cola'.
|
710 |
+
06/02/2024 08:22:37 - INFO - adapters.loading - Loading module weights from ./outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_9999/pytorch_adapter.bin
|
711 |
+
06/02/2024 08:22:37 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_9999/head_config.json
|
712 |
+
06/02/2024 08:22:37 - INFO - adapters.heads.model_mixin - Adding head 'cola' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}.
|
713 |
+
06/02/2024 08:22:37 - INFO - adapters.loading - Loading module weights from ./outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_9999/pytorch_model_head.bin
|
714 |
+
06/02/2024 08:22:37 - INFO - __main__ - Adapter Name = cola
|
715 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.weight
|
716 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.bias
|
717 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.cola.adapter_up.weight
|
718 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.cola.adapter_up.bias
|
719 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.cola.adapter_down.0.weight
|
720 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.cola.adapter_down.0.bias
|
721 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.cola.adapter_up.weight
|
722 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.cola.adapter_up.bias
|
723 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.weight
|
724 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.bias
|
725 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.cola.adapter_up.weight
|
726 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.cola.adapter_up.bias
|
727 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.cola.adapter_down.0.weight
|
728 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.cola.adapter_down.0.bias
|
729 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.cola.adapter_up.weight
|
730 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.cola.adapter_up.bias
|
731 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.weight
|
732 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.bias
|
733 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.cola.adapter_up.weight
|
734 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.cola.adapter_up.bias
|
735 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.cola.adapter_down.0.weight
|
736 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.cola.adapter_down.0.bias
|
737 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.cola.adapter_up.weight
|
738 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.cola.adapter_up.bias
|
739 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.weight
|
740 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.bias
|
741 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.cola.adapter_up.weight
|
742 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.cola.adapter_up.bias
|
743 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.cola.adapter_down.0.weight
|
744 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.cola.adapter_down.0.bias
|
745 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.cola.adapter_up.weight
|
746 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.cola.adapter_up.bias
|
747 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.weight
|
748 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.bias
|
749 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.cola.adapter_up.weight
|
750 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.cola.adapter_up.bias
|
751 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.cola.adapter_down.0.weight
|
752 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.cola.adapter_down.0.bias
|
753 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.cola.adapter_up.weight
|
754 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.cola.adapter_up.bias
|
755 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.weight
|
756 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.bias
|
757 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.cola.adapter_up.weight
|
758 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.cola.adapter_up.bias
|
759 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.cola.adapter_down.0.weight
|
760 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.cola.adapter_down.0.bias
|
761 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.cola.adapter_up.weight
|
762 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.cola.adapter_up.bias
|
763 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.weight
|
764 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.bias
|
765 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.cola.adapter_up.weight
|
766 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.cola.adapter_up.bias
|
767 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.cola.adapter_down.0.weight
|
768 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.cola.adapter_down.0.bias
|
769 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.cola.adapter_up.weight
|
770 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.cola.adapter_up.bias
|
771 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.weight
|
772 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.bias
|
773 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.cola.adapter_up.weight
|
774 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.cola.adapter_up.bias
|
775 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.cola.adapter_down.0.weight
|
776 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.cola.adapter_down.0.bias
|
777 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.cola.adapter_up.weight
|
778 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.cola.adapter_up.bias
|
779 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.weight
|
780 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.bias
|
781 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.cola.adapter_up.weight
|
782 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.cola.adapter_up.bias
|
783 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.cola.adapter_down.0.weight
|
784 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.cola.adapter_down.0.bias
|
785 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.cola.adapter_up.weight
|
786 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.cola.adapter_up.bias
|
787 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.weight
|
788 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.bias
|
789 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.cola.adapter_up.weight
|
790 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.cola.adapter_up.bias
|
791 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.cola.adapter_down.0.weight
|
792 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.cola.adapter_down.0.bias
|
793 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.cola.adapter_up.weight
|
794 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.cola.adapter_up.bias
|
795 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.weight
|
796 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.bias
|
797 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.cola.adapter_up.weight
|
798 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.cola.adapter_up.bias
|
799 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.cola.adapter_down.0.weight
|
800 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.cola.adapter_down.0.bias
|
801 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.cola.adapter_up.weight
|
802 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.cola.adapter_up.bias
|
803 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.weight
|
804 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.bias
|
805 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.cola.adapter_up.weight
|
806 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.cola.adapter_up.bias
|
807 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.cola.adapter_down.0.weight
|
808 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.cola.adapter_down.0.bias
|
809 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.cola.adapter_up.weight
|
810 |
+
06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.cola.adapter_up.bias
|
811 |
+
06/02/2024 08:22:37 - INFO - __main__ - heads.cola.1.weight
|
812 |
+
06/02/2024 08:22:37 - INFO - __main__ - heads.cola.1.bias
|
813 |
+
06/02/2024 08:22:37 - INFO - __main__ - heads.cola.4.weight
|
814 |
+
06/02/2024 08:22:37 - INFO - __main__ - heads.cola.4.bias
|
815 |
+
06/02/2024 08:22:38 - INFO - __main__ - Sample 3412 of the training set: {'input_ids': [0, 100, 7546, 5, 3737, 4, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1], 'labels': 1}.
|
816 |
+
06/02/2024 08:22:38 - INFO - __main__ - Sample 6002 of the training set: {'input_ids': [0, 100, 531, 64, 3529, 2968, 8014, 4, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
|
817 |
+
06/02/2024 08:22:38 - INFO - __main__ - Sample 83 of the training set: {'input_ids': [0, 133, 20276, 910, 10434, 1495, 7, 5, 1255, 4, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
|
818 |
+
06/02/2024 08:27:38 - INFO - __main__ - f_mu shape : torch.Size([1043, 2])
|
819 |
+
06/02/2024 08:27:38 - INFO - __main__ - f_var shape : torch.Size([1043, 2, 2])
|
820 |
+
06/02/2024 08:27:38 - INFO - __main__ - tensor([[-2.7968, 3.0246],
|
821 |
+
[-2.7893, 2.8360],
|
822 |
+
[-2.7577, 2.7798],
|
823 |
+
...,
|
824 |
+
[-3.0549, 3.3754],
|
825 |
+
[-1.0387, 1.2427],
|
826 |
+
[-0.9734, 1.1604]], device='cuda:0')
|
827 |
+
06/02/2024 08:27:38 - INFO - __main__ - tensor([[[ 5.2688, 1.2654],
|
828 |
+
[ 1.2654, 5.1606]],
|
829 |
+
|
830 |
+
[[ 3.1404, 1.7835],
|
831 |
+
[ 1.7835, 3.1196]],
|
832 |
+
|
833 |
+
[[ 9.1570, -3.2938],
|
834 |
+
[ -3.2938, 8.7105]],
|
835 |
+
|
836 |
+
...,
|
837 |
+
|
838 |
+
[[ 4.6808, 3.8526],
|
839 |
+
[ 3.8526, 4.6242]],
|
840 |
+
|
841 |
+
[[ 9.2852, -6.5529],
|
842 |
+
[ -6.5529, 9.0689]],
|
843 |
+
|
844 |
+
[[ 13.4494, -10.2085],
|
845 |
+
[-10.2085, 13.4283]]], device='cuda:0')
|
846 |
+
06/02/2024 08:27:38 - INFO - __main__ - ***** Completed training *****
|
outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_0/all_results_la_kron_all_homo_mc_corr_1000.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"eval_matthews_correlation": -0.040852194988972475}
|
outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_0/eval_res_la_kron_all_homo_mc_corr_1000.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_0/gpu_stats_la.json
ADDED
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"memory_allocated": 1030541312,
|
3 |
+
"max_memory_allocated": 3413011456,
|
4 |
+
"memory_reserved": 10204741632,
|
5 |
+
"max_memory_reserved": 10204741632,
|
6 |
+
"memory_stats": {
|
7 |
+
"active.all.allocated": 2915632,
|
8 |
+
"active.all.current": 1233,
|
9 |
+
"active.all.freed": 2914399,
|
10 |
+
"active.all.peak": 1485,
|
11 |
+
"active.large_pool.allocated": 361096,
|
12 |
+
"active.large_pool.current": 284,
|
13 |
+
"active.large_pool.freed": 360812,
|
14 |
+
"active.large_pool.peak": 384,
|
15 |
+
"active.small_pool.allocated": 2554536,
|
16 |
+
"active.small_pool.current": 949,
|
17 |
+
"active.small_pool.freed": 2553587,
|
18 |
+
"active.small_pool.peak": 1199,
|
19 |
+
"active_bytes.all.allocated": 2022530949632,
|
20 |
+
"active_bytes.all.current": 1030541312,
|
21 |
+
"active_bytes.all.freed": 2021500408320,
|
22 |
+
"active_bytes.all.peak": 3413011456,
|
23 |
+
"active_bytes.large_pool.allocated": 1458420835328,
|
24 |
+
"active_bytes.large_pool.current": 1013274624,
|
25 |
+
"active_bytes.large_pool.freed": 1457407560704,
|
26 |
+
"active_bytes.large_pool.peak": 3327070720,
|
27 |
+
"active_bytes.small_pool.allocated": 564110114304,
|
28 |
+
"active_bytes.small_pool.current": 17266688,
|
29 |
+
"active_bytes.small_pool.freed": 564092847616,
|
30 |
+
"active_bytes.small_pool.peak": 118205440,
|
31 |
+
"allocated_bytes.all.allocated": 2022530949632,
|
32 |
+
"allocated_bytes.all.current": 1030541312,
|
33 |
+
"allocated_bytes.all.freed": 2021500408320,
|
34 |
+
"allocated_bytes.all.peak": 3413011456,
|
35 |
+
"allocated_bytes.large_pool.allocated": 1458420835328,
|
36 |
+
"allocated_bytes.large_pool.current": 1013274624,
|
37 |
+
"allocated_bytes.large_pool.freed": 1457407560704,
|
38 |
+
"allocated_bytes.large_pool.peak": 3327070720,
|
39 |
+
"allocated_bytes.small_pool.allocated": 564110114304,
|
40 |
+
"allocated_bytes.small_pool.current": 17266688,
|
41 |
+
"allocated_bytes.small_pool.freed": 564092847616,
|
42 |
+
"allocated_bytes.small_pool.peak": 118205440,
|
43 |
+
"allocation.all.allocated": 2915632,
|
44 |
+
"allocation.all.current": 1233,
|
45 |
+
"allocation.all.freed": 2914399,
|
46 |
+
"allocation.all.peak": 1485,
|
47 |
+
"allocation.large_pool.allocated": 361096,
|
48 |
+
"allocation.large_pool.current": 284,
|
49 |
+
"allocation.large_pool.freed": 360812,
|
50 |
+
"allocation.large_pool.peak": 384,
|
51 |
+
"allocation.small_pool.allocated": 2554536,
|
52 |
+
"allocation.small_pool.current": 949,
|
53 |
+
"allocation.small_pool.freed": 2553587,
|
54 |
+
"allocation.small_pool.peak": 1199,
|
55 |
+
"inactive_split.all.allocated": 1425838,
|
56 |
+
"inactive_split.all.current": 173,
|
57 |
+
"inactive_split.all.freed": 1425665,
|
58 |
+
"inactive_split.all.peak": 221,
|
59 |
+
"inactive_split.large_pool.allocated": 166568,
|
60 |
+
"inactive_split.large_pool.current": 43,
|
61 |
+
"inactive_split.large_pool.freed": 166525,
|
62 |
+
"inactive_split.large_pool.peak": 96,
|
63 |
+
"inactive_split.small_pool.allocated": 1259270,
|
64 |
+
"inactive_split.small_pool.current": 130,
|
65 |
+
"inactive_split.small_pool.freed": 1259140,
|
66 |
+
"inactive_split.small_pool.peak": 145,
|
67 |
+
"inactive_split_bytes.all.allocated": 2015997287936,
|
68 |
+
"inactive_split_bytes.all.current": 150155264,
|
69 |
+
"inactive_split_bytes.all.freed": 2015847132672,
|
70 |
+
"inactive_split_bytes.all.peak": 926979584,
|
71 |
+
"inactive_split_bytes.large_pool.allocated": 1432195878400,
|
72 |
+
"inactive_split_bytes.large_pool.current": 106604544,
|
73 |
+
"inactive_split_bytes.large_pool.freed": 1432089273856,
|
74 |
+
"inactive_split_bytes.large_pool.peak": 890500608,
|
75 |
+
"inactive_split_bytes.small_pool.allocated": 583801409536,
|
76 |
+
"inactive_split_bytes.small_pool.current": 43550720,
|
77 |
+
"inactive_split_bytes.small_pool.freed": 583757858816,
|
78 |
+
"inactive_split_bytes.small_pool.peak": 80564224,
|
79 |
+
"max_split_size": -1,
|
80 |
+
"num_alloc_retries": 0,
|
81 |
+
"num_device_alloc": 426,
|
82 |
+
"num_device_free": 52,
|
83 |
+
"num_ooms": 0,
|
84 |
+
"num_sync_all_streams": 1,
|
85 |
+
"oversize_allocations.allocated": 0,
|
86 |
+
"oversize_allocations.current": 0,
|
87 |
+
"oversize_allocations.freed": 0,
|
88 |
+
"oversize_allocations.peak": 0,
|
89 |
+
"oversize_segments.allocated": 0,
|
90 |
+
"oversize_segments.current": 0,
|
91 |
+
"oversize_segments.freed": 0,
|
92 |
+
"oversize_segments.peak": 0,
|
93 |
+
"requested_bytes.all.allocated": 1978722581871,
|
94 |
+
"requested_bytes.all.current": 1027778996,
|
95 |
+
"requested_bytes.all.freed": 1977694802875,
|
96 |
+
"requested_bytes.all.peak": 3382342108,
|
97 |
+
"requested_bytes.large_pool.allocated": 1414850593488,
|
98 |
+
"requested_bytes.large_pool.current": 1010670088,
|
99 |
+
"requested_bytes.large_pool.freed": 1413839923400,
|
100 |
+
"requested_bytes.large_pool.peak": 3296520840,
|
101 |
+
"requested_bytes.small_pool.allocated": 563871988383,
|
102 |
+
"requested_bytes.small_pool.current": 17108908,
|
103 |
+
"requested_bytes.small_pool.freed": 563854879475,
|
104 |
+
"requested_bytes.small_pool.peak": 118076684,
|
105 |
+
"reserved_bytes.all.allocated": 10521411584,
|
106 |
+
"reserved_bytes.all.current": 10204741632,
|
107 |
+
"reserved_bytes.all.freed": 316669952,
|
108 |
+
"reserved_bytes.all.peak": 10204741632,
|
109 |
+
"reserved_bytes.large_pool.allocated": 10297016320,
|
110 |
+
"reserved_bytes.large_pool.current": 10066329600,
|
111 |
+
"reserved_bytes.large_pool.freed": 230686720,
|
112 |
+
"reserved_bytes.large_pool.peak": 10066329600,
|
113 |
+
"reserved_bytes.small_pool.allocated": 224395264,
|
114 |
+
"reserved_bytes.small_pool.current": 138412032,
|
115 |
+
"reserved_bytes.small_pool.freed": 85983232,
|
116 |
+
"reserved_bytes.small_pool.peak": 138412032,
|
117 |
+
"segment.all.allocated": 426,
|
118 |
+
"segment.all.current": 374,
|
119 |
+
"segment.all.freed": 52,
|
120 |
+
"segment.all.peak": 374,
|
121 |
+
"segment.large_pool.allocated": 319,
|
122 |
+
"segment.large_pool.current": 308,
|
123 |
+
"segment.large_pool.freed": 11,
|
124 |
+
"segment.large_pool.peak": 308,
|
125 |
+
"segment.small_pool.allocated": 107,
|
126 |
+
"segment.small_pool.current": 66,
|
127 |
+
"segment.small_pool.freed": 41,
|
128 |
+
"segment.small_pool.peak": 66
|
129 |
+
}
|
130 |
+
}
|
outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_1999/all_results_la_kron_all_homo_mc_corr_1000.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"eval_matthews_correlation": 0.529144545456451}
|
outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_1999/eval_res_la_kron_all_homo_mc_corr_1000.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_1999/gpu_stats_la.json
ADDED
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"memory_allocated": 1030541312,
|
3 |
+
"max_memory_allocated": 3413011456,
|
4 |
+
"memory_reserved": 10206838784,
|
5 |
+
"max_memory_reserved": 10206838784,
|
6 |
+
"memory_stats": {
|
7 |
+
"active.all.allocated": 5831312,
|
8 |
+
"active.all.current": 1233,
|
9 |
+
"active.all.freed": 5830079,
|
10 |
+
"active.all.peak": 1485,
|
11 |
+
"active.large_pool.allocated": 722190,
|
12 |
+
"active.large_pool.current": 284,
|
13 |
+
"active.large_pool.freed": 721906,
|
14 |
+
"active.large_pool.peak": 482,
|
15 |
+
"active.small_pool.allocated": 5109122,
|
16 |
+
"active.small_pool.current": 949,
|
17 |
+
"active.small_pool.freed": 5108173,
|
18 |
+
"active.small_pool.peak": 1199,
|
19 |
+
"active_bytes.all.allocated": 4044156840960,
|
20 |
+
"active_bytes.all.current": 1030541312,
|
21 |
+
"active_bytes.all.freed": 4043126299648,
|
22 |
+
"active_bytes.all.peak": 3413011456,
|
23 |
+
"active_bytes.large_pool.allocated": 2915936585728,
|
24 |
+
"active_bytes.large_pool.current": 1013274624,
|
25 |
+
"active_bytes.large_pool.freed": 2914923311104,
|
26 |
+
"active_bytes.large_pool.peak": 3327070720,
|
27 |
+
"active_bytes.small_pool.allocated": 1128220255232,
|
28 |
+
"active_bytes.small_pool.current": 17266688,
|
29 |
+
"active_bytes.small_pool.freed": 1128202988544,
|
30 |
+
"active_bytes.small_pool.peak": 118205440,
|
31 |
+
"allocated_bytes.all.allocated": 4044156840960,
|
32 |
+
"allocated_bytes.all.current": 1030541312,
|
33 |
+
"allocated_bytes.all.freed": 4043126299648,
|
34 |
+
"allocated_bytes.all.peak": 3413011456,
|
35 |
+
"allocated_bytes.large_pool.allocated": 2915936585728,
|
36 |
+
"allocated_bytes.large_pool.current": 1013274624,
|
37 |
+
"allocated_bytes.large_pool.freed": 2914923311104,
|
38 |
+
"allocated_bytes.large_pool.peak": 3327070720,
|
39 |
+
"allocated_bytes.small_pool.allocated": 1128220255232,
|
40 |
+
"allocated_bytes.small_pool.current": 17266688,
|
41 |
+
"allocated_bytes.small_pool.freed": 1128202988544,
|
42 |
+
"allocated_bytes.small_pool.peak": 118205440,
|
43 |
+
"allocation.all.allocated": 5831312,
|
44 |
+
"allocation.all.current": 1233,
|
45 |
+
"allocation.all.freed": 5830079,
|
46 |
+
"allocation.all.peak": 1485,
|
47 |
+
"allocation.large_pool.allocated": 722190,
|
48 |
+
"allocation.large_pool.current": 284,
|
49 |
+
"allocation.large_pool.freed": 721906,
|
50 |
+
"allocation.large_pool.peak": 482,
|
51 |
+
"allocation.small_pool.allocated": 5109122,
|
52 |
+
"allocation.small_pool.current": 949,
|
53 |
+
"allocation.small_pool.freed": 5108173,
|
54 |
+
"allocation.small_pool.peak": 1199,
|
55 |
+
"inactive_split.all.allocated": 2839308,
|
56 |
+
"inactive_split.all.current": 174,
|
57 |
+
"inactive_split.all.freed": 2839134,
|
58 |
+
"inactive_split.all.peak": 229,
|
59 |
+
"inactive_split.large_pool.allocated": 334357,
|
60 |
+
"inactive_split.large_pool.current": 43,
|
61 |
+
"inactive_split.large_pool.freed": 334314,
|
62 |
+
"inactive_split.large_pool.peak": 96,
|
63 |
+
"inactive_split.small_pool.allocated": 2504951,
|
64 |
+
"inactive_split.small_pool.current": 131,
|
65 |
+
"inactive_split.small_pool.freed": 2504820,
|
66 |
+
"inactive_split.small_pool.peak": 183,
|
67 |
+
"inactive_split_bytes.all.allocated": 3999387359232,
|
68 |
+
"inactive_split_bytes.all.current": 150155264,
|
69 |
+
"inactive_split_bytes.all.freed": 3999237203968,
|
70 |
+
"inactive_split_bytes.all.peak": 931173888,
|
71 |
+
"inactive_split_bytes.large_pool.allocated": 2836943942656,
|
72 |
+
"inactive_split_bytes.large_pool.current": 106604544,
|
73 |
+
"inactive_split_bytes.large_pool.freed": 2836837338112,
|
74 |
+
"inactive_split_bytes.large_pool.peak": 890500608,
|
75 |
+
"inactive_split_bytes.small_pool.allocated": 1162443416576,
|
76 |
+
"inactive_split_bytes.small_pool.current": 43550720,
|
77 |
+
"inactive_split_bytes.small_pool.freed": 1162399865856,
|
78 |
+
"inactive_split_bytes.small_pool.peak": 80564224,
|
79 |
+
"max_split_size": -1,
|
80 |
+
"num_alloc_retries": 0,
|
81 |
+
"num_device_alloc": 834,
|
82 |
+
"num_device_free": 459,
|
83 |
+
"num_ooms": 0,
|
84 |
+
"num_sync_all_streams": 3,
|
85 |
+
"oversize_allocations.allocated": 0,
|
86 |
+
"oversize_allocations.current": 0,
|
87 |
+
"oversize_allocations.freed": 0,
|
88 |
+
"oversize_allocations.peak": 0,
|
89 |
+
"oversize_segments.allocated": 0,
|
90 |
+
"oversize_segments.current": 0,
|
91 |
+
"oversize_segments.freed": 0,
|
92 |
+
"oversize_segments.peak": 0,
|
93 |
+
"requested_bytes.all.allocated": 3957428127418,
|
94 |
+
"requested_bytes.all.current": 1027778996,
|
95 |
+
"requested_bytes.all.freed": 3956400348422,
|
96 |
+
"requested_bytes.all.peak": 3382342108,
|
97 |
+
"requested_bytes.large_pool.allocated": 2829684147616,
|
98 |
+
"requested_bytes.large_pool.current": 1010670088,
|
99 |
+
"requested_bytes.large_pool.freed": 2828673477528,
|
100 |
+
"requested_bytes.large_pool.peak": 3296520840,
|
101 |
+
"requested_bytes.small_pool.allocated": 1127743979802,
|
102 |
+
"requested_bytes.small_pool.current": 17108908,
|
103 |
+
"requested_bytes.small_pool.freed": 1127726870894,
|
104 |
+
"requested_bytes.small_pool.peak": 118076684,
|
105 |
+
"reserved_bytes.all.allocated": 20929576960,
|
106 |
+
"reserved_bytes.all.current": 10206838784,
|
107 |
+
"reserved_bytes.all.freed": 10722738176,
|
108 |
+
"reserved_bytes.all.peak": 10206838784,
|
109 |
+
"reserved_bytes.large_pool.allocated": 20510146560,
|
110 |
+
"reserved_bytes.large_pool.current": 10066329600,
|
111 |
+
"reserved_bytes.large_pool.freed": 10443816960,
|
112 |
+
"reserved_bytes.large_pool.peak": 10066329600,
|
113 |
+
"reserved_bytes.small_pool.allocated": 419430400,
|
114 |
+
"reserved_bytes.small_pool.current": 140509184,
|
115 |
+
"reserved_bytes.small_pool.freed": 278921216,
|
116 |
+
"reserved_bytes.small_pool.peak": 140509184,
|
117 |
+
"segment.all.allocated": 834,
|
118 |
+
"segment.all.current": 375,
|
119 |
+
"segment.all.freed": 459,
|
120 |
+
"segment.all.peak": 375,
|
121 |
+
"segment.large_pool.allocated": 634,
|
122 |
+
"segment.large_pool.current": 308,
|
123 |
+
"segment.large_pool.freed": 326,
|
124 |
+
"segment.large_pool.peak": 308,
|
125 |
+
"segment.small_pool.allocated": 200,
|
126 |
+
"segment.small_pool.current": 67,
|
127 |
+
"segment.small_pool.freed": 133,
|
128 |
+
"segment.small_pool.peak": 67
|
129 |
+
}
|
130 |
+
}
|
outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_3999/all_results_la_kron_all_homo_mc_corr_1000.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"eval_matthews_correlation": 0.6015805476045657}
|
outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_3999/eval_res_la_kron_all_homo_mc_corr_1000.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_3999/gpu_stats_la.json
ADDED
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"memory_allocated": 1030541312,
|
3 |
+
"max_memory_allocated": 3413011456,
|
4 |
+
"memory_reserved": 10208935936,
|
5 |
+
"max_memory_reserved": 10208935936,
|
6 |
+
"memory_stats": {
|
7 |
+
"active.all.allocated": 8747042,
|
8 |
+
"active.all.current": 1233,
|
9 |
+
"active.all.freed": 8745809,
|
10 |
+
"active.all.peak": 1485,
|
11 |
+
"active.large_pool.allocated": 1083284,
|
12 |
+
"active.large_pool.current": 284,
|
13 |
+
"active.large_pool.freed": 1083000,
|
14 |
+
"active.large_pool.peak": 482,
|
15 |
+
"active.small_pool.allocated": 7663758,
|
16 |
+
"active.small_pool.current": 949,
|
17 |
+
"active.small_pool.freed": 7662809,
|
18 |
+
"active.small_pool.peak": 1199,
|
19 |
+
"active_bytes.all.allocated": 6065810808320,
|
20 |
+
"active_bytes.all.current": 1030541312,
|
21 |
+
"active_bytes.all.freed": 6064780267008,
|
22 |
+
"active_bytes.all.peak": 3413011456,
|
23 |
+
"active_bytes.large_pool.allocated": 4373480385536,
|
24 |
+
"active_bytes.large_pool.current": 1013274624,
|
25 |
+
"active_bytes.large_pool.freed": 4372467110912,
|
26 |
+
"active_bytes.large_pool.peak": 3327070720,
|
27 |
+
"active_bytes.small_pool.allocated": 1692330422784,
|
28 |
+
"active_bytes.small_pool.current": 17266688,
|
29 |
+
"active_bytes.small_pool.freed": 1692313156096,
|
30 |
+
"active_bytes.small_pool.peak": 118205440,
|
31 |
+
"allocated_bytes.all.allocated": 6065810808320,
|
32 |
+
"allocated_bytes.all.current": 1030541312,
|
33 |
+
"allocated_bytes.all.freed": 6064780267008,
|
34 |
+
"allocated_bytes.all.peak": 3413011456,
|
35 |
+
"allocated_bytes.large_pool.allocated": 4373480385536,
|
36 |
+
"allocated_bytes.large_pool.current": 1013274624,
|
37 |
+
"allocated_bytes.large_pool.freed": 4372467110912,
|
38 |
+
"allocated_bytes.large_pool.peak": 3327070720,
|
39 |
+
"allocated_bytes.small_pool.allocated": 1692330422784,
|
40 |
+
"allocated_bytes.small_pool.current": 17266688,
|
41 |
+
"allocated_bytes.small_pool.freed": 1692313156096,
|
42 |
+
"allocated_bytes.small_pool.peak": 118205440,
|
43 |
+
"allocation.all.allocated": 8747042,
|
44 |
+
"allocation.all.current": 1233,
|
45 |
+
"allocation.all.freed": 8745809,
|
46 |
+
"allocation.all.peak": 1485,
|
47 |
+
"allocation.large_pool.allocated": 1083284,
|
48 |
+
"allocation.large_pool.current": 284,
|
49 |
+
"allocation.large_pool.freed": 1083000,
|
50 |
+
"allocation.large_pool.peak": 482,
|
51 |
+
"allocation.small_pool.allocated": 7663758,
|
52 |
+
"allocation.small_pool.current": 949,
|
53 |
+
"allocation.small_pool.freed": 7662809,
|
54 |
+
"allocation.small_pool.peak": 1199,
|
55 |
+
"inactive_split.all.allocated": 4301545,
|
56 |
+
"inactive_split.all.current": 182,
|
57 |
+
"inactive_split.all.freed": 4301363,
|
58 |
+
"inactive_split.all.peak": 246,
|
59 |
+
"inactive_split.large_pool.allocated": 499623,
|
60 |
+
"inactive_split.large_pool.current": 43,
|
61 |
+
"inactive_split.large_pool.freed": 499580,
|
62 |
+
"inactive_split.large_pool.peak": 96,
|
63 |
+
"inactive_split.small_pool.allocated": 3801922,
|
64 |
+
"inactive_split.small_pool.current": 139,
|
65 |
+
"inactive_split.small_pool.freed": 3801783,
|
66 |
+
"inactive_split.small_pool.peak": 220,
|
67 |
+
"inactive_split_bytes.all.allocated": 5982721435648,
|
68 |
+
"inactive_split_bytes.all.current": 150155264,
|
69 |
+
"inactive_split_bytes.all.freed": 5982571280384,
|
70 |
+
"inactive_split_bytes.all.peak": 935368192,
|
71 |
+
"inactive_split_bytes.large_pool.allocated": 4242779976192,
|
72 |
+
"inactive_split_bytes.large_pool.current": 106604544,
|
73 |
+
"inactive_split_bytes.large_pool.freed": 4242673371648,
|
74 |
+
"inactive_split_bytes.large_pool.peak": 890500608,
|
75 |
+
"inactive_split_bytes.small_pool.allocated": 1739941459456,
|
76 |
+
"inactive_split_bytes.small_pool.current": 43550720,
|
77 |
+
"inactive_split_bytes.small_pool.freed": 1739897908736,
|
78 |
+
"inactive_split_bytes.small_pool.peak": 80564224,
|
79 |
+
"max_split_size": -1,
|
80 |
+
"num_alloc_retries": 0,
|
81 |
+
"num_device_alloc": 1238,
|
82 |
+
"num_device_free": 862,
|
83 |
+
"num_ooms": 0,
|
84 |
+
"num_sync_all_streams": 5,
|
85 |
+
"oversize_allocations.allocated": 0,
|
86 |
+
"oversize_allocations.current": 0,
|
87 |
+
"oversize_allocations.freed": 0,
|
88 |
+
"oversize_allocations.peak": 0,
|
89 |
+
"oversize_segments.allocated": 0,
|
90 |
+
"oversize_segments.current": 0,
|
91 |
+
"oversize_segments.freed": 0,
|
92 |
+
"oversize_segments.peak": 0,
|
93 |
+
"requested_bytes.all.allocated": 5936133676001,
|
94 |
+
"requested_bytes.all.current": 1027778996,
|
95 |
+
"requested_bytes.all.freed": 5935105897005,
|
96 |
+
"requested_bytes.all.peak": 3382342108,
|
97 |
+
"requested_bytes.large_pool.allocated": 4244517701744,
|
98 |
+
"requested_bytes.large_pool.current": 1010670088,
|
99 |
+
"requested_bytes.large_pool.freed": 4243507031656,
|
100 |
+
"requested_bytes.large_pool.peak": 3296520840,
|
101 |
+
"requested_bytes.small_pool.allocated": 1691615974257,
|
102 |
+
"requested_bytes.small_pool.current": 17108908,
|
103 |
+
"requested_bytes.small_pool.freed": 1691598865349,
|
104 |
+
"requested_bytes.small_pool.peak": 118076684,
|
105 |
+
"reserved_bytes.all.allocated": 31348228096,
|
106 |
+
"reserved_bytes.all.current": 10208935936,
|
107 |
+
"reserved_bytes.all.freed": 21139292160,
|
108 |
+
"reserved_bytes.all.peak": 10208935936,
|
109 |
+
"reserved_bytes.large_pool.allocated": 30744248320,
|
110 |
+
"reserved_bytes.large_pool.current": 10066329600,
|
111 |
+
"reserved_bytes.large_pool.freed": 20677918720,
|
112 |
+
"reserved_bytes.large_pool.peak": 10066329600,
|
113 |
+
"reserved_bytes.small_pool.allocated": 603979776,
|
114 |
+
"reserved_bytes.small_pool.current": 142606336,
|
115 |
+
"reserved_bytes.small_pool.freed": 461373440,
|
116 |
+
"reserved_bytes.small_pool.peak": 142606336,
|
117 |
+
"segment.all.allocated": 1238,
|
118 |
+
"segment.all.current": 376,
|
119 |
+
"segment.all.freed": 862,
|
120 |
+
"segment.all.peak": 376,
|
121 |
+
"segment.large_pool.allocated": 950,
|
122 |
+
"segment.large_pool.current": 308,
|
123 |
+
"segment.large_pool.freed": 642,
|
124 |
+
"segment.large_pool.peak": 308,
|
125 |
+
"segment.small_pool.allocated": 288,
|
126 |
+
"segment.small_pool.current": 68,
|
127 |
+
"segment.small_pool.freed": 220,
|
128 |
+
"segment.small_pool.peak": 68
|
129 |
+
}
|
130 |
+
}
|
outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_5999/all_results_la_kron_all_homo_mc_corr_1000.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"eval_matthews_correlation": 0.5933072676560336}
|
outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_5999/eval_res_la_kron_all_homo_mc_corr_1000.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|