jdorairaj commited on
Commit
3f490c1
·
1 Parent(s): 29d3117

almost completed la runs,sst2 remains

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/logfile_la.log +846 -0
  2. outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_0/all_results_la_kron_all_homo_mc_corr_1000.json +1 -0
  3. outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_0/eval_res_la_kron_all_homo_mc_corr_1000.json +0 -0
  4. outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_0/gpu_stats_la.json +130 -0
  5. outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_1999/all_results_la_kron_all_homo_mc_corr_1000.json +1 -0
  6. outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_1999/eval_res_la_kron_all_homo_mc_corr_1000.json +0 -0
  7. outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_1999/gpu_stats_la.json +130 -0
  8. outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_3999/all_results_la_kron_all_homo_mc_corr_1000.json +1 -0
  9. outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_3999/eval_res_la_kron_all_homo_mc_corr_1000.json +0 -0
  10. outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_3999/gpu_stats_la.json +130 -0
  11. outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_5999/all_results_la_kron_all_homo_mc_corr_1000.json +1 -0
  12. outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_5999/eval_res_la_kron_all_homo_mc_corr_1000.json +0 -0
  13. outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_5999/gpu_stats_la.json +130 -0
  14. outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_7999/all_results_la_kron_all_homo_mc_corr_1000.json +1 -0
  15. outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_7999/eval_res_la_kron_all_homo_mc_corr_1000.json +0 -0
  16. outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_7999/gpu_stats_la.json +130 -0
  17. outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_9999/all_results_la_kron_all_homo_mc_corr_1000.json +1 -0
  18. outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_9999/eval_res_la_kron_all_homo_mc_corr_1000.json +0 -0
  19. outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_9999/gpu_stats_la.json +130 -0
  20. outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/logfile_la.log +846 -0
  21. outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_0/all_results_la_kron_all_homo_mc_corr_1000.json +1 -0
  22. outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_0/eval_res_la_kron_all_homo_mc_corr_1000.json +0 -0
  23. outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_0/gpu_stats_la.json +130 -0
  24. outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_1999/all_results_la_kron_all_homo_mc_corr_1000.json +1 -0
  25. outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_1999/eval_res_la_kron_all_homo_mc_corr_1000.json +0 -0
  26. outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_1999/gpu_stats_la.json +130 -0
  27. outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_3999/all_results_la_kron_all_homo_mc_corr_1000.json +1 -0
  28. outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_3999/eval_res_la_kron_all_homo_mc_corr_1000.json +0 -0
  29. outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_3999/gpu_stats_la.json +130 -0
  30. outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_5999/all_results_la_kron_all_homo_mc_corr_1000.json +1 -0
  31. outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_5999/eval_res_la_kron_all_homo_mc_corr_1000.json +0 -0
  32. outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_5999/gpu_stats_la.json +130 -0
  33. outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_7999/all_results_la_kron_all_homo_mc_corr_1000.json +1 -0
  34. outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_7999/eval_res_la_kron_all_homo_mc_corr_1000.json +0 -0
  35. outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_7999/gpu_stats_la.json +130 -0
  36. outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_9999/all_results_la_kron_all_homo_mc_corr_1000.json +1 -0
  37. outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_9999/eval_res_la_kron_all_homo_mc_corr_1000.json +0 -0
  38. outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_9999/gpu_stats_la.json +130 -0
  39. outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/logfile_la.log +846 -0
  40. outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_0/all_results_la_kron_all_homo_mc_corr_1000.json +1 -0
  41. outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_0/eval_res_la_kron_all_homo_mc_corr_1000.json +0 -0
  42. outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_0/gpu_stats_la.json +130 -0
  43. outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_1999/all_results_la_kron_all_homo_mc_corr_1000.json +1 -0
  44. outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_1999/eval_res_la_kron_all_homo_mc_corr_1000.json +0 -0
  45. outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_1999/gpu_stats_la.json +130 -0
  46. outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_3999/all_results_la_kron_all_homo_mc_corr_1000.json +1 -0
  47. outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_3999/eval_res_la_kron_all_homo_mc_corr_1000.json +0 -0
  48. outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_3999/gpu_stats_la.json +130 -0
  49. outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_5999/all_results_la_kron_all_homo_mc_corr_1000.json +1 -0
  50. outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_5999/eval_res_la_kron_all_homo_mc_corr_1000.json +0 -0
outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/logfile_la.log ADDED
@@ -0,0 +1,846 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 06/01/2024 23:31:05 - INFO - __main__ - Number of labels detected = 2
2
+ 06/01/2024 23:31:06 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 30522, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}.
3
+ 06/01/2024 23:31:07 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_0/adapter_config.json
4
+ 06/01/2024 23:31:07 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'cola'.
5
+ 06/01/2024 23:31:07 - INFO - adapters.loading - Loading module weights from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_0/pytorch_adapter.bin
6
+ 06/01/2024 23:31:07 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_0/head_config.json
7
+ 06/01/2024 23:31:07 - INFO - adapters.heads.model_mixin - Adding head 'cola' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}.
8
+ 06/01/2024 23:31:07 - INFO - adapters.loading - Loading module weights from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_0/pytorch_model_head.bin
9
+ 06/01/2024 23:31:07 - INFO - __main__ - Adapter Name = cola
10
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.weight
11
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.bias
12
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_up.weight
13
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_up.bias
14
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_down.0.weight
15
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_down.0.bias
16
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_up.weight
17
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_up.bias
18
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.weight
19
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.bias
20
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_up.weight
21
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_up.bias
22
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_down.0.weight
23
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_down.0.bias
24
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_up.weight
25
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_up.bias
26
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.weight
27
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.bias
28
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_up.weight
29
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_up.bias
30
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_down.0.weight
31
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_down.0.bias
32
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_up.weight
33
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_up.bias
34
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.weight
35
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.bias
36
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_up.weight
37
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_up.bias
38
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_down.0.weight
39
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_down.0.bias
40
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_up.weight
41
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_up.bias
42
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.weight
43
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.bias
44
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_up.weight
45
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_up.bias
46
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_down.0.weight
47
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_down.0.bias
48
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_up.weight
49
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_up.bias
50
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.weight
51
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.bias
52
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_up.weight
53
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_up.bias
54
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_down.0.weight
55
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_down.0.bias
56
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_up.weight
57
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_up.bias
58
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.weight
59
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.bias
60
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_up.weight
61
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_up.bias
62
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_down.0.weight
63
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_down.0.bias
64
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_up.weight
65
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_up.bias
66
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.weight
67
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.bias
68
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_up.weight
69
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_up.bias
70
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_down.0.weight
71
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_down.0.bias
72
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_up.weight
73
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_up.bias
74
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.weight
75
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.bias
76
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_up.weight
77
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_up.bias
78
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_down.0.weight
79
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_down.0.bias
80
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_up.weight
81
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_up.bias
82
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.weight
83
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.bias
84
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_up.weight
85
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_up.bias
86
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_down.0.weight
87
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_down.0.bias
88
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_up.weight
89
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_up.bias
90
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.weight
91
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.bias
92
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_up.weight
93
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_up.bias
94
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_down.0.weight
95
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_down.0.bias
96
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_up.weight
97
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_up.bias
98
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.weight
99
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.bias
100
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_up.weight
101
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_up.bias
102
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_down.0.weight
103
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_down.0.bias
104
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_up.weight
105
+ 06/01/2024 23:31:07 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_up.bias
106
+ 06/01/2024 23:31:07 - INFO - __main__ - heads.cola.1.weight
107
+ 06/01/2024 23:31:07 - INFO - __main__ - heads.cola.1.bias
108
+ 06/01/2024 23:31:07 - INFO - __main__ - heads.cola.4.weight
109
+ 06/01/2024 23:31:07 - INFO - __main__ - heads.cola.4.bias
110
+ 06/01/2024 23:31:07 - INFO - __main__ - Sample 3412 of the training set: {'input_ids': [101, 1045, 12781, 1996, 7427, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1], 'labels': 1}.
111
+ 06/01/2024 23:31:07 - INFO - __main__ - Sample 6002 of the training set: {'input_ids': [101, 1045, 2442, 2064, 4521, 22088, 2015, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
112
+ 06/01/2024 23:31:07 - INFO - __main__ - Sample 83 of the training set: {'input_ids': [101, 1996, 7764, 22257, 2993, 2000, 1996, 2598, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
113
+ 06/01/2024 23:35:56 - INFO - __main__ - f_mu shape : torch.Size([1043, 2])
114
+ 06/01/2024 23:35:56 - INFO - __main__ - f_var shape : torch.Size([1043, 2, 2])
115
+ 06/01/2024 23:35:56 - INFO - __main__ - tensor([[-0.1079, 0.3158],
116
+ [-0.1277, 0.1944],
117
+ [-0.1159, 0.2506],
118
+ ...,
119
+ [-0.1310, 0.2133],
120
+ [-0.1701, 0.2358],
121
+ [-0.1486, 0.1628]], device='cuda:0')
122
+ 06/01/2024 23:35:56 - INFO - __main__ - tensor([[[12.4738, 12.2974],
123
+ [12.2974, 12.4902]],
124
+
125
+ [[11.5270, 11.1531],
126
+ [11.1531, 11.4932]],
127
+
128
+ [[11.6347, 11.4029],
129
+ [11.4029, 11.6472]],
130
+
131
+ ...,
132
+
133
+ [[13.0798, 12.7824],
134
+ [12.7824, 13.1277]],
135
+
136
+ [[11.2542, 11.0789],
137
+ [11.0788, 11.2619]],
138
+
139
+ [[11.7360, 11.4998],
140
+ [11.4998, 11.7346]]], device='cuda:0')
141
+ 06/01/2024 23:35:56 - INFO - __main__ - ***** Completed training *****
142
+ 06/01/2024 23:35:59 - INFO - __main__ - Number of labels detected = 2
143
+ 06/01/2024 23:36:00 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 30522, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}.
144
+ 06/01/2024 23:36:00 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_1999/adapter_config.json
145
+ 06/01/2024 23:36:00 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'cola'.
146
+ 06/01/2024 23:36:01 - INFO - adapters.loading - Loading module weights from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_1999/pytorch_adapter.bin
147
+ 06/01/2024 23:36:01 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_1999/head_config.json
148
+ 06/01/2024 23:36:01 - INFO - adapters.heads.model_mixin - Adding head 'cola' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}.
149
+ 06/01/2024 23:36:01 - INFO - adapters.loading - Loading module weights from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_1999/pytorch_model_head.bin
150
+ 06/01/2024 23:36:01 - INFO - __main__ - Adapter Name = cola
151
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.weight
152
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.bias
153
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_up.weight
154
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_up.bias
155
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_down.0.weight
156
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_down.0.bias
157
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_up.weight
158
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_up.bias
159
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.weight
160
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.bias
161
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_up.weight
162
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_up.bias
163
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_down.0.weight
164
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_down.0.bias
165
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_up.weight
166
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_up.bias
167
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.weight
168
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.bias
169
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_up.weight
170
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_up.bias
171
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_down.0.weight
172
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_down.0.bias
173
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_up.weight
174
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_up.bias
175
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.weight
176
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.bias
177
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_up.weight
178
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_up.bias
179
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_down.0.weight
180
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_down.0.bias
181
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_up.weight
182
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_up.bias
183
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.weight
184
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.bias
185
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_up.weight
186
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_up.bias
187
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_down.0.weight
188
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_down.0.bias
189
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_up.weight
190
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_up.bias
191
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.weight
192
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.bias
193
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_up.weight
194
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_up.bias
195
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_down.0.weight
196
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_down.0.bias
197
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_up.weight
198
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_up.bias
199
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.weight
200
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.bias
201
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_up.weight
202
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_up.bias
203
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_down.0.weight
204
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_down.0.bias
205
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_up.weight
206
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_up.bias
207
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.weight
208
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.bias
209
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_up.weight
210
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_up.bias
211
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_down.0.weight
212
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_down.0.bias
213
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_up.weight
214
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_up.bias
215
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.weight
216
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.bias
217
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_up.weight
218
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_up.bias
219
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_down.0.weight
220
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_down.0.bias
221
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_up.weight
222
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_up.bias
223
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.weight
224
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.bias
225
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_up.weight
226
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_up.bias
227
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_down.0.weight
228
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_down.0.bias
229
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_up.weight
230
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_up.bias
231
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.weight
232
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.bias
233
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_up.weight
234
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_up.bias
235
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_down.0.weight
236
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_down.0.bias
237
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_up.weight
238
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_up.bias
239
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.weight
240
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.bias
241
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_up.weight
242
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_up.bias
243
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_down.0.weight
244
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_down.0.bias
245
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_up.weight
246
+ 06/01/2024 23:36:01 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_up.bias
247
+ 06/01/2024 23:36:01 - INFO - __main__ - heads.cola.1.weight
248
+ 06/01/2024 23:36:01 - INFO - __main__ - heads.cola.1.bias
249
+ 06/01/2024 23:36:01 - INFO - __main__ - heads.cola.4.weight
250
+ 06/01/2024 23:36:01 - INFO - __main__ - heads.cola.4.bias
251
+ 06/01/2024 23:36:02 - INFO - __main__ - Sample 3412 of the training set: {'input_ids': [101, 1045, 12781, 1996, 7427, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1], 'labels': 1}.
252
+ 06/01/2024 23:36:02 - INFO - __main__ - Sample 6002 of the training set: {'input_ids': [101, 1045, 2442, 2064, 4521, 22088, 2015, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
253
+ 06/01/2024 23:36:02 - INFO - __main__ - Sample 83 of the training set: {'input_ids': [101, 1996, 7764, 22257, 2993, 2000, 1996, 2598, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
254
+ 06/01/2024 23:40:56 - INFO - __main__ - f_mu shape : torch.Size([1043, 2])
255
+ 06/01/2024 23:40:56 - INFO - __main__ - f_var shape : torch.Size([1043, 2, 2])
256
+ 06/01/2024 23:40:56 - INFO - __main__ - tensor([[-2.1297, 2.2213],
257
+ [-1.8189, 1.7128],
258
+ [-1.5682, 1.5394],
259
+ ...,
260
+ [-2.5910, 2.4892],
261
+ [-0.3724, 0.3888],
262
+ [-0.4022, 0.3224]], device='cuda:0')
263
+ 06/01/2024 23:40:56 - INFO - __main__ - tensor([[[4.5618, 2.1410],
264
+ [2.1410, 4.4561]],
265
+
266
+ [[3.3290, 2.0678],
267
+ [2.0678, 3.3150]],
268
+
269
+ [[3.4761, 1.5668],
270
+ [1.5668, 3.3424]],
271
+
272
+ ...,
273
+
274
+ [[4.5626, 3.5440],
275
+ [3.5440, 4.6095]],
276
+
277
+ [[3.2903, 0.2714],
278
+ [0.2714, 3.0593]],
279
+
280
+ [[3.2947, 0.1413],
281
+ [0.1413, 2.9931]]], device='cuda:0')
282
+ 06/01/2024 23:40:56 - INFO - __main__ - ***** Completed training *****
283
+ 06/01/2024 23:40:58 - INFO - __main__ - Number of labels detected = 2
284
+ 06/01/2024 23:40:59 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 30522, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}.
285
+ 06/01/2024 23:40:59 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_3999/adapter_config.json
286
+ 06/01/2024 23:40:59 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'cola'.
287
+ 06/01/2024 23:40:59 - INFO - adapters.loading - Loading module weights from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_3999/pytorch_adapter.bin
288
+ 06/01/2024 23:40:59 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_3999/head_config.json
289
+ 06/01/2024 23:40:59 - INFO - adapters.heads.model_mixin - Adding head 'cola' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}.
290
+ 06/01/2024 23:40:59 - INFO - adapters.loading - Loading module weights from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_3999/pytorch_model_head.bin
291
+ 06/01/2024 23:40:59 - INFO - __main__ - Adapter Name = cola
292
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.weight
293
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.bias
294
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_up.weight
295
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_up.bias
296
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_down.0.weight
297
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_down.0.bias
298
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_up.weight
299
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_up.bias
300
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.weight
301
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.bias
302
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_up.weight
303
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_up.bias
304
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_down.0.weight
305
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_down.0.bias
306
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_up.weight
307
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_up.bias
308
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.weight
309
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.bias
310
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_up.weight
311
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_up.bias
312
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_down.0.weight
313
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_down.0.bias
314
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_up.weight
315
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_up.bias
316
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.weight
317
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.bias
318
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_up.weight
319
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_up.bias
320
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_down.0.weight
321
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_down.0.bias
322
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_up.weight
323
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_up.bias
324
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.weight
325
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.bias
326
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_up.weight
327
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_up.bias
328
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_down.0.weight
329
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_down.0.bias
330
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_up.weight
331
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_up.bias
332
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.weight
333
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.bias
334
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_up.weight
335
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_up.bias
336
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_down.0.weight
337
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_down.0.bias
338
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_up.weight
339
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_up.bias
340
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.weight
341
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.bias
342
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_up.weight
343
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_up.bias
344
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_down.0.weight
345
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_down.0.bias
346
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_up.weight
347
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_up.bias
348
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.weight
349
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.bias
350
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_up.weight
351
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_up.bias
352
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_down.0.weight
353
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_down.0.bias
354
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_up.weight
355
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_up.bias
356
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.weight
357
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.bias
358
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_up.weight
359
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_up.bias
360
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_down.0.weight
361
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_down.0.bias
362
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_up.weight
363
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_up.bias
364
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.weight
365
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.bias
366
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_up.weight
367
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_up.bias
368
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_down.0.weight
369
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_down.0.bias
370
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_up.weight
371
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_up.bias
372
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.weight
373
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.bias
374
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_up.weight
375
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_up.bias
376
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_down.0.weight
377
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_down.0.bias
378
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_up.weight
379
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_up.bias
380
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.weight
381
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.bias
382
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_up.weight
383
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_up.bias
384
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_down.0.weight
385
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_down.0.bias
386
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_up.weight
387
+ 06/01/2024 23:40:59 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_up.bias
388
+ 06/01/2024 23:40:59 - INFO - __main__ - heads.cola.1.weight
389
+ 06/01/2024 23:40:59 - INFO - __main__ - heads.cola.1.bias
390
+ 06/01/2024 23:40:59 - INFO - __main__ - heads.cola.4.weight
391
+ 06/01/2024 23:40:59 - INFO - __main__ - heads.cola.4.bias
392
+ 06/01/2024 23:41:00 - INFO - __main__ - Sample 3412 of the training set: {'input_ids': [101, 1045, 12781, 1996, 7427, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1], 'labels': 1}.
393
+ 06/01/2024 23:41:00 - INFO - __main__ - Sample 6002 of the training set: {'input_ids': [101, 1045, 2442, 2064, 4521, 22088, 2015, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
394
+ 06/01/2024 23:41:00 - INFO - __main__ - Sample 83 of the training set: {'input_ids': [101, 1996, 7764, 22257, 2993, 2000, 1996, 2598, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
395
+ 06/01/2024 23:46:03 - INFO - __main__ - f_mu shape : torch.Size([1043, 2])
396
+ 06/01/2024 23:46:03 - INFO - __main__ - f_var shape : torch.Size([1043, 2, 2])
397
+ 06/01/2024 23:46:03 - INFO - __main__ - tensor([[-2.6237, 2.6967],
398
+ [-2.0123, 1.8970],
399
+ [-1.4105, 1.4059],
400
+ ...,
401
+ [-3.3720, 3.2471],
402
+ [-0.4277, 0.4479],
403
+ [-0.6115, 0.5214]], device='cuda:0')
404
+ 06/01/2024 23:46:03 - INFO - __main__ - tensor([[[ 5.0017, 1.7107],
405
+ [ 1.7107, 4.7907]],
406
+
407
+ [[ 3.6648, 1.0633],
408
+ [ 1.0633, 3.5681]],
409
+
410
+ [[ 3.6117, 0.3526],
411
+ [ 0.3526, 3.3652]],
412
+
413
+ ...,
414
+
415
+ [[ 4.8242, 3.7225],
416
+ [ 3.7225, 4.8405]],
417
+
418
+ [[ 4.8466, -1.7092],
419
+ [-1.7092, 4.2847]],
420
+
421
+ [[ 4.6424, -1.6387],
422
+ [-1.6387, 4.0598]]], device='cuda:0')
423
+ 06/01/2024 23:46:03 - INFO - __main__ - ***** Completed training *****
424
+ 06/01/2024 23:46:06 - INFO - __main__ - Number of labels detected = 2
425
+ 06/01/2024 23:46:06 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 30522, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}.
426
+ 06/01/2024 23:46:07 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_5999/adapter_config.json
427
+ 06/01/2024 23:46:07 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'cola'.
428
+ 06/01/2024 23:46:07 - INFO - adapters.loading - Loading module weights from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_5999/pytorch_adapter.bin
429
+ 06/01/2024 23:46:07 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_5999/head_config.json
430
+ 06/01/2024 23:46:07 - INFO - adapters.heads.model_mixin - Adding head 'cola' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}.
431
+ 06/01/2024 23:46:07 - INFO - adapters.loading - Loading module weights from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_5999/pytorch_model_head.bin
432
+ 06/01/2024 23:46:07 - INFO - __main__ - Adapter Name = cola
433
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.weight
434
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.bias
435
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_up.weight
436
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_up.bias
437
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_down.0.weight
438
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_down.0.bias
439
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_up.weight
440
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_up.bias
441
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.weight
442
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.bias
443
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_up.weight
444
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_up.bias
445
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_down.0.weight
446
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_down.0.bias
447
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_up.weight
448
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_up.bias
449
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.weight
450
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.bias
451
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_up.weight
452
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_up.bias
453
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_down.0.weight
454
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_down.0.bias
455
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_up.weight
456
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_up.bias
457
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.weight
458
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.bias
459
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_up.weight
460
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_up.bias
461
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_down.0.weight
462
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_down.0.bias
463
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_up.weight
464
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_up.bias
465
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.weight
466
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.bias
467
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_up.weight
468
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_up.bias
469
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_down.0.weight
470
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_down.0.bias
471
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_up.weight
472
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_up.bias
473
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.weight
474
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.bias
475
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_up.weight
476
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_up.bias
477
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_down.0.weight
478
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_down.0.bias
479
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_up.weight
480
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_up.bias
481
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.weight
482
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.bias
483
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_up.weight
484
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_up.bias
485
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_down.0.weight
486
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_down.0.bias
487
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_up.weight
488
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_up.bias
489
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.weight
490
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.bias
491
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_up.weight
492
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_up.bias
493
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_down.0.weight
494
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_down.0.bias
495
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_up.weight
496
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_up.bias
497
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.weight
498
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.bias
499
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_up.weight
500
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_up.bias
501
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_down.0.weight
502
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_down.0.bias
503
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_up.weight
504
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_up.bias
505
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.weight
506
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.bias
507
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_up.weight
508
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_up.bias
509
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_down.0.weight
510
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_down.0.bias
511
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_up.weight
512
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_up.bias
513
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.weight
514
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.bias
515
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_up.weight
516
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_up.bias
517
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_down.0.weight
518
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_down.0.bias
519
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_up.weight
520
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_up.bias
521
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.weight
522
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.bias
523
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_up.weight
524
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_up.bias
525
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_down.0.weight
526
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_down.0.bias
527
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_up.weight
528
+ 06/01/2024 23:46:07 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_up.bias
529
+ 06/01/2024 23:46:07 - INFO - __main__ - heads.cola.1.weight
530
+ 06/01/2024 23:46:07 - INFO - __main__ - heads.cola.1.bias
531
+ 06/01/2024 23:46:07 - INFO - __main__ - heads.cola.4.weight
532
+ 06/01/2024 23:46:07 - INFO - __main__ - heads.cola.4.bias
533
+ 06/01/2024 23:46:08 - INFO - __main__ - Sample 3412 of the training set: {'input_ids': [101, 1045, 12781, 1996, 7427, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1], 'labels': 1}.
534
+ 06/01/2024 23:46:08 - INFO - __main__ - Sample 6002 of the training set: {'input_ids': [101, 1045, 2442, 2064, 4521, 22088, 2015, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
535
+ 06/01/2024 23:46:08 - INFO - __main__ - Sample 83 of the training set: {'input_ids': [101, 1996, 7764, 22257, 2993, 2000, 1996, 2598, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
536
+ 06/01/2024 23:51:11 - INFO - __main__ - f_mu shape : torch.Size([1043, 2])
537
+ 06/01/2024 23:51:11 - INFO - __main__ - f_var shape : torch.Size([1043, 2, 2])
538
+ 06/01/2024 23:51:11 - INFO - __main__ - tensor([[-2.9662, 2.9773],
539
+ [-2.7168, 2.5565],
540
+ [-1.7817, 1.7464],
541
+ ...,
542
+ [-3.3487, 3.2080],
543
+ [-1.8054, 1.7300],
544
+ [-1.0835, 0.9648]], device='cuda:0')
545
+ 06/01/2024 23:51:11 - INFO - __main__ - tensor([[[ 4.7834, 2.8649],
546
+ [ 2.8649, 4.6725]],
547
+
548
+ [[ 3.9393, 2.2446],
549
+ [ 2.2446, 3.9447]],
550
+
551
+ [[ 3.9373, 0.4859],
552
+ [ 0.4859, 3.6285]],
553
+
554
+ ...,
555
+
556
+ [[ 5.0232, 3.7086],
557
+ [ 3.7086, 5.0201]],
558
+
559
+ [[ 8.5577, -3.8952],
560
+ [-3.8952, 7.9227]],
561
+
562
+ [[ 5.8607, -2.4746],
563
+ [-2.4746, 5.3347]]], device='cuda:0')
564
+ 06/01/2024 23:51:11 - INFO - __main__ - ***** Completed training *****
565
+ 06/01/2024 23:51:14 - INFO - __main__ - Number of labels detected = 2
566
+ 06/01/2024 23:51:15 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 30522, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}.
567
+ 06/01/2024 23:51:15 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_7999/adapter_config.json
568
+ 06/01/2024 23:51:15 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'cola'.
569
+ 06/01/2024 23:51:15 - INFO - adapters.loading - Loading module weights from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_7999/pytorch_adapter.bin
570
+ 06/01/2024 23:51:15 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_7999/head_config.json
571
+ 06/01/2024 23:51:15 - INFO - adapters.heads.model_mixin - Adding head 'cola' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}.
572
+ 06/01/2024 23:51:15 - INFO - adapters.loading - Loading module weights from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_7999/pytorch_model_head.bin
573
+ 06/01/2024 23:51:15 - INFO - __main__ - Adapter Name = cola
574
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.weight
575
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.bias
576
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_up.weight
577
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_up.bias
578
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_down.0.weight
579
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_down.0.bias
580
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_up.weight
581
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_up.bias
582
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.weight
583
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.bias
584
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_up.weight
585
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_up.bias
586
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_down.0.weight
587
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_down.0.bias
588
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_up.weight
589
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_up.bias
590
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.weight
591
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.bias
592
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_up.weight
593
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_up.bias
594
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_down.0.weight
595
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_down.0.bias
596
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_up.weight
597
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_up.bias
598
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.weight
599
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.bias
600
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_up.weight
601
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_up.bias
602
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_down.0.weight
603
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_down.0.bias
604
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_up.weight
605
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_up.bias
606
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.weight
607
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.bias
608
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_up.weight
609
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_up.bias
610
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_down.0.weight
611
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_down.0.bias
612
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_up.weight
613
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_up.bias
614
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.weight
615
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.bias
616
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_up.weight
617
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_up.bias
618
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_down.0.weight
619
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_down.0.bias
620
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_up.weight
621
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_up.bias
622
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.weight
623
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.bias
624
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_up.weight
625
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_up.bias
626
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_down.0.weight
627
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_down.0.bias
628
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_up.weight
629
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_up.bias
630
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.weight
631
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.bias
632
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_up.weight
633
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_up.bias
634
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_down.0.weight
635
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_down.0.bias
636
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_up.weight
637
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_up.bias
638
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.weight
639
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.bias
640
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_up.weight
641
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_up.bias
642
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_down.0.weight
643
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_down.0.bias
644
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_up.weight
645
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_up.bias
646
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.weight
647
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.bias
648
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_up.weight
649
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_up.bias
650
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_down.0.weight
651
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_down.0.bias
652
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_up.weight
653
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_up.bias
654
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.weight
655
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.bias
656
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_up.weight
657
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_up.bias
658
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_down.0.weight
659
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_down.0.bias
660
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_up.weight
661
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_up.bias
662
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.weight
663
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.bias
664
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_up.weight
665
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_up.bias
666
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_down.0.weight
667
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_down.0.bias
668
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_up.weight
669
+ 06/01/2024 23:51:15 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_up.bias
670
+ 06/01/2024 23:51:15 - INFO - __main__ - heads.cola.1.weight
671
+ 06/01/2024 23:51:15 - INFO - __main__ - heads.cola.1.bias
672
+ 06/01/2024 23:51:15 - INFO - __main__ - heads.cola.4.weight
673
+ 06/01/2024 23:51:15 - INFO - __main__ - heads.cola.4.bias
674
+ 06/01/2024 23:51:16 - INFO - __main__ - Sample 3412 of the training set: {'input_ids': [101, 1045, 12781, 1996, 7427, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1], 'labels': 1}.
675
+ 06/01/2024 23:51:16 - INFO - __main__ - Sample 6002 of the training set: {'input_ids': [101, 1045, 2442, 2064, 4521, 22088, 2015, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
676
+ 06/01/2024 23:51:16 - INFO - __main__ - Sample 83 of the training set: {'input_ids': [101, 1996, 7764, 22257, 2993, 2000, 1996, 2598, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
677
+ 06/01/2024 23:56:23 - INFO - __main__ - f_mu shape : torch.Size([1043, 2])
678
+ 06/01/2024 23:56:23 - INFO - __main__ - f_var shape : torch.Size([1043, 2, 2])
679
+ 06/01/2024 23:56:23 - INFO - __main__ - tensor([[-3.3699, 3.3674],
680
+ [-3.1780, 3.0025],
681
+ [-2.2889, 2.2263],
682
+ ...,
683
+ [-4.2645, 4.0783],
684
+ [-2.1805, 2.0611],
685
+ [-1.5461, 1.3975]], device='cuda:0')
686
+ 06/01/2024 23:56:23 - INFO - __main__ - tensor([[[ 5.1090, 2.1602],
687
+ [ 2.1602, 4.9487]],
688
+
689
+ [[ 4.3270, 1.8304],
690
+ [ 1.8304, 4.3619]],
691
+
692
+ [[ 5.2377, -0.5261],
693
+ [ -0.5261, 4.6952]],
694
+
695
+ ...,
696
+
697
+ [[ 5.2573, 4.1918],
698
+ [ 4.1918, 5.2629]],
699
+
700
+ [[ 17.0288, -12.2766],
701
+ [-12.2766, 16.0319]],
702
+
703
+ [[ 14.0171, -10.2656],
704
+ [-10.2656, 12.9278]]], device='cuda:0')
705
+ 06/01/2024 23:56:23 - INFO - __main__ - ***** Completed training *****
706
+ 06/01/2024 23:56:26 - INFO - __main__ - Number of labels detected = 2
707
+ 06/01/2024 23:56:27 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 30522, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}.
708
+ 06/01/2024 23:56:27 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_9999/adapter_config.json
709
+ 06/01/2024 23:56:27 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'cola'.
710
+ 06/01/2024 23:56:27 - INFO - adapters.loading - Loading module weights from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_9999/pytorch_adapter.bin
711
+ 06/01/2024 23:56:27 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_9999/head_config.json
712
+ 06/01/2024 23:56:27 - INFO - adapters.heads.model_mixin - Adding head 'cola' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}.
713
+ 06/01/2024 23:56:27 - INFO - adapters.loading - Loading module weights from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_9999/pytorch_model_head.bin
714
+ 06/01/2024 23:56:27 - INFO - __main__ - Adapter Name = cola
715
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.weight
716
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.bias
717
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_up.weight
718
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_up.bias
719
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_down.0.weight
720
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_down.0.bias
721
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_up.weight
722
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_up.bias
723
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.weight
724
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.bias
725
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_up.weight
726
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_up.bias
727
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_down.0.weight
728
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_down.0.bias
729
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_up.weight
730
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_up.bias
731
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.weight
732
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.bias
733
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_up.weight
734
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_up.bias
735
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_down.0.weight
736
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_down.0.bias
737
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_up.weight
738
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_up.bias
739
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.weight
740
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.bias
741
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_up.weight
742
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_up.bias
743
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_down.0.weight
744
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_down.0.bias
745
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_up.weight
746
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_up.bias
747
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.weight
748
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.bias
749
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_up.weight
750
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_up.bias
751
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_down.0.weight
752
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_down.0.bias
753
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_up.weight
754
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_up.bias
755
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.weight
756
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.bias
757
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_up.weight
758
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_up.bias
759
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_down.0.weight
760
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_down.0.bias
761
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_up.weight
762
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_up.bias
763
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.weight
764
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.bias
765
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_up.weight
766
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_up.bias
767
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_down.0.weight
768
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_down.0.bias
769
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_up.weight
770
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_up.bias
771
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.weight
772
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.bias
773
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_up.weight
774
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_up.bias
775
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_down.0.weight
776
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_down.0.bias
777
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_up.weight
778
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_up.bias
779
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.weight
780
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.bias
781
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_up.weight
782
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_up.bias
783
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_down.0.weight
784
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_down.0.bias
785
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_up.weight
786
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_up.bias
787
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.weight
788
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.bias
789
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_up.weight
790
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_up.bias
791
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_down.0.weight
792
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_down.0.bias
793
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_up.weight
794
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_up.bias
795
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.weight
796
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.bias
797
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_up.weight
798
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_up.bias
799
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_down.0.weight
800
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_down.0.bias
801
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_up.weight
802
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_up.bias
803
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.weight
804
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.bias
805
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_up.weight
806
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_up.bias
807
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_down.0.weight
808
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_down.0.bias
809
+ 06/01/2024 23:56:27 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_up.weight
810
+ 06/01/2024 23:56:28 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_up.bias
811
+ 06/01/2024 23:56:28 - INFO - __main__ - heads.cola.1.weight
812
+ 06/01/2024 23:56:28 - INFO - __main__ - heads.cola.1.bias
813
+ 06/01/2024 23:56:28 - INFO - __main__ - heads.cola.4.weight
814
+ 06/01/2024 23:56:28 - INFO - __main__ - heads.cola.4.bias
815
+ 06/01/2024 23:56:28 - INFO - __main__ - Sample 3412 of the training set: {'input_ids': [101, 1045, 12781, 1996, 7427, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1], 'labels': 1}.
816
+ 06/01/2024 23:56:28 - INFO - __main__ - Sample 6002 of the training set: {'input_ids': [101, 1045, 2442, 2064, 4521, 22088, 2015, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
817
+ 06/01/2024 23:56:28 - INFO - __main__ - Sample 83 of the training set: {'input_ids': [101, 1996, 7764, 22257, 2993, 2000, 1996, 2598, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
818
+ 06/02/2024 00:01:37 - INFO - __main__ - f_mu shape : torch.Size([1043, 2])
819
+ 06/02/2024 00:01:37 - INFO - __main__ - f_var shape : torch.Size([1043, 2, 2])
820
+ 06/02/2024 00:01:37 - INFO - __main__ - tensor([[-3.2560, 3.2816],
821
+ [-3.1414, 2.9828],
822
+ [-2.0300, 2.0107],
823
+ ...,
824
+ [-4.2195, 4.0517],
825
+ [-2.9542, 2.8213],
826
+ [-1.6776, 1.5290]], device='cuda:0')
827
+ 06/02/2024 00:01:37 - INFO - __main__ - tensor([[[ 4.9482, 1.9264],
828
+ [ 1.9264, 4.7548]],
829
+
830
+ [[ 4.2033, 1.7314],
831
+ [ 1.7314, 4.2234]],
832
+
833
+ [[ 5.0032, -0.7315],
834
+ [ -0.7315, 4.4748]],
835
+
836
+ ...,
837
+
838
+ [[ 5.1036, 3.9530],
839
+ [ 3.9530, 5.1016]],
840
+
841
+ [[ 11.9591, -6.0827],
842
+ [ -6.0827, 11.4617]],
843
+
844
+ [[ 14.0977, -10.2441],
845
+ [-10.2441, 12.9932]]], device='cuda:0')
846
+ 06/02/2024 00:01:37 - INFO - __main__ - ***** Completed training *****
outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_0/all_results_la_kron_all_homo_mc_corr_1000.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"eval_matthews_correlation": -0.02929206145132745}
outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_0/eval_res_la_kron_all_homo_mc_corr_1000.json ADDED
The diff for this file is too large to render. See raw diff
 
outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_0/gpu_stats_la.json ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "memory_allocated": 969647616,
3
+ "max_memory_allocated": 3324437504,
4
+ "memory_reserved": 8908701696,
5
+ "max_memory_reserved": 8908701696,
6
+ "memory_stats": {
7
+ "active.all.allocated": 2905439,
8
+ "active.all.current": 1234,
9
+ "active.all.freed": 2904205,
10
+ "active.all.peak": 1487,
11
+ "active.large_pool.allocated": 360903,
12
+ "active.large_pool.current": 284,
13
+ "active.large_pool.freed": 360619,
14
+ "active.large_pool.peak": 384,
15
+ "active.small_pool.allocated": 2544536,
16
+ "active.small_pool.current": 950,
17
+ "active.small_pool.freed": 2543586,
18
+ "active.small_pool.peak": 1200,
19
+ "active_bytes.all.allocated": 2036389039616,
20
+ "active_bytes.all.current": 969647616,
21
+ "active_bytes.all.freed": 2035419392000,
22
+ "active_bytes.all.peak": 3324437504,
23
+ "active_bytes.large_pool.allocated": 1464350304256,
24
+ "active_bytes.large_pool.current": 952457216,
25
+ "active_bytes.large_pool.freed": 1463397847040,
26
+ "active_bytes.large_pool.peak": 3240482816,
27
+ "active_bytes.small_pool.allocated": 572038735360,
28
+ "active_bytes.small_pool.current": 17190400,
29
+ "active_bytes.small_pool.freed": 572021544960,
30
+ "active_bytes.small_pool.peak": 114983424,
31
+ "allocated_bytes.all.allocated": 2036389039616,
32
+ "allocated_bytes.all.current": 969647616,
33
+ "allocated_bytes.all.freed": 2035419392000,
34
+ "allocated_bytes.all.peak": 3324437504,
35
+ "allocated_bytes.large_pool.allocated": 1464350304256,
36
+ "allocated_bytes.large_pool.current": 952457216,
37
+ "allocated_bytes.large_pool.freed": 1463397847040,
38
+ "allocated_bytes.large_pool.peak": 3240482816,
39
+ "allocated_bytes.small_pool.allocated": 572038735360,
40
+ "allocated_bytes.small_pool.current": 17190400,
41
+ "allocated_bytes.small_pool.freed": 572021544960,
42
+ "allocated_bytes.small_pool.peak": 114983424,
43
+ "allocation.all.allocated": 2905439,
44
+ "allocation.all.current": 1234,
45
+ "allocation.all.freed": 2904205,
46
+ "allocation.all.peak": 1487,
47
+ "allocation.large_pool.allocated": 360903,
48
+ "allocation.large_pool.current": 284,
49
+ "allocation.large_pool.freed": 360619,
50
+ "allocation.large_pool.peak": 384,
51
+ "allocation.small_pool.allocated": 2544536,
52
+ "allocation.small_pool.current": 950,
53
+ "allocation.small_pool.freed": 2543586,
54
+ "allocation.small_pool.peak": 1200,
55
+ "inactive_split.all.allocated": 1420248,
56
+ "inactive_split.all.current": 170,
57
+ "inactive_split.all.freed": 1420078,
58
+ "inactive_split.all.peak": 220,
59
+ "inactive_split.large_pool.allocated": 166957,
60
+ "inactive_split.large_pool.current": 43,
61
+ "inactive_split.large_pool.freed": 166914,
62
+ "inactive_split.large_pool.peak": 96,
63
+ "inactive_split.small_pool.allocated": 1253291,
64
+ "inactive_split.small_pool.current": 127,
65
+ "inactive_split.small_pool.freed": 1253164,
66
+ "inactive_split.small_pool.peak": 144,
67
+ "inactive_split_bytes.all.allocated": 2042948382208,
68
+ "inactive_split_bytes.all.current": 148134400,
69
+ "inactive_split_bytes.all.freed": 2042800247808,
70
+ "inactive_split_bytes.all.peak": 896064512,
71
+ "inactive_split_bytes.large_pool.allocated": 1450380810240,
72
+ "inactive_split_bytes.large_pool.current": 106604544,
73
+ "inactive_split_bytes.large_pool.freed": 1450274205696,
74
+ "inactive_split_bytes.large_pool.peak": 859522048,
75
+ "inactive_split_bytes.small_pool.allocated": 592567571968,
76
+ "inactive_split_bytes.small_pool.current": 41529856,
77
+ "inactive_split_bytes.small_pool.freed": 592526042112,
78
+ "inactive_split_bytes.small_pool.peak": 77619712,
79
+ "max_split_size": -1,
80
+ "num_alloc_retries": 0,
81
+ "num_device_alloc": 389,
82
+ "num_device_free": 47,
83
+ "num_ooms": 0,
84
+ "num_sync_all_streams": 1,
85
+ "oversize_allocations.allocated": 0,
86
+ "oversize_allocations.current": 0,
87
+ "oversize_allocations.freed": 0,
88
+ "oversize_allocations.peak": 0,
89
+ "oversize_segments.allocated": 0,
90
+ "oversize_segments.current": 0,
91
+ "oversize_segments.freed": 0,
92
+ "oversize_segments.peak": 0,
93
+ "requested_bytes.all.allocated": 1994872523059,
94
+ "requested_bytes.all.current": 967046712,
95
+ "requested_bytes.all.freed": 1993905476347,
96
+ "requested_bytes.all.peak": 3263085376,
97
+ "requested_bytes.large_pool.allocated": 1423070574288,
98
+ "requested_bytes.large_pool.current": 950013448,
99
+ "requested_bytes.large_pool.freed": 1422120560840,
100
+ "requested_bytes.large_pool.peak": 3179241096,
101
+ "requested_bytes.small_pool.allocated": 571801948771,
102
+ "requested_bytes.small_pool.current": 17033264,
103
+ "requested_bytes.small_pool.freed": 571784915507,
104
+ "requested_bytes.small_pool.peak": 114852048,
105
+ "reserved_bytes.all.allocated": 9214885888,
106
+ "reserved_bytes.all.current": 8908701696,
107
+ "reserved_bytes.all.freed": 306184192,
108
+ "reserved_bytes.all.peak": 8908701696,
109
+ "reserved_bytes.large_pool.allocated": 9003073536,
110
+ "reserved_bytes.large_pool.current": 8772386816,
111
+ "reserved_bytes.large_pool.freed": 230686720,
112
+ "reserved_bytes.large_pool.peak": 8772386816,
113
+ "reserved_bytes.small_pool.allocated": 211812352,
114
+ "reserved_bytes.small_pool.current": 136314880,
115
+ "reserved_bytes.small_pool.freed": 75497472,
116
+ "reserved_bytes.small_pool.peak": 136314880,
117
+ "segment.all.allocated": 389,
118
+ "segment.all.current": 342,
119
+ "segment.all.freed": 47,
120
+ "segment.all.peak": 342,
121
+ "segment.large_pool.allocated": 288,
122
+ "segment.large_pool.current": 277,
123
+ "segment.large_pool.freed": 11,
124
+ "segment.large_pool.peak": 277,
125
+ "segment.small_pool.allocated": 101,
126
+ "segment.small_pool.current": 65,
127
+ "segment.small_pool.freed": 36,
128
+ "segment.small_pool.peak": 65
129
+ }
130
+ }
outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_1999/all_results_la_kron_all_homo_mc_corr_1000.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"eval_matthews_correlation": 0.47194522204020767}
outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_1999/eval_res_la_kron_all_homo_mc_corr_1000.json ADDED
The diff for this file is too large to render. See raw diff
 
outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_1999/gpu_stats_la.json ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "memory_allocated": 968467968,
3
+ "max_memory_allocated": 3324437504,
4
+ "memory_reserved": 8910798848,
5
+ "max_memory_reserved": 8910798848,
6
+ "memory_stats": {
7
+ "active.all.allocated": 5810926,
8
+ "active.all.current": 1234,
9
+ "active.all.freed": 5809692,
10
+ "active.all.peak": 1487,
11
+ "active.large_pool.allocated": 721804,
12
+ "active.large_pool.current": 284,
13
+ "active.large_pool.freed": 721520,
14
+ "active.large_pool.peak": 384,
15
+ "active.small_pool.allocated": 5089122,
16
+ "active.small_pool.current": 950,
17
+ "active.small_pool.freed": 5088172,
18
+ "active.small_pool.peak": 1200,
19
+ "active_bytes.all.allocated": 4071282991616,
20
+ "active_bytes.all.current": 968467968,
21
+ "active_bytes.all.freed": 4070314523648,
22
+ "active_bytes.all.peak": 3324437504,
23
+ "active_bytes.large_pool.allocated": 2927205494272,
24
+ "active_bytes.large_pool.current": 951277568,
25
+ "active_bytes.large_pool.freed": 2926254216704,
26
+ "active_bytes.large_pool.peak": 3240482816,
27
+ "active_bytes.small_pool.allocated": 1144077497344,
28
+ "active_bytes.small_pool.current": 17190400,
29
+ "active_bytes.small_pool.freed": 1144060306944,
30
+ "active_bytes.small_pool.peak": 114983424,
31
+ "allocated_bytes.all.allocated": 4071282991616,
32
+ "allocated_bytes.all.current": 968467968,
33
+ "allocated_bytes.all.freed": 4070314523648,
34
+ "allocated_bytes.all.peak": 3324437504,
35
+ "allocated_bytes.large_pool.allocated": 2927205494272,
36
+ "allocated_bytes.large_pool.current": 951277568,
37
+ "allocated_bytes.large_pool.freed": 2926254216704,
38
+ "allocated_bytes.large_pool.peak": 3240482816,
39
+ "allocated_bytes.small_pool.allocated": 1144077497344,
40
+ "allocated_bytes.small_pool.current": 17190400,
41
+ "allocated_bytes.small_pool.freed": 1144060306944,
42
+ "allocated_bytes.small_pool.peak": 114983424,
43
+ "allocation.all.allocated": 5810926,
44
+ "allocation.all.current": 1234,
45
+ "allocation.all.freed": 5809692,
46
+ "allocation.all.peak": 1487,
47
+ "allocation.large_pool.allocated": 721804,
48
+ "allocation.large_pool.current": 284,
49
+ "allocation.large_pool.freed": 721520,
50
+ "allocation.large_pool.peak": 384,
51
+ "allocation.small_pool.allocated": 5089122,
52
+ "allocation.small_pool.current": 950,
53
+ "allocation.small_pool.freed": 5088172,
54
+ "allocation.small_pool.peak": 1200,
55
+ "inactive_split.all.allocated": 2786921,
56
+ "inactive_split.all.current": 161,
57
+ "inactive_split.all.freed": 2786760,
58
+ "inactive_split.all.peak": 228,
59
+ "inactive_split.large_pool.allocated": 334121,
60
+ "inactive_split.large_pool.current": 45,
61
+ "inactive_split.large_pool.freed": 334076,
62
+ "inactive_split.large_pool.peak": 98,
63
+ "inactive_split.small_pool.allocated": 2452800,
64
+ "inactive_split.small_pool.current": 116,
65
+ "inactive_split.small_pool.freed": 2452684,
66
+ "inactive_split.small_pool.peak": 193,
67
+ "inactive_split_bytes.all.allocated": 4080610900992,
68
+ "inactive_split_bytes.all.current": 147216896,
69
+ "inactive_split_bytes.all.freed": 4080463684096,
70
+ "inactive_split_bytes.all.peak": 897244160,
71
+ "inactive_split_bytes.large_pool.allocated": 2895166959616,
72
+ "inactive_split_bytes.large_pool.current": 107784192,
73
+ "inactive_split_bytes.large_pool.freed": 2895059175424,
74
+ "inactive_split_bytes.large_pool.peak": 860701696,
75
+ "inactive_split_bytes.small_pool.allocated": 1185443941376,
76
+ "inactive_split_bytes.small_pool.current": 39432704,
77
+ "inactive_split_bytes.small_pool.freed": 1185404508672,
78
+ "inactive_split_bytes.small_pool.peak": 77619712,
79
+ "max_split_size": -1,
80
+ "num_alloc_retries": 0,
81
+ "num_device_alloc": 725,
82
+ "num_device_free": 382,
83
+ "num_ooms": 0,
84
+ "num_sync_all_streams": 3,
85
+ "oversize_allocations.allocated": 0,
86
+ "oversize_allocations.current": 0,
87
+ "oversize_allocations.freed": 0,
88
+ "oversize_allocations.peak": 0,
89
+ "oversize_segments.allocated": 0,
90
+ "oversize_segments.current": 0,
91
+ "oversize_segments.freed": 0,
92
+ "oversize_segments.peak": 0,
93
+ "requested_bytes.all.allocated": 3989728009794,
94
+ "requested_bytes.all.current": 967046712,
95
+ "requested_bytes.all.freed": 3988760963082,
96
+ "requested_bytes.all.peak": 3263085376,
97
+ "requested_bytes.large_pool.allocated": 2846124109216,
98
+ "requested_bytes.large_pool.current": 950013448,
99
+ "requested_bytes.large_pool.freed": 2845174095768,
100
+ "requested_bytes.large_pool.peak": 3179241096,
101
+ "requested_bytes.small_pool.allocated": 1143603900578,
102
+ "requested_bytes.small_pool.current": 17033264,
103
+ "requested_bytes.small_pool.freed": 1143586867314,
104
+ "requested_bytes.small_pool.peak": 114852048,
105
+ "reserved_bytes.all.allocated": 17678991360,
106
+ "reserved_bytes.all.current": 8910798848,
107
+ "reserved_bytes.all.freed": 8768192512,
108
+ "reserved_bytes.all.peak": 8910798848,
109
+ "reserved_bytes.large_pool.allocated": 17303601152,
110
+ "reserved_bytes.large_pool.current": 8772386816,
111
+ "reserved_bytes.large_pool.freed": 8531214336,
112
+ "reserved_bytes.large_pool.peak": 8772386816,
113
+ "reserved_bytes.small_pool.allocated": 375390208,
114
+ "reserved_bytes.small_pool.current": 138412032,
115
+ "reserved_bytes.small_pool.freed": 236978176,
116
+ "reserved_bytes.small_pool.peak": 138412032,
117
+ "segment.all.allocated": 725,
118
+ "segment.all.current": 343,
119
+ "segment.all.freed": 382,
120
+ "segment.all.peak": 343,
121
+ "segment.large_pool.allocated": 546,
122
+ "segment.large_pool.current": 277,
123
+ "segment.large_pool.freed": 269,
124
+ "segment.large_pool.peak": 277,
125
+ "segment.small_pool.allocated": 179,
126
+ "segment.small_pool.current": 66,
127
+ "segment.small_pool.freed": 113,
128
+ "segment.small_pool.peak": 66
129
+ }
130
+ }
outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_3999/all_results_la_kron_all_homo_mc_corr_1000.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"eval_matthews_correlation": 0.5399503104637741}
outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_3999/eval_res_la_kron_all_homo_mc_corr_1000.json ADDED
The diff for this file is too large to render. See raw diff
 
outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_3999/gpu_stats_la.json ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "memory_allocated": 969647616,
3
+ "max_memory_allocated": 3324437504,
4
+ "memory_reserved": 8910798848,
5
+ "max_memory_reserved": 8910798848,
6
+ "memory_stats": {
7
+ "active.all.allocated": 8716463,
8
+ "active.all.current": 1234,
9
+ "active.all.freed": 8715229,
10
+ "active.all.peak": 1487,
11
+ "active.large_pool.allocated": 1082705,
12
+ "active.large_pool.current": 284,
13
+ "active.large_pool.freed": 1082421,
14
+ "active.large_pool.peak": 482,
15
+ "active.small_pool.allocated": 7633758,
16
+ "active.small_pool.current": 950,
17
+ "active.small_pool.freed": 7632808,
18
+ "active.small_pool.peak": 1200,
19
+ "active_bytes.all.allocated": 6107360731136,
20
+ "active_bytes.all.current": 969647616,
21
+ "active_bytes.all.freed": 6106391083520,
22
+ "active_bytes.all.peak": 3324437504,
23
+ "active_bytes.large_pool.allocated": 4391244445184,
24
+ "active_bytes.large_pool.current": 952457216,
25
+ "active_bytes.large_pool.freed": 4390291987968,
26
+ "active_bytes.large_pool.peak": 3240482816,
27
+ "active_bytes.small_pool.allocated": 1716116285952,
28
+ "active_bytes.small_pool.current": 17190400,
29
+ "active_bytes.small_pool.freed": 1716099095552,
30
+ "active_bytes.small_pool.peak": 114983424,
31
+ "allocated_bytes.all.allocated": 6107360731136,
32
+ "allocated_bytes.all.current": 969647616,
33
+ "allocated_bytes.all.freed": 6106391083520,
34
+ "allocated_bytes.all.peak": 3324437504,
35
+ "allocated_bytes.large_pool.allocated": 4391244445184,
36
+ "allocated_bytes.large_pool.current": 952457216,
37
+ "allocated_bytes.large_pool.freed": 4390291987968,
38
+ "allocated_bytes.large_pool.peak": 3240482816,
39
+ "allocated_bytes.small_pool.allocated": 1716116285952,
40
+ "allocated_bytes.small_pool.current": 17190400,
41
+ "allocated_bytes.small_pool.freed": 1716099095552,
42
+ "allocated_bytes.small_pool.peak": 114983424,
43
+ "allocation.all.allocated": 8716463,
44
+ "allocation.all.current": 1234,
45
+ "allocation.all.freed": 8715229,
46
+ "allocation.all.peak": 1487,
47
+ "allocation.large_pool.allocated": 1082705,
48
+ "allocation.large_pool.current": 284,
49
+ "allocation.large_pool.freed": 1082421,
50
+ "allocation.large_pool.peak": 482,
51
+ "allocation.small_pool.allocated": 7633758,
52
+ "allocation.small_pool.current": 950,
53
+ "allocation.small_pool.freed": 7632808,
54
+ "allocation.small_pool.peak": 1200,
55
+ "inactive_split.all.allocated": 4205221,
56
+ "inactive_split.all.current": 173,
57
+ "inactive_split.all.freed": 4205048,
58
+ "inactive_split.all.peak": 228,
59
+ "inactive_split.large_pool.allocated": 505158,
60
+ "inactive_split.large_pool.current": 43,
61
+ "inactive_split.large_pool.freed": 505115,
62
+ "inactive_split.large_pool.peak": 98,
63
+ "inactive_split.small_pool.allocated": 3700063,
64
+ "inactive_split.small_pool.current": 130,
65
+ "inactive_split.small_pool.freed": 3699933,
66
+ "inactive_split.small_pool.peak": 193,
67
+ "inactive_split_bytes.all.allocated": 6091702189568,
68
+ "inactive_split_bytes.all.current": 150231552,
69
+ "inactive_split_bytes.all.freed": 6091551958016,
70
+ "inactive_split_bytes.all.peak": 898161664,
71
+ "inactive_split_bytes.large_pool.allocated": 4323643853824,
72
+ "inactive_split_bytes.large_pool.current": 106604544,
73
+ "inactive_split_bytes.large_pool.freed": 4323537249280,
74
+ "inactive_split_bytes.large_pool.peak": 860701696,
75
+ "inactive_split_bytes.small_pool.allocated": 1768058335744,
76
+ "inactive_split_bytes.small_pool.current": 43627008,
77
+ "inactive_split_bytes.small_pool.freed": 1768014708736,
78
+ "inactive_split_bytes.small_pool.peak": 77619712,
79
+ "max_split_size": -1,
80
+ "num_alloc_retries": 0,
81
+ "num_device_alloc": 1087,
82
+ "num_device_free": 744,
83
+ "num_ooms": 0,
84
+ "num_sync_all_streams": 5,
85
+ "oversize_allocations.allocated": 0,
86
+ "oversize_allocations.current": 0,
87
+ "oversize_allocations.freed": 0,
88
+ "oversize_allocations.peak": 0,
89
+ "oversize_segments.allocated": 0,
90
+ "oversize_segments.current": 0,
91
+ "oversize_segments.freed": 0,
92
+ "oversize_segments.peak": 0,
93
+ "requested_bytes.all.allocated": 5984583499565,
94
+ "requested_bytes.all.current": 967046712,
95
+ "requested_bytes.all.freed": 5983616452853,
96
+ "requested_bytes.all.peak": 3263085376,
97
+ "requested_bytes.large_pool.allocated": 4269177644144,
98
+ "requested_bytes.large_pool.current": 950013448,
99
+ "requested_bytes.large_pool.freed": 4268227630696,
100
+ "requested_bytes.large_pool.peak": 3179241096,
101
+ "requested_bytes.small_pool.allocated": 1715405855421,
102
+ "requested_bytes.small_pool.current": 17033264,
103
+ "requested_bytes.small_pool.freed": 1715388822157,
104
+ "requested_bytes.small_pool.peak": 114852048,
105
+ "reserved_bytes.all.allocated": 26742882304,
106
+ "reserved_bytes.all.current": 8910798848,
107
+ "reserved_bytes.all.freed": 17832083456,
108
+ "reserved_bytes.all.peak": 8910798848,
109
+ "reserved_bytes.large_pool.allocated": 26201817088,
110
+ "reserved_bytes.large_pool.current": 8772386816,
111
+ "reserved_bytes.large_pool.freed": 17429430272,
112
+ "reserved_bytes.large_pool.peak": 8772386816,
113
+ "reserved_bytes.small_pool.allocated": 541065216,
114
+ "reserved_bytes.small_pool.current": 138412032,
115
+ "reserved_bytes.small_pool.freed": 402653184,
116
+ "reserved_bytes.small_pool.peak": 138412032,
117
+ "segment.all.allocated": 1087,
118
+ "segment.all.current": 343,
119
+ "segment.all.freed": 744,
120
+ "segment.all.peak": 343,
121
+ "segment.large_pool.allocated": 829,
122
+ "segment.large_pool.current": 277,
123
+ "segment.large_pool.freed": 552,
124
+ "segment.large_pool.peak": 277,
125
+ "segment.small_pool.allocated": 258,
126
+ "segment.small_pool.current": 66,
127
+ "segment.small_pool.freed": 192,
128
+ "segment.small_pool.peak": 66
129
+ }
130
+ }
outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_5999/all_results_la_kron_all_homo_mc_corr_1000.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"eval_matthews_correlation": 0.5127103010689016}
outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_5999/eval_res_la_kron_all_homo_mc_corr_1000.json ADDED
The diff for this file is too large to render. See raw diff
 
outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_5999/gpu_stats_la.json ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "memory_allocated": 969735680,
3
+ "max_memory_allocated": 3324437504,
4
+ "memory_reserved": 8914993152,
5
+ "max_memory_reserved": 8914993152,
6
+ "memory_stats": {
7
+ "active.all.allocated": 11622050,
8
+ "active.all.current": 1234,
9
+ "active.all.freed": 11620816,
10
+ "active.all.peak": 1487,
11
+ "active.large_pool.allocated": 1443606,
12
+ "active.large_pool.current": 284,
13
+ "active.large_pool.freed": 1443322,
14
+ "active.large_pool.peak": 482,
15
+ "active.small_pool.allocated": 10178444,
16
+ "active.small_pool.current": 950,
17
+ "active.small_pool.freed": 10177494,
18
+ "active.small_pool.peak": 1200,
19
+ "active_bytes.all.allocated": 8142390843904,
20
+ "active_bytes.all.current": 969735680,
21
+ "active_bytes.all.freed": 8141421108224,
22
+ "active_bytes.all.peak": 3324437504,
23
+ "active_bytes.large_pool.allocated": 5854235742720,
24
+ "active_bytes.large_pool.current": 952545280,
25
+ "active_bytes.large_pool.freed": 5853283197440,
26
+ "active_bytes.large_pool.peak": 3240482816,
27
+ "active_bytes.small_pool.allocated": 2288155101184,
28
+ "active_bytes.small_pool.current": 17190400,
29
+ "active_bytes.small_pool.freed": 2288137910784,
30
+ "active_bytes.small_pool.peak": 114983424,
31
+ "allocated_bytes.all.allocated": 8142390843904,
32
+ "allocated_bytes.all.current": 969735680,
33
+ "allocated_bytes.all.freed": 8141421108224,
34
+ "allocated_bytes.all.peak": 3324437504,
35
+ "allocated_bytes.large_pool.allocated": 5854235742720,
36
+ "allocated_bytes.large_pool.current": 952545280,
37
+ "allocated_bytes.large_pool.freed": 5853283197440,
38
+ "allocated_bytes.large_pool.peak": 3240482816,
39
+ "allocated_bytes.small_pool.allocated": 2288155101184,
40
+ "allocated_bytes.small_pool.current": 17190400,
41
+ "allocated_bytes.small_pool.freed": 2288137910784,
42
+ "allocated_bytes.small_pool.peak": 114983424,
43
+ "allocation.all.allocated": 11622050,
44
+ "allocation.all.current": 1234,
45
+ "allocation.all.freed": 11620816,
46
+ "allocation.all.peak": 1487,
47
+ "allocation.large_pool.allocated": 1443606,
48
+ "allocation.large_pool.current": 284,
49
+ "allocation.large_pool.freed": 1443322,
50
+ "allocation.large_pool.peak": 482,
51
+ "allocation.small_pool.allocated": 10178444,
52
+ "allocation.small_pool.current": 950,
53
+ "allocation.small_pool.freed": 10177494,
54
+ "allocation.small_pool.peak": 1200,
55
+ "inactive_split.all.allocated": 5657934,
56
+ "inactive_split.all.current": 179,
57
+ "inactive_split.all.freed": 5657755,
58
+ "inactive_split.all.peak": 260,
59
+ "inactive_split.large_pool.allocated": 678637,
60
+ "inactive_split.large_pool.current": 43,
61
+ "inactive_split.large_pool.freed": 678594,
62
+ "inactive_split.large_pool.peak": 98,
63
+ "inactive_split.small_pool.allocated": 4979297,
64
+ "inactive_split.small_pool.current": 136,
65
+ "inactive_split.small_pool.freed": 4979161,
66
+ "inactive_split.small_pool.peak": 223,
67
+ "inactive_split_bytes.all.allocated": 8082423126528,
68
+ "inactive_split_bytes.all.current": 148046336,
69
+ "inactive_split_bytes.all.freed": 8082275080192,
70
+ "inactive_split_bytes.all.peak": 900170752,
71
+ "inactive_split_bytes.large_pool.allocated": 5729011654656,
72
+ "inactive_split_bytes.large_pool.current": 106516480,
73
+ "inactive_split_bytes.large_pool.freed": 5728905138176,
74
+ "inactive_split_bytes.large_pool.peak": 860701696,
75
+ "inactive_split_bytes.small_pool.allocated": 2353411471872,
76
+ "inactive_split_bytes.small_pool.current": 41529856,
77
+ "inactive_split_bytes.small_pool.freed": 2353369942016,
78
+ "inactive_split_bytes.small_pool.peak": 77619712,
79
+ "max_split_size": -1,
80
+ "num_alloc_retries": 0,
81
+ "num_device_alloc": 1459,
82
+ "num_device_free": 1114,
83
+ "num_ooms": 0,
84
+ "num_sync_all_streams": 7,
85
+ "oversize_allocations.allocated": 0,
86
+ "oversize_allocations.current": 0,
87
+ "oversize_allocations.freed": 0,
88
+ "oversize_allocations.peak": 0,
89
+ "oversize_segments.allocated": 0,
90
+ "oversize_segments.current": 0,
91
+ "oversize_segments.freed": 0,
92
+ "oversize_segments.peak": 0,
93
+ "requested_bytes.all.allocated": 7979438992372,
94
+ "requested_bytes.all.current": 967046712,
95
+ "requested_bytes.all.freed": 7978471945660,
96
+ "requested_bytes.all.peak": 3263085376,
97
+ "requested_bytes.large_pool.allocated": 5692231179072,
98
+ "requested_bytes.large_pool.current": 950013448,
99
+ "requested_bytes.large_pool.freed": 5691281165624,
100
+ "requested_bytes.large_pool.peak": 3179241096,
101
+ "requested_bytes.small_pool.allocated": 2287207813300,
102
+ "requested_bytes.small_pool.current": 17033264,
103
+ "requested_bytes.small_pool.freed": 2287190780036,
104
+ "requested_bytes.small_pool.peak": 114852048,
105
+ "reserved_bytes.all.allocated": 35846619136,
106
+ "reserved_bytes.all.current": 8914993152,
107
+ "reserved_bytes.all.freed": 26931625984,
108
+ "reserved_bytes.all.peak": 8914993152,
109
+ "reserved_bytes.large_pool.allocated": 35121004544,
110
+ "reserved_bytes.large_pool.current": 8772386816,
111
+ "reserved_bytes.large_pool.freed": 26348617728,
112
+ "reserved_bytes.large_pool.peak": 8772386816,
113
+ "reserved_bytes.small_pool.allocated": 725614592,
114
+ "reserved_bytes.small_pool.current": 142606336,
115
+ "reserved_bytes.small_pool.freed": 583008256,
116
+ "reserved_bytes.small_pool.peak": 142606336,
117
+ "segment.all.allocated": 1459,
118
+ "segment.all.current": 345,
119
+ "segment.all.freed": 1114,
120
+ "segment.all.peak": 345,
121
+ "segment.large_pool.allocated": 1113,
122
+ "segment.large_pool.current": 277,
123
+ "segment.large_pool.freed": 836,
124
+ "segment.large_pool.peak": 277,
125
+ "segment.small_pool.allocated": 346,
126
+ "segment.small_pool.current": 68,
127
+ "segment.small_pool.freed": 278,
128
+ "segment.small_pool.peak": 68
129
+ }
130
+ }
outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_7999/all_results_la_kron_all_homo_mc_corr_1000.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"eval_matthews_correlation": 0.5327637463001902}
outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_7999/eval_res_la_kron_all_homo_mc_corr_1000.json ADDED
The diff for this file is too large to render. See raw diff
 
outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_7999/gpu_stats_la.json ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "memory_allocated": 968467968,
3
+ "max_memory_allocated": 3324437504,
4
+ "memory_reserved": 8910798848,
5
+ "max_memory_reserved": 8914993152,
6
+ "memory_stats": {
7
+ "active.all.allocated": 14527687,
8
+ "active.all.current": 1234,
9
+ "active.all.freed": 14526453,
10
+ "active.all.peak": 1487,
11
+ "active.large_pool.allocated": 1804507,
12
+ "active.large_pool.current": 284,
13
+ "active.large_pool.freed": 1804223,
14
+ "active.large_pool.peak": 482,
15
+ "active.small_pool.allocated": 12723180,
16
+ "active.small_pool.current": 950,
17
+ "active.small_pool.freed": 12722230,
18
+ "active.small_pool.peak": 1200,
19
+ "active_bytes.all.allocated": 10177284875776,
20
+ "active_bytes.all.current": 968467968,
21
+ "active_bytes.all.freed": 10176316407808,
22
+ "active_bytes.all.peak": 3324437504,
23
+ "active_bytes.large_pool.allocated": 7317090932736,
24
+ "active_bytes.large_pool.current": 951277568,
25
+ "active_bytes.large_pool.freed": 7316139655168,
26
+ "active_bytes.large_pool.peak": 3240482816,
27
+ "active_bytes.small_pool.allocated": 2860193943040,
28
+ "active_bytes.small_pool.current": 17190400,
29
+ "active_bytes.small_pool.freed": 2860176752640,
30
+ "active_bytes.small_pool.peak": 114983424,
31
+ "allocated_bytes.all.allocated": 10177284875776,
32
+ "allocated_bytes.all.current": 968467968,
33
+ "allocated_bytes.all.freed": 10176316407808,
34
+ "allocated_bytes.all.peak": 3324437504,
35
+ "allocated_bytes.large_pool.allocated": 7317090932736,
36
+ "allocated_bytes.large_pool.current": 951277568,
37
+ "allocated_bytes.large_pool.freed": 7316139655168,
38
+ "allocated_bytes.large_pool.peak": 3240482816,
39
+ "allocated_bytes.small_pool.allocated": 2860193943040,
40
+ "allocated_bytes.small_pool.current": 17190400,
41
+ "allocated_bytes.small_pool.freed": 2860176752640,
42
+ "allocated_bytes.small_pool.peak": 114983424,
43
+ "allocation.all.allocated": 14527687,
44
+ "allocation.all.current": 1234,
45
+ "allocation.all.freed": 14526453,
46
+ "allocation.all.peak": 1487,
47
+ "allocation.large_pool.allocated": 1804507,
48
+ "allocation.large_pool.current": 284,
49
+ "allocation.large_pool.freed": 1804223,
50
+ "allocation.large_pool.peak": 482,
51
+ "allocation.small_pool.allocated": 12723180,
52
+ "allocation.small_pool.current": 950,
53
+ "allocation.small_pool.freed": 12722230,
54
+ "allocation.small_pool.peak": 1200,
55
+ "inactive_split.all.allocated": 7000582,
56
+ "inactive_split.all.current": 163,
57
+ "inactive_split.all.freed": 7000419,
58
+ "inactive_split.all.peak": 286,
59
+ "inactive_split.large_pool.allocated": 845816,
60
+ "inactive_split.large_pool.current": 45,
61
+ "inactive_split.large_pool.freed": 845771,
62
+ "inactive_split.large_pool.peak": 98,
63
+ "inactive_split.small_pool.allocated": 6154766,
64
+ "inactive_split.small_pool.current": 118,
65
+ "inactive_split.small_pool.freed": 6154648,
66
+ "inactive_split.small_pool.peak": 247,
67
+ "inactive_split_bytes.all.allocated": 10120438529536,
68
+ "inactive_split_bytes.all.current": 147216896,
69
+ "inactive_split_bytes.all.freed": 10120291312640,
70
+ "inactive_split_bytes.all.peak": 900170752,
71
+ "inactive_split_bytes.large_pool.allocated": 7173797017600,
72
+ "inactive_split_bytes.large_pool.current": 107784192,
73
+ "inactive_split_bytes.large_pool.freed": 7173689233408,
74
+ "inactive_split_bytes.large_pool.peak": 860701696,
75
+ "inactive_split_bytes.small_pool.allocated": 2946641511936,
76
+ "inactive_split_bytes.small_pool.current": 39432704,
77
+ "inactive_split_bytes.small_pool.freed": 2946602079232,
78
+ "inactive_split_bytes.small_pool.peak": 77619712,
79
+ "max_split_size": -1,
80
+ "num_alloc_retries": 0,
81
+ "num_device_alloc": 1798,
82
+ "num_device_free": 1455,
83
+ "num_ooms": 0,
84
+ "num_sync_all_streams": 9,
85
+ "oversize_allocations.allocated": 0,
86
+ "oversize_allocations.current": 0,
87
+ "oversize_allocations.freed": 0,
88
+ "oversize_allocations.peak": 0,
89
+ "oversize_segments.allocated": 0,
90
+ "oversize_segments.current": 0,
91
+ "oversize_segments.freed": 0,
92
+ "oversize_segments.peak": 0,
93
+ "requested_bytes.all.allocated": 9974294488215,
94
+ "requested_bytes.all.current": 967046712,
95
+ "requested_bytes.all.freed": 9973327441503,
96
+ "requested_bytes.all.peak": 3263085376,
97
+ "requested_bytes.large_pool.allocated": 7115284714000,
98
+ "requested_bytes.large_pool.current": 950013448,
99
+ "requested_bytes.large_pool.freed": 7114334700552,
100
+ "requested_bytes.large_pool.peak": 3179241096,
101
+ "requested_bytes.small_pool.allocated": 2859009774215,
102
+ "requested_bytes.small_pool.current": 17033264,
103
+ "requested_bytes.small_pool.freed": 2858992740951,
104
+ "requested_bytes.small_pool.peak": 114852048,
105
+ "reserved_bytes.all.allocated": 44317016064,
106
+ "reserved_bytes.all.current": 8910798848,
107
+ "reserved_bytes.all.freed": 35406217216,
108
+ "reserved_bytes.all.peak": 8914993152,
109
+ "reserved_bytes.large_pool.allocated": 43421532160,
110
+ "reserved_bytes.large_pool.current": 8772386816,
111
+ "reserved_bytes.large_pool.freed": 34649145344,
112
+ "reserved_bytes.large_pool.peak": 8772386816,
113
+ "reserved_bytes.small_pool.allocated": 895483904,
114
+ "reserved_bytes.small_pool.current": 138412032,
115
+ "reserved_bytes.small_pool.freed": 757071872,
116
+ "reserved_bytes.small_pool.peak": 142606336,
117
+ "segment.all.allocated": 1798,
118
+ "segment.all.current": 343,
119
+ "segment.all.freed": 1455,
120
+ "segment.all.peak": 345,
121
+ "segment.large_pool.allocated": 1371,
122
+ "segment.large_pool.current": 277,
123
+ "segment.large_pool.freed": 1094,
124
+ "segment.large_pool.peak": 277,
125
+ "segment.small_pool.allocated": 427,
126
+ "segment.small_pool.current": 66,
127
+ "segment.small_pool.freed": 361,
128
+ "segment.small_pool.peak": 68
129
+ }
130
+ }
outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_9999/all_results_la_kron_all_homo_mc_corr_1000.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"eval_matthews_correlation": 0.5226700639354173}
outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_9999/eval_res_la_kron_all_homo_mc_corr_1000.json ADDED
The diff for this file is too large to render. See raw diff
 
outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_12345_8_10000/step_9999/gpu_stats_la.json ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "memory_allocated": 969647616,
3
+ "max_memory_allocated": 3324437504,
4
+ "memory_reserved": 8914993152,
5
+ "max_memory_reserved": 8914993152,
6
+ "memory_stats": {
7
+ "active.all.allocated": 17433374,
8
+ "active.all.current": 1234,
9
+ "active.all.freed": 17432140,
10
+ "active.all.peak": 1487,
11
+ "active.large_pool.allocated": 2165408,
12
+ "active.large_pool.current": 284,
13
+ "active.large_pool.freed": 2165124,
14
+ "active.large_pool.peak": 482,
15
+ "active.small_pool.allocated": 15267966,
16
+ "active.small_pool.current": 950,
17
+ "active.small_pool.freed": 15267016,
18
+ "active.small_pool.peak": 1200,
19
+ "active_bytes.all.allocated": 12213042263040,
20
+ "active_bytes.all.current": 969647616,
21
+ "active_bytes.all.freed": 12212072615424,
22
+ "active_bytes.all.peak": 3324437504,
23
+ "active_bytes.large_pool.allocated": 8780809451520,
24
+ "active_bytes.large_pool.current": 952457216,
25
+ "active_bytes.large_pool.freed": 8779856994304,
26
+ "active_bytes.large_pool.peak": 3240482816,
27
+ "active_bytes.small_pool.allocated": 3432232811520,
28
+ "active_bytes.small_pool.current": 17190400,
29
+ "active_bytes.small_pool.freed": 3432215621120,
30
+ "active_bytes.small_pool.peak": 114983424,
31
+ "allocated_bytes.all.allocated": 12213042263040,
32
+ "allocated_bytes.all.current": 969647616,
33
+ "allocated_bytes.all.freed": 12212072615424,
34
+ "allocated_bytes.all.peak": 3324437504,
35
+ "allocated_bytes.large_pool.allocated": 8780809451520,
36
+ "allocated_bytes.large_pool.current": 952457216,
37
+ "allocated_bytes.large_pool.freed": 8779856994304,
38
+ "allocated_bytes.large_pool.peak": 3240482816,
39
+ "allocated_bytes.small_pool.allocated": 3432232811520,
40
+ "allocated_bytes.small_pool.current": 17190400,
41
+ "allocated_bytes.small_pool.freed": 3432215621120,
42
+ "allocated_bytes.small_pool.peak": 114983424,
43
+ "allocation.all.allocated": 17433374,
44
+ "allocation.all.current": 1234,
45
+ "allocation.all.freed": 17432140,
46
+ "allocation.all.peak": 1487,
47
+ "allocation.large_pool.allocated": 2165408,
48
+ "allocation.large_pool.current": 284,
49
+ "allocation.large_pool.freed": 2165124,
50
+ "allocation.large_pool.peak": 482,
51
+ "allocation.small_pool.allocated": 15267966,
52
+ "allocation.small_pool.current": 950,
53
+ "allocation.small_pool.freed": 15267016,
54
+ "allocation.small_pool.peak": 1200,
55
+ "inactive_split.all.allocated": 8443535,
56
+ "inactive_split.all.current": 173,
57
+ "inactive_split.all.freed": 8443362,
58
+ "inactive_split.all.peak": 286,
59
+ "inactive_split.large_pool.allocated": 1015117,
60
+ "inactive_split.large_pool.current": 43,
61
+ "inactive_split.large_pool.freed": 1015074,
62
+ "inactive_split.large_pool.peak": 102,
63
+ "inactive_split.small_pool.allocated": 7428418,
64
+ "inactive_split.small_pool.current": 130,
65
+ "inactive_split.small_pool.freed": 7428288,
66
+ "inactive_split.small_pool.peak": 247,
67
+ "inactive_split_bytes.all.allocated": 12134537098240,
68
+ "inactive_split_bytes.all.current": 152328704,
69
+ "inactive_split_bytes.all.freed": 12134384769536,
70
+ "inactive_split_bytes.all.peak": 900170752,
71
+ "inactive_split_bytes.large_pool.allocated": 8605136866304,
72
+ "inactive_split_bytes.large_pool.current": 106604544,
73
+ "inactive_split_bytes.large_pool.freed": 8605030261760,
74
+ "inactive_split_bytes.large_pool.peak": 860701696,
75
+ "inactive_split_bytes.small_pool.allocated": 3529400231936,
76
+ "inactive_split_bytes.small_pool.current": 45724160,
77
+ "inactive_split_bytes.small_pool.freed": 3529354507776,
78
+ "inactive_split_bytes.small_pool.peak": 77619712,
79
+ "max_split_size": -1,
80
+ "num_alloc_retries": 0,
81
+ "num_device_alloc": 2166,
82
+ "num_device_free": 1821,
83
+ "num_ooms": 0,
84
+ "num_sync_all_streams": 11,
85
+ "oversize_allocations.allocated": 0,
86
+ "oversize_allocations.current": 0,
87
+ "oversize_allocations.freed": 0,
88
+ "oversize_allocations.peak": 0,
89
+ "oversize_segments.allocated": 0,
90
+ "oversize_segments.current": 0,
91
+ "oversize_segments.freed": 0,
92
+ "oversize_segments.peak": 0,
93
+ "requested_bytes.all.allocated": 11969149987094,
94
+ "requested_bytes.all.current": 967046712,
95
+ "requested_bytes.all.freed": 11968182940382,
96
+ "requested_bytes.all.peak": 3263085376,
97
+ "requested_bytes.large_pool.allocated": 8538338248928,
98
+ "requested_bytes.large_pool.current": 950013448,
99
+ "requested_bytes.large_pool.freed": 8537388235480,
100
+ "requested_bytes.large_pool.peak": 3179241096,
101
+ "requested_bytes.small_pool.allocated": 3430811738166,
102
+ "requested_bytes.small_pool.current": 17033264,
103
+ "requested_bytes.small_pool.freed": 3430794704902,
104
+ "requested_bytes.small_pool.peak": 114852048,
105
+ "reserved_bytes.all.allocated": 53393489920,
106
+ "reserved_bytes.all.current": 8914993152,
107
+ "reserved_bytes.all.freed": 44478496768,
108
+ "reserved_bytes.all.peak": 8914993152,
109
+ "reserved_bytes.large_pool.allocated": 52319748096,
110
+ "reserved_bytes.large_pool.current": 8772386816,
111
+ "reserved_bytes.large_pool.freed": 43547361280,
112
+ "reserved_bytes.large_pool.peak": 8772386816,
113
+ "reserved_bytes.small_pool.allocated": 1073741824,
114
+ "reserved_bytes.small_pool.current": 142606336,
115
+ "reserved_bytes.small_pool.freed": 931135488,
116
+ "reserved_bytes.small_pool.peak": 142606336,
117
+ "segment.all.allocated": 2166,
118
+ "segment.all.current": 345,
119
+ "segment.all.freed": 1821,
120
+ "segment.all.peak": 345,
121
+ "segment.large_pool.allocated": 1654,
122
+ "segment.large_pool.current": 277,
123
+ "segment.large_pool.freed": 1377,
124
+ "segment.large_pool.peak": 277,
125
+ "segment.small_pool.allocated": 512,
126
+ "segment.small_pool.current": 68,
127
+ "segment.small_pool.freed": 444,
128
+ "segment.small_pool.peak": 68
129
+ }
130
+ }
outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/logfile_la.log ADDED
@@ -0,0 +1,846 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 06/02/2024 00:01:55 - INFO - __main__ - Number of labels detected = 2
2
+ 06/02/2024 00:01:56 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 30522, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}.
3
+ 06/02/2024 00:01:57 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_0/adapter_config.json
4
+ 06/02/2024 00:01:57 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'cola'.
5
+ 06/02/2024 00:01:57 - INFO - adapters.loading - Loading module weights from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_0/pytorch_adapter.bin
6
+ 06/02/2024 00:01:57 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_0/head_config.json
7
+ 06/02/2024 00:01:57 - INFO - adapters.heads.model_mixin - Adding head 'cola' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}.
8
+ 06/02/2024 00:01:57 - INFO - adapters.loading - Loading module weights from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_0/pytorch_model_head.bin
9
+ 06/02/2024 00:01:57 - INFO - __main__ - Adapter Name = cola
10
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.weight
11
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.bias
12
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_up.weight
13
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_up.bias
14
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_down.0.weight
15
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_down.0.bias
16
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_up.weight
17
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_up.bias
18
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.weight
19
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.bias
20
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_up.weight
21
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_up.bias
22
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_down.0.weight
23
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_down.0.bias
24
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_up.weight
25
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_up.bias
26
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.weight
27
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.bias
28
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_up.weight
29
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_up.bias
30
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_down.0.weight
31
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_down.0.bias
32
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_up.weight
33
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_up.bias
34
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.weight
35
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.bias
36
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_up.weight
37
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_up.bias
38
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_down.0.weight
39
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_down.0.bias
40
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_up.weight
41
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_up.bias
42
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.weight
43
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.bias
44
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_up.weight
45
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_up.bias
46
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_down.0.weight
47
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_down.0.bias
48
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_up.weight
49
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_up.bias
50
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.weight
51
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.bias
52
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_up.weight
53
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_up.bias
54
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_down.0.weight
55
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_down.0.bias
56
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_up.weight
57
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_up.bias
58
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.weight
59
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.bias
60
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_up.weight
61
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_up.bias
62
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_down.0.weight
63
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_down.0.bias
64
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_up.weight
65
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_up.bias
66
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.weight
67
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.bias
68
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_up.weight
69
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_up.bias
70
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_down.0.weight
71
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_down.0.bias
72
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_up.weight
73
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_up.bias
74
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.weight
75
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.bias
76
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_up.weight
77
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_up.bias
78
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_down.0.weight
79
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_down.0.bias
80
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_up.weight
81
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_up.bias
82
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.weight
83
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.bias
84
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_up.weight
85
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_up.bias
86
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_down.0.weight
87
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_down.0.bias
88
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_up.weight
89
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_up.bias
90
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.weight
91
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.bias
92
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_up.weight
93
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_up.bias
94
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_down.0.weight
95
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_down.0.bias
96
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_up.weight
97
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_up.bias
98
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.weight
99
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.bias
100
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_up.weight
101
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_up.bias
102
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_down.0.weight
103
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_down.0.bias
104
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_up.weight
105
+ 06/02/2024 00:01:57 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_up.bias
106
+ 06/02/2024 00:01:57 - INFO - __main__ - heads.cola.1.weight
107
+ 06/02/2024 00:01:57 - INFO - __main__ - heads.cola.1.bias
108
+ 06/02/2024 00:01:57 - INFO - __main__ - heads.cola.4.weight
109
+ 06/02/2024 00:01:57 - INFO - __main__ - heads.cola.4.bias
110
+ 06/02/2024 00:01:58 - INFO - __main__ - Sample 5238 of the training set: {'input_ids': [101, 2009, 1005, 1055, 2986, 2008, 2002, 3825, 1998, 17806, 1010, 2021, 1045, 2123, 1005, 1056, 2428, 2729, 2055, 2010, 15531, 1010, 2030, 1996, 2769, 1010, 2030, 2505, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}.
111
+ 06/02/2024 00:01:58 - INFO - __main__ - Sample 912 of the training set: {'input_ids': [101, 1045, 2113, 2029, 2338, 23848, 3191, 1010, 1998, 2029, 2338, 3960, 2356, 2339, 2017, 2910, 1005, 1056, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
112
+ 06/02/2024 00:01:58 - INFO - __main__ - Sample 204 of the training set: {'input_ids': [101, 1996, 26108, 2002, 4152, 1010, 1996, 2062, 2198, 6010, 11067, 2229, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
113
+ 06/02/2024 00:07:01 - INFO - __main__ - f_mu shape : torch.Size([1043, 2])
114
+ 06/02/2024 00:07:01 - INFO - __main__ - f_var shape : torch.Size([1043, 2, 2])
115
+ 06/02/2024 00:07:01 - INFO - __main__ - tensor([[ 0.0687, 0.1107],
116
+ [ 0.1065, 0.1546],
117
+ [ 0.1252, 0.1252],
118
+ ...,
119
+ [ 0.0203, 0.1087],
120
+ [ 0.0962, 0.1235],
121
+ [ 0.0511, -0.0174]], device='cuda:0')
122
+ 06/02/2024 00:07:01 - INFO - __main__ - tensor([[[12.5866, 12.3973],
123
+ [12.3973, 12.5533]],
124
+
125
+ [[11.7669, 11.4376],
126
+ [11.4376, 11.7918]],
127
+
128
+ [[11.8492, 11.6148],
129
+ [11.6148, 11.8600]],
130
+
131
+ ...,
132
+
133
+ [[13.4014, 13.0880],
134
+ [13.0880, 13.4134]],
135
+
136
+ [[11.4929, 11.3232],
137
+ [11.3232, 11.5001]],
138
+
139
+ [[11.7616, 11.5308],
140
+ [11.5308, 11.7491]]], device='cuda:0')
141
+ 06/02/2024 00:07:01 - INFO - __main__ - ***** Completed training *****
142
+ 06/02/2024 00:07:05 - INFO - __main__ - Number of labels detected = 2
143
+ 06/02/2024 00:07:05 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 30522, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}.
144
+ 06/02/2024 00:07:06 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_1999/adapter_config.json
145
+ 06/02/2024 00:07:06 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'cola'.
146
+ 06/02/2024 00:07:06 - INFO - adapters.loading - Loading module weights from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_1999/pytorch_adapter.bin
147
+ 06/02/2024 00:07:06 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_1999/head_config.json
148
+ 06/02/2024 00:07:06 - INFO - adapters.heads.model_mixin - Adding head 'cola' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}.
149
+ 06/02/2024 00:07:06 - INFO - adapters.loading - Loading module weights from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_1999/pytorch_model_head.bin
150
+ 06/02/2024 00:07:06 - INFO - __main__ - Adapter Name = cola
151
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.weight
152
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.bias
153
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_up.weight
154
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_up.bias
155
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_down.0.weight
156
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_down.0.bias
157
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_up.weight
158
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_up.bias
159
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.weight
160
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.bias
161
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_up.weight
162
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_up.bias
163
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_down.0.weight
164
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_down.0.bias
165
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_up.weight
166
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_up.bias
167
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.weight
168
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.bias
169
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_up.weight
170
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_up.bias
171
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_down.0.weight
172
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_down.0.bias
173
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_up.weight
174
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_up.bias
175
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.weight
176
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.bias
177
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_up.weight
178
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_up.bias
179
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_down.0.weight
180
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_down.0.bias
181
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_up.weight
182
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_up.bias
183
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.weight
184
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.bias
185
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_up.weight
186
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_up.bias
187
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_down.0.weight
188
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_down.0.bias
189
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_up.weight
190
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_up.bias
191
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.weight
192
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.bias
193
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_up.weight
194
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_up.bias
195
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_down.0.weight
196
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_down.0.bias
197
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_up.weight
198
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_up.bias
199
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.weight
200
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.bias
201
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_up.weight
202
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_up.bias
203
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_down.0.weight
204
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_down.0.bias
205
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_up.weight
206
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_up.bias
207
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.weight
208
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.bias
209
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_up.weight
210
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_up.bias
211
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_down.0.weight
212
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_down.0.bias
213
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_up.weight
214
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_up.bias
215
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.weight
216
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.bias
217
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_up.weight
218
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_up.bias
219
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_down.0.weight
220
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_down.0.bias
221
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_up.weight
222
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_up.bias
223
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.weight
224
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.bias
225
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_up.weight
226
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_up.bias
227
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_down.0.weight
228
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_down.0.bias
229
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_up.weight
230
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_up.bias
231
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.weight
232
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.bias
233
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_up.weight
234
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_up.bias
235
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_down.0.weight
236
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_down.0.bias
237
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_up.weight
238
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_up.bias
239
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.weight
240
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.bias
241
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_up.weight
242
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_up.bias
243
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_down.0.weight
244
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_down.0.bias
245
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_up.weight
246
+ 06/02/2024 00:07:06 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_up.bias
247
+ 06/02/2024 00:07:06 - INFO - __main__ - heads.cola.1.weight
248
+ 06/02/2024 00:07:06 - INFO - __main__ - heads.cola.1.bias
249
+ 06/02/2024 00:07:06 - INFO - __main__ - heads.cola.4.weight
250
+ 06/02/2024 00:07:06 - INFO - __main__ - heads.cola.4.bias
251
+ 06/02/2024 00:07:07 - INFO - __main__ - Sample 5238 of the training set: {'input_ids': [101, 2009, 1005, 1055, 2986, 2008, 2002, 3825, 1998, 17806, 1010, 2021, 1045, 2123, 1005, 1056, 2428, 2729, 2055, 2010, 15531, 1010, 2030, 1996, 2769, 1010, 2030, 2505, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}.
252
+ 06/02/2024 00:07:07 - INFO - __main__ - Sample 912 of the training set: {'input_ids': [101, 1045, 2113, 2029, 2338, 23848, 3191, 1010, 1998, 2029, 2338, 3960, 2356, 2339, 2017, 2910, 1005, 1056, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
253
+ 06/02/2024 00:07:07 - INFO - __main__ - Sample 204 of the training set: {'input_ids': [101, 1996, 26108, 2002, 4152, 1010, 1996, 2062, 2198, 6010, 11067, 2229, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
254
+ 06/02/2024 00:12:15 - INFO - __main__ - f_mu shape : torch.Size([1043, 2])
255
+ 06/02/2024 00:12:15 - INFO - __main__ - f_var shape : torch.Size([1043, 2, 2])
256
+ 06/02/2024 00:12:15 - INFO - __main__ - tensor([[-1.6485, 2.0073],
257
+ [-1.4895, 1.9543],
258
+ [-0.8011, 1.1930],
259
+ ...,
260
+ [-2.4070, 2.6930],
261
+ [ 0.0670, 0.1763],
262
+ [-0.8631, 1.0701]], device='cuda:0')
263
+ 06/02/2024 00:12:15 - INFO - __main__ - tensor([[[ 4.6435, 0.9781],
264
+ [ 0.9781, 4.6696]],
265
+
266
+ [[ 3.1061, 1.7478],
267
+ [ 1.7478, 3.1230]],
268
+
269
+ [[ 2.7134, 1.0829],
270
+ [ 1.0829, 2.8030]],
271
+
272
+ ...,
273
+
274
+ [[ 4.3186, 3.3896],
275
+ [ 3.3896, 4.2989]],
276
+
277
+ [[ 2.5481, 0.3680],
278
+ [ 0.3680, 2.8497]],
279
+
280
+ [[ 3.6499, -0.1064],
281
+ [-0.1064, 3.7341]]], device='cuda:0')
282
+ 06/02/2024 00:12:15 - INFO - __main__ - ***** Completed training *****
283
+ 06/02/2024 00:12:17 - INFO - __main__ - Number of labels detected = 2
284
+ 06/02/2024 00:12:18 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 30522, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}.
285
+ 06/02/2024 00:12:18 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_3999/adapter_config.json
286
+ 06/02/2024 00:12:18 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'cola'.
287
+ 06/02/2024 00:12:18 - INFO - adapters.loading - Loading module weights from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_3999/pytorch_adapter.bin
288
+ 06/02/2024 00:12:18 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_3999/head_config.json
289
+ 06/02/2024 00:12:18 - INFO - adapters.heads.model_mixin - Adding head 'cola' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}.
290
+ 06/02/2024 00:12:18 - INFO - adapters.loading - Loading module weights from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_3999/pytorch_model_head.bin
291
+ 06/02/2024 00:12:18 - INFO - __main__ - Adapter Name = cola
292
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.weight
293
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.bias
294
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_up.weight
295
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_up.bias
296
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_down.0.weight
297
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_down.0.bias
298
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_up.weight
299
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_up.bias
300
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.weight
301
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.bias
302
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_up.weight
303
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_up.bias
304
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_down.0.weight
305
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_down.0.bias
306
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_up.weight
307
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_up.bias
308
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.weight
309
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.bias
310
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_up.weight
311
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_up.bias
312
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_down.0.weight
313
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_down.0.bias
314
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_up.weight
315
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_up.bias
316
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.weight
317
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.bias
318
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_up.weight
319
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_up.bias
320
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_down.0.weight
321
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_down.0.bias
322
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_up.weight
323
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_up.bias
324
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.weight
325
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.bias
326
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_up.weight
327
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_up.bias
328
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_down.0.weight
329
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_down.0.bias
330
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_up.weight
331
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_up.bias
332
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.weight
333
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.bias
334
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_up.weight
335
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_up.bias
336
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_down.0.weight
337
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_down.0.bias
338
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_up.weight
339
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_up.bias
340
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.weight
341
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.bias
342
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_up.weight
343
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_up.bias
344
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_down.0.weight
345
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_down.0.bias
346
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_up.weight
347
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_up.bias
348
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.weight
349
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.bias
350
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_up.weight
351
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_up.bias
352
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_down.0.weight
353
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_down.0.bias
354
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_up.weight
355
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_up.bias
356
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.weight
357
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.bias
358
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_up.weight
359
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_up.bias
360
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_down.0.weight
361
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_down.0.bias
362
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_up.weight
363
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_up.bias
364
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.weight
365
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.bias
366
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_up.weight
367
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_up.bias
368
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_down.0.weight
369
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_down.0.bias
370
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_up.weight
371
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_up.bias
372
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.weight
373
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.bias
374
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_up.weight
375
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_up.bias
376
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_down.0.weight
377
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_down.0.bias
378
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_up.weight
379
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_up.bias
380
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.weight
381
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.bias
382
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_up.weight
383
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_up.bias
384
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_down.0.weight
385
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_down.0.bias
386
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_up.weight
387
+ 06/02/2024 00:12:18 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_up.bias
388
+ 06/02/2024 00:12:18 - INFO - __main__ - heads.cola.1.weight
389
+ 06/02/2024 00:12:18 - INFO - __main__ - heads.cola.1.bias
390
+ 06/02/2024 00:12:18 - INFO - __main__ - heads.cola.4.weight
391
+ 06/02/2024 00:12:18 - INFO - __main__ - heads.cola.4.bias
392
+ 06/02/2024 00:12:19 - INFO - __main__ - Sample 5238 of the training set: {'input_ids': [101, 2009, 1005, 1055, 2986, 2008, 2002, 3825, 1998, 17806, 1010, 2021, 1045, 2123, 1005, 1056, 2428, 2729, 2055, 2010, 15531, 1010, 2030, 1996, 2769, 1010, 2030, 2505, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}.
393
+ 06/02/2024 00:12:19 - INFO - __main__ - Sample 912 of the training set: {'input_ids': [101, 1045, 2113, 2029, 2338, 23848, 3191, 1010, 1998, 2029, 2338, 3960, 2356, 2339, 2017, 2910, 1005, 1056, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
394
+ 06/02/2024 00:12:19 - INFO - __main__ - Sample 204 of the training set: {'input_ids': [101, 1996, 26108, 2002, 4152, 1010, 1996, 2062, 2198, 6010, 11067, 2229, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
395
+ 06/02/2024 00:17:29 - INFO - __main__ - f_mu shape : torch.Size([1043, 2])
396
+ 06/02/2024 00:17:29 - INFO - __main__ - f_var shape : torch.Size([1043, 2, 2])
397
+ 06/02/2024 00:17:29 - INFO - __main__ - tensor([[-2.6441, 3.0048],
398
+ [-2.1385, 2.6015],
399
+ [-0.8923, 1.2743],
400
+ ...,
401
+ [-3.1286, 3.4216],
402
+ [-1.2865, 1.6362],
403
+ [-2.3139, 2.5438]], device='cuda:0')
404
+ 06/02/2024 00:17:29 - INFO - __main__ - tensor([[[ 4.6644, 2.6618],
405
+ [ 2.6618, 4.6231]],
406
+
407
+ [[ 3.5755, 1.9887],
408
+ [ 1.9887, 3.5351]],
409
+
410
+ [[ 2.7346, 0.4514],
411
+ [ 0.4514, 2.9041]],
412
+
413
+ ...,
414
+
415
+ [[ 4.7327, 3.9928],
416
+ [ 3.9928, 4.7332]],
417
+
418
+ [[ 5.0674, -1.8297],
419
+ [-1.8297, 5.7372]],
420
+
421
+ [[ 5.9379, -0.2734],
422
+ [-0.2734, 5.8899]]], device='cuda:0')
423
+ 06/02/2024 00:17:29 - INFO - __main__ - ***** Completed training *****
424
+ 06/02/2024 00:17:33 - INFO - __main__ - Number of labels detected = 2
425
+ 06/02/2024 00:17:33 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 30522, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}.
426
+ 06/02/2024 00:17:34 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_5999/adapter_config.json
427
+ 06/02/2024 00:17:34 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'cola'.
428
+ 06/02/2024 00:17:34 - INFO - adapters.loading - Loading module weights from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_5999/pytorch_adapter.bin
429
+ 06/02/2024 00:17:34 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_5999/head_config.json
430
+ 06/02/2024 00:17:34 - INFO - adapters.heads.model_mixin - Adding head 'cola' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}.
431
+ 06/02/2024 00:17:34 - INFO - adapters.loading - Loading module weights from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_5999/pytorch_model_head.bin
432
+ 06/02/2024 00:17:34 - INFO - __main__ - Adapter Name = cola
433
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.weight
434
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.bias
435
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_up.weight
436
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_up.bias
437
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_down.0.weight
438
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_down.0.bias
439
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_up.weight
440
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_up.bias
441
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.weight
442
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.bias
443
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_up.weight
444
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_up.bias
445
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_down.0.weight
446
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_down.0.bias
447
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_up.weight
448
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_up.bias
449
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.weight
450
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.bias
451
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_up.weight
452
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_up.bias
453
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_down.0.weight
454
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_down.0.bias
455
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_up.weight
456
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_up.bias
457
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.weight
458
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.bias
459
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_up.weight
460
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_up.bias
461
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_down.0.weight
462
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_down.0.bias
463
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_up.weight
464
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_up.bias
465
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.weight
466
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.bias
467
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_up.weight
468
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_up.bias
469
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_down.0.weight
470
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_down.0.bias
471
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_up.weight
472
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_up.bias
473
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.weight
474
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.bias
475
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_up.weight
476
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_up.bias
477
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_down.0.weight
478
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_down.0.bias
479
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_up.weight
480
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_up.bias
481
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.weight
482
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.bias
483
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_up.weight
484
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_up.bias
485
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_down.0.weight
486
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_down.0.bias
487
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_up.weight
488
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_up.bias
489
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.weight
490
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.bias
491
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_up.weight
492
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_up.bias
493
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_down.0.weight
494
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_down.0.bias
495
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_up.weight
496
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_up.bias
497
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.weight
498
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.bias
499
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_up.weight
500
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_up.bias
501
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_down.0.weight
502
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_down.0.bias
503
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_up.weight
504
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_up.bias
505
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.weight
506
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.bias
507
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_up.weight
508
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_up.bias
509
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_down.0.weight
510
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_down.0.bias
511
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_up.weight
512
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_up.bias
513
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.weight
514
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.bias
515
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_up.weight
516
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_up.bias
517
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_down.0.weight
518
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_down.0.bias
519
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_up.weight
520
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_up.bias
521
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.weight
522
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.bias
523
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_up.weight
524
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_up.bias
525
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_down.0.weight
526
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_down.0.bias
527
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_up.weight
528
+ 06/02/2024 00:17:34 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_up.bias
529
+ 06/02/2024 00:17:34 - INFO - __main__ - heads.cola.1.weight
530
+ 06/02/2024 00:17:34 - INFO - __main__ - heads.cola.1.bias
531
+ 06/02/2024 00:17:34 - INFO - __main__ - heads.cola.4.weight
532
+ 06/02/2024 00:17:34 - INFO - __main__ - heads.cola.4.bias
533
+ 06/02/2024 00:17:35 - INFO - __main__ - Sample 5238 of the training set: {'input_ids': [101, 2009, 1005, 1055, 2986, 2008, 2002, 3825, 1998, 17806, 1010, 2021, 1045, 2123, 1005, 1056, 2428, 2729, 2055, 2010, 15531, 1010, 2030, 1996, 2769, 1010, 2030, 2505, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}.
534
+ 06/02/2024 00:17:35 - INFO - __main__ - Sample 912 of the training set: {'input_ids': [101, 1045, 2113, 2029, 2338, 23848, 3191, 1010, 1998, 2029, 2338, 3960, 2356, 2339, 2017, 2910, 1005, 1056, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
535
+ 06/02/2024 00:17:35 - INFO - __main__ - Sample 204 of the training set: {'input_ids': [101, 1996, 26108, 2002, 4152, 1010, 1996, 2062, 2198, 6010, 11067, 2229, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
536
+ 06/02/2024 00:22:46 - INFO - __main__ - f_mu shape : torch.Size([1043, 2])
537
+ 06/02/2024 00:22:46 - INFO - __main__ - f_var shape : torch.Size([1043, 2, 2])
538
+ 06/02/2024 00:22:46 - INFO - __main__ - tensor([[-2.6790, 3.0428],
539
+ [-1.9493, 2.4252],
540
+ [-0.6514, 1.0621],
541
+ ...,
542
+ [-3.0951, 3.3807],
543
+ [-1.4203, 1.8088],
544
+ [-1.5409, 1.7612]], device='cuda:0')
545
+ 06/02/2024 00:22:46 - INFO - __main__ - tensor([[[ 4.6626, 2.1988],
546
+ [ 2.1988, 4.6641]],
547
+
548
+ [[ 3.3556, 1.3597],
549
+ [ 1.3597, 3.3220]],
550
+
551
+ [[ 2.4772, 0.2199],
552
+ [ 0.2199, 2.6351]],
553
+
554
+ ...,
555
+
556
+ [[ 4.4411, 3.4198],
557
+ [ 3.4198, 4.4294]],
558
+
559
+ [[ 5.6441, -2.3624],
560
+ [-2.3624, 6.0773]],
561
+
562
+ [[ 6.1997, -2.6992],
563
+ [-2.6992, 6.2934]]], device='cuda:0')
564
+ 06/02/2024 00:22:46 - INFO - __main__ - ***** Completed training *****
565
+ 06/02/2024 00:22:48 - INFO - __main__ - Number of labels detected = 2
566
+ 06/02/2024 00:22:49 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 30522, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}.
567
+ 06/02/2024 00:22:49 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_7999/adapter_config.json
568
+ 06/02/2024 00:22:49 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'cola'.
569
+ 06/02/2024 00:22:49 - INFO - adapters.loading - Loading module weights from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_7999/pytorch_adapter.bin
570
+ 06/02/2024 00:22:49 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_7999/head_config.json
571
+ 06/02/2024 00:22:49 - INFO - adapters.heads.model_mixin - Adding head 'cola' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}.
572
+ 06/02/2024 00:22:49 - INFO - adapters.loading - Loading module weights from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_7999/pytorch_model_head.bin
573
+ 06/02/2024 00:22:49 - INFO - __main__ - Adapter Name = cola
574
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.weight
575
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.bias
576
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_up.weight
577
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_up.bias
578
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_down.0.weight
579
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_down.0.bias
580
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_up.weight
581
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_up.bias
582
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.weight
583
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.bias
584
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_up.weight
585
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_up.bias
586
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_down.0.weight
587
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_down.0.bias
588
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_up.weight
589
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_up.bias
590
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.weight
591
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.bias
592
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_up.weight
593
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_up.bias
594
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_down.0.weight
595
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_down.0.bias
596
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_up.weight
597
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_up.bias
598
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.weight
599
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.bias
600
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_up.weight
601
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_up.bias
602
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_down.0.weight
603
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_down.0.bias
604
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_up.weight
605
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_up.bias
606
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.weight
607
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.bias
608
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_up.weight
609
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_up.bias
610
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_down.0.weight
611
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_down.0.bias
612
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_up.weight
613
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_up.bias
614
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.weight
615
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.bias
616
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_up.weight
617
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_up.bias
618
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_down.0.weight
619
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_down.0.bias
620
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_up.weight
621
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_up.bias
622
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.weight
623
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.bias
624
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_up.weight
625
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_up.bias
626
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_down.0.weight
627
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_down.0.bias
628
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_up.weight
629
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_up.bias
630
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.weight
631
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.bias
632
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_up.weight
633
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_up.bias
634
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_down.0.weight
635
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_down.0.bias
636
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_up.weight
637
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_up.bias
638
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.weight
639
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.bias
640
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_up.weight
641
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_up.bias
642
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_down.0.weight
643
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_down.0.bias
644
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_up.weight
645
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_up.bias
646
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.weight
647
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.bias
648
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_up.weight
649
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_up.bias
650
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_down.0.weight
651
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_down.0.bias
652
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_up.weight
653
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_up.bias
654
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.weight
655
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.bias
656
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_up.weight
657
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_up.bias
658
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_down.0.weight
659
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_down.0.bias
660
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_up.weight
661
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_up.bias
662
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.weight
663
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.bias
664
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_up.weight
665
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_up.bias
666
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_down.0.weight
667
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_down.0.bias
668
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_up.weight
669
+ 06/02/2024 00:22:49 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_up.bias
670
+ 06/02/2024 00:22:49 - INFO - __main__ - heads.cola.1.weight
671
+ 06/02/2024 00:22:49 - INFO - __main__ - heads.cola.1.bias
672
+ 06/02/2024 00:22:49 - INFO - __main__ - heads.cola.4.weight
673
+ 06/02/2024 00:22:49 - INFO - __main__ - heads.cola.4.bias
674
+ 06/02/2024 00:22:51 - INFO - __main__ - Sample 5238 of the training set: {'input_ids': [101, 2009, 1005, 1055, 2986, 2008, 2002, 3825, 1998, 17806, 1010, 2021, 1045, 2123, 1005, 1056, 2428, 2729, 2055, 2010, 15531, 1010, 2030, 1996, 2769, 1010, 2030, 2505, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}.
675
+ 06/02/2024 00:22:51 - INFO - __main__ - Sample 912 of the training set: {'input_ids': [101, 1045, 2113, 2029, 2338, 23848, 3191, 1010, 1998, 2029, 2338, 3960, 2356, 2339, 2017, 2910, 1005, 1056, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
676
+ 06/02/2024 00:22:51 - INFO - __main__ - Sample 204 of the training set: {'input_ids': [101, 1996, 26108, 2002, 4152, 1010, 1996, 2062, 2198, 6010, 11067, 2229, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
677
+ 06/02/2024 00:27:58 - INFO - __main__ - f_mu shape : torch.Size([1043, 2])
678
+ 06/02/2024 00:27:58 - INFO - __main__ - f_var shape : torch.Size([1043, 2, 2])
679
+ 06/02/2024 00:27:58 - INFO - __main__ - tensor([[-3.3962, 3.7076],
680
+ [-2.5677, 2.9764],
681
+ [-0.9388, 1.3018],
682
+ ...,
683
+ [-3.7986, 4.0254],
684
+ [-2.4808, 2.8059],
685
+ [-2.0593, 2.2397]], device='cuda:0')
686
+ 06/02/2024 00:27:58 - INFO - __main__ - tensor([[[ 5.1139, 2.8399],
687
+ [ 2.8399, 5.1270]],
688
+
689
+ [[ 4.0497, 1.4560],
690
+ [ 1.4560, 3.9944]],
691
+
692
+ [[ 3.5599, -0.8490],
693
+ [-0.8490, 3.7326]],
694
+
695
+ ...,
696
+
697
+ [[ 4.9693, 3.9614],
698
+ [ 3.9614, 4.9527]],
699
+
700
+ [[ 7.3436, -2.3866],
701
+ [-2.3866, 7.4845]],
702
+
703
+ [[11.3763, -7.5061],
704
+ [-7.5062, 11.6565]]], device='cuda:0')
705
+ 06/02/2024 00:27:58 - INFO - __main__ - ***** Completed training *****
706
+ 06/02/2024 00:28:00 - INFO - __main__ - Number of labels detected = 2
707
+ 06/02/2024 00:28:01 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 30522, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}.
708
+ 06/02/2024 00:28:02 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_9999/adapter_config.json
709
+ 06/02/2024 00:28:02 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'cola'.
710
+ 06/02/2024 00:28:02 - INFO - adapters.loading - Loading module weights from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_9999/pytorch_adapter.bin
711
+ 06/02/2024 00:28:02 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_9999/head_config.json
712
+ 06/02/2024 00:28:02 - INFO - adapters.heads.model_mixin - Adding head 'cola' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}.
713
+ 06/02/2024 00:28:02 - INFO - adapters.loading - Loading module weights from ./outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_9999/pytorch_model_head.bin
714
+ 06/02/2024 00:28:02 - INFO - __main__ - Adapter Name = cola
715
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.weight
716
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.bias
717
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_up.weight
718
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.0.attention.output.adapters.cola.adapter_up.bias
719
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_down.0.weight
720
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_down.0.bias
721
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_up.weight
722
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.0.output.adapters.cola.adapter_up.bias
723
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.weight
724
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.bias
725
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_up.weight
726
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.1.attention.output.adapters.cola.adapter_up.bias
727
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_down.0.weight
728
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_down.0.bias
729
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_up.weight
730
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.1.output.adapters.cola.adapter_up.bias
731
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.weight
732
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.bias
733
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_up.weight
734
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.2.attention.output.adapters.cola.adapter_up.bias
735
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_down.0.weight
736
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_down.0.bias
737
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_up.weight
738
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.2.output.adapters.cola.adapter_up.bias
739
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.weight
740
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.bias
741
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_up.weight
742
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.3.attention.output.adapters.cola.adapter_up.bias
743
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_down.0.weight
744
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_down.0.bias
745
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_up.weight
746
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.3.output.adapters.cola.adapter_up.bias
747
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.weight
748
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.bias
749
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_up.weight
750
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.4.attention.output.adapters.cola.adapter_up.bias
751
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_down.0.weight
752
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_down.0.bias
753
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_up.weight
754
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.4.output.adapters.cola.adapter_up.bias
755
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.weight
756
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.bias
757
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_up.weight
758
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.5.attention.output.adapters.cola.adapter_up.bias
759
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_down.0.weight
760
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_down.0.bias
761
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_up.weight
762
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.5.output.adapters.cola.adapter_up.bias
763
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.weight
764
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.bias
765
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_up.weight
766
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.6.attention.output.adapters.cola.adapter_up.bias
767
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_down.0.weight
768
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_down.0.bias
769
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_up.weight
770
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.6.output.adapters.cola.adapter_up.bias
771
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.weight
772
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.bias
773
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_up.weight
774
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.7.attention.output.adapters.cola.adapter_up.bias
775
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_down.0.weight
776
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_down.0.bias
777
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_up.weight
778
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.7.output.adapters.cola.adapter_up.bias
779
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.weight
780
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.bias
781
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_up.weight
782
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.8.attention.output.adapters.cola.adapter_up.bias
783
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_down.0.weight
784
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_down.0.bias
785
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_up.weight
786
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.8.output.adapters.cola.adapter_up.bias
787
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.weight
788
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.bias
789
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_up.weight
790
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.9.attention.output.adapters.cola.adapter_up.bias
791
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_down.0.weight
792
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_down.0.bias
793
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_up.weight
794
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.9.output.adapters.cola.adapter_up.bias
795
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.weight
796
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.bias
797
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_up.weight
798
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.10.attention.output.adapters.cola.adapter_up.bias
799
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_down.0.weight
800
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_down.0.bias
801
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_up.weight
802
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.10.output.adapters.cola.adapter_up.bias
803
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.weight
804
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.bias
805
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_up.weight
806
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.11.attention.output.adapters.cola.adapter_up.bias
807
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_down.0.weight
808
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_down.0.bias
809
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_up.weight
810
+ 06/02/2024 00:28:02 - INFO - __main__ - bert.encoder.layer.11.output.adapters.cola.adapter_up.bias
811
+ 06/02/2024 00:28:02 - INFO - __main__ - heads.cola.1.weight
812
+ 06/02/2024 00:28:02 - INFO - __main__ - heads.cola.1.bias
813
+ 06/02/2024 00:28:02 - INFO - __main__ - heads.cola.4.weight
814
+ 06/02/2024 00:28:02 - INFO - __main__ - heads.cola.4.bias
815
+ 06/02/2024 00:28:03 - INFO - __main__ - Sample 5238 of the training set: {'input_ids': [101, 2009, 1005, 1055, 2986, 2008, 2002, 3825, 1998, 17806, 1010, 2021, 1045, 2123, 1005, 1056, 2428, 2729, 2055, 2010, 15531, 1010, 2030, 1996, 2769, 1010, 2030, 2505, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}.
816
+ 06/02/2024 00:28:03 - INFO - __main__ - Sample 912 of the training set: {'input_ids': [101, 1045, 2113, 2029, 2338, 23848, 3191, 1010, 1998, 2029, 2338, 3960, 2356, 2339, 2017, 2910, 1005, 1056, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
817
+ 06/02/2024 00:28:03 - INFO - __main__ - Sample 204 of the training set: {'input_ids': [101, 1996, 26108, 2002, 4152, 1010, 1996, 2062, 2198, 6010, 11067, 2229, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
818
+ 06/02/2024 00:33:06 - INFO - __main__ - f_mu shape : torch.Size([1043, 2])
819
+ 06/02/2024 00:33:06 - INFO - __main__ - f_var shape : torch.Size([1043, 2, 2])
820
+ 06/02/2024 00:33:06 - INFO - __main__ - tensor([[-3.6432, 3.9375],
821
+ [-2.6919, 3.0975],
822
+ [-1.0813, 1.4512],
823
+ ...,
824
+ [-3.9697, 4.1863],
825
+ [-2.6218, 2.9444],
826
+ [-2.3552, 2.5516]], device='cuda:0')
827
+ 06/02/2024 00:33:06 - INFO - __main__ - tensor([[[ 5.2339, 3.0798],
828
+ [ 3.0798, 5.2397]],
829
+
830
+ [[ 4.3019, 1.2904],
831
+ [ 1.2904, 4.2205]],
832
+
833
+ [[ 4.0779, -1.2783],
834
+ [-1.2783, 4.2286]],
835
+
836
+ ...,
837
+
838
+ [[ 5.0681, 3.9858],
839
+ [ 3.9858, 5.0388]],
840
+
841
+ [[ 7.9208, -2.8210],
842
+ [-2.8210, 8.0162]],
843
+
844
+ [[12.2069, -7.8169],
845
+ [-7.8169, 12.2902]]], device='cuda:0')
846
+ 06/02/2024 00:33:06 - INFO - __main__ - ***** Completed training *****
outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_0/all_results_la_kron_all_homo_mc_corr_1000.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"eval_matthews_correlation": 0.013232794083812355}
outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_0/eval_res_la_kron_all_homo_mc_corr_1000.json ADDED
The diff for this file is too large to render. See raw diff
 
outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_0/gpu_stats_la.json ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "memory_allocated": 969647616,
3
+ "max_memory_allocated": 3324437504,
4
+ "memory_reserved": 8908701696,
5
+ "max_memory_reserved": 8908701696,
6
+ "memory_stats": {
7
+ "active.all.allocated": 2905439,
8
+ "active.all.current": 1234,
9
+ "active.all.freed": 2904205,
10
+ "active.all.peak": 1487,
11
+ "active.large_pool.allocated": 360975,
12
+ "active.large_pool.current": 284,
13
+ "active.large_pool.freed": 360691,
14
+ "active.large_pool.peak": 384,
15
+ "active.small_pool.allocated": 2544464,
16
+ "active.small_pool.current": 950,
17
+ "active.small_pool.freed": 2543514,
18
+ "active.small_pool.peak": 1200,
19
+ "active_bytes.all.allocated": 2033417876480,
20
+ "active_bytes.all.current": 969647616,
21
+ "active_bytes.all.freed": 2032448228864,
22
+ "active_bytes.all.peak": 3324437504,
23
+ "active_bytes.large_pool.allocated": 1464489492480,
24
+ "active_bytes.large_pool.current": 952457216,
25
+ "active_bytes.large_pool.freed": 1463537035264,
26
+ "active_bytes.large_pool.peak": 3240482816,
27
+ "active_bytes.small_pool.allocated": 568928384000,
28
+ "active_bytes.small_pool.current": 17190400,
29
+ "active_bytes.small_pool.freed": 568911193600,
30
+ "active_bytes.small_pool.peak": 114983424,
31
+ "allocated_bytes.all.allocated": 2033417876480,
32
+ "allocated_bytes.all.current": 969647616,
33
+ "allocated_bytes.all.freed": 2032448228864,
34
+ "allocated_bytes.all.peak": 3324437504,
35
+ "allocated_bytes.large_pool.allocated": 1464489492480,
36
+ "allocated_bytes.large_pool.current": 952457216,
37
+ "allocated_bytes.large_pool.freed": 1463537035264,
38
+ "allocated_bytes.large_pool.peak": 3240482816,
39
+ "allocated_bytes.small_pool.allocated": 568928384000,
40
+ "allocated_bytes.small_pool.current": 17190400,
41
+ "allocated_bytes.small_pool.freed": 568911193600,
42
+ "allocated_bytes.small_pool.peak": 114983424,
43
+ "allocation.all.allocated": 2905439,
44
+ "allocation.all.current": 1234,
45
+ "allocation.all.freed": 2904205,
46
+ "allocation.all.peak": 1487,
47
+ "allocation.large_pool.allocated": 360975,
48
+ "allocation.large_pool.current": 284,
49
+ "allocation.large_pool.freed": 360691,
50
+ "allocation.large_pool.peak": 384,
51
+ "allocation.small_pool.allocated": 2544464,
52
+ "allocation.small_pool.current": 950,
53
+ "allocation.small_pool.freed": 2543514,
54
+ "allocation.small_pool.peak": 1200,
55
+ "inactive_split.all.allocated": 1330181,
56
+ "inactive_split.all.current": 168,
57
+ "inactive_split.all.freed": 1330013,
58
+ "inactive_split.all.peak": 224,
59
+ "inactive_split.large_pool.allocated": 167153,
60
+ "inactive_split.large_pool.current": 43,
61
+ "inactive_split.large_pool.freed": 167110,
62
+ "inactive_split.large_pool.peak": 96,
63
+ "inactive_split.small_pool.allocated": 1163028,
64
+ "inactive_split.small_pool.current": 125,
65
+ "inactive_split.small_pool.freed": 1162903,
66
+ "inactive_split.small_pool.peak": 149,
67
+ "inactive_split_bytes.all.allocated": 2039758622720,
68
+ "inactive_split_bytes.all.current": 146037248,
69
+ "inactive_split_bytes.all.freed": 2039612585472,
70
+ "inactive_split_bytes.all.peak": 893967360,
71
+ "inactive_split_bytes.large_pool.allocated": 1450436033536,
72
+ "inactive_split_bytes.large_pool.current": 106604544,
73
+ "inactive_split_bytes.large_pool.freed": 1450329428992,
74
+ "inactive_split_bytes.large_pool.peak": 859522048,
75
+ "inactive_split_bytes.small_pool.allocated": 589322589184,
76
+ "inactive_split_bytes.small_pool.current": 39432704,
77
+ "inactive_split_bytes.small_pool.freed": 589283156480,
78
+ "inactive_split_bytes.small_pool.peak": 78419456,
79
+ "max_split_size": -1,
80
+ "num_alloc_retries": 0,
81
+ "num_device_alloc": 394,
82
+ "num_device_free": 52,
83
+ "num_ooms": 0,
84
+ "num_sync_all_streams": 1,
85
+ "oversize_allocations.allocated": 0,
86
+ "oversize_allocations.current": 0,
87
+ "oversize_allocations.freed": 0,
88
+ "oversize_allocations.peak": 0,
89
+ "oversize_segments.allocated": 0,
90
+ "oversize_segments.current": 0,
91
+ "oversize_segments.freed": 0,
92
+ "oversize_segments.peak": 0,
93
+ "requested_bytes.all.allocated": 1990795460851,
94
+ "requested_bytes.all.current": 967046712,
95
+ "requested_bytes.all.freed": 1989828414139,
96
+ "requested_bytes.all.peak": 3263085376,
97
+ "requested_bytes.large_pool.allocated": 1422103262928,
98
+ "requested_bytes.large_pool.current": 950013448,
99
+ "requested_bytes.large_pool.freed": 1421153249480,
100
+ "requested_bytes.large_pool.peak": 3179241096,
101
+ "requested_bytes.small_pool.allocated": 568692197923,
102
+ "requested_bytes.small_pool.current": 17033264,
103
+ "requested_bytes.small_pool.freed": 568675164659,
104
+ "requested_bytes.small_pool.peak": 114852048,
105
+ "reserved_bytes.all.allocated": 9225371648,
106
+ "reserved_bytes.all.current": 8908701696,
107
+ "reserved_bytes.all.freed": 316669952,
108
+ "reserved_bytes.all.peak": 8908701696,
109
+ "reserved_bytes.large_pool.allocated": 9003073536,
110
+ "reserved_bytes.large_pool.current": 8772386816,
111
+ "reserved_bytes.large_pool.freed": 230686720,
112
+ "reserved_bytes.large_pool.peak": 8772386816,
113
+ "reserved_bytes.small_pool.allocated": 222298112,
114
+ "reserved_bytes.small_pool.current": 136314880,
115
+ "reserved_bytes.small_pool.freed": 85983232,
116
+ "reserved_bytes.small_pool.peak": 136314880,
117
+ "segment.all.allocated": 394,
118
+ "segment.all.current": 342,
119
+ "segment.all.freed": 52,
120
+ "segment.all.peak": 342,
121
+ "segment.large_pool.allocated": 288,
122
+ "segment.large_pool.current": 277,
123
+ "segment.large_pool.freed": 11,
124
+ "segment.large_pool.peak": 277,
125
+ "segment.small_pool.allocated": 106,
126
+ "segment.small_pool.current": 65,
127
+ "segment.small_pool.freed": 41,
128
+ "segment.small_pool.peak": 65
129
+ }
130
+ }
outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_1999/all_results_la_kron_all_homo_mc_corr_1000.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"eval_matthews_correlation": 0.500854588319398}
outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_1999/eval_res_la_kron_all_homo_mc_corr_1000.json ADDED
The diff for this file is too large to render. See raw diff
 
outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_1999/gpu_stats_la.json ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "memory_allocated": 968467968,
3
+ "max_memory_allocated": 3324437504,
4
+ "memory_reserved": 8908701696,
5
+ "max_memory_reserved": 8908701696,
6
+ "memory_stats": {
7
+ "active.all.allocated": 5810926,
8
+ "active.all.current": 1234,
9
+ "active.all.freed": 5809692,
10
+ "active.all.peak": 1487,
11
+ "active.large_pool.allocated": 721948,
12
+ "active.large_pool.current": 284,
13
+ "active.large_pool.freed": 721664,
14
+ "active.large_pool.peak": 384,
15
+ "active.small_pool.allocated": 5088978,
16
+ "active.small_pool.current": 950,
17
+ "active.small_pool.freed": 5088028,
18
+ "active.small_pool.peak": 1200,
19
+ "active_bytes.all.allocated": 4065322237440,
20
+ "active_bytes.all.current": 968467968,
21
+ "active_bytes.all.freed": 4064353769472,
22
+ "active_bytes.all.peak": 3324437504,
23
+ "active_bytes.large_pool.allocated": 2927465442816,
24
+ "active_bytes.large_pool.current": 951277568,
25
+ "active_bytes.large_pool.freed": 2926514165248,
26
+ "active_bytes.large_pool.peak": 3240482816,
27
+ "active_bytes.small_pool.allocated": 1137856794624,
28
+ "active_bytes.small_pool.current": 17190400,
29
+ "active_bytes.small_pool.freed": 1137839604224,
30
+ "active_bytes.small_pool.peak": 114983424,
31
+ "allocated_bytes.all.allocated": 4065322237440,
32
+ "allocated_bytes.all.current": 968467968,
33
+ "allocated_bytes.all.freed": 4064353769472,
34
+ "allocated_bytes.all.peak": 3324437504,
35
+ "allocated_bytes.large_pool.allocated": 2927465442816,
36
+ "allocated_bytes.large_pool.current": 951277568,
37
+ "allocated_bytes.large_pool.freed": 2926514165248,
38
+ "allocated_bytes.large_pool.peak": 3240482816,
39
+ "allocated_bytes.small_pool.allocated": 1137856794624,
40
+ "allocated_bytes.small_pool.current": 17190400,
41
+ "allocated_bytes.small_pool.freed": 1137839604224,
42
+ "allocated_bytes.small_pool.peak": 114983424,
43
+ "allocation.all.allocated": 5810926,
44
+ "allocation.all.current": 1234,
45
+ "allocation.all.freed": 5809692,
46
+ "allocation.all.peak": 1487,
47
+ "allocation.large_pool.allocated": 721948,
48
+ "allocation.large_pool.current": 284,
49
+ "allocation.large_pool.freed": 721664,
50
+ "allocation.large_pool.peak": 384,
51
+ "allocation.small_pool.allocated": 5088978,
52
+ "allocation.small_pool.current": 950,
53
+ "allocation.small_pool.freed": 5088028,
54
+ "allocation.small_pool.peak": 1200,
55
+ "inactive_split.all.allocated": 2713249,
56
+ "inactive_split.all.current": 170,
57
+ "inactive_split.all.freed": 2713079,
58
+ "inactive_split.all.peak": 227,
59
+ "inactive_split.large_pool.allocated": 334447,
60
+ "inactive_split.large_pool.current": 45,
61
+ "inactive_split.large_pool.freed": 334402,
62
+ "inactive_split.large_pool.peak": 98,
63
+ "inactive_split.small_pool.allocated": 2378802,
64
+ "inactive_split.small_pool.current": 125,
65
+ "inactive_split.small_pool.freed": 2378677,
66
+ "inactive_split.small_pool.peak": 186,
67
+ "inactive_split_bytes.all.allocated": 4074440010240,
68
+ "inactive_split_bytes.all.current": 149314048,
69
+ "inactive_split_bytes.all.freed": 4074290696192,
70
+ "inactive_split_bytes.all.peak": 899341312,
71
+ "inactive_split_bytes.large_pool.allocated": 2895554385920,
72
+ "inactive_split_bytes.large_pool.current": 107784192,
73
+ "inactive_split_bytes.large_pool.freed": 2895446601728,
74
+ "inactive_split_bytes.large_pool.peak": 860701696,
75
+ "inactive_split_bytes.small_pool.allocated": 1178885624320,
76
+ "inactive_split_bytes.small_pool.current": 41529856,
77
+ "inactive_split_bytes.small_pool.freed": 1178844094464,
78
+ "inactive_split_bytes.small_pool.peak": 79716864,
79
+ "max_split_size": -1,
80
+ "num_alloc_retries": 0,
81
+ "num_device_alloc": 735,
82
+ "num_device_free": 393,
83
+ "num_ooms": 0,
84
+ "num_sync_all_streams": 3,
85
+ "oversize_allocations.allocated": 0,
86
+ "oversize_allocations.current": 0,
87
+ "oversize_allocations.freed": 0,
88
+ "oversize_allocations.peak": 0,
89
+ "oversize_segments.allocated": 0,
90
+ "oversize_segments.current": 0,
91
+ "oversize_segments.freed": 0,
92
+ "oversize_segments.peak": 0,
93
+ "requested_bytes.all.allocated": 3981573885378,
94
+ "requested_bytes.all.current": 967046712,
95
+ "requested_bytes.all.freed": 3980606838666,
96
+ "requested_bytes.all.peak": 3263085376,
97
+ "requested_bytes.large_pool.allocated": 2844189486496,
98
+ "requested_bytes.large_pool.current": 950013448,
99
+ "requested_bytes.large_pool.freed": 2843239473048,
100
+ "requested_bytes.large_pool.peak": 3179241096,
101
+ "requested_bytes.small_pool.allocated": 1137384398882,
102
+ "requested_bytes.small_pool.current": 17033264,
103
+ "requested_bytes.small_pool.freed": 1137367365618,
104
+ "requested_bytes.small_pool.peak": 114852048,
105
+ "reserved_bytes.all.allocated": 17718837248,
106
+ "reserved_bytes.all.current": 8908701696,
107
+ "reserved_bytes.all.freed": 8810135552,
108
+ "reserved_bytes.all.peak": 8908701696,
109
+ "reserved_bytes.large_pool.allocated": 17324572672,
110
+ "reserved_bytes.large_pool.current": 8772386816,
111
+ "reserved_bytes.large_pool.freed": 8552185856,
112
+ "reserved_bytes.large_pool.peak": 8772386816,
113
+ "reserved_bytes.small_pool.allocated": 394264576,
114
+ "reserved_bytes.small_pool.current": 136314880,
115
+ "reserved_bytes.small_pool.freed": 257949696,
116
+ "reserved_bytes.small_pool.peak": 136314880,
117
+ "segment.all.allocated": 735,
118
+ "segment.all.current": 342,
119
+ "segment.all.freed": 393,
120
+ "segment.all.peak": 342,
121
+ "segment.large_pool.allocated": 547,
122
+ "segment.large_pool.current": 277,
123
+ "segment.large_pool.freed": 270,
124
+ "segment.large_pool.peak": 277,
125
+ "segment.small_pool.allocated": 188,
126
+ "segment.small_pool.current": 65,
127
+ "segment.small_pool.freed": 123,
128
+ "segment.small_pool.peak": 65
129
+ }
130
+ }
outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_3999/all_results_la_kron_all_homo_mc_corr_1000.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"eval_matthews_correlation": 0.5186267566332291}
outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_3999/eval_res_la_kron_all_homo_mc_corr_1000.json ADDED
The diff for this file is too large to render. See raw diff
 
outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_3999/gpu_stats_la.json ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "memory_allocated": 969647616,
3
+ "max_memory_allocated": 3324437504,
4
+ "memory_reserved": 8910798848,
5
+ "max_memory_reserved": 8910798848,
6
+ "memory_stats": {
7
+ "active.all.allocated": 8716463,
8
+ "active.all.current": 1234,
9
+ "active.all.freed": 8715229,
10
+ "active.all.peak": 1487,
11
+ "active.large_pool.allocated": 1082921,
12
+ "active.large_pool.current": 284,
13
+ "active.large_pool.freed": 1082637,
14
+ "active.large_pool.peak": 482,
15
+ "active.small_pool.allocated": 7633542,
16
+ "active.small_pool.current": 950,
17
+ "active.small_pool.freed": 7632592,
18
+ "active.small_pool.peak": 1200,
19
+ "active_bytes.all.allocated": 6098303822336,
20
+ "active_bytes.all.current": 969647616,
21
+ "active_bytes.all.freed": 6097334174720,
22
+ "active_bytes.all.peak": 3324437504,
23
+ "active_bytes.large_pool.allocated": 4391518590464,
24
+ "active_bytes.large_pool.current": 952457216,
25
+ "active_bytes.large_pool.freed": 4390566133248,
26
+ "active_bytes.large_pool.peak": 3240482816,
27
+ "active_bytes.small_pool.allocated": 1706785231872,
28
+ "active_bytes.small_pool.current": 17190400,
29
+ "active_bytes.small_pool.freed": 1706768041472,
30
+ "active_bytes.small_pool.peak": 114983424,
31
+ "allocated_bytes.all.allocated": 6098303822336,
32
+ "allocated_bytes.all.current": 969647616,
33
+ "allocated_bytes.all.freed": 6097334174720,
34
+ "allocated_bytes.all.peak": 3324437504,
35
+ "allocated_bytes.large_pool.allocated": 4391518590464,
36
+ "allocated_bytes.large_pool.current": 952457216,
37
+ "allocated_bytes.large_pool.freed": 4390566133248,
38
+ "allocated_bytes.large_pool.peak": 3240482816,
39
+ "allocated_bytes.small_pool.allocated": 1706785231872,
40
+ "allocated_bytes.small_pool.current": 17190400,
41
+ "allocated_bytes.small_pool.freed": 1706768041472,
42
+ "allocated_bytes.small_pool.peak": 114983424,
43
+ "allocation.all.allocated": 8716463,
44
+ "allocation.all.current": 1234,
45
+ "allocation.all.freed": 8715229,
46
+ "allocation.all.peak": 1487,
47
+ "allocation.large_pool.allocated": 1082921,
48
+ "allocation.large_pool.current": 284,
49
+ "allocation.large_pool.freed": 1082637,
50
+ "allocation.large_pool.peak": 482,
51
+ "allocation.small_pool.allocated": 7633542,
52
+ "allocation.small_pool.current": 950,
53
+ "allocation.small_pool.freed": 7632592,
54
+ "allocation.small_pool.peak": 1200,
55
+ "inactive_split.all.allocated": 4132126,
56
+ "inactive_split.all.current": 181,
57
+ "inactive_split.all.freed": 4131945,
58
+ "inactive_split.all.peak": 227,
59
+ "inactive_split.large_pool.allocated": 500912,
60
+ "inactive_split.large_pool.current": 43,
61
+ "inactive_split.large_pool.freed": 500869,
62
+ "inactive_split.large_pool.peak": 98,
63
+ "inactive_split.small_pool.allocated": 3631214,
64
+ "inactive_split.small_pool.current": 138,
65
+ "inactive_split.small_pool.freed": 3631076,
66
+ "inactive_split.small_pool.peak": 194,
67
+ "inactive_split_bytes.all.allocated": 6074756495872,
68
+ "inactive_split_bytes.all.current": 152328704,
69
+ "inactive_split_bytes.all.freed": 6074604167168,
70
+ "inactive_split_bytes.all.peak": 904453120,
71
+ "inactive_split_bytes.large_pool.allocated": 4316633251840,
72
+ "inactive_split_bytes.large_pool.current": 106604544,
73
+ "inactive_split_bytes.large_pool.freed": 4316526647296,
74
+ "inactive_split_bytes.large_pool.peak": 860701696,
75
+ "inactive_split_bytes.small_pool.allocated": 1758123244032,
76
+ "inactive_split_bytes.small_pool.current": 45724160,
77
+ "inactive_split_bytes.small_pool.freed": 1758077519872,
78
+ "inactive_split_bytes.small_pool.peak": 79716864,
79
+ "max_split_size": -1,
80
+ "num_alloc_retries": 0,
81
+ "num_device_alloc": 1097,
82
+ "num_device_free": 754,
83
+ "num_ooms": 0,
84
+ "num_sync_all_streams": 5,
85
+ "oversize_allocations.allocated": 0,
86
+ "oversize_allocations.current": 0,
87
+ "oversize_allocations.freed": 0,
88
+ "oversize_allocations.peak": 0,
89
+ "oversize_segments.allocated": 0,
90
+ "oversize_segments.current": 0,
91
+ "oversize_segments.freed": 0,
92
+ "oversize_segments.peak": 0,
93
+ "requested_bytes.all.allocated": 5972352312941,
94
+ "requested_bytes.all.current": 967046712,
95
+ "requested_bytes.all.freed": 5971385266229,
96
+ "requested_bytes.all.peak": 3263085376,
97
+ "requested_bytes.large_pool.allocated": 4266275710064,
98
+ "requested_bytes.large_pool.current": 950013448,
99
+ "requested_bytes.large_pool.freed": 4265325696616,
100
+ "requested_bytes.large_pool.peak": 3179241096,
101
+ "requested_bytes.small_pool.allocated": 1706076602877,
102
+ "requested_bytes.small_pool.current": 17033264,
103
+ "requested_bytes.small_pool.freed": 1706059569613,
104
+ "requested_bytes.small_pool.peak": 114852048,
105
+ "reserved_bytes.all.allocated": 26782728192,
106
+ "reserved_bytes.all.current": 8910798848,
107
+ "reserved_bytes.all.freed": 17871929344,
108
+ "reserved_bytes.all.peak": 8910798848,
109
+ "reserved_bytes.large_pool.allocated": 26222788608,
110
+ "reserved_bytes.large_pool.current": 8772386816,
111
+ "reserved_bytes.large_pool.freed": 17450401792,
112
+ "reserved_bytes.large_pool.peak": 8772386816,
113
+ "reserved_bytes.small_pool.allocated": 559939584,
114
+ "reserved_bytes.small_pool.current": 138412032,
115
+ "reserved_bytes.small_pool.freed": 421527552,
116
+ "reserved_bytes.small_pool.peak": 138412032,
117
+ "segment.all.allocated": 1097,
118
+ "segment.all.current": 343,
119
+ "segment.all.freed": 754,
120
+ "segment.all.peak": 343,
121
+ "segment.large_pool.allocated": 830,
122
+ "segment.large_pool.current": 277,
123
+ "segment.large_pool.freed": 553,
124
+ "segment.large_pool.peak": 277,
125
+ "segment.small_pool.allocated": 267,
126
+ "segment.small_pool.current": 66,
127
+ "segment.small_pool.freed": 201,
128
+ "segment.small_pool.peak": 66
129
+ }
130
+ }
outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_5999/all_results_la_kron_all_homo_mc_corr_1000.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"eval_matthews_correlation": 0.5611975320184954}
outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_5999/eval_res_la_kron_all_homo_mc_corr_1000.json ADDED
The diff for this file is too large to render. See raw diff
 
outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_5999/gpu_stats_la.json ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "memory_allocated": 969647616,
3
+ "max_memory_allocated": 3324437504,
4
+ "memory_reserved": 8917090304,
5
+ "max_memory_reserved": 8917090304,
6
+ "memory_stats": {
7
+ "active.all.allocated": 11622050,
8
+ "active.all.current": 1234,
9
+ "active.all.freed": 11620816,
10
+ "active.all.peak": 1487,
11
+ "active.large_pool.allocated": 1443894,
12
+ "active.large_pool.current": 284,
13
+ "active.large_pool.freed": 1443610,
14
+ "active.large_pool.peak": 482,
15
+ "active.small_pool.allocated": 10178156,
16
+ "active.small_pool.current": 950,
17
+ "active.small_pool.freed": 10177206,
18
+ "active.small_pool.peak": 1200,
19
+ "active_bytes.all.allocated": 8130957157888,
20
+ "active_bytes.all.current": 969647616,
21
+ "active_bytes.all.freed": 8129987510272,
22
+ "active_bytes.all.peak": 3324437504,
23
+ "active_bytes.large_pool.allocated": 5855243462144,
24
+ "active_bytes.large_pool.current": 952457216,
25
+ "active_bytes.large_pool.freed": 5854291004928,
26
+ "active_bytes.large_pool.peak": 3240482816,
27
+ "active_bytes.small_pool.allocated": 2275713695744,
28
+ "active_bytes.small_pool.current": 17190400,
29
+ "active_bytes.small_pool.freed": 2275696505344,
30
+ "active_bytes.small_pool.peak": 114983424,
31
+ "allocated_bytes.all.allocated": 8130957157888,
32
+ "allocated_bytes.all.current": 969647616,
33
+ "allocated_bytes.all.freed": 8129987510272,
34
+ "allocated_bytes.all.peak": 3324437504,
35
+ "allocated_bytes.large_pool.allocated": 5855243462144,
36
+ "allocated_bytes.large_pool.current": 952457216,
37
+ "allocated_bytes.large_pool.freed": 5854291004928,
38
+ "allocated_bytes.large_pool.peak": 3240482816,
39
+ "allocated_bytes.small_pool.allocated": 2275713695744,
40
+ "allocated_bytes.small_pool.current": 17190400,
41
+ "allocated_bytes.small_pool.freed": 2275696505344,
42
+ "allocated_bytes.small_pool.peak": 114983424,
43
+ "allocation.all.allocated": 11622050,
44
+ "allocation.all.current": 1234,
45
+ "allocation.all.freed": 11620816,
46
+ "allocation.all.peak": 1487,
47
+ "allocation.large_pool.allocated": 1443894,
48
+ "allocation.large_pool.current": 284,
49
+ "allocation.large_pool.freed": 1443610,
50
+ "allocation.large_pool.peak": 482,
51
+ "allocation.small_pool.allocated": 10178156,
52
+ "allocation.small_pool.current": 950,
53
+ "allocation.small_pool.freed": 10177206,
54
+ "allocation.small_pool.peak": 1200,
55
+ "inactive_split.all.allocated": 5588000,
56
+ "inactive_split.all.current": 178,
57
+ "inactive_split.all.freed": 5587822,
58
+ "inactive_split.all.peak": 247,
59
+ "inactive_split.large_pool.allocated": 667566,
60
+ "inactive_split.large_pool.current": 43,
61
+ "inactive_split.large_pool.freed": 667523,
62
+ "inactive_split.large_pool.peak": 98,
63
+ "inactive_split.small_pool.allocated": 4920434,
64
+ "inactive_split.small_pool.current": 135,
65
+ "inactive_split.small_pool.freed": 4920299,
66
+ "inactive_split.small_pool.peak": 221,
67
+ "inactive_split_bytes.all.allocated": 8074710071808,
68
+ "inactive_split_bytes.all.current": 150231552,
69
+ "inactive_split_bytes.all.freed": 8074559840256,
70
+ "inactive_split_bytes.all.peak": 906550272,
71
+ "inactive_split_bytes.large_pool.allocated": 5736156622848,
72
+ "inactive_split_bytes.large_pool.current": 106604544,
73
+ "inactive_split_bytes.large_pool.freed": 5736050018304,
74
+ "inactive_split_bytes.large_pool.peak": 860701696,
75
+ "inactive_split_bytes.small_pool.allocated": 2338553448960,
76
+ "inactive_split_bytes.small_pool.current": 43627008,
77
+ "inactive_split_bytes.small_pool.freed": 2338509821952,
78
+ "inactive_split_bytes.small_pool.peak": 79716864,
79
+ "max_split_size": -1,
80
+ "num_alloc_retries": 0,
81
+ "num_device_alloc": 1464,
82
+ "num_device_free": 1118,
83
+ "num_ooms": 0,
84
+ "num_sync_all_streams": 7,
85
+ "oversize_allocations.allocated": 0,
86
+ "oversize_allocations.current": 0,
87
+ "oversize_allocations.freed": 0,
88
+ "oversize_allocations.peak": 0,
89
+ "oversize_segments.allocated": 0,
90
+ "oversize_segments.current": 0,
91
+ "oversize_segments.freed": 0,
92
+ "oversize_segments.peak": 0,
93
+ "requested_bytes.all.allocated": 7963130743540,
94
+ "requested_bytes.all.current": 967046712,
95
+ "requested_bytes.all.freed": 7962163696828,
96
+ "requested_bytes.all.peak": 3263085376,
97
+ "requested_bytes.large_pool.allocated": 5688361933632,
98
+ "requested_bytes.large_pool.current": 950013448,
99
+ "requested_bytes.large_pool.freed": 5687411920184,
100
+ "requested_bytes.large_pool.peak": 3179241096,
101
+ "requested_bytes.small_pool.allocated": 2274768809908,
102
+ "requested_bytes.small_pool.current": 17033264,
103
+ "requested_bytes.small_pool.freed": 2274751776644,
104
+ "requested_bytes.small_pool.peak": 114852048,
105
+ "reserved_bytes.all.allocated": 35894853632,
106
+ "reserved_bytes.all.current": 8917090304,
107
+ "reserved_bytes.all.freed": 26977763328,
108
+ "reserved_bytes.all.peak": 8917090304,
109
+ "reserved_bytes.large_pool.allocated": 35162947584,
110
+ "reserved_bytes.large_pool.current": 8772386816,
111
+ "reserved_bytes.large_pool.freed": 26390560768,
112
+ "reserved_bytes.large_pool.peak": 8772386816,
113
+ "reserved_bytes.small_pool.allocated": 731906048,
114
+ "reserved_bytes.small_pool.current": 144703488,
115
+ "reserved_bytes.small_pool.freed": 587202560,
116
+ "reserved_bytes.small_pool.peak": 144703488,
117
+ "segment.all.allocated": 1464,
118
+ "segment.all.current": 346,
119
+ "segment.all.freed": 1118,
120
+ "segment.all.peak": 346,
121
+ "segment.large_pool.allocated": 1115,
122
+ "segment.large_pool.current": 277,
123
+ "segment.large_pool.freed": 838,
124
+ "segment.large_pool.peak": 277,
125
+ "segment.small_pool.allocated": 349,
126
+ "segment.small_pool.current": 69,
127
+ "segment.small_pool.freed": 280,
128
+ "segment.small_pool.peak": 69
129
+ }
130
+ }
outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_7999/all_results_la_kron_all_homo_mc_corr_1000.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"eval_matthews_correlation": 0.5403785768297347}
outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_7999/eval_res_la_kron_all_homo_mc_corr_1000.json ADDED
The diff for this file is too large to render. See raw diff
 
outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_7999/gpu_stats_la.json ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "memory_allocated": 968467968,
3
+ "max_memory_allocated": 3324437504,
4
+ "memory_reserved": 8908701696,
5
+ "max_memory_reserved": 8917090304,
6
+ "memory_stats": {
7
+ "active.all.allocated": 14527687,
8
+ "active.all.current": 1234,
9
+ "active.all.freed": 14526453,
10
+ "active.all.peak": 1487,
11
+ "active.large_pool.allocated": 1804867,
12
+ "active.large_pool.current": 284,
13
+ "active.large_pool.freed": 1804583,
14
+ "active.large_pool.peak": 482,
15
+ "active.small_pool.allocated": 12722820,
16
+ "active.small_pool.current": 950,
17
+ "active.small_pool.freed": 12721870,
18
+ "active.small_pool.peak": 1200,
19
+ "active_bytes.all.allocated": 10162861598720,
20
+ "active_bytes.all.current": 968467968,
21
+ "active_bytes.all.freed": 10161893130752,
22
+ "active_bytes.all.peak": 3324437504,
23
+ "active_bytes.large_pool.allocated": 7318219412480,
24
+ "active_bytes.large_pool.current": 951277568,
25
+ "active_bytes.large_pool.freed": 7317268134912,
26
+ "active_bytes.large_pool.peak": 3240482816,
27
+ "active_bytes.small_pool.allocated": 2844642186240,
28
+ "active_bytes.small_pool.current": 17190400,
29
+ "active_bytes.small_pool.freed": 2844624995840,
30
+ "active_bytes.small_pool.peak": 114983424,
31
+ "allocated_bytes.all.allocated": 10162861598720,
32
+ "allocated_bytes.all.current": 968467968,
33
+ "allocated_bytes.all.freed": 10161893130752,
34
+ "allocated_bytes.all.peak": 3324437504,
35
+ "allocated_bytes.large_pool.allocated": 7318219412480,
36
+ "allocated_bytes.large_pool.current": 951277568,
37
+ "allocated_bytes.large_pool.freed": 7317268134912,
38
+ "allocated_bytes.large_pool.peak": 3240482816,
39
+ "allocated_bytes.small_pool.allocated": 2844642186240,
40
+ "allocated_bytes.small_pool.current": 17190400,
41
+ "allocated_bytes.small_pool.freed": 2844624995840,
42
+ "allocated_bytes.small_pool.peak": 114983424,
43
+ "allocation.all.allocated": 14527687,
44
+ "allocation.all.current": 1234,
45
+ "allocation.all.freed": 14526453,
46
+ "allocation.all.peak": 1487,
47
+ "allocation.large_pool.allocated": 1804867,
48
+ "allocation.large_pool.current": 284,
49
+ "allocation.large_pool.freed": 1804583,
50
+ "allocation.large_pool.peak": 482,
51
+ "allocation.small_pool.allocated": 12722820,
52
+ "allocation.small_pool.current": 950,
53
+ "allocation.small_pool.freed": 12721870,
54
+ "allocation.small_pool.peak": 1200,
55
+ "inactive_split.all.allocated": 6924013,
56
+ "inactive_split.all.current": 173,
57
+ "inactive_split.all.freed": 6923840,
58
+ "inactive_split.all.peak": 268,
59
+ "inactive_split.large_pool.allocated": 834865,
60
+ "inactive_split.large_pool.current": 45,
61
+ "inactive_split.large_pool.freed": 834820,
62
+ "inactive_split.large_pool.peak": 98,
63
+ "inactive_split.small_pool.allocated": 6089148,
64
+ "inactive_split.small_pool.current": 128,
65
+ "inactive_split.small_pool.freed": 6089020,
66
+ "inactive_split.small_pool.peak": 242,
67
+ "inactive_split_bytes.all.allocated": 10110169738752,
68
+ "inactive_split_bytes.all.current": 147216896,
69
+ "inactive_split_bytes.all.freed": 10110022521856,
70
+ "inactive_split_bytes.all.peak": 906550272,
71
+ "inactive_split_bytes.large_pool.allocated": 7181281266688,
72
+ "inactive_split_bytes.large_pool.current": 107784192,
73
+ "inactive_split_bytes.large_pool.freed": 7181173482496,
74
+ "inactive_split_bytes.large_pool.peak": 860701696,
75
+ "inactive_split_bytes.small_pool.allocated": 2928888472064,
76
+ "inactive_split_bytes.small_pool.current": 39432704,
77
+ "inactive_split_bytes.small_pool.freed": 2928849039360,
78
+ "inactive_split_bytes.small_pool.peak": 79716864,
79
+ "max_split_size": -1,
80
+ "num_alloc_retries": 0,
81
+ "num_device_alloc": 1806,
82
+ "num_device_free": 1464,
83
+ "num_ooms": 0,
84
+ "num_sync_all_streams": 9,
85
+ "oversize_allocations.allocated": 0,
86
+ "oversize_allocations.current": 0,
87
+ "oversize_allocations.freed": 0,
88
+ "oversize_allocations.peak": 0,
89
+ "oversize_segments.allocated": 0,
90
+ "oversize_segments.current": 0,
91
+ "oversize_segments.freed": 0,
92
+ "oversize_segments.peak": 0,
93
+ "requested_bytes.all.allocated": 9953909177175,
94
+ "requested_bytes.all.current": 967046712,
95
+ "requested_bytes.all.freed": 9952942130463,
96
+ "requested_bytes.all.peak": 3263085376,
97
+ "requested_bytes.large_pool.allocated": 7110448157200,
98
+ "requested_bytes.large_pool.current": 950013448,
99
+ "requested_bytes.large_pool.freed": 7109498143752,
100
+ "requested_bytes.large_pool.peak": 3179241096,
101
+ "requested_bytes.small_pool.allocated": 2843461019975,
102
+ "requested_bytes.small_pool.current": 17033264,
103
+ "requested_bytes.small_pool.freed": 2843443986711,
104
+ "requested_bytes.small_pool.peak": 114852048,
105
+ "reserved_bytes.all.allocated": 44409290752,
106
+ "reserved_bytes.all.current": 8908701696,
107
+ "reserved_bytes.all.freed": 35500589056,
108
+ "reserved_bytes.all.peak": 8917090304,
109
+ "reserved_bytes.large_pool.allocated": 43505418240,
110
+ "reserved_bytes.large_pool.current": 8772386816,
111
+ "reserved_bytes.large_pool.freed": 34733031424,
112
+ "reserved_bytes.large_pool.peak": 8772386816,
113
+ "reserved_bytes.small_pool.allocated": 903872512,
114
+ "reserved_bytes.small_pool.current": 136314880,
115
+ "reserved_bytes.small_pool.freed": 767557632,
116
+ "reserved_bytes.small_pool.peak": 144703488,
117
+ "segment.all.allocated": 1806,
118
+ "segment.all.current": 342,
119
+ "segment.all.freed": 1464,
120
+ "segment.all.peak": 346,
121
+ "segment.large_pool.allocated": 1375,
122
+ "segment.large_pool.current": 277,
123
+ "segment.large_pool.freed": 1098,
124
+ "segment.large_pool.peak": 277,
125
+ "segment.small_pool.allocated": 431,
126
+ "segment.small_pool.current": 65,
127
+ "segment.small_pool.freed": 366,
128
+ "segment.small_pool.peak": 69
129
+ }
130
+ }
outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_9999/all_results_la_kron_all_homo_mc_corr_1000.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"eval_matthews_correlation": 0.5347381322825221}
outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_9999/eval_res_la_kron_all_homo_mc_corr_1000.json ADDED
The diff for this file is too large to render. See raw diff
 
outputs/cola/bert-base-uncased_adapterstrain_val_0.0001_42_8_10000/step_9999/gpu_stats_la.json ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "memory_allocated": 969647616,
3
+ "max_memory_allocated": 3324437504,
4
+ "memory_reserved": 8912896000,
5
+ "max_memory_reserved": 8917090304,
6
+ "memory_stats": {
7
+ "active.all.allocated": 17433374,
8
+ "active.all.current": 1234,
9
+ "active.all.freed": 17432140,
10
+ "active.all.peak": 1487,
11
+ "active.large_pool.allocated": 2165840,
12
+ "active.large_pool.current": 284,
13
+ "active.large_pool.freed": 2165556,
14
+ "active.large_pool.peak": 482,
15
+ "active.small_pool.allocated": 15267534,
16
+ "active.small_pool.current": 950,
17
+ "active.small_pool.freed": 15266584,
18
+ "active.small_pool.peak": 1200,
19
+ "active_bytes.all.allocated": 12195657911296,
20
+ "active_bytes.all.current": 969647616,
21
+ "active_bytes.all.freed": 12194688263680,
22
+ "active_bytes.all.peak": 3324437504,
23
+ "active_bytes.large_pool.allocated": 8782087207936,
24
+ "active_bytes.large_pool.current": 952457216,
25
+ "active_bytes.large_pool.freed": 8781134750720,
26
+ "active_bytes.large_pool.peak": 3240482816,
27
+ "active_bytes.small_pool.allocated": 3413570703360,
28
+ "active_bytes.small_pool.current": 17190400,
29
+ "active_bytes.small_pool.freed": 3413553512960,
30
+ "active_bytes.small_pool.peak": 114983424,
31
+ "allocated_bytes.all.allocated": 12195657911296,
32
+ "allocated_bytes.all.current": 969647616,
33
+ "allocated_bytes.all.freed": 12194688263680,
34
+ "allocated_bytes.all.peak": 3324437504,
35
+ "allocated_bytes.large_pool.allocated": 8782087207936,
36
+ "allocated_bytes.large_pool.current": 952457216,
37
+ "allocated_bytes.large_pool.freed": 8781134750720,
38
+ "allocated_bytes.large_pool.peak": 3240482816,
39
+ "allocated_bytes.small_pool.allocated": 3413570703360,
40
+ "allocated_bytes.small_pool.current": 17190400,
41
+ "allocated_bytes.small_pool.freed": 3413553512960,
42
+ "allocated_bytes.small_pool.peak": 114983424,
43
+ "allocation.all.allocated": 17433374,
44
+ "allocation.all.current": 1234,
45
+ "allocation.all.freed": 17432140,
46
+ "allocation.all.peak": 1487,
47
+ "allocation.large_pool.allocated": 2165840,
48
+ "allocation.large_pool.current": 284,
49
+ "allocation.large_pool.freed": 2165556,
50
+ "allocation.large_pool.peak": 482,
51
+ "allocation.small_pool.allocated": 15267534,
52
+ "allocation.small_pool.current": 950,
53
+ "allocation.small_pool.freed": 15266584,
54
+ "allocation.small_pool.peak": 1200,
55
+ "inactive_split.all.allocated": 8349332,
56
+ "inactive_split.all.current": 167,
57
+ "inactive_split.all.freed": 8349165,
58
+ "inactive_split.all.peak": 268,
59
+ "inactive_split.large_pool.allocated": 1002355,
60
+ "inactive_split.large_pool.current": 43,
61
+ "inactive_split.large_pool.freed": 1002312,
62
+ "inactive_split.large_pool.peak": 98,
63
+ "inactive_split.small_pool.allocated": 7346977,
64
+ "inactive_split.small_pool.current": 124,
65
+ "inactive_split.small_pool.freed": 7346853,
66
+ "inactive_split.small_pool.peak": 242,
67
+ "inactive_split_bytes.all.allocated": 12144424250880,
68
+ "inactive_split_bytes.all.current": 141842944,
69
+ "inactive_split_bytes.all.freed": 12144282407936,
70
+ "inactive_split_bytes.all.peak": 906550272,
71
+ "inactive_split_bytes.large_pool.allocated": 8631940797440,
72
+ "inactive_split_bytes.large_pool.current": 106604544,
73
+ "inactive_split_bytes.large_pool.freed": 8631834192896,
74
+ "inactive_split_bytes.large_pool.peak": 860701696,
75
+ "inactive_split_bytes.small_pool.allocated": 3512483453440,
76
+ "inactive_split_bytes.small_pool.current": 35238400,
77
+ "inactive_split_bytes.small_pool.freed": 3512448215040,
78
+ "inactive_split_bytes.small_pool.peak": 79716864,
79
+ "max_split_size": -1,
80
+ "num_alloc_retries": 0,
81
+ "num_device_alloc": 2177,
82
+ "num_device_free": 1833,
83
+ "num_ooms": 0,
84
+ "num_sync_all_streams": 11,
85
+ "oversize_allocations.allocated": 0,
86
+ "oversize_allocations.current": 0,
87
+ "oversize_allocations.freed": 0,
88
+ "oversize_allocations.peak": 0,
89
+ "oversize_segments.allocated": 0,
90
+ "oversize_segments.current": 0,
91
+ "oversize_segments.freed": 0,
92
+ "oversize_segments.peak": 0,
93
+ "requested_bytes.all.allocated": 11944687613846,
94
+ "requested_bytes.all.current": 967046712,
95
+ "requested_bytes.all.freed": 11943720567134,
96
+ "requested_bytes.all.peak": 3263085376,
97
+ "requested_bytes.large_pool.allocated": 8532534380768,
98
+ "requested_bytes.large_pool.current": 950013448,
99
+ "requested_bytes.large_pool.freed": 8531584367320,
100
+ "requested_bytes.large_pool.peak": 3179241096,
101
+ "requested_bytes.small_pool.allocated": 3412153233078,
102
+ "requested_bytes.small_pool.current": 17033264,
103
+ "requested_bytes.small_pool.freed": 3412136199814,
104
+ "requested_bytes.small_pool.peak": 114852048,
105
+ "reserved_bytes.all.allocated": 53510930432,
106
+ "reserved_bytes.all.current": 8912896000,
107
+ "reserved_bytes.all.freed": 44598034432,
108
+ "reserved_bytes.all.peak": 8917090304,
109
+ "reserved_bytes.large_pool.allocated": 52424605696,
110
+ "reserved_bytes.large_pool.current": 8772386816,
111
+ "reserved_bytes.large_pool.freed": 43652218880,
112
+ "reserved_bytes.large_pool.peak": 8772386816,
113
+ "reserved_bytes.small_pool.allocated": 1086324736,
114
+ "reserved_bytes.small_pool.current": 140509184,
115
+ "reserved_bytes.small_pool.freed": 945815552,
116
+ "reserved_bytes.small_pool.peak": 144703488,
117
+ "segment.all.allocated": 2177,
118
+ "segment.all.current": 344,
119
+ "segment.all.freed": 1833,
120
+ "segment.all.peak": 346,
121
+ "segment.large_pool.allocated": 1659,
122
+ "segment.large_pool.current": 277,
123
+ "segment.large_pool.freed": 1382,
124
+ "segment.large_pool.peak": 277,
125
+ "segment.small_pool.allocated": 518,
126
+ "segment.small_pool.current": 67,
127
+ "segment.small_pool.freed": 451,
128
+ "segment.small_pool.peak": 69
129
+ }
130
+ }
outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/logfile_la.log ADDED
@@ -0,0 +1,846 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 06/02/2024 07:57:08 - INFO - __main__ - Number of labels detected = 2
2
+ 06/02/2024 07:57:08 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 50265, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}.
3
+ 06/02/2024 07:57:09 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_0/adapter_config.json
4
+ 06/02/2024 07:57:09 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'cola'.
5
+ 06/02/2024 07:57:09 - INFO - adapters.loading - Loading module weights from ./outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_0/pytorch_adapter.bin
6
+ 06/02/2024 07:57:09 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_0/head_config.json
7
+ 06/02/2024 07:57:09 - INFO - adapters.heads.model_mixin - Adding head 'cola' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}.
8
+ 06/02/2024 07:57:09 - INFO - adapters.loading - Loading module weights from ./outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_0/pytorch_model_head.bin
9
+ 06/02/2024 07:57:09 - INFO - __main__ - Adapter Name = cola
10
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.weight
11
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.bias
12
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.cola.adapter_up.weight
13
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.cola.adapter_up.bias
14
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.cola.adapter_down.0.weight
15
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.cola.adapter_down.0.bias
16
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.cola.adapter_up.weight
17
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.cola.adapter_up.bias
18
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.weight
19
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.bias
20
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.cola.adapter_up.weight
21
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.cola.adapter_up.bias
22
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.cola.adapter_down.0.weight
23
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.cola.adapter_down.0.bias
24
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.cola.adapter_up.weight
25
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.cola.adapter_up.bias
26
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.weight
27
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.bias
28
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.cola.adapter_up.weight
29
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.cola.adapter_up.bias
30
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.cola.adapter_down.0.weight
31
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.cola.adapter_down.0.bias
32
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.cola.adapter_up.weight
33
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.cola.adapter_up.bias
34
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.weight
35
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.bias
36
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.cola.adapter_up.weight
37
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.cola.adapter_up.bias
38
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.cola.adapter_down.0.weight
39
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.cola.adapter_down.0.bias
40
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.cola.adapter_up.weight
41
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.cola.adapter_up.bias
42
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.weight
43
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.bias
44
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.cola.adapter_up.weight
45
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.cola.adapter_up.bias
46
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.cola.adapter_down.0.weight
47
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.cola.adapter_down.0.bias
48
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.cola.adapter_up.weight
49
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.cola.adapter_up.bias
50
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.weight
51
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.bias
52
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.cola.adapter_up.weight
53
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.cola.adapter_up.bias
54
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.cola.adapter_down.0.weight
55
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.cola.adapter_down.0.bias
56
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.cola.adapter_up.weight
57
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.cola.adapter_up.bias
58
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.weight
59
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.bias
60
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.cola.adapter_up.weight
61
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.cola.adapter_up.bias
62
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.cola.adapter_down.0.weight
63
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.cola.adapter_down.0.bias
64
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.cola.adapter_up.weight
65
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.cola.adapter_up.bias
66
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.weight
67
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.bias
68
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.cola.adapter_up.weight
69
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.cola.adapter_up.bias
70
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.cola.adapter_down.0.weight
71
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.cola.adapter_down.0.bias
72
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.cola.adapter_up.weight
73
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.cola.adapter_up.bias
74
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.weight
75
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.bias
76
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.cola.adapter_up.weight
77
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.cola.adapter_up.bias
78
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.cola.adapter_down.0.weight
79
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.cola.adapter_down.0.bias
80
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.cola.adapter_up.weight
81
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.cola.adapter_up.bias
82
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.weight
83
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.bias
84
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.cola.adapter_up.weight
85
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.cola.adapter_up.bias
86
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.cola.adapter_down.0.weight
87
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.cola.adapter_down.0.bias
88
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.cola.adapter_up.weight
89
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.cola.adapter_up.bias
90
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.weight
91
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.bias
92
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.cola.adapter_up.weight
93
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.cola.adapter_up.bias
94
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.cola.adapter_down.0.weight
95
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.cola.adapter_down.0.bias
96
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.cola.adapter_up.weight
97
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.cola.adapter_up.bias
98
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.weight
99
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.bias
100
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.cola.adapter_up.weight
101
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.cola.adapter_up.bias
102
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.cola.adapter_down.0.weight
103
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.cola.adapter_down.0.bias
104
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.cola.adapter_up.weight
105
+ 06/02/2024 07:57:09 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.cola.adapter_up.bias
106
+ 06/02/2024 07:57:09 - INFO - __main__ - heads.cola.1.weight
107
+ 06/02/2024 07:57:09 - INFO - __main__ - heads.cola.1.bias
108
+ 06/02/2024 07:57:09 - INFO - __main__ - heads.cola.4.weight
109
+ 06/02/2024 07:57:09 - INFO - __main__ - heads.cola.4.bias
110
+ 06/02/2024 07:57:10 - INFO - __main__ - Sample 3412 of the training set: {'input_ids': [0, 100, 7546, 5, 3737, 4, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1], 'labels': 1}.
111
+ 06/02/2024 07:57:10 - INFO - __main__ - Sample 6002 of the training set: {'input_ids': [0, 100, 531, 64, 3529, 2968, 8014, 4, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
112
+ 06/02/2024 07:57:10 - INFO - __main__ - Sample 83 of the training set: {'input_ids': [0, 133, 20276, 910, 10434, 1495, 7, 5, 1255, 4, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
113
+ 06/02/2024 08:01:59 - INFO - __main__ - f_mu shape : torch.Size([1043, 2])
114
+ 06/02/2024 08:01:59 - INFO - __main__ - f_var shape : torch.Size([1043, 2, 2])
115
+ 06/02/2024 08:01:59 - INFO - __main__ - tensor([[0.0461, 0.1350],
116
+ [0.0575, 0.1638],
117
+ [0.0505, 0.0935],
118
+ ...,
119
+ [0.1243, 0.0734],
120
+ [0.0638, 0.1309],
121
+ [0.1254, 0.0759]], device='cuda:0')
122
+ 06/02/2024 08:01:59 - INFO - __main__ - tensor([[[ 9.6094, 9.5192],
123
+ [ 9.5192, 9.6058]],
124
+
125
+ [[ 9.4777, 9.4281],
126
+ [ 9.4281, 9.4676]],
127
+
128
+ [[ 9.1818, 9.1107],
129
+ [ 9.1107, 9.1895]],
130
+
131
+ ...,
132
+
133
+ [[10.9495, 10.9250],
134
+ [10.9249, 10.9505]],
135
+
136
+ [[ 9.3165, 9.2374],
137
+ [ 9.2374, 9.3122]],
138
+
139
+ [[10.9862, 10.9572],
140
+ [10.9572, 10.9816]]], device='cuda:0')
141
+ 06/02/2024 08:01:59 - INFO - __main__ - ***** Completed training *****
142
+ 06/02/2024 08:02:02 - INFO - __main__ - Number of labels detected = 2
143
+ 06/02/2024 08:02:03 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 50265, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}.
144
+ 06/02/2024 08:02:04 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_1999/adapter_config.json
145
+ 06/02/2024 08:02:04 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'cola'.
146
+ 06/02/2024 08:02:04 - INFO - adapters.loading - Loading module weights from ./outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_1999/pytorch_adapter.bin
147
+ 06/02/2024 08:02:04 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_1999/head_config.json
148
+ 06/02/2024 08:02:04 - INFO - adapters.heads.model_mixin - Adding head 'cola' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}.
149
+ 06/02/2024 08:02:04 - INFO - adapters.loading - Loading module weights from ./outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_1999/pytorch_model_head.bin
150
+ 06/02/2024 08:02:04 - INFO - __main__ - Adapter Name = cola
151
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.weight
152
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.bias
153
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.cola.adapter_up.weight
154
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.cola.adapter_up.bias
155
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.cola.adapter_down.0.weight
156
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.cola.adapter_down.0.bias
157
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.cola.adapter_up.weight
158
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.cola.adapter_up.bias
159
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.weight
160
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.bias
161
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.cola.adapter_up.weight
162
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.cola.adapter_up.bias
163
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.cola.adapter_down.0.weight
164
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.cola.adapter_down.0.bias
165
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.cola.adapter_up.weight
166
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.cola.adapter_up.bias
167
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.weight
168
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.bias
169
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.cola.adapter_up.weight
170
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.cola.adapter_up.bias
171
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.cola.adapter_down.0.weight
172
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.cola.adapter_down.0.bias
173
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.cola.adapter_up.weight
174
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.cola.adapter_up.bias
175
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.weight
176
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.bias
177
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.cola.adapter_up.weight
178
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.cola.adapter_up.bias
179
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.cola.adapter_down.0.weight
180
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.cola.adapter_down.0.bias
181
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.cola.adapter_up.weight
182
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.cola.adapter_up.bias
183
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.weight
184
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.bias
185
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.cola.adapter_up.weight
186
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.cola.adapter_up.bias
187
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.cola.adapter_down.0.weight
188
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.cola.adapter_down.0.bias
189
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.cola.adapter_up.weight
190
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.cola.adapter_up.bias
191
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.weight
192
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.bias
193
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.cola.adapter_up.weight
194
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.cola.adapter_up.bias
195
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.cola.adapter_down.0.weight
196
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.cola.adapter_down.0.bias
197
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.cola.adapter_up.weight
198
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.cola.adapter_up.bias
199
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.weight
200
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.bias
201
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.cola.adapter_up.weight
202
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.cola.adapter_up.bias
203
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.cola.adapter_down.0.weight
204
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.cola.adapter_down.0.bias
205
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.cola.adapter_up.weight
206
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.cola.adapter_up.bias
207
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.weight
208
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.bias
209
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.cola.adapter_up.weight
210
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.cola.adapter_up.bias
211
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.cola.adapter_down.0.weight
212
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.cola.adapter_down.0.bias
213
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.cola.adapter_up.weight
214
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.cola.adapter_up.bias
215
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.weight
216
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.bias
217
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.cola.adapter_up.weight
218
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.cola.adapter_up.bias
219
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.cola.adapter_down.0.weight
220
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.cola.adapter_down.0.bias
221
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.cola.adapter_up.weight
222
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.cola.adapter_up.bias
223
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.weight
224
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.bias
225
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.cola.adapter_up.weight
226
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.cola.adapter_up.bias
227
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.cola.adapter_down.0.weight
228
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.cola.adapter_down.0.bias
229
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.cola.adapter_up.weight
230
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.cola.adapter_up.bias
231
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.weight
232
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.bias
233
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.cola.adapter_up.weight
234
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.cola.adapter_up.bias
235
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.cola.adapter_down.0.weight
236
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.cola.adapter_down.0.bias
237
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.cola.adapter_up.weight
238
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.cola.adapter_up.bias
239
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.weight
240
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.bias
241
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.cola.adapter_up.weight
242
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.cola.adapter_up.bias
243
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.cola.adapter_down.0.weight
244
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.cola.adapter_down.0.bias
245
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.cola.adapter_up.weight
246
+ 06/02/2024 08:02:04 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.cola.adapter_up.bias
247
+ 06/02/2024 08:02:04 - INFO - __main__ - heads.cola.1.weight
248
+ 06/02/2024 08:02:04 - INFO - __main__ - heads.cola.1.bias
249
+ 06/02/2024 08:02:04 - INFO - __main__ - heads.cola.4.weight
250
+ 06/02/2024 08:02:04 - INFO - __main__ - heads.cola.4.bias
251
+ 06/02/2024 08:02:05 - INFO - __main__ - Sample 3412 of the training set: {'input_ids': [0, 100, 7546, 5, 3737, 4, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1], 'labels': 1}.
252
+ 06/02/2024 08:02:05 - INFO - __main__ - Sample 6002 of the training set: {'input_ids': [0, 100, 531, 64, 3529, 2968, 8014, 4, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
253
+ 06/02/2024 08:02:05 - INFO - __main__ - Sample 83 of the training set: {'input_ids': [0, 133, 20276, 910, 10434, 1495, 7, 5, 1255, 4, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
254
+ 06/02/2024 08:06:58 - INFO - __main__ - f_mu shape : torch.Size([1043, 2])
255
+ 06/02/2024 08:06:58 - INFO - __main__ - f_var shape : torch.Size([1043, 2, 2])
256
+ 06/02/2024 08:06:58 - INFO - __main__ - tensor([[-1.3785, 1.5697],
257
+ [-1.5844, 1.6368],
258
+ [-1.4067, 1.5115],
259
+ ...,
260
+ [-1.6957, 2.0050],
261
+ [ 0.1387, 0.0336],
262
+ [ 0.2648, -0.1128]], device='cuda:0')
263
+ 06/02/2024 08:06:58 - INFO - __main__ - tensor([[[ 4.2579, 1.3738],
264
+ [ 1.3738, 4.2773]],
265
+
266
+ [[ 2.6641, 1.8229],
267
+ [ 1.8229, 2.6801]],
268
+
269
+ [[ 4.7692, 1.4135],
270
+ [ 1.4135, 4.7653]],
271
+
272
+ ...,
273
+
274
+ [[ 4.3119, 3.3220],
275
+ [ 3.3220, 4.4150]],
276
+
277
+ [[ 3.6567, -0.2874],
278
+ [-0.2874, 3.7251]],
279
+
280
+ [[ 3.1754, 0.5567],
281
+ [ 0.5567, 3.1824]]], device='cuda:0')
282
+ 06/02/2024 08:06:58 - INFO - __main__ - ***** Completed training *****
283
+ 06/02/2024 08:07:24 - INFO - __main__ - Number of labels detected = 2
284
+ 06/02/2024 08:07:25 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 50265, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}.
285
+ 06/02/2024 08:07:25 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_3999/adapter_config.json
286
+ 06/02/2024 08:07:25 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'cola'.
287
+ 06/02/2024 08:07:26 - INFO - adapters.loading - Loading module weights from ./outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_3999/pytorch_adapter.bin
288
+ 06/02/2024 08:07:26 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_3999/head_config.json
289
+ 06/02/2024 08:07:26 - INFO - adapters.heads.model_mixin - Adding head 'cola' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}.
290
+ 06/02/2024 08:07:26 - INFO - adapters.loading - Loading module weights from ./outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_3999/pytorch_model_head.bin
291
+ 06/02/2024 08:07:26 - INFO - __main__ - Adapter Name = cola
292
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.weight
293
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.bias
294
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.cola.adapter_up.weight
295
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.cola.adapter_up.bias
296
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.cola.adapter_down.0.weight
297
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.cola.adapter_down.0.bias
298
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.cola.adapter_up.weight
299
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.cola.adapter_up.bias
300
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.weight
301
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.bias
302
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.cola.adapter_up.weight
303
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.cola.adapter_up.bias
304
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.cola.adapter_down.0.weight
305
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.cola.adapter_down.0.bias
306
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.cola.adapter_up.weight
307
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.cola.adapter_up.bias
308
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.weight
309
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.bias
310
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.cola.adapter_up.weight
311
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.cola.adapter_up.bias
312
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.cola.adapter_down.0.weight
313
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.cola.adapter_down.0.bias
314
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.cola.adapter_up.weight
315
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.cola.adapter_up.bias
316
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.weight
317
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.bias
318
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.cola.adapter_up.weight
319
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.cola.adapter_up.bias
320
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.cola.adapter_down.0.weight
321
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.cola.adapter_down.0.bias
322
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.cola.adapter_up.weight
323
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.cola.adapter_up.bias
324
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.weight
325
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.bias
326
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.cola.adapter_up.weight
327
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.cola.adapter_up.bias
328
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.cola.adapter_down.0.weight
329
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.cola.adapter_down.0.bias
330
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.cola.adapter_up.weight
331
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.cola.adapter_up.bias
332
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.weight
333
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.bias
334
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.cola.adapter_up.weight
335
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.cola.adapter_up.bias
336
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.cola.adapter_down.0.weight
337
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.cola.adapter_down.0.bias
338
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.cola.adapter_up.weight
339
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.cola.adapter_up.bias
340
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.weight
341
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.bias
342
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.cola.adapter_up.weight
343
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.cola.adapter_up.bias
344
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.cola.adapter_down.0.weight
345
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.cola.adapter_down.0.bias
346
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.cola.adapter_up.weight
347
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.cola.adapter_up.bias
348
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.weight
349
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.bias
350
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.cola.adapter_up.weight
351
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.cola.adapter_up.bias
352
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.cola.adapter_down.0.weight
353
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.cola.adapter_down.0.bias
354
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.cola.adapter_up.weight
355
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.cola.adapter_up.bias
356
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.weight
357
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.bias
358
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.cola.adapter_up.weight
359
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.cola.adapter_up.bias
360
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.cola.adapter_down.0.weight
361
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.cola.adapter_down.0.bias
362
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.cola.adapter_up.weight
363
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.cola.adapter_up.bias
364
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.weight
365
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.bias
366
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.cola.adapter_up.weight
367
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.cola.adapter_up.bias
368
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.cola.adapter_down.0.weight
369
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.cola.adapter_down.0.bias
370
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.cola.adapter_up.weight
371
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.cola.adapter_up.bias
372
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.weight
373
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.bias
374
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.cola.adapter_up.weight
375
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.cola.adapter_up.bias
376
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.cola.adapter_down.0.weight
377
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.cola.adapter_down.0.bias
378
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.cola.adapter_up.weight
379
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.cola.adapter_up.bias
380
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.weight
381
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.bias
382
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.cola.adapter_up.weight
383
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.cola.adapter_up.bias
384
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.cola.adapter_down.0.weight
385
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.cola.adapter_down.0.bias
386
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.cola.adapter_up.weight
387
+ 06/02/2024 08:07:26 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.cola.adapter_up.bias
388
+ 06/02/2024 08:07:26 - INFO - __main__ - heads.cola.1.weight
389
+ 06/02/2024 08:07:26 - INFO - __main__ - heads.cola.1.bias
390
+ 06/02/2024 08:07:26 - INFO - __main__ - heads.cola.4.weight
391
+ 06/02/2024 08:07:26 - INFO - __main__ - heads.cola.4.bias
392
+ 06/02/2024 08:07:26 - INFO - __main__ - Sample 3412 of the training set: {'input_ids': [0, 100, 7546, 5, 3737, 4, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1], 'labels': 1}.
393
+ 06/02/2024 08:07:26 - INFO - __main__ - Sample 6002 of the training set: {'input_ids': [0, 100, 531, 64, 3529, 2968, 8014, 4, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
394
+ 06/02/2024 08:07:26 - INFO - __main__ - Sample 83 of the training set: {'input_ids': [0, 133, 20276, 910, 10434, 1495, 7, 5, 1255, 4, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
395
+ 06/02/2024 08:12:24 - INFO - __main__ - f_mu shape : torch.Size([1043, 2])
396
+ 06/02/2024 08:12:24 - INFO - __main__ - f_var shape : torch.Size([1043, 2, 2])
397
+ 06/02/2024 08:12:24 - INFO - __main__ - tensor([[-1.8083, 1.9876],
398
+ [-2.1538, 2.1866],
399
+ [-1.8847, 1.9186],
400
+ ...,
401
+ [-2.4217, 2.7522],
402
+ [ 0.4212, -0.2226],
403
+ [ 0.7813, -0.5914]], device='cuda:0')
404
+ 06/02/2024 08:12:24 - INFO - __main__ - tensor([[[ 4.8062, 0.8630],
405
+ [ 0.8630, 4.7482]],
406
+
407
+ [[ 2.8347, 1.9434],
408
+ [ 1.9434, 2.8418]],
409
+
410
+ [[ 6.4753, -1.0532],
411
+ [-1.0532, 6.2592]],
412
+
413
+ ...,
414
+
415
+ [[ 4.3534, 3.6074],
416
+ [ 3.6074, 4.3897]],
417
+
418
+ [[ 4.9381, -1.9654],
419
+ [-1.9654, 4.7852]],
420
+
421
+ [[ 5.4259, -1.6392],
422
+ [-1.6392, 5.1288]]], device='cuda:0')
423
+ 06/02/2024 08:12:24 - INFO - __main__ - ***** Completed training *****
424
+ 06/02/2024 08:12:27 - INFO - __main__ - Number of labels detected = 2
425
+ 06/02/2024 08:12:28 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 50265, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}.
426
+ 06/02/2024 08:12:28 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_5999/adapter_config.json
427
+ 06/02/2024 08:12:28 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'cola'.
428
+ 06/02/2024 08:12:28 - INFO - adapters.loading - Loading module weights from ./outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_5999/pytorch_adapter.bin
429
+ 06/02/2024 08:12:28 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_5999/head_config.json
430
+ 06/02/2024 08:12:28 - INFO - adapters.heads.model_mixin - Adding head 'cola' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}.
431
+ 06/02/2024 08:12:28 - INFO - adapters.loading - Loading module weights from ./outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_5999/pytorch_model_head.bin
432
+ 06/02/2024 08:12:28 - INFO - __main__ - Adapter Name = cola
433
+ 06/02/2024 08:12:28 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.weight
434
+ 06/02/2024 08:12:28 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.bias
435
+ 06/02/2024 08:12:28 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.cola.adapter_up.weight
436
+ 06/02/2024 08:12:28 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.cola.adapter_up.bias
437
+ 06/02/2024 08:12:28 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.cola.adapter_down.0.weight
438
+ 06/02/2024 08:12:28 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.cola.adapter_down.0.bias
439
+ 06/02/2024 08:12:28 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.cola.adapter_up.weight
440
+ 06/02/2024 08:12:28 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.cola.adapter_up.bias
441
+ 06/02/2024 08:12:28 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.weight
442
+ 06/02/2024 08:12:28 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.bias
443
+ 06/02/2024 08:12:28 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.cola.adapter_up.weight
444
+ 06/02/2024 08:12:28 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.cola.adapter_up.bias
445
+ 06/02/2024 08:12:28 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.cola.adapter_down.0.weight
446
+ 06/02/2024 08:12:28 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.cola.adapter_down.0.bias
447
+ 06/02/2024 08:12:28 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.cola.adapter_up.weight
448
+ 06/02/2024 08:12:28 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.cola.adapter_up.bias
449
+ 06/02/2024 08:12:28 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.weight
450
+ 06/02/2024 08:12:28 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.bias
451
+ 06/02/2024 08:12:28 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.cola.adapter_up.weight
452
+ 06/02/2024 08:12:28 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.cola.adapter_up.bias
453
+ 06/02/2024 08:12:28 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.cola.adapter_down.0.weight
454
+ 06/02/2024 08:12:28 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.cola.adapter_down.0.bias
455
+ 06/02/2024 08:12:28 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.cola.adapter_up.weight
456
+ 06/02/2024 08:12:28 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.cola.adapter_up.bias
457
+ 06/02/2024 08:12:28 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.weight
458
+ 06/02/2024 08:12:28 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.bias
459
+ 06/02/2024 08:12:28 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.cola.adapter_up.weight
460
+ 06/02/2024 08:12:28 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.cola.adapter_up.bias
461
+ 06/02/2024 08:12:28 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.cola.adapter_down.0.weight
462
+ 06/02/2024 08:12:28 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.cola.adapter_down.0.bias
463
+ 06/02/2024 08:12:28 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.cola.adapter_up.weight
464
+ 06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.cola.adapter_up.bias
465
+ 06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.weight
466
+ 06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.bias
467
+ 06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.cola.adapter_up.weight
468
+ 06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.cola.adapter_up.bias
469
+ 06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.cola.adapter_down.0.weight
470
+ 06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.cola.adapter_down.0.bias
471
+ 06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.cola.adapter_up.weight
472
+ 06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.cola.adapter_up.bias
473
+ 06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.weight
474
+ 06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.bias
475
+ 06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.cola.adapter_up.weight
476
+ 06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.cola.adapter_up.bias
477
+ 06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.cola.adapter_down.0.weight
478
+ 06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.cola.adapter_down.0.bias
479
+ 06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.cola.adapter_up.weight
480
+ 06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.cola.adapter_up.bias
481
+ 06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.weight
482
+ 06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.bias
483
+ 06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.cola.adapter_up.weight
484
+ 06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.cola.adapter_up.bias
485
+ 06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.cola.adapter_down.0.weight
486
+ 06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.cola.adapter_down.0.bias
487
+ 06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.cola.adapter_up.weight
488
+ 06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.cola.adapter_up.bias
489
+ 06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.weight
490
+ 06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.bias
491
+ 06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.cola.adapter_up.weight
492
+ 06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.cola.adapter_up.bias
493
+ 06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.cola.adapter_down.0.weight
494
+ 06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.cola.adapter_down.0.bias
495
+ 06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.cola.adapter_up.weight
496
+ 06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.cola.adapter_up.bias
497
+ 06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.weight
498
+ 06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.bias
499
+ 06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.cola.adapter_up.weight
500
+ 06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.cola.adapter_up.bias
501
+ 06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.cola.adapter_down.0.weight
502
+ 06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.cola.adapter_down.0.bias
503
+ 06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.cola.adapter_up.weight
504
+ 06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.cola.adapter_up.bias
505
+ 06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.weight
506
+ 06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.bias
507
+ 06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.cola.adapter_up.weight
508
+ 06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.cola.adapter_up.bias
509
+ 06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.cola.adapter_down.0.weight
510
+ 06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.cola.adapter_down.0.bias
511
+ 06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.cola.adapter_up.weight
512
+ 06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.cola.adapter_up.bias
513
+ 06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.weight
514
+ 06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.bias
515
+ 06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.cola.adapter_up.weight
516
+ 06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.cola.adapter_up.bias
517
+ 06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.cola.adapter_down.0.weight
518
+ 06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.cola.adapter_down.0.bias
519
+ 06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.cola.adapter_up.weight
520
+ 06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.cola.adapter_up.bias
521
+ 06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.weight
522
+ 06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.bias
523
+ 06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.cola.adapter_up.weight
524
+ 06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.cola.adapter_up.bias
525
+ 06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.cola.adapter_down.0.weight
526
+ 06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.cola.adapter_down.0.bias
527
+ 06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.cola.adapter_up.weight
528
+ 06/02/2024 08:12:29 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.cola.adapter_up.bias
529
+ 06/02/2024 08:12:29 - INFO - __main__ - heads.cola.1.weight
530
+ 06/02/2024 08:12:29 - INFO - __main__ - heads.cola.1.bias
531
+ 06/02/2024 08:12:29 - INFO - __main__ - heads.cola.4.weight
532
+ 06/02/2024 08:12:29 - INFO - __main__ - heads.cola.4.bias
533
+ 06/02/2024 08:12:30 - INFO - __main__ - Sample 3412 of the training set: {'input_ids': [0, 100, 7546, 5, 3737, 4, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1], 'labels': 1}.
534
+ 06/02/2024 08:12:30 - INFO - __main__ - Sample 6002 of the training set: {'input_ids': [0, 100, 531, 64, 3529, 2968, 8014, 4, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
535
+ 06/02/2024 08:12:30 - INFO - __main__ - Sample 83 of the training set: {'input_ids': [0, 133, 20276, 910, 10434, 1495, 7, 5, 1255, 4, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
536
+ 06/02/2024 08:17:27 - INFO - __main__ - f_mu shape : torch.Size([1043, 2])
537
+ 06/02/2024 08:17:27 - INFO - __main__ - f_var shape : torch.Size([1043, 2, 2])
538
+ 06/02/2024 08:17:27 - INFO - __main__ - tensor([[-2.4468, 2.6517],
539
+ [-2.4047, 2.4407],
540
+ [-2.1919, 2.2412],
541
+ ...,
542
+ [-2.7916, 3.1134],
543
+ [-0.2845, 0.4917],
544
+ [ 1.0054, -0.7745]], device='cuda:0')
545
+ 06/02/2024 08:17:27 - INFO - __main__ - tensor([[[ 5.8574, 1.0059],
546
+ [ 1.0059, 5.7621]],
547
+
548
+ [[ 3.2467, 1.8953],
549
+ [ 1.8953, 3.2247]],
550
+
551
+ [[ 8.0175, -2.0250],
552
+ [-2.0250, 7.6710]],
553
+
554
+ ...,
555
+
556
+ [[ 5.0837, 4.4143],
557
+ [ 4.4143, 5.0397]],
558
+
559
+ [[ 6.5210, -3.7889],
560
+ [-3.7889, 6.3052]],
561
+
562
+ [[ 6.4597, -1.8953],
563
+ [-1.8953, 5.9696]]], device='cuda:0')
564
+ 06/02/2024 08:17:27 - INFO - __main__ - ***** Completed training *****
565
+ 06/02/2024 08:17:30 - INFO - __main__ - Number of labels detected = 2
566
+ 06/02/2024 08:17:31 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 50265, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}.
567
+ 06/02/2024 08:17:32 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_7999/adapter_config.json
568
+ 06/02/2024 08:17:32 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'cola'.
569
+ 06/02/2024 08:17:32 - INFO - adapters.loading - Loading module weights from ./outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_7999/pytorch_adapter.bin
570
+ 06/02/2024 08:17:32 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_7999/head_config.json
571
+ 06/02/2024 08:17:32 - INFO - adapters.heads.model_mixin - Adding head 'cola' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}.
572
+ 06/02/2024 08:17:32 - INFO - adapters.loading - Loading module weights from ./outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_7999/pytorch_model_head.bin
573
+ 06/02/2024 08:17:32 - INFO - __main__ - Adapter Name = cola
574
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.weight
575
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.bias
576
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.cola.adapter_up.weight
577
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.cola.adapter_up.bias
578
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.cola.adapter_down.0.weight
579
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.cola.adapter_down.0.bias
580
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.cola.adapter_up.weight
581
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.cola.adapter_up.bias
582
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.weight
583
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.bias
584
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.cola.adapter_up.weight
585
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.cola.adapter_up.bias
586
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.cola.adapter_down.0.weight
587
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.cola.adapter_down.0.bias
588
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.cola.adapter_up.weight
589
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.cola.adapter_up.bias
590
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.weight
591
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.bias
592
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.cola.adapter_up.weight
593
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.cola.adapter_up.bias
594
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.cola.adapter_down.0.weight
595
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.cola.adapter_down.0.bias
596
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.cola.adapter_up.weight
597
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.cola.adapter_up.bias
598
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.weight
599
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.bias
600
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.cola.adapter_up.weight
601
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.cola.adapter_up.bias
602
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.cola.adapter_down.0.weight
603
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.cola.adapter_down.0.bias
604
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.cola.adapter_up.weight
605
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.cola.adapter_up.bias
606
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.weight
607
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.bias
608
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.cola.adapter_up.weight
609
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.cola.adapter_up.bias
610
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.cola.adapter_down.0.weight
611
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.cola.adapter_down.0.bias
612
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.cola.adapter_up.weight
613
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.cola.adapter_up.bias
614
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.weight
615
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.bias
616
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.cola.adapter_up.weight
617
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.cola.adapter_up.bias
618
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.cola.adapter_down.0.weight
619
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.cola.adapter_down.0.bias
620
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.cola.adapter_up.weight
621
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.cola.adapter_up.bias
622
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.weight
623
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.bias
624
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.cola.adapter_up.weight
625
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.cola.adapter_up.bias
626
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.cola.adapter_down.0.weight
627
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.cola.adapter_down.0.bias
628
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.cola.adapter_up.weight
629
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.cola.adapter_up.bias
630
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.weight
631
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.bias
632
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.cola.adapter_up.weight
633
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.cola.adapter_up.bias
634
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.cola.adapter_down.0.weight
635
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.cola.adapter_down.0.bias
636
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.cola.adapter_up.weight
637
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.cola.adapter_up.bias
638
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.weight
639
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.bias
640
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.cola.adapter_up.weight
641
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.cola.adapter_up.bias
642
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.cola.adapter_down.0.weight
643
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.cola.adapter_down.0.bias
644
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.cola.adapter_up.weight
645
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.cola.adapter_up.bias
646
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.weight
647
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.bias
648
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.cola.adapter_up.weight
649
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.cola.adapter_up.bias
650
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.cola.adapter_down.0.weight
651
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.cola.adapter_down.0.bias
652
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.cola.adapter_up.weight
653
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.cola.adapter_up.bias
654
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.weight
655
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.bias
656
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.cola.adapter_up.weight
657
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.cola.adapter_up.bias
658
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.cola.adapter_down.0.weight
659
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.cola.adapter_down.0.bias
660
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.cola.adapter_up.weight
661
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.cola.adapter_up.bias
662
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.weight
663
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.bias
664
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.cola.adapter_up.weight
665
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.cola.adapter_up.bias
666
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.cola.adapter_down.0.weight
667
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.cola.adapter_down.0.bias
668
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.cola.adapter_up.weight
669
+ 06/02/2024 08:17:32 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.cola.adapter_up.bias
670
+ 06/02/2024 08:17:32 - INFO - __main__ - heads.cola.1.weight
671
+ 06/02/2024 08:17:32 - INFO - __main__ - heads.cola.1.bias
672
+ 06/02/2024 08:17:32 - INFO - __main__ - heads.cola.4.weight
673
+ 06/02/2024 08:17:32 - INFO - __main__ - heads.cola.4.bias
674
+ 06/02/2024 08:17:32 - INFO - __main__ - Sample 3412 of the training set: {'input_ids': [0, 100, 7546, 5, 3737, 4, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1], 'labels': 1}.
675
+ 06/02/2024 08:17:32 - INFO - __main__ - Sample 6002 of the training set: {'input_ids': [0, 100, 531, 64, 3529, 2968, 8014, 4, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
676
+ 06/02/2024 08:17:32 - INFO - __main__ - Sample 83 of the training set: {'input_ids': [0, 133, 20276, 910, 10434, 1495, 7, 5, 1255, 4, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
677
+ 06/02/2024 08:22:33 - INFO - __main__ - f_mu shape : torch.Size([1043, 2])
678
+ 06/02/2024 08:22:33 - INFO - __main__ - f_var shape : torch.Size([1043, 2, 2])
679
+ 06/02/2024 08:22:33 - INFO - __main__ - tensor([[-2.7738, 2.9883],
680
+ [-2.7051, 2.7341],
681
+ [-2.7796, 2.7984],
682
+ ...,
683
+ [-3.0705, 3.3764],
684
+ [-0.9860, 1.1826],
685
+ [-1.1101, 1.2928]], device='cuda:0')
686
+ 06/02/2024 08:22:33 - INFO - __main__ - tensor([[[ 4.8272, 1.4823],
687
+ [ 1.4823, 4.7181]],
688
+
689
+ [[ 2.9251, 1.7708],
690
+ [ 1.7708, 2.9030]],
691
+
692
+ [[ 7.8872, -2.1296],
693
+ [-2.1296, 7.5143]],
694
+
695
+ ...,
696
+
697
+ [[ 4.5578, 3.7887],
698
+ [ 3.7887, 4.5065]],
699
+
700
+ [[ 7.6898, -5.0990],
701
+ [-5.0990, 7.4951]],
702
+
703
+ [[11.8868, -8.6863],
704
+ [-8.6863, 11.9500]]], device='cuda:0')
705
+ 06/02/2024 08:22:33 - INFO - __main__ - ***** Completed training *****
706
+ 06/02/2024 08:22:35 - INFO - __main__ - Number of labels detected = 2
707
+ 06/02/2024 08:22:36 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 50265, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}.
708
+ 06/02/2024 08:22:37 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_9999/adapter_config.json
709
+ 06/02/2024 08:22:37 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'cola'.
710
+ 06/02/2024 08:22:37 - INFO - adapters.loading - Loading module weights from ./outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_9999/pytorch_adapter.bin
711
+ 06/02/2024 08:22:37 - INFO - adapters.loading - Loading module configuration from ./outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_9999/head_config.json
712
+ 06/02/2024 08:22:37 - INFO - adapters.heads.model_mixin - Adding head 'cola' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}.
713
+ 06/02/2024 08:22:37 - INFO - adapters.loading - Loading module weights from ./outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_9999/pytorch_model_head.bin
714
+ 06/02/2024 08:22:37 - INFO - __main__ - Adapter Name = cola
715
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.weight
716
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.cola.adapter_down.0.bias
717
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.cola.adapter_up.weight
718
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.cola.adapter_up.bias
719
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.cola.adapter_down.0.weight
720
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.cola.adapter_down.0.bias
721
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.cola.adapter_up.weight
722
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.cola.adapter_up.bias
723
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.weight
724
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.cola.adapter_down.0.bias
725
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.cola.adapter_up.weight
726
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.cola.adapter_up.bias
727
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.cola.adapter_down.0.weight
728
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.cola.adapter_down.0.bias
729
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.cola.adapter_up.weight
730
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.cola.adapter_up.bias
731
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.weight
732
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.cola.adapter_down.0.bias
733
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.cola.adapter_up.weight
734
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.cola.adapter_up.bias
735
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.cola.adapter_down.0.weight
736
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.cola.adapter_down.0.bias
737
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.cola.adapter_up.weight
738
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.cola.adapter_up.bias
739
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.weight
740
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.cola.adapter_down.0.bias
741
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.cola.adapter_up.weight
742
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.cola.adapter_up.bias
743
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.cola.adapter_down.0.weight
744
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.cola.adapter_down.0.bias
745
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.cola.adapter_up.weight
746
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.cola.adapter_up.bias
747
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.weight
748
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.cola.adapter_down.0.bias
749
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.cola.adapter_up.weight
750
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.cola.adapter_up.bias
751
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.cola.adapter_down.0.weight
752
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.cola.adapter_down.0.bias
753
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.cola.adapter_up.weight
754
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.cola.adapter_up.bias
755
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.weight
756
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.cola.adapter_down.0.bias
757
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.cola.adapter_up.weight
758
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.cola.adapter_up.bias
759
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.cola.adapter_down.0.weight
760
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.cola.adapter_down.0.bias
761
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.cola.adapter_up.weight
762
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.cola.adapter_up.bias
763
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.weight
764
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.cola.adapter_down.0.bias
765
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.cola.adapter_up.weight
766
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.cola.adapter_up.bias
767
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.cola.adapter_down.0.weight
768
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.cola.adapter_down.0.bias
769
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.cola.adapter_up.weight
770
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.cola.adapter_up.bias
771
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.weight
772
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.cola.adapter_down.0.bias
773
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.cola.adapter_up.weight
774
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.cola.adapter_up.bias
775
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.cola.adapter_down.0.weight
776
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.cola.adapter_down.0.bias
777
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.cola.adapter_up.weight
778
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.cola.adapter_up.bias
779
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.weight
780
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.cola.adapter_down.0.bias
781
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.cola.adapter_up.weight
782
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.cola.adapter_up.bias
783
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.cola.adapter_down.0.weight
784
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.cola.adapter_down.0.bias
785
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.cola.adapter_up.weight
786
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.cola.adapter_up.bias
787
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.weight
788
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.cola.adapter_down.0.bias
789
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.cola.adapter_up.weight
790
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.cola.adapter_up.bias
791
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.cola.adapter_down.0.weight
792
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.cola.adapter_down.0.bias
793
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.cola.adapter_up.weight
794
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.cola.adapter_up.bias
795
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.weight
796
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.cola.adapter_down.0.bias
797
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.cola.adapter_up.weight
798
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.cola.adapter_up.bias
799
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.cola.adapter_down.0.weight
800
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.cola.adapter_down.0.bias
801
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.cola.adapter_up.weight
802
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.cola.adapter_up.bias
803
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.weight
804
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.cola.adapter_down.0.bias
805
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.cola.adapter_up.weight
806
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.cola.adapter_up.bias
807
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.cola.adapter_down.0.weight
808
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.cola.adapter_down.0.bias
809
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.cola.adapter_up.weight
810
+ 06/02/2024 08:22:37 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.cola.adapter_up.bias
811
+ 06/02/2024 08:22:37 - INFO - __main__ - heads.cola.1.weight
812
+ 06/02/2024 08:22:37 - INFO - __main__ - heads.cola.1.bias
813
+ 06/02/2024 08:22:37 - INFO - __main__ - heads.cola.4.weight
814
+ 06/02/2024 08:22:37 - INFO - __main__ - heads.cola.4.bias
815
+ 06/02/2024 08:22:38 - INFO - __main__ - Sample 3412 of the training set: {'input_ids': [0, 100, 7546, 5, 3737, 4, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1], 'labels': 1}.
816
+ 06/02/2024 08:22:38 - INFO - __main__ - Sample 6002 of the training set: {'input_ids': [0, 100, 531, 64, 3529, 2968, 8014, 4, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
817
+ 06/02/2024 08:22:38 - INFO - __main__ - Sample 83 of the training set: {'input_ids': [0, 133, 20276, 910, 10434, 1495, 7, 5, 1255, 4, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
818
+ 06/02/2024 08:27:38 - INFO - __main__ - f_mu shape : torch.Size([1043, 2])
819
+ 06/02/2024 08:27:38 - INFO - __main__ - f_var shape : torch.Size([1043, 2, 2])
820
+ 06/02/2024 08:27:38 - INFO - __main__ - tensor([[-2.7968, 3.0246],
821
+ [-2.7893, 2.8360],
822
+ [-2.7577, 2.7798],
823
+ ...,
824
+ [-3.0549, 3.3754],
825
+ [-1.0387, 1.2427],
826
+ [-0.9734, 1.1604]], device='cuda:0')
827
+ 06/02/2024 08:27:38 - INFO - __main__ - tensor([[[ 5.2688, 1.2654],
828
+ [ 1.2654, 5.1606]],
829
+
830
+ [[ 3.1404, 1.7835],
831
+ [ 1.7835, 3.1196]],
832
+
833
+ [[ 9.1570, -3.2938],
834
+ [ -3.2938, 8.7105]],
835
+
836
+ ...,
837
+
838
+ [[ 4.6808, 3.8526],
839
+ [ 3.8526, 4.6242]],
840
+
841
+ [[ 9.2852, -6.5529],
842
+ [ -6.5529, 9.0689]],
843
+
844
+ [[ 13.4494, -10.2085],
845
+ [-10.2085, 13.4283]]], device='cuda:0')
846
+ 06/02/2024 08:27:38 - INFO - __main__ - ***** Completed training *****
outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_0/all_results_la_kron_all_homo_mc_corr_1000.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"eval_matthews_correlation": -0.040852194988972475}
outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_0/eval_res_la_kron_all_homo_mc_corr_1000.json ADDED
The diff for this file is too large to render. See raw diff
 
outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_0/gpu_stats_la.json ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "memory_allocated": 1030541312,
3
+ "max_memory_allocated": 3413011456,
4
+ "memory_reserved": 10204741632,
5
+ "max_memory_reserved": 10204741632,
6
+ "memory_stats": {
7
+ "active.all.allocated": 2915632,
8
+ "active.all.current": 1233,
9
+ "active.all.freed": 2914399,
10
+ "active.all.peak": 1485,
11
+ "active.large_pool.allocated": 361096,
12
+ "active.large_pool.current": 284,
13
+ "active.large_pool.freed": 360812,
14
+ "active.large_pool.peak": 384,
15
+ "active.small_pool.allocated": 2554536,
16
+ "active.small_pool.current": 949,
17
+ "active.small_pool.freed": 2553587,
18
+ "active.small_pool.peak": 1199,
19
+ "active_bytes.all.allocated": 2022530949632,
20
+ "active_bytes.all.current": 1030541312,
21
+ "active_bytes.all.freed": 2021500408320,
22
+ "active_bytes.all.peak": 3413011456,
23
+ "active_bytes.large_pool.allocated": 1458420835328,
24
+ "active_bytes.large_pool.current": 1013274624,
25
+ "active_bytes.large_pool.freed": 1457407560704,
26
+ "active_bytes.large_pool.peak": 3327070720,
27
+ "active_bytes.small_pool.allocated": 564110114304,
28
+ "active_bytes.small_pool.current": 17266688,
29
+ "active_bytes.small_pool.freed": 564092847616,
30
+ "active_bytes.small_pool.peak": 118205440,
31
+ "allocated_bytes.all.allocated": 2022530949632,
32
+ "allocated_bytes.all.current": 1030541312,
33
+ "allocated_bytes.all.freed": 2021500408320,
34
+ "allocated_bytes.all.peak": 3413011456,
35
+ "allocated_bytes.large_pool.allocated": 1458420835328,
36
+ "allocated_bytes.large_pool.current": 1013274624,
37
+ "allocated_bytes.large_pool.freed": 1457407560704,
38
+ "allocated_bytes.large_pool.peak": 3327070720,
39
+ "allocated_bytes.small_pool.allocated": 564110114304,
40
+ "allocated_bytes.small_pool.current": 17266688,
41
+ "allocated_bytes.small_pool.freed": 564092847616,
42
+ "allocated_bytes.small_pool.peak": 118205440,
43
+ "allocation.all.allocated": 2915632,
44
+ "allocation.all.current": 1233,
45
+ "allocation.all.freed": 2914399,
46
+ "allocation.all.peak": 1485,
47
+ "allocation.large_pool.allocated": 361096,
48
+ "allocation.large_pool.current": 284,
49
+ "allocation.large_pool.freed": 360812,
50
+ "allocation.large_pool.peak": 384,
51
+ "allocation.small_pool.allocated": 2554536,
52
+ "allocation.small_pool.current": 949,
53
+ "allocation.small_pool.freed": 2553587,
54
+ "allocation.small_pool.peak": 1199,
55
+ "inactive_split.all.allocated": 1425838,
56
+ "inactive_split.all.current": 173,
57
+ "inactive_split.all.freed": 1425665,
58
+ "inactive_split.all.peak": 221,
59
+ "inactive_split.large_pool.allocated": 166568,
60
+ "inactive_split.large_pool.current": 43,
61
+ "inactive_split.large_pool.freed": 166525,
62
+ "inactive_split.large_pool.peak": 96,
63
+ "inactive_split.small_pool.allocated": 1259270,
64
+ "inactive_split.small_pool.current": 130,
65
+ "inactive_split.small_pool.freed": 1259140,
66
+ "inactive_split.small_pool.peak": 145,
67
+ "inactive_split_bytes.all.allocated": 2015997287936,
68
+ "inactive_split_bytes.all.current": 150155264,
69
+ "inactive_split_bytes.all.freed": 2015847132672,
70
+ "inactive_split_bytes.all.peak": 926979584,
71
+ "inactive_split_bytes.large_pool.allocated": 1432195878400,
72
+ "inactive_split_bytes.large_pool.current": 106604544,
73
+ "inactive_split_bytes.large_pool.freed": 1432089273856,
74
+ "inactive_split_bytes.large_pool.peak": 890500608,
75
+ "inactive_split_bytes.small_pool.allocated": 583801409536,
76
+ "inactive_split_bytes.small_pool.current": 43550720,
77
+ "inactive_split_bytes.small_pool.freed": 583757858816,
78
+ "inactive_split_bytes.small_pool.peak": 80564224,
79
+ "max_split_size": -1,
80
+ "num_alloc_retries": 0,
81
+ "num_device_alloc": 426,
82
+ "num_device_free": 52,
83
+ "num_ooms": 0,
84
+ "num_sync_all_streams": 1,
85
+ "oversize_allocations.allocated": 0,
86
+ "oversize_allocations.current": 0,
87
+ "oversize_allocations.freed": 0,
88
+ "oversize_allocations.peak": 0,
89
+ "oversize_segments.allocated": 0,
90
+ "oversize_segments.current": 0,
91
+ "oversize_segments.freed": 0,
92
+ "oversize_segments.peak": 0,
93
+ "requested_bytes.all.allocated": 1978722581871,
94
+ "requested_bytes.all.current": 1027778996,
95
+ "requested_bytes.all.freed": 1977694802875,
96
+ "requested_bytes.all.peak": 3382342108,
97
+ "requested_bytes.large_pool.allocated": 1414850593488,
98
+ "requested_bytes.large_pool.current": 1010670088,
99
+ "requested_bytes.large_pool.freed": 1413839923400,
100
+ "requested_bytes.large_pool.peak": 3296520840,
101
+ "requested_bytes.small_pool.allocated": 563871988383,
102
+ "requested_bytes.small_pool.current": 17108908,
103
+ "requested_bytes.small_pool.freed": 563854879475,
104
+ "requested_bytes.small_pool.peak": 118076684,
105
+ "reserved_bytes.all.allocated": 10521411584,
106
+ "reserved_bytes.all.current": 10204741632,
107
+ "reserved_bytes.all.freed": 316669952,
108
+ "reserved_bytes.all.peak": 10204741632,
109
+ "reserved_bytes.large_pool.allocated": 10297016320,
110
+ "reserved_bytes.large_pool.current": 10066329600,
111
+ "reserved_bytes.large_pool.freed": 230686720,
112
+ "reserved_bytes.large_pool.peak": 10066329600,
113
+ "reserved_bytes.small_pool.allocated": 224395264,
114
+ "reserved_bytes.small_pool.current": 138412032,
115
+ "reserved_bytes.small_pool.freed": 85983232,
116
+ "reserved_bytes.small_pool.peak": 138412032,
117
+ "segment.all.allocated": 426,
118
+ "segment.all.current": 374,
119
+ "segment.all.freed": 52,
120
+ "segment.all.peak": 374,
121
+ "segment.large_pool.allocated": 319,
122
+ "segment.large_pool.current": 308,
123
+ "segment.large_pool.freed": 11,
124
+ "segment.large_pool.peak": 308,
125
+ "segment.small_pool.allocated": 107,
126
+ "segment.small_pool.current": 66,
127
+ "segment.small_pool.freed": 41,
128
+ "segment.small_pool.peak": 66
129
+ }
130
+ }
outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_1999/all_results_la_kron_all_homo_mc_corr_1000.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"eval_matthews_correlation": 0.529144545456451}
outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_1999/eval_res_la_kron_all_homo_mc_corr_1000.json ADDED
The diff for this file is too large to render. See raw diff
 
outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_1999/gpu_stats_la.json ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "memory_allocated": 1030541312,
3
+ "max_memory_allocated": 3413011456,
4
+ "memory_reserved": 10206838784,
5
+ "max_memory_reserved": 10206838784,
6
+ "memory_stats": {
7
+ "active.all.allocated": 5831312,
8
+ "active.all.current": 1233,
9
+ "active.all.freed": 5830079,
10
+ "active.all.peak": 1485,
11
+ "active.large_pool.allocated": 722190,
12
+ "active.large_pool.current": 284,
13
+ "active.large_pool.freed": 721906,
14
+ "active.large_pool.peak": 482,
15
+ "active.small_pool.allocated": 5109122,
16
+ "active.small_pool.current": 949,
17
+ "active.small_pool.freed": 5108173,
18
+ "active.small_pool.peak": 1199,
19
+ "active_bytes.all.allocated": 4044156840960,
20
+ "active_bytes.all.current": 1030541312,
21
+ "active_bytes.all.freed": 4043126299648,
22
+ "active_bytes.all.peak": 3413011456,
23
+ "active_bytes.large_pool.allocated": 2915936585728,
24
+ "active_bytes.large_pool.current": 1013274624,
25
+ "active_bytes.large_pool.freed": 2914923311104,
26
+ "active_bytes.large_pool.peak": 3327070720,
27
+ "active_bytes.small_pool.allocated": 1128220255232,
28
+ "active_bytes.small_pool.current": 17266688,
29
+ "active_bytes.small_pool.freed": 1128202988544,
30
+ "active_bytes.small_pool.peak": 118205440,
31
+ "allocated_bytes.all.allocated": 4044156840960,
32
+ "allocated_bytes.all.current": 1030541312,
33
+ "allocated_bytes.all.freed": 4043126299648,
34
+ "allocated_bytes.all.peak": 3413011456,
35
+ "allocated_bytes.large_pool.allocated": 2915936585728,
36
+ "allocated_bytes.large_pool.current": 1013274624,
37
+ "allocated_bytes.large_pool.freed": 2914923311104,
38
+ "allocated_bytes.large_pool.peak": 3327070720,
39
+ "allocated_bytes.small_pool.allocated": 1128220255232,
40
+ "allocated_bytes.small_pool.current": 17266688,
41
+ "allocated_bytes.small_pool.freed": 1128202988544,
42
+ "allocated_bytes.small_pool.peak": 118205440,
43
+ "allocation.all.allocated": 5831312,
44
+ "allocation.all.current": 1233,
45
+ "allocation.all.freed": 5830079,
46
+ "allocation.all.peak": 1485,
47
+ "allocation.large_pool.allocated": 722190,
48
+ "allocation.large_pool.current": 284,
49
+ "allocation.large_pool.freed": 721906,
50
+ "allocation.large_pool.peak": 482,
51
+ "allocation.small_pool.allocated": 5109122,
52
+ "allocation.small_pool.current": 949,
53
+ "allocation.small_pool.freed": 5108173,
54
+ "allocation.small_pool.peak": 1199,
55
+ "inactive_split.all.allocated": 2839308,
56
+ "inactive_split.all.current": 174,
57
+ "inactive_split.all.freed": 2839134,
58
+ "inactive_split.all.peak": 229,
59
+ "inactive_split.large_pool.allocated": 334357,
60
+ "inactive_split.large_pool.current": 43,
61
+ "inactive_split.large_pool.freed": 334314,
62
+ "inactive_split.large_pool.peak": 96,
63
+ "inactive_split.small_pool.allocated": 2504951,
64
+ "inactive_split.small_pool.current": 131,
65
+ "inactive_split.small_pool.freed": 2504820,
66
+ "inactive_split.small_pool.peak": 183,
67
+ "inactive_split_bytes.all.allocated": 3999387359232,
68
+ "inactive_split_bytes.all.current": 150155264,
69
+ "inactive_split_bytes.all.freed": 3999237203968,
70
+ "inactive_split_bytes.all.peak": 931173888,
71
+ "inactive_split_bytes.large_pool.allocated": 2836943942656,
72
+ "inactive_split_bytes.large_pool.current": 106604544,
73
+ "inactive_split_bytes.large_pool.freed": 2836837338112,
74
+ "inactive_split_bytes.large_pool.peak": 890500608,
75
+ "inactive_split_bytes.small_pool.allocated": 1162443416576,
76
+ "inactive_split_bytes.small_pool.current": 43550720,
77
+ "inactive_split_bytes.small_pool.freed": 1162399865856,
78
+ "inactive_split_bytes.small_pool.peak": 80564224,
79
+ "max_split_size": -1,
80
+ "num_alloc_retries": 0,
81
+ "num_device_alloc": 834,
82
+ "num_device_free": 459,
83
+ "num_ooms": 0,
84
+ "num_sync_all_streams": 3,
85
+ "oversize_allocations.allocated": 0,
86
+ "oversize_allocations.current": 0,
87
+ "oversize_allocations.freed": 0,
88
+ "oversize_allocations.peak": 0,
89
+ "oversize_segments.allocated": 0,
90
+ "oversize_segments.current": 0,
91
+ "oversize_segments.freed": 0,
92
+ "oversize_segments.peak": 0,
93
+ "requested_bytes.all.allocated": 3957428127418,
94
+ "requested_bytes.all.current": 1027778996,
95
+ "requested_bytes.all.freed": 3956400348422,
96
+ "requested_bytes.all.peak": 3382342108,
97
+ "requested_bytes.large_pool.allocated": 2829684147616,
98
+ "requested_bytes.large_pool.current": 1010670088,
99
+ "requested_bytes.large_pool.freed": 2828673477528,
100
+ "requested_bytes.large_pool.peak": 3296520840,
101
+ "requested_bytes.small_pool.allocated": 1127743979802,
102
+ "requested_bytes.small_pool.current": 17108908,
103
+ "requested_bytes.small_pool.freed": 1127726870894,
104
+ "requested_bytes.small_pool.peak": 118076684,
105
+ "reserved_bytes.all.allocated": 20929576960,
106
+ "reserved_bytes.all.current": 10206838784,
107
+ "reserved_bytes.all.freed": 10722738176,
108
+ "reserved_bytes.all.peak": 10206838784,
109
+ "reserved_bytes.large_pool.allocated": 20510146560,
110
+ "reserved_bytes.large_pool.current": 10066329600,
111
+ "reserved_bytes.large_pool.freed": 10443816960,
112
+ "reserved_bytes.large_pool.peak": 10066329600,
113
+ "reserved_bytes.small_pool.allocated": 419430400,
114
+ "reserved_bytes.small_pool.current": 140509184,
115
+ "reserved_bytes.small_pool.freed": 278921216,
116
+ "reserved_bytes.small_pool.peak": 140509184,
117
+ "segment.all.allocated": 834,
118
+ "segment.all.current": 375,
119
+ "segment.all.freed": 459,
120
+ "segment.all.peak": 375,
121
+ "segment.large_pool.allocated": 634,
122
+ "segment.large_pool.current": 308,
123
+ "segment.large_pool.freed": 326,
124
+ "segment.large_pool.peak": 308,
125
+ "segment.small_pool.allocated": 200,
126
+ "segment.small_pool.current": 67,
127
+ "segment.small_pool.freed": 133,
128
+ "segment.small_pool.peak": 67
129
+ }
130
+ }
outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_3999/all_results_la_kron_all_homo_mc_corr_1000.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"eval_matthews_correlation": 0.6015805476045657}
outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_3999/eval_res_la_kron_all_homo_mc_corr_1000.json ADDED
The diff for this file is too large to render. See raw diff
 
outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_3999/gpu_stats_la.json ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "memory_allocated": 1030541312,
3
+ "max_memory_allocated": 3413011456,
4
+ "memory_reserved": 10208935936,
5
+ "max_memory_reserved": 10208935936,
6
+ "memory_stats": {
7
+ "active.all.allocated": 8747042,
8
+ "active.all.current": 1233,
9
+ "active.all.freed": 8745809,
10
+ "active.all.peak": 1485,
11
+ "active.large_pool.allocated": 1083284,
12
+ "active.large_pool.current": 284,
13
+ "active.large_pool.freed": 1083000,
14
+ "active.large_pool.peak": 482,
15
+ "active.small_pool.allocated": 7663758,
16
+ "active.small_pool.current": 949,
17
+ "active.small_pool.freed": 7662809,
18
+ "active.small_pool.peak": 1199,
19
+ "active_bytes.all.allocated": 6065810808320,
20
+ "active_bytes.all.current": 1030541312,
21
+ "active_bytes.all.freed": 6064780267008,
22
+ "active_bytes.all.peak": 3413011456,
23
+ "active_bytes.large_pool.allocated": 4373480385536,
24
+ "active_bytes.large_pool.current": 1013274624,
25
+ "active_bytes.large_pool.freed": 4372467110912,
26
+ "active_bytes.large_pool.peak": 3327070720,
27
+ "active_bytes.small_pool.allocated": 1692330422784,
28
+ "active_bytes.small_pool.current": 17266688,
29
+ "active_bytes.small_pool.freed": 1692313156096,
30
+ "active_bytes.small_pool.peak": 118205440,
31
+ "allocated_bytes.all.allocated": 6065810808320,
32
+ "allocated_bytes.all.current": 1030541312,
33
+ "allocated_bytes.all.freed": 6064780267008,
34
+ "allocated_bytes.all.peak": 3413011456,
35
+ "allocated_bytes.large_pool.allocated": 4373480385536,
36
+ "allocated_bytes.large_pool.current": 1013274624,
37
+ "allocated_bytes.large_pool.freed": 4372467110912,
38
+ "allocated_bytes.large_pool.peak": 3327070720,
39
+ "allocated_bytes.small_pool.allocated": 1692330422784,
40
+ "allocated_bytes.small_pool.current": 17266688,
41
+ "allocated_bytes.small_pool.freed": 1692313156096,
42
+ "allocated_bytes.small_pool.peak": 118205440,
43
+ "allocation.all.allocated": 8747042,
44
+ "allocation.all.current": 1233,
45
+ "allocation.all.freed": 8745809,
46
+ "allocation.all.peak": 1485,
47
+ "allocation.large_pool.allocated": 1083284,
48
+ "allocation.large_pool.current": 284,
49
+ "allocation.large_pool.freed": 1083000,
50
+ "allocation.large_pool.peak": 482,
51
+ "allocation.small_pool.allocated": 7663758,
52
+ "allocation.small_pool.current": 949,
53
+ "allocation.small_pool.freed": 7662809,
54
+ "allocation.small_pool.peak": 1199,
55
+ "inactive_split.all.allocated": 4301545,
56
+ "inactive_split.all.current": 182,
57
+ "inactive_split.all.freed": 4301363,
58
+ "inactive_split.all.peak": 246,
59
+ "inactive_split.large_pool.allocated": 499623,
60
+ "inactive_split.large_pool.current": 43,
61
+ "inactive_split.large_pool.freed": 499580,
62
+ "inactive_split.large_pool.peak": 96,
63
+ "inactive_split.small_pool.allocated": 3801922,
64
+ "inactive_split.small_pool.current": 139,
65
+ "inactive_split.small_pool.freed": 3801783,
66
+ "inactive_split.small_pool.peak": 220,
67
+ "inactive_split_bytes.all.allocated": 5982721435648,
68
+ "inactive_split_bytes.all.current": 150155264,
69
+ "inactive_split_bytes.all.freed": 5982571280384,
70
+ "inactive_split_bytes.all.peak": 935368192,
71
+ "inactive_split_bytes.large_pool.allocated": 4242779976192,
72
+ "inactive_split_bytes.large_pool.current": 106604544,
73
+ "inactive_split_bytes.large_pool.freed": 4242673371648,
74
+ "inactive_split_bytes.large_pool.peak": 890500608,
75
+ "inactive_split_bytes.small_pool.allocated": 1739941459456,
76
+ "inactive_split_bytes.small_pool.current": 43550720,
77
+ "inactive_split_bytes.small_pool.freed": 1739897908736,
78
+ "inactive_split_bytes.small_pool.peak": 80564224,
79
+ "max_split_size": -1,
80
+ "num_alloc_retries": 0,
81
+ "num_device_alloc": 1238,
82
+ "num_device_free": 862,
83
+ "num_ooms": 0,
84
+ "num_sync_all_streams": 5,
85
+ "oversize_allocations.allocated": 0,
86
+ "oversize_allocations.current": 0,
87
+ "oversize_allocations.freed": 0,
88
+ "oversize_allocations.peak": 0,
89
+ "oversize_segments.allocated": 0,
90
+ "oversize_segments.current": 0,
91
+ "oversize_segments.freed": 0,
92
+ "oversize_segments.peak": 0,
93
+ "requested_bytes.all.allocated": 5936133676001,
94
+ "requested_bytes.all.current": 1027778996,
95
+ "requested_bytes.all.freed": 5935105897005,
96
+ "requested_bytes.all.peak": 3382342108,
97
+ "requested_bytes.large_pool.allocated": 4244517701744,
98
+ "requested_bytes.large_pool.current": 1010670088,
99
+ "requested_bytes.large_pool.freed": 4243507031656,
100
+ "requested_bytes.large_pool.peak": 3296520840,
101
+ "requested_bytes.small_pool.allocated": 1691615974257,
102
+ "requested_bytes.small_pool.current": 17108908,
103
+ "requested_bytes.small_pool.freed": 1691598865349,
104
+ "requested_bytes.small_pool.peak": 118076684,
105
+ "reserved_bytes.all.allocated": 31348228096,
106
+ "reserved_bytes.all.current": 10208935936,
107
+ "reserved_bytes.all.freed": 21139292160,
108
+ "reserved_bytes.all.peak": 10208935936,
109
+ "reserved_bytes.large_pool.allocated": 30744248320,
110
+ "reserved_bytes.large_pool.current": 10066329600,
111
+ "reserved_bytes.large_pool.freed": 20677918720,
112
+ "reserved_bytes.large_pool.peak": 10066329600,
113
+ "reserved_bytes.small_pool.allocated": 603979776,
114
+ "reserved_bytes.small_pool.current": 142606336,
115
+ "reserved_bytes.small_pool.freed": 461373440,
116
+ "reserved_bytes.small_pool.peak": 142606336,
117
+ "segment.all.allocated": 1238,
118
+ "segment.all.current": 376,
119
+ "segment.all.freed": 862,
120
+ "segment.all.peak": 376,
121
+ "segment.large_pool.allocated": 950,
122
+ "segment.large_pool.current": 308,
123
+ "segment.large_pool.freed": 642,
124
+ "segment.large_pool.peak": 308,
125
+ "segment.small_pool.allocated": 288,
126
+ "segment.small_pool.current": 68,
127
+ "segment.small_pool.freed": 220,
128
+ "segment.small_pool.peak": 68
129
+ }
130
+ }
outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_5999/all_results_la_kron_all_homo_mc_corr_1000.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"eval_matthews_correlation": 0.5933072676560336}
outputs/cola/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_5999/eval_res_la_kron_all_homo_mc_corr_1000.json ADDED
The diff for this file is too large to render. See raw diff