interim results
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- outputs/cola/args.json +41 -0
- outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/args.json +32 -0
- outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/logfile.log +222 -0
- outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_0/README.md +202 -0
- outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_0/adapter_config.json +32 -0
- outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_0/adapter_model.safetensors +3 -0
- outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_0/all_results.json +1 -0
- outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_0/all_results_la_kron_all_homo_mc_corr_1000.json +1 -0
- outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_0/all_results_la_kron_last_layer_homo_mc_corr_1000.json +1 -0
- outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_0/all_results_val.json +1 -0
- outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_0/eval_res.json +0 -0
- outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_0/eval_res_la_kron_all_homo_mc_corr_1000.json +0 -0
- outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_0/eval_res_la_kron_last_layer_homo_mc_corr_1000.json +0 -0
- outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_0/eval_res_val.json +0 -0
- outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_0/gpu_stats.json +130 -0
- outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_0/gpu_stats_la.json +130 -0
- outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_0/special_tokens_map.json +7 -0
- outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_0/tokenizer.json +0 -0
- outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_0/tokenizer_config.json +55 -0
- outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_0/val_res.json +0 -0
- outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_0/vocab.txt +0 -0
- outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_1069/README.md +202 -0
- outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_1069/adapter_config.json +32 -0
- outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_1069/adapter_model.safetensors +3 -0
- outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_1069/all_results.json +1 -0
- outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_1069/all_results_la_kron_all_homo_mc_corr_1000.json +1 -0
- outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_1069/all_results_val.json +1 -0
- outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_1069/eval_res.json +0 -0
- outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_1069/eval_res_la_kron_all_homo_mc_corr_1000.json +0 -0
- outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_1069/eval_res_val.json +0 -0
- outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_1069/gpu_stats.json +130 -0
- outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_1069/special_tokens_map.json +7 -0
- outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_1069/tokenizer.json +0 -0
- outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_1069/tokenizer_config.json +56 -0
- outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_1069/vocab.txt +0 -0
- outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_1999/README.md +202 -0
- outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_1999/adapter_config.json +32 -0
- outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_1999/adapter_model.safetensors +3 -0
- outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_1999/all_results.json +1 -0
- outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_1999/all_results_la_kron_all_homo_mc_corr_1000.json +1 -0
- outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_1999/all_results_la_kron_last_layer_homo_mc_corr_1000.json +1 -0
- outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_1999/all_results_val.json +1 -0
- outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_1999/eval_res.json +0 -0
- outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_1999/eval_res_la_kron_all_homo_mc_corr_1000.json +0 -0
- outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_1999/eval_res_la_kron_last_layer_homo_mc_corr_1000.json +0 -0
- outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_1999/gpu_stats.json +130 -0
- outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_1999/gpu_stats_la.json +130 -0
- outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_1999/special_tokens_map.json +7 -0
- outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_1999/tokenizer.json +0 -0
- outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_1999/tokenizer_config.json +55 -0
outputs/cola/args.json
ADDED
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"task_name": "cola",
|
3 |
+
"train_file": null,
|
4 |
+
"validation_file": null,
|
5 |
+
"max_length": 256,
|
6 |
+
"pad_to_max_length": false,
|
7 |
+
"model_name_or_path": "bert-base-uncased",
|
8 |
+
"use_slow_tokenizer": false,
|
9 |
+
"per_device_train_batch_size": 32,
|
10 |
+
"per_device_eval_batch_size": 1,
|
11 |
+
"learning_rate": 5e-05,
|
12 |
+
"weight_decay": 0.0,
|
13 |
+
"num_train_epochs": 5,
|
14 |
+
"max_train_steps": null,
|
15 |
+
"peft_method": null,
|
16 |
+
"gradient_accumulation_steps": 1,
|
17 |
+
"lr_scheduler_type": "linear",
|
18 |
+
"num_warmup_steps": 0,
|
19 |
+
"output_dir": "./outputs",
|
20 |
+
"seed": 42,
|
21 |
+
"push_to_hub": false,
|
22 |
+
"hub_model_id": null,
|
23 |
+
"hub_token": null,
|
24 |
+
"checkpointing_steps": "1000",
|
25 |
+
"resume_from_checkpoint": null,
|
26 |
+
"with_tracking": false,
|
27 |
+
"report_to": "all",
|
28 |
+
"ignore_mismatched_sizes": true,
|
29 |
+
"save": false,
|
30 |
+
"load_step": 999,
|
31 |
+
"lora_r": 8,
|
32 |
+
"lora_alpha": 16,
|
33 |
+
"lora_dropout": 0.1,
|
34 |
+
"laplace_hessian": "kron",
|
35 |
+
"laplace_sub": "last_layer",
|
36 |
+
"laplace_prior": "homo",
|
37 |
+
"laplace_optim_step": 1000,
|
38 |
+
"testing_set": "train_val",
|
39 |
+
"cache_dir": "/content/cache/huggingface/metrics/",
|
40 |
+
"laplace_predict": "mc_corr"
|
41 |
+
}
|
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/args.json
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"task_name": "cola",
|
3 |
+
"train_file": null,
|
4 |
+
"validation_file": null,
|
5 |
+
"max_length": 256,
|
6 |
+
"pad_to_max_length": false,
|
7 |
+
"model_name_or_path": "bert-base-uncased",
|
8 |
+
"use_slow_tokenizer": false,
|
9 |
+
"per_device_train_batch_size": 32,
|
10 |
+
"per_device_eval_batch_size": 1,
|
11 |
+
"learning_rate": 5e-05,
|
12 |
+
"weight_decay": 0.0,
|
13 |
+
"num_train_epochs": 5,
|
14 |
+
"max_train_steps": 10000,
|
15 |
+
"gradient_accumulation_steps": 1,
|
16 |
+
"lr_scheduler_type": "linear",
|
17 |
+
"num_warmup_steps": 0,
|
18 |
+
"output_dir": "./outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42",
|
19 |
+
"seed": 42,
|
20 |
+
"push_to_hub": false,
|
21 |
+
"hub_model_id": null,
|
22 |
+
"hub_token": null,
|
23 |
+
"checkpointing_steps": null,
|
24 |
+
"resume_from_checkpoint": null,
|
25 |
+
"with_tracking": false,
|
26 |
+
"report_to": "all",
|
27 |
+
"ignore_mismatched_sizes": true,
|
28 |
+
"lora_r": 8,
|
29 |
+
"lora_alpha": 16,
|
30 |
+
"lora_dropout": 0.1,
|
31 |
+
"testing_set": "train_val"
|
32 |
+
}
|
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/logfile.log
ADDED
@@ -0,0 +1,222 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
05/23/2024 14:23:35 - INFO - __main__ - Number of labels detected = 2
|
2 |
+
05/23/2024 14:23:36 - INFO - __main__ - None
|
3 |
+
05/23/2024 14:23:36 - INFO - __main__ - Sample 5238 of the training set: {'input_ids': [101, 2009, 1005, 1055, 2986, 2008, 2002, 3825, 1998, 17806, 1010, 2021, 1045, 2123, 1005, 1056, 2428, 2729, 2055, 2010, 15531, 1010, 2030, 1996, 2769, 1010, 2030, 2505, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}.
|
4 |
+
05/23/2024 14:23:36 - INFO - __main__ - Sample 912 of the training set: {'input_ids': [101, 1045, 2113, 2029, 2338, 23848, 3191, 1010, 1998, 2029, 2338, 3960, 2356, 2339, 2017, 2910, 1005, 1056, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
|
5 |
+
05/23/2024 14:23:36 - INFO - __main__ - Sample 204 of the training set: {'input_ids': [101, 1996, 26108, 2002, 4152, 1010, 1996, 2062, 2198, 6010, 11067, 2229, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
|
6 |
+
05/23/2024 14:23:36 - INFO - __main__ - Max training steps before recalculation = None
|
7 |
+
05/23/2024 14:23:36 - INFO - __main__ - num_update_steps_per_epoch initial = 214
|
8 |
+
05/23/2024 14:23:36 - INFO - __main__ - num training epochs initial = 5
|
9 |
+
05/23/2024 14:23:36 - INFO - __main__ - Adjusted num_train_epochs based on max_train_steps: 5
|
10 |
+
05/23/2024 14:23:36 - INFO - __main__ - PeftModelForSequenceClassification(
|
11 |
+
(base_model): LoraModel(
|
12 |
+
(model): BertForSequenceClassification(
|
13 |
+
(bert): BertModel(
|
14 |
+
(embeddings): BertEmbeddings(
|
15 |
+
(word_embeddings): Embedding(30522, 768, padding_idx=0)
|
16 |
+
(position_embeddings): Embedding(512, 768)
|
17 |
+
(token_type_embeddings): Embedding(2, 768)
|
18 |
+
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
|
19 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
20 |
+
)
|
21 |
+
(encoder): BertEncoder(
|
22 |
+
(layer): ModuleList(
|
23 |
+
(0-11): 12 x BertLayer(
|
24 |
+
(attention): BertAttention(
|
25 |
+
(self): BertSdpaSelfAttention(
|
26 |
+
(query): lora.Linear(
|
27 |
+
(base_layer): Linear(in_features=768, out_features=768, bias=True)
|
28 |
+
(lora_dropout): ModuleDict(
|
29 |
+
(default): Dropout(p=0.1, inplace=False)
|
30 |
+
)
|
31 |
+
(lora_A): ModuleDict(
|
32 |
+
(default): Linear(in_features=768, out_features=8, bias=False)
|
33 |
+
)
|
34 |
+
(lora_B): ModuleDict(
|
35 |
+
(default): Linear(in_features=8, out_features=768, bias=False)
|
36 |
+
)
|
37 |
+
(lora_embedding_A): ParameterDict()
|
38 |
+
(lora_embedding_B): ParameterDict()
|
39 |
+
)
|
40 |
+
(key): Linear(in_features=768, out_features=768, bias=True)
|
41 |
+
(value): lora.Linear(
|
42 |
+
(base_layer): Linear(in_features=768, out_features=768, bias=True)
|
43 |
+
(lora_dropout): ModuleDict(
|
44 |
+
(default): Dropout(p=0.1, inplace=False)
|
45 |
+
)
|
46 |
+
(lora_A): ModuleDict(
|
47 |
+
(default): Linear(in_features=768, out_features=8, bias=False)
|
48 |
+
)
|
49 |
+
(lora_B): ModuleDict(
|
50 |
+
(default): Linear(in_features=8, out_features=768, bias=False)
|
51 |
+
)
|
52 |
+
(lora_embedding_A): ParameterDict()
|
53 |
+
(lora_embedding_B): ParameterDict()
|
54 |
+
)
|
55 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
56 |
+
)
|
57 |
+
(output): BertSelfOutput(
|
58 |
+
(dense): Linear(in_features=768, out_features=768, bias=True)
|
59 |
+
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
|
60 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
61 |
+
)
|
62 |
+
)
|
63 |
+
(intermediate): BertIntermediate(
|
64 |
+
(dense): Linear(in_features=768, out_features=3072, bias=True)
|
65 |
+
(intermediate_act_fn): GELUActivation()
|
66 |
+
)
|
67 |
+
(output): BertOutput(
|
68 |
+
(dense): Linear(in_features=3072, out_features=768, bias=True)
|
69 |
+
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
|
70 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
71 |
+
)
|
72 |
+
)
|
73 |
+
)
|
74 |
+
)
|
75 |
+
(pooler): BertPooler(
|
76 |
+
(dense): Linear(in_features=768, out_features=768, bias=True)
|
77 |
+
(activation): Tanh()
|
78 |
+
)
|
79 |
+
)
|
80 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
81 |
+
(classifier): ModulesToSaveWrapper(
|
82 |
+
(original_module): Linear(in_features=768, out_features=2, bias=True)
|
83 |
+
(modules_to_save): ModuleDict(
|
84 |
+
(default): Linear(in_features=768, out_features=2, bias=True)
|
85 |
+
)
|
86 |
+
)
|
87 |
+
)
|
88 |
+
)
|
89 |
+
)
|
90 |
+
05/23/2024 14:23:37 - INFO - __main__ - num_update_steps_per_epoch before recalculation = 214
|
91 |
+
05/23/2024 14:23:37 - INFO - __main__ - num_update_steps_per_epoch after recalculation = 214
|
92 |
+
05/23/2024 14:23:37 - INFO - __main__ - num training epochs before recalculation = 5
|
93 |
+
05/23/2024 14:23:37 - INFO - __main__ - ***** Running training *****
|
94 |
+
05/23/2024 14:23:37 - INFO - __main__ - Num examples = 6840
|
95 |
+
05/23/2024 14:23:37 - INFO - __main__ - Num Epochs = 5
|
96 |
+
05/23/2024 14:23:37 - INFO - __main__ - Instantaneous batch size per device = 32
|
97 |
+
05/23/2024 14:23:37 - INFO - __main__ - Total train batch size (w. parallel, distributed & accumulation) = 32
|
98 |
+
05/23/2024 14:23:37 - INFO - __main__ - Gradient Accumulation steps = 1
|
99 |
+
05/23/2024 14:23:37 - INFO - __main__ - Total optimization steps = 1070
|
100 |
+
05/23/2024 14:23:53 - INFO - __main__ - epoch 0: {'matthews_correlation': 0.10404532946217532}
|
101 |
+
05/23/2024 14:24:18 - INFO - __main__ - epoch 0: {'matthews_correlation': 0.08209862418796109}
|
102 |
+
05/23/2024 14:24:51 - INFO - __main__ - epoch 0: {'matthews_correlation': 0.0}
|
103 |
+
05/23/2024 14:25:17 - INFO - __main__ - epoch 0: {'matthews_correlation': 0.0}
|
104 |
+
05/23/2024 14:25:52 - INFO - __main__ - epoch 1: {'matthews_correlation': 0.11716571434177496}
|
105 |
+
05/23/2024 14:26:18 - INFO - __main__ - epoch 1: {'matthews_correlation': 0.17856585985971396}
|
106 |
+
05/23/2024 14:26:54 - INFO - __main__ - epoch 2: {'matthews_correlation': 0.3214366409689651}
|
107 |
+
05/23/2024 14:27:22 - INFO - __main__ - epoch 2: {'matthews_correlation': 0.36034145570197795}
|
108 |
+
05/23/2024 14:27:58 - INFO - __main__ - epoch 3: {'matthews_correlation': 0.3396154119450817}
|
109 |
+
05/23/2024 14:28:24 - INFO - __main__ - epoch 3: {'matthews_correlation': 0.3875723081509275}
|
110 |
+
05/23/2024 14:29:01 - INFO - __main__ - epoch 4: {'matthews_correlation': 0.3581225660855234}
|
111 |
+
05/23/2024 14:29:27 - INFO - __main__ - epoch 4: {'matthews_correlation': 0.40757323337120416}
|
112 |
+
05/23/2024 16:35:23 - INFO - __main__ - Number of labels detected = 2
|
113 |
+
05/23/2024 16:35:24 - INFO - __main__ - None
|
114 |
+
05/23/2024 16:35:24 - INFO - __main__ - Sample 5238 of the training set: {'input_ids': [101, 2009, 1005, 1055, 2986, 2008, 2002, 3825, 1998, 17806, 1010, 2021, 1045, 2123, 1005, 1056, 2428, 2729, 2055, 2010, 15531, 1010, 2030, 1996, 2769, 1010, 2030, 2505, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}.
|
115 |
+
05/23/2024 16:35:24 - INFO - __main__ - Sample 912 of the training set: {'input_ids': [101, 1045, 2113, 2029, 2338, 23848, 3191, 1010, 1998, 2029, 2338, 3960, 2356, 2339, 2017, 2910, 1005, 1056, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
|
116 |
+
05/23/2024 16:35:24 - INFO - __main__ - Sample 204 of the training set: {'input_ids': [101, 1996, 26108, 2002, 4152, 1010, 1996, 2062, 2198, 6010, 11067, 2229, 1012, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
|
117 |
+
05/23/2024 16:35:24 - INFO - __main__ - Max training steps before recalculation = None
|
118 |
+
05/23/2024 16:35:24 - INFO - __main__ - num_update_steps_per_epoch initial = 214
|
119 |
+
05/23/2024 16:35:24 - INFO - __main__ - num training epochs initial = 5
|
120 |
+
05/23/2024 16:35:24 - INFO - __main__ - Adjusted num_train_epochs based on max_train_steps: 5
|
121 |
+
05/23/2024 16:35:24 - INFO - __main__ - PeftModelForSequenceClassification(
|
122 |
+
(base_model): LoraModel(
|
123 |
+
(model): BertForSequenceClassification(
|
124 |
+
(bert): BertModel(
|
125 |
+
(embeddings): BertEmbeddings(
|
126 |
+
(word_embeddings): Embedding(30522, 768, padding_idx=0)
|
127 |
+
(position_embeddings): Embedding(512, 768)
|
128 |
+
(token_type_embeddings): Embedding(2, 768)
|
129 |
+
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
|
130 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
131 |
+
)
|
132 |
+
(encoder): BertEncoder(
|
133 |
+
(layer): ModuleList(
|
134 |
+
(0-11): 12 x BertLayer(
|
135 |
+
(attention): BertAttention(
|
136 |
+
(self): BertSdpaSelfAttention(
|
137 |
+
(query): lora.Linear(
|
138 |
+
(base_layer): Linear(in_features=768, out_features=768, bias=True)
|
139 |
+
(lora_dropout): ModuleDict(
|
140 |
+
(default): Dropout(p=0.1, inplace=False)
|
141 |
+
)
|
142 |
+
(lora_A): ModuleDict(
|
143 |
+
(default): Linear(in_features=768, out_features=8, bias=False)
|
144 |
+
)
|
145 |
+
(lora_B): ModuleDict(
|
146 |
+
(default): Linear(in_features=8, out_features=768, bias=False)
|
147 |
+
)
|
148 |
+
(lora_embedding_A): ParameterDict()
|
149 |
+
(lora_embedding_B): ParameterDict()
|
150 |
+
)
|
151 |
+
(key): Linear(in_features=768, out_features=768, bias=True)
|
152 |
+
(value): lora.Linear(
|
153 |
+
(base_layer): Linear(in_features=768, out_features=768, bias=True)
|
154 |
+
(lora_dropout): ModuleDict(
|
155 |
+
(default): Dropout(p=0.1, inplace=False)
|
156 |
+
)
|
157 |
+
(lora_A): ModuleDict(
|
158 |
+
(default): Linear(in_features=768, out_features=8, bias=False)
|
159 |
+
)
|
160 |
+
(lora_B): ModuleDict(
|
161 |
+
(default): Linear(in_features=8, out_features=768, bias=False)
|
162 |
+
)
|
163 |
+
(lora_embedding_A): ParameterDict()
|
164 |
+
(lora_embedding_B): ParameterDict()
|
165 |
+
)
|
166 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
167 |
+
)
|
168 |
+
(output): BertSelfOutput(
|
169 |
+
(dense): Linear(in_features=768, out_features=768, bias=True)
|
170 |
+
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
|
171 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
172 |
+
)
|
173 |
+
)
|
174 |
+
(intermediate): BertIntermediate(
|
175 |
+
(dense): Linear(in_features=768, out_features=3072, bias=True)
|
176 |
+
(intermediate_act_fn): GELUActivation()
|
177 |
+
)
|
178 |
+
(output): BertOutput(
|
179 |
+
(dense): Linear(in_features=3072, out_features=768, bias=True)
|
180 |
+
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
|
181 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
182 |
+
)
|
183 |
+
)
|
184 |
+
)
|
185 |
+
)
|
186 |
+
(pooler): BertPooler(
|
187 |
+
(dense): Linear(in_features=768, out_features=768, bias=True)
|
188 |
+
(activation): Tanh()
|
189 |
+
)
|
190 |
+
)
|
191 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
192 |
+
(classifier): ModulesToSaveWrapper(
|
193 |
+
(original_module): Linear(in_features=768, out_features=2, bias=True)
|
194 |
+
(modules_to_save): ModuleDict(
|
195 |
+
(default): Linear(in_features=768, out_features=2, bias=True)
|
196 |
+
)
|
197 |
+
)
|
198 |
+
)
|
199 |
+
)
|
200 |
+
)
|
201 |
+
05/23/2024 16:35:24 - INFO - __main__ - num_update_steps_per_epoch before recalculation = 214
|
202 |
+
05/23/2024 16:35:24 - INFO - __main__ - num_update_steps_per_epoch after recalculation = 214
|
203 |
+
05/23/2024 16:35:24 - INFO - __main__ - num training epochs before recalculation = 5
|
204 |
+
05/23/2024 16:35:25 - INFO - __main__ - ***** Running training *****
|
205 |
+
05/23/2024 16:35:25 - INFO - __main__ - Num examples = 6840
|
206 |
+
05/23/2024 16:35:25 - INFO - __main__ - Num Epochs = 5
|
207 |
+
05/23/2024 16:35:25 - INFO - __main__ - Instantaneous batch size per device = 32
|
208 |
+
05/23/2024 16:35:25 - INFO - __main__ - Total train batch size (w. parallel, distributed & accumulation) = 32
|
209 |
+
05/23/2024 16:35:25 - INFO - __main__ - Gradient Accumulation steps = 1
|
210 |
+
05/23/2024 16:35:25 - INFO - __main__ - Total optimization steps = 1070
|
211 |
+
05/23/2024 16:35:41 - INFO - __main__ - epoch 0: {'matthews_correlation': 0.10404532946217532}
|
212 |
+
05/23/2024 16:36:06 - INFO - __main__ - epoch 0: {'matthews_correlation': 0.08209862418796109}
|
213 |
+
05/23/2024 16:36:40 - INFO - __main__ - epoch 0: {'matthews_correlation': 0.018148342420931135}
|
214 |
+
05/23/2024 16:37:06 - INFO - __main__ - epoch 0: {'matthews_correlation': 0.0}
|
215 |
+
05/23/2024 16:37:40 - INFO - __main__ - epoch 1: {'matthews_correlation': 0.3184881845473565}
|
216 |
+
05/23/2024 16:38:06 - INFO - __main__ - epoch 1: {'matthews_correlation': 0.37525192119086626}
|
217 |
+
05/23/2024 16:38:41 - INFO - __main__ - epoch 2: {'matthews_correlation': 0.3641002176705668}
|
218 |
+
05/23/2024 16:39:07 - INFO - __main__ - epoch 2: {'matthews_correlation': 0.386668454336151}
|
219 |
+
05/23/2024 16:39:43 - INFO - __main__ - epoch 3: {'matthews_correlation': 0.3860262930745603}
|
220 |
+
05/23/2024 16:40:09 - INFO - __main__ - epoch 3: {'matthews_correlation': 0.386060392272705}
|
221 |
+
05/23/2024 16:40:44 - INFO - __main__ - epoch 4: {'matthews_correlation': 0.39481313720106403}
|
222 |
+
05/23/2024 16:41:10 - INFO - __main__ - epoch 4: {'matthews_correlation': 0.38770266622658694}
|
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_0/README.md
ADDED
@@ -0,0 +1,202 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
library_name: peft
|
3 |
+
base_model: bert-base-uncased
|
4 |
+
---
|
5 |
+
|
6 |
+
# Model Card for Model ID
|
7 |
+
|
8 |
+
<!-- Provide a quick summary of what the model is/does. -->
|
9 |
+
|
10 |
+
|
11 |
+
|
12 |
+
## Model Details
|
13 |
+
|
14 |
+
### Model Description
|
15 |
+
|
16 |
+
<!-- Provide a longer summary of what this model is. -->
|
17 |
+
|
18 |
+
|
19 |
+
|
20 |
+
- **Developed by:** [More Information Needed]
|
21 |
+
- **Funded by [optional]:** [More Information Needed]
|
22 |
+
- **Shared by [optional]:** [More Information Needed]
|
23 |
+
- **Model type:** [More Information Needed]
|
24 |
+
- **Language(s) (NLP):** [More Information Needed]
|
25 |
+
- **License:** [More Information Needed]
|
26 |
+
- **Finetuned from model [optional]:** [More Information Needed]
|
27 |
+
|
28 |
+
### Model Sources [optional]
|
29 |
+
|
30 |
+
<!-- Provide the basic links for the model. -->
|
31 |
+
|
32 |
+
- **Repository:** [More Information Needed]
|
33 |
+
- **Paper [optional]:** [More Information Needed]
|
34 |
+
- **Demo [optional]:** [More Information Needed]
|
35 |
+
|
36 |
+
## Uses
|
37 |
+
|
38 |
+
<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
|
39 |
+
|
40 |
+
### Direct Use
|
41 |
+
|
42 |
+
<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
|
43 |
+
|
44 |
+
[More Information Needed]
|
45 |
+
|
46 |
+
### Downstream Use [optional]
|
47 |
+
|
48 |
+
<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
|
49 |
+
|
50 |
+
[More Information Needed]
|
51 |
+
|
52 |
+
### Out-of-Scope Use
|
53 |
+
|
54 |
+
<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
|
55 |
+
|
56 |
+
[More Information Needed]
|
57 |
+
|
58 |
+
## Bias, Risks, and Limitations
|
59 |
+
|
60 |
+
<!-- This section is meant to convey both technical and sociotechnical limitations. -->
|
61 |
+
|
62 |
+
[More Information Needed]
|
63 |
+
|
64 |
+
### Recommendations
|
65 |
+
|
66 |
+
<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
|
67 |
+
|
68 |
+
Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
|
69 |
+
|
70 |
+
## How to Get Started with the Model
|
71 |
+
|
72 |
+
Use the code below to get started with the model.
|
73 |
+
|
74 |
+
[More Information Needed]
|
75 |
+
|
76 |
+
## Training Details
|
77 |
+
|
78 |
+
### Training Data
|
79 |
+
|
80 |
+
<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
|
81 |
+
|
82 |
+
[More Information Needed]
|
83 |
+
|
84 |
+
### Training Procedure
|
85 |
+
|
86 |
+
<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
|
87 |
+
|
88 |
+
#### Preprocessing [optional]
|
89 |
+
|
90 |
+
[More Information Needed]
|
91 |
+
|
92 |
+
|
93 |
+
#### Training Hyperparameters
|
94 |
+
|
95 |
+
- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
|
96 |
+
|
97 |
+
#### Speeds, Sizes, Times [optional]
|
98 |
+
|
99 |
+
<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
|
100 |
+
|
101 |
+
[More Information Needed]
|
102 |
+
|
103 |
+
## Evaluation
|
104 |
+
|
105 |
+
<!-- This section describes the evaluation protocols and provides the results. -->
|
106 |
+
|
107 |
+
### Testing Data, Factors & Metrics
|
108 |
+
|
109 |
+
#### Testing Data
|
110 |
+
|
111 |
+
<!-- This should link to a Dataset Card if possible. -->
|
112 |
+
|
113 |
+
[More Information Needed]
|
114 |
+
|
115 |
+
#### Factors
|
116 |
+
|
117 |
+
<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
|
118 |
+
|
119 |
+
[More Information Needed]
|
120 |
+
|
121 |
+
#### Metrics
|
122 |
+
|
123 |
+
<!-- These are the evaluation metrics being used, ideally with a description of why. -->
|
124 |
+
|
125 |
+
[More Information Needed]
|
126 |
+
|
127 |
+
### Results
|
128 |
+
|
129 |
+
[More Information Needed]
|
130 |
+
|
131 |
+
#### Summary
|
132 |
+
|
133 |
+
|
134 |
+
|
135 |
+
## Model Examination [optional]
|
136 |
+
|
137 |
+
<!-- Relevant interpretability work for the model goes here -->
|
138 |
+
|
139 |
+
[More Information Needed]
|
140 |
+
|
141 |
+
## Environmental Impact
|
142 |
+
|
143 |
+
<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
|
144 |
+
|
145 |
+
Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
|
146 |
+
|
147 |
+
- **Hardware Type:** [More Information Needed]
|
148 |
+
- **Hours used:** [More Information Needed]
|
149 |
+
- **Cloud Provider:** [More Information Needed]
|
150 |
+
- **Compute Region:** [More Information Needed]
|
151 |
+
- **Carbon Emitted:** [More Information Needed]
|
152 |
+
|
153 |
+
## Technical Specifications [optional]
|
154 |
+
|
155 |
+
### Model Architecture and Objective
|
156 |
+
|
157 |
+
[More Information Needed]
|
158 |
+
|
159 |
+
### Compute Infrastructure
|
160 |
+
|
161 |
+
[More Information Needed]
|
162 |
+
|
163 |
+
#### Hardware
|
164 |
+
|
165 |
+
[More Information Needed]
|
166 |
+
|
167 |
+
#### Software
|
168 |
+
|
169 |
+
[More Information Needed]
|
170 |
+
|
171 |
+
## Citation [optional]
|
172 |
+
|
173 |
+
<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
|
174 |
+
|
175 |
+
**BibTeX:**
|
176 |
+
|
177 |
+
[More Information Needed]
|
178 |
+
|
179 |
+
**APA:**
|
180 |
+
|
181 |
+
[More Information Needed]
|
182 |
+
|
183 |
+
## Glossary [optional]
|
184 |
+
|
185 |
+
<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
|
186 |
+
|
187 |
+
[More Information Needed]
|
188 |
+
|
189 |
+
## More Information [optional]
|
190 |
+
|
191 |
+
[More Information Needed]
|
192 |
+
|
193 |
+
## Model Card Authors [optional]
|
194 |
+
|
195 |
+
[More Information Needed]
|
196 |
+
|
197 |
+
## Model Card Contact
|
198 |
+
|
199 |
+
[More Information Needed]
|
200 |
+
### Framework versions
|
201 |
+
|
202 |
+
- PEFT 0.11.1
|
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_0/adapter_config.json
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"alpha_pattern": {},
|
3 |
+
"auto_mapping": null,
|
4 |
+
"base_model_name_or_path": "bert-base-uncased",
|
5 |
+
"bias": "none",
|
6 |
+
"fan_in_fan_out": false,
|
7 |
+
"inference_mode": true,
|
8 |
+
"init_lora_weights": true,
|
9 |
+
"layer_replication": null,
|
10 |
+
"layers_pattern": null,
|
11 |
+
"layers_to_transform": null,
|
12 |
+
"loftq_config": {},
|
13 |
+
"lora_alpha": 16,
|
14 |
+
"lora_dropout": 0.1,
|
15 |
+
"megatron_config": null,
|
16 |
+
"megatron_core": "megatron.core",
|
17 |
+
"modules_to_save": [
|
18 |
+
"classifier",
|
19 |
+
"score"
|
20 |
+
],
|
21 |
+
"peft_type": "LORA",
|
22 |
+
"r": 8,
|
23 |
+
"rank_pattern": {},
|
24 |
+
"revision": null,
|
25 |
+
"target_modules": [
|
26 |
+
"query",
|
27 |
+
"value"
|
28 |
+
],
|
29 |
+
"task_type": "SEQ_CLS",
|
30 |
+
"use_dora": false,
|
31 |
+
"use_rslora": false
|
32 |
+
}
|
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_0/adapter_model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:68ad9f6c556fa0cefdee2d3f1c058573923e8b0afefb79923f14fc1d96825ae9
|
3 |
+
size 1192672
|
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_0/all_results.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"eval_matthews_correlation": 0.10404532946217532}
|
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_0/all_results_la_kron_all_homo_mc_corr_1000.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"eval_matthews_correlation": 0.10966430521058565}
|
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_0/all_results_la_kron_last_layer_homo_mc_corr_1000.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"eval_matthews_correlation": 0.11166418336918776}
|
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_0/all_results_val.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"eval_matthews_correlation": 0.08209862418796109}
|
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_0/eval_res.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_0/eval_res_la_kron_all_homo_mc_corr_1000.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_0/eval_res_la_kron_last_layer_homo_mc_corr_1000.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_0/eval_res_val.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_0/gpu_stats.json
ADDED
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"memory_allocated": 448806400,
|
3 |
+
"max_memory_allocated": 450717184,
|
4 |
+
"memory_reserved": 499122176,
|
5 |
+
"max_memory_reserved": 499122176,
|
6 |
+
"memory_stats": {
|
7 |
+
"active.all.allocated": 1057794,
|
8 |
+
"active.all.current": 265,
|
9 |
+
"active.all.freed": 1057529,
|
10 |
+
"active.all.peak": 278,
|
11 |
+
"active.large_pool.allocated": 76,
|
12 |
+
"active.large_pool.current": 76,
|
13 |
+
"active.large_pool.freed": 0,
|
14 |
+
"active.large_pool.peak": 76,
|
15 |
+
"active.small_pool.allocated": 1057718,
|
16 |
+
"active.small_pool.current": 189,
|
17 |
+
"active.small_pool.freed": 1057529,
|
18 |
+
"active.small_pool.peak": 202,
|
19 |
+
"active_bytes.all.allocated": 241463920640,
|
20 |
+
"active_bytes.all.current": 448806400,
|
21 |
+
"active_bytes.all.freed": 241015114240,
|
22 |
+
"active_bytes.all.peak": 450717184,
|
23 |
+
"active_bytes.large_pool.allocated": 447086592,
|
24 |
+
"active_bytes.large_pool.current": 447086592,
|
25 |
+
"active_bytes.large_pool.freed": 0,
|
26 |
+
"active_bytes.large_pool.peak": 447086592,
|
27 |
+
"active_bytes.small_pool.allocated": 241016834048,
|
28 |
+
"active_bytes.small_pool.current": 1719808,
|
29 |
+
"active_bytes.small_pool.freed": 241015114240,
|
30 |
+
"active_bytes.small_pool.peak": 3630592,
|
31 |
+
"allocated_bytes.all.allocated": 241463920640,
|
32 |
+
"allocated_bytes.all.current": 448806400,
|
33 |
+
"allocated_bytes.all.freed": 241015114240,
|
34 |
+
"allocated_bytes.all.peak": 450717184,
|
35 |
+
"allocated_bytes.large_pool.allocated": 447086592,
|
36 |
+
"allocated_bytes.large_pool.current": 447086592,
|
37 |
+
"allocated_bytes.large_pool.freed": 0,
|
38 |
+
"allocated_bytes.large_pool.peak": 447086592,
|
39 |
+
"allocated_bytes.small_pool.allocated": 241016834048,
|
40 |
+
"allocated_bytes.small_pool.current": 1719808,
|
41 |
+
"allocated_bytes.small_pool.freed": 241015114240,
|
42 |
+
"allocated_bytes.small_pool.peak": 3630592,
|
43 |
+
"allocation.all.allocated": 1057794,
|
44 |
+
"allocation.all.current": 265,
|
45 |
+
"allocation.all.freed": 1057529,
|
46 |
+
"allocation.all.peak": 278,
|
47 |
+
"allocation.large_pool.allocated": 76,
|
48 |
+
"allocation.large_pool.current": 76,
|
49 |
+
"allocation.large_pool.freed": 0,
|
50 |
+
"allocation.large_pool.peak": 76,
|
51 |
+
"allocation.small_pool.allocated": 1057718,
|
52 |
+
"allocation.small_pool.current": 189,
|
53 |
+
"allocation.small_pool.freed": 1057529,
|
54 |
+
"allocation.small_pool.peak": 202,
|
55 |
+
"inactive_split.all.allocated": 676269,
|
56 |
+
"inactive_split.all.current": 21,
|
57 |
+
"inactive_split.all.freed": 676248,
|
58 |
+
"inactive_split.all.peak": 28,
|
59 |
+
"inactive_split.large_pool.allocated": 19,
|
60 |
+
"inactive_split.large_pool.current": 18,
|
61 |
+
"inactive_split.large_pool.freed": 1,
|
62 |
+
"inactive_split.large_pool.peak": 18,
|
63 |
+
"inactive_split.small_pool.allocated": 676250,
|
64 |
+
"inactive_split.small_pool.current": 3,
|
65 |
+
"inactive_split.small_pool.freed": 676247,
|
66 |
+
"inactive_split.small_pool.peak": 10,
|
67 |
+
"inactive_split_bytes.all.allocated": 285715726848,
|
68 |
+
"inactive_split_bytes.all.current": 46121472,
|
69 |
+
"inactive_split_bytes.all.freed": 285669605376,
|
70 |
+
"inactive_split_bytes.all.peak": 55617536,
|
71 |
+
"inactive_split_bytes.large_pool.allocated": 269484032,
|
72 |
+
"inactive_split_bytes.large_pool.current": 45744128,
|
73 |
+
"inactive_split_bytes.large_pool.freed": 223739904,
|
74 |
+
"inactive_split_bytes.large_pool.peak": 54263808,
|
75 |
+
"inactive_split_bytes.small_pool.allocated": 285446242816,
|
76 |
+
"inactive_split_bytes.small_pool.current": 377344,
|
77 |
+
"inactive_split_bytes.small_pool.freed": 285445865472,
|
78 |
+
"inactive_split_bytes.small_pool.peak": 2768384,
|
79 |
+
"max_split_size": -1,
|
80 |
+
"num_alloc_retries": 0,
|
81 |
+
"num_device_alloc": 23,
|
82 |
+
"num_device_free": 0,
|
83 |
+
"num_ooms": 0,
|
84 |
+
"num_sync_all_streams": 0,
|
85 |
+
"oversize_allocations.allocated": 0,
|
86 |
+
"oversize_allocations.current": 0,
|
87 |
+
"oversize_allocations.freed": 0,
|
88 |
+
"oversize_allocations.peak": 0,
|
89 |
+
"oversize_segments.allocated": 0,
|
90 |
+
"oversize_segments.current": 0,
|
91 |
+
"oversize_segments.freed": 0,
|
92 |
+
"oversize_segments.peak": 0,
|
93 |
+
"requested_bytes.all.allocated": 241366910219,
|
94 |
+
"requested_bytes.all.current": 447669276,
|
95 |
+
"requested_bytes.all.freed": 240919240943,
|
96 |
+
"requested_bytes.all.peak": 449579412,
|
97 |
+
"requested_bytes.large_pool.allocated": 445954048,
|
98 |
+
"requested_bytes.large_pool.current": 445954048,
|
99 |
+
"requested_bytes.large_pool.freed": 0,
|
100 |
+
"requested_bytes.large_pool.peak": 445954048,
|
101 |
+
"requested_bytes.small_pool.allocated": 240920956171,
|
102 |
+
"requested_bytes.small_pool.current": 1715228,
|
103 |
+
"requested_bytes.small_pool.freed": 240919240943,
|
104 |
+
"requested_bytes.small_pool.peak": 3625364,
|
105 |
+
"reserved_bytes.all.allocated": 499122176,
|
106 |
+
"reserved_bytes.all.current": 499122176,
|
107 |
+
"reserved_bytes.all.freed": 0,
|
108 |
+
"reserved_bytes.all.peak": 499122176,
|
109 |
+
"reserved_bytes.large_pool.allocated": 492830720,
|
110 |
+
"reserved_bytes.large_pool.current": 492830720,
|
111 |
+
"reserved_bytes.large_pool.freed": 0,
|
112 |
+
"reserved_bytes.large_pool.peak": 492830720,
|
113 |
+
"reserved_bytes.small_pool.allocated": 6291456,
|
114 |
+
"reserved_bytes.small_pool.current": 6291456,
|
115 |
+
"reserved_bytes.small_pool.freed": 0,
|
116 |
+
"reserved_bytes.small_pool.peak": 6291456,
|
117 |
+
"segment.all.allocated": 23,
|
118 |
+
"segment.all.current": 23,
|
119 |
+
"segment.all.freed": 0,
|
120 |
+
"segment.all.peak": 23,
|
121 |
+
"segment.large_pool.allocated": 20,
|
122 |
+
"segment.large_pool.current": 20,
|
123 |
+
"segment.large_pool.freed": 0,
|
124 |
+
"segment.large_pool.peak": 20,
|
125 |
+
"segment.small_pool.allocated": 3,
|
126 |
+
"segment.small_pool.current": 3,
|
127 |
+
"segment.small_pool.freed": 0,
|
128 |
+
"segment.small_pool.peak": 3
|
129 |
+
}
|
130 |
+
}
|
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_0/gpu_stats_la.json
ADDED
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"memory_allocated": 1390722048,
|
3 |
+
"max_memory_allocated": 1438867456,
|
4 |
+
"memory_reserved": 1535115264,
|
5 |
+
"max_memory_reserved": 1644167168,
|
6 |
+
"memory_stats": {
|
7 |
+
"active.all.allocated": 593915,
|
8 |
+
"active.all.current": 2760,
|
9 |
+
"active.all.freed": 591155,
|
10 |
+
"active.all.peak": 2768,
|
11 |
+
"active.large_pool.allocated": 49362,
|
12 |
+
"active.large_pool.current": 265,
|
13 |
+
"active.large_pool.freed": 49097,
|
14 |
+
"active.large_pool.peak": 271,
|
15 |
+
"active.small_pool.allocated": 544553,
|
16 |
+
"active.small_pool.current": 2495,
|
17 |
+
"active.small_pool.freed": 542058,
|
18 |
+
"active.small_pool.peak": 2503,
|
19 |
+
"active_bytes.all.allocated": 267919830016,
|
20 |
+
"active_bytes.all.current": 1390722048,
|
21 |
+
"active_bytes.all.freed": 266529107968,
|
22 |
+
"active_bytes.all.peak": 1438867456,
|
23 |
+
"active_bytes.large_pool.allocated": 153659284992,
|
24 |
+
"active_bytes.large_pool.current": 1385332736,
|
25 |
+
"active_bytes.large_pool.freed": 152273952256,
|
26 |
+
"active_bytes.large_pool.peak": 1434157056,
|
27 |
+
"active_bytes.small_pool.allocated": 114260545024,
|
28 |
+
"active_bytes.small_pool.current": 5389312,
|
29 |
+
"active_bytes.small_pool.freed": 114255155712,
|
30 |
+
"active_bytes.small_pool.peak": 6965248,
|
31 |
+
"allocated_bytes.all.allocated": 267919830016,
|
32 |
+
"allocated_bytes.all.current": 1390722048,
|
33 |
+
"allocated_bytes.all.freed": 266529107968,
|
34 |
+
"allocated_bytes.all.peak": 1438867456,
|
35 |
+
"allocated_bytes.large_pool.allocated": 153659284992,
|
36 |
+
"allocated_bytes.large_pool.current": 1385332736,
|
37 |
+
"allocated_bytes.large_pool.freed": 152273952256,
|
38 |
+
"allocated_bytes.large_pool.peak": 1434157056,
|
39 |
+
"allocated_bytes.small_pool.allocated": 114260545024,
|
40 |
+
"allocated_bytes.small_pool.current": 5389312,
|
41 |
+
"allocated_bytes.small_pool.freed": 114255155712,
|
42 |
+
"allocated_bytes.small_pool.peak": 6965248,
|
43 |
+
"allocation.all.allocated": 593915,
|
44 |
+
"allocation.all.current": 2760,
|
45 |
+
"allocation.all.freed": 591155,
|
46 |
+
"allocation.all.peak": 2768,
|
47 |
+
"allocation.large_pool.allocated": 49362,
|
48 |
+
"allocation.large_pool.current": 265,
|
49 |
+
"allocation.large_pool.freed": 49097,
|
50 |
+
"allocation.large_pool.peak": 271,
|
51 |
+
"allocation.small_pool.allocated": 544553,
|
52 |
+
"allocation.small_pool.current": 2495,
|
53 |
+
"allocation.small_pool.freed": 542058,
|
54 |
+
"allocation.small_pool.peak": 2503,
|
55 |
+
"inactive_split.all.allocated": 336711,
|
56 |
+
"inactive_split.all.current": 75,
|
57 |
+
"inactive_split.all.freed": 336636,
|
58 |
+
"inactive_split.all.peak": 84,
|
59 |
+
"inactive_split.large_pool.allocated": 42840,
|
60 |
+
"inactive_split.large_pool.current": 67,
|
61 |
+
"inactive_split.large_pool.freed": 42773,
|
62 |
+
"inactive_split.large_pool.peak": 75,
|
63 |
+
"inactive_split.small_pool.allocated": 293871,
|
64 |
+
"inactive_split.small_pool.current": 8,
|
65 |
+
"inactive_split.small_pool.freed": 293863,
|
66 |
+
"inactive_split.small_pool.peak": 14,
|
67 |
+
"inactive_split_bytes.all.allocated": 252283523072,
|
68 |
+
"inactive_split_bytes.all.current": 142296064,
|
69 |
+
"inactive_split_bytes.all.freed": 252141227008,
|
70 |
+
"inactive_split_bytes.all.peak": 179005952,
|
71 |
+
"inactive_split_bytes.large_pool.allocated": 137276645888,
|
72 |
+
"inactive_split_bytes.large_pool.current": 141393920,
|
73 |
+
"inactive_split_bytes.large_pool.freed": 137135251968,
|
74 |
+
"inactive_split_bytes.large_pool.peak": 177340416,
|
75 |
+
"inactive_split_bytes.small_pool.allocated": 115006877184,
|
76 |
+
"inactive_split_bytes.small_pool.current": 902144,
|
77 |
+
"inactive_split_bytes.small_pool.freed": 115005975040,
|
78 |
+
"inactive_split_bytes.small_pool.peak": 2908160,
|
79 |
+
"max_split_size": -1,
|
80 |
+
"num_alloc_retries": 0,
|
81 |
+
"num_device_alloc": 87,
|
82 |
+
"num_device_free": 8,
|
83 |
+
"num_ooms": 0,
|
84 |
+
"num_sync_all_streams": 1,
|
85 |
+
"oversize_allocations.allocated": 0,
|
86 |
+
"oversize_allocations.current": 0,
|
87 |
+
"oversize_allocations.freed": 0,
|
88 |
+
"oversize_allocations.peak": 0,
|
89 |
+
"oversize_segments.allocated": 0,
|
90 |
+
"oversize_segments.current": 0,
|
91 |
+
"oversize_segments.freed": 0,
|
92 |
+
"oversize_segments.peak": 0,
|
93 |
+
"requested_bytes.all.allocated": 263244243497,
|
94 |
+
"requested_bytes.all.current": 1386385184,
|
95 |
+
"requested_bytes.all.freed": 261857858313,
|
96 |
+
"requested_bytes.all.peak": 1435216624,
|
97 |
+
"requested_bytes.large_pool.allocated": 149065362052,
|
98 |
+
"requested_bytes.large_pool.current": 1382051840,
|
99 |
+
"requested_bytes.large_pool.freed": 147683310212,
|
100 |
+
"requested_bytes.large_pool.peak": 1430515712,
|
101 |
+
"requested_bytes.small_pool.allocated": 114178881445,
|
102 |
+
"requested_bytes.small_pool.current": 4333344,
|
103 |
+
"requested_bytes.small_pool.freed": 114174548101,
|
104 |
+
"requested_bytes.small_pool.peak": 6209040,
|
105 |
+
"reserved_bytes.all.allocated": 1646264320,
|
106 |
+
"reserved_bytes.all.current": 1535115264,
|
107 |
+
"reserved_bytes.all.freed": 111149056,
|
108 |
+
"reserved_bytes.all.peak": 1644167168,
|
109 |
+
"reserved_bytes.large_pool.allocated": 1635778560,
|
110 |
+
"reserved_bytes.large_pool.current": 1526726656,
|
111 |
+
"reserved_bytes.large_pool.freed": 109051904,
|
112 |
+
"reserved_bytes.large_pool.peak": 1635778560,
|
113 |
+
"reserved_bytes.small_pool.allocated": 10485760,
|
114 |
+
"reserved_bytes.small_pool.current": 8388608,
|
115 |
+
"reserved_bytes.small_pool.freed": 2097152,
|
116 |
+
"reserved_bytes.small_pool.peak": 8388608,
|
117 |
+
"segment.all.allocated": 87,
|
118 |
+
"segment.all.current": 79,
|
119 |
+
"segment.all.freed": 8,
|
120 |
+
"segment.all.peak": 86,
|
121 |
+
"segment.large_pool.allocated": 82,
|
122 |
+
"segment.large_pool.current": 75,
|
123 |
+
"segment.large_pool.freed": 7,
|
124 |
+
"segment.large_pool.peak": 82,
|
125 |
+
"segment.small_pool.allocated": 5,
|
126 |
+
"segment.small_pool.current": 4,
|
127 |
+
"segment.small_pool.freed": 1,
|
128 |
+
"segment.small_pool.peak": 4
|
129 |
+
}
|
130 |
+
}
|
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_0/special_tokens_map.json
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": "[CLS]",
|
3 |
+
"mask_token": "[MASK]",
|
4 |
+
"pad_token": "[PAD]",
|
5 |
+
"sep_token": "[SEP]",
|
6 |
+
"unk_token": "[UNK]"
|
7 |
+
}
|
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_0/tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_0/tokenizer_config.json
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "[PAD]",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"100": {
|
12 |
+
"content": "[UNK]",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"101": {
|
20 |
+
"content": "[CLS]",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"102": {
|
28 |
+
"content": "[SEP]",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"103": {
|
36 |
+
"content": "[MASK]",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"clean_up_tokenization_spaces": true,
|
45 |
+
"cls_token": "[CLS]",
|
46 |
+
"do_lower_case": true,
|
47 |
+
"mask_token": "[MASK]",
|
48 |
+
"model_max_length": 512,
|
49 |
+
"pad_token": "[PAD]",
|
50 |
+
"sep_token": "[SEP]",
|
51 |
+
"strip_accents": null,
|
52 |
+
"tokenize_chinese_chars": true,
|
53 |
+
"tokenizer_class": "BertTokenizer",
|
54 |
+
"unk_token": "[UNK]"
|
55 |
+
}
|
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_0/val_res.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_0/vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_1069/README.md
ADDED
@@ -0,0 +1,202 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
library_name: peft
|
3 |
+
base_model: bert-base-uncased
|
4 |
+
---
|
5 |
+
|
6 |
+
# Model Card for Model ID
|
7 |
+
|
8 |
+
<!-- Provide a quick summary of what the model is/does. -->
|
9 |
+
|
10 |
+
|
11 |
+
|
12 |
+
## Model Details
|
13 |
+
|
14 |
+
### Model Description
|
15 |
+
|
16 |
+
<!-- Provide a longer summary of what this model is. -->
|
17 |
+
|
18 |
+
|
19 |
+
|
20 |
+
- **Developed by:** [More Information Needed]
|
21 |
+
- **Funded by [optional]:** [More Information Needed]
|
22 |
+
- **Shared by [optional]:** [More Information Needed]
|
23 |
+
- **Model type:** [More Information Needed]
|
24 |
+
- **Language(s) (NLP):** [More Information Needed]
|
25 |
+
- **License:** [More Information Needed]
|
26 |
+
- **Finetuned from model [optional]:** [More Information Needed]
|
27 |
+
|
28 |
+
### Model Sources [optional]
|
29 |
+
|
30 |
+
<!-- Provide the basic links for the model. -->
|
31 |
+
|
32 |
+
- **Repository:** [More Information Needed]
|
33 |
+
- **Paper [optional]:** [More Information Needed]
|
34 |
+
- **Demo [optional]:** [More Information Needed]
|
35 |
+
|
36 |
+
## Uses
|
37 |
+
|
38 |
+
<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
|
39 |
+
|
40 |
+
### Direct Use
|
41 |
+
|
42 |
+
<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
|
43 |
+
|
44 |
+
[More Information Needed]
|
45 |
+
|
46 |
+
### Downstream Use [optional]
|
47 |
+
|
48 |
+
<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
|
49 |
+
|
50 |
+
[More Information Needed]
|
51 |
+
|
52 |
+
### Out-of-Scope Use
|
53 |
+
|
54 |
+
<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
|
55 |
+
|
56 |
+
[More Information Needed]
|
57 |
+
|
58 |
+
## Bias, Risks, and Limitations
|
59 |
+
|
60 |
+
<!-- This section is meant to convey both technical and sociotechnical limitations. -->
|
61 |
+
|
62 |
+
[More Information Needed]
|
63 |
+
|
64 |
+
### Recommendations
|
65 |
+
|
66 |
+
<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
|
67 |
+
|
68 |
+
Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
|
69 |
+
|
70 |
+
## How to Get Started with the Model
|
71 |
+
|
72 |
+
Use the code below to get started with the model.
|
73 |
+
|
74 |
+
[More Information Needed]
|
75 |
+
|
76 |
+
## Training Details
|
77 |
+
|
78 |
+
### Training Data
|
79 |
+
|
80 |
+
<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
|
81 |
+
|
82 |
+
[More Information Needed]
|
83 |
+
|
84 |
+
### Training Procedure
|
85 |
+
|
86 |
+
<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
|
87 |
+
|
88 |
+
#### Preprocessing [optional]
|
89 |
+
|
90 |
+
[More Information Needed]
|
91 |
+
|
92 |
+
|
93 |
+
#### Training Hyperparameters
|
94 |
+
|
95 |
+
- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
|
96 |
+
|
97 |
+
#### Speeds, Sizes, Times [optional]
|
98 |
+
|
99 |
+
<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
|
100 |
+
|
101 |
+
[More Information Needed]
|
102 |
+
|
103 |
+
## Evaluation
|
104 |
+
|
105 |
+
<!-- This section describes the evaluation protocols and provides the results. -->
|
106 |
+
|
107 |
+
### Testing Data, Factors & Metrics
|
108 |
+
|
109 |
+
#### Testing Data
|
110 |
+
|
111 |
+
<!-- This should link to a Dataset Card if possible. -->
|
112 |
+
|
113 |
+
[More Information Needed]
|
114 |
+
|
115 |
+
#### Factors
|
116 |
+
|
117 |
+
<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
|
118 |
+
|
119 |
+
[More Information Needed]
|
120 |
+
|
121 |
+
#### Metrics
|
122 |
+
|
123 |
+
<!-- These are the evaluation metrics being used, ideally with a description of why. -->
|
124 |
+
|
125 |
+
[More Information Needed]
|
126 |
+
|
127 |
+
### Results
|
128 |
+
|
129 |
+
[More Information Needed]
|
130 |
+
|
131 |
+
#### Summary
|
132 |
+
|
133 |
+
|
134 |
+
|
135 |
+
## Model Examination [optional]
|
136 |
+
|
137 |
+
<!-- Relevant interpretability work for the model goes here -->
|
138 |
+
|
139 |
+
[More Information Needed]
|
140 |
+
|
141 |
+
## Environmental Impact
|
142 |
+
|
143 |
+
<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
|
144 |
+
|
145 |
+
Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
|
146 |
+
|
147 |
+
- **Hardware Type:** [More Information Needed]
|
148 |
+
- **Hours used:** [More Information Needed]
|
149 |
+
- **Cloud Provider:** [More Information Needed]
|
150 |
+
- **Compute Region:** [More Information Needed]
|
151 |
+
- **Carbon Emitted:** [More Information Needed]
|
152 |
+
|
153 |
+
## Technical Specifications [optional]
|
154 |
+
|
155 |
+
### Model Architecture and Objective
|
156 |
+
|
157 |
+
[More Information Needed]
|
158 |
+
|
159 |
+
### Compute Infrastructure
|
160 |
+
|
161 |
+
[More Information Needed]
|
162 |
+
|
163 |
+
#### Hardware
|
164 |
+
|
165 |
+
[More Information Needed]
|
166 |
+
|
167 |
+
#### Software
|
168 |
+
|
169 |
+
[More Information Needed]
|
170 |
+
|
171 |
+
## Citation [optional]
|
172 |
+
|
173 |
+
<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
|
174 |
+
|
175 |
+
**BibTeX:**
|
176 |
+
|
177 |
+
[More Information Needed]
|
178 |
+
|
179 |
+
**APA:**
|
180 |
+
|
181 |
+
[More Information Needed]
|
182 |
+
|
183 |
+
## Glossary [optional]
|
184 |
+
|
185 |
+
<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
|
186 |
+
|
187 |
+
[More Information Needed]
|
188 |
+
|
189 |
+
## More Information [optional]
|
190 |
+
|
191 |
+
[More Information Needed]
|
192 |
+
|
193 |
+
## Model Card Authors [optional]
|
194 |
+
|
195 |
+
[More Information Needed]
|
196 |
+
|
197 |
+
## Model Card Contact
|
198 |
+
|
199 |
+
[More Information Needed]
|
200 |
+
### Framework versions
|
201 |
+
|
202 |
+
- PEFT 0.11.1
|
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_1069/adapter_config.json
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"alpha_pattern": {},
|
3 |
+
"auto_mapping": null,
|
4 |
+
"base_model_name_or_path": "bert-base-uncased",
|
5 |
+
"bias": "none",
|
6 |
+
"fan_in_fan_out": false,
|
7 |
+
"inference_mode": true,
|
8 |
+
"init_lora_weights": true,
|
9 |
+
"layer_replication": null,
|
10 |
+
"layers_pattern": null,
|
11 |
+
"layers_to_transform": null,
|
12 |
+
"loftq_config": {},
|
13 |
+
"lora_alpha": 16,
|
14 |
+
"lora_dropout": 0.1,
|
15 |
+
"megatron_config": null,
|
16 |
+
"megatron_core": "megatron.core",
|
17 |
+
"modules_to_save": [
|
18 |
+
"classifier",
|
19 |
+
"score"
|
20 |
+
],
|
21 |
+
"peft_type": "LORA",
|
22 |
+
"r": 8,
|
23 |
+
"rank_pattern": {},
|
24 |
+
"revision": null,
|
25 |
+
"target_modules": [
|
26 |
+
"query",
|
27 |
+
"value"
|
28 |
+
],
|
29 |
+
"task_type": "SEQ_CLS",
|
30 |
+
"use_dora": false,
|
31 |
+
"use_rslora": false
|
32 |
+
}
|
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_1069/adapter_model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ab16918f1e80b50c94ce0b8c607c7dfc8f071057850e4dacd500077862f35b18
|
3 |
+
size 1192672
|
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_1069/all_results.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"eval_matthews_correlation": 0.39481313720106403}
|
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_1069/all_results_la_kron_all_homo_mc_corr_1000.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"eval_matthews_correlation": 0.3581225660855234}
|
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_1069/all_results_val.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"eval_matthews_correlation": 0.38770266622658694}
|
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_1069/eval_res.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_1069/eval_res_la_kron_all_homo_mc_corr_1000.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_1069/eval_res_val.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_1069/gpu_stats.json
ADDED
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"memory_allocated": 459692544,
|
3 |
+
"max_memory_allocated": 1249124352,
|
4 |
+
"memory_reserved": 1367343104,
|
5 |
+
"max_memory_reserved": 1367343104,
|
6 |
+
"memory_stats": {
|
7 |
+
"active.all.allocated": 7337693,
|
8 |
+
"active.all.current": 366,
|
9 |
+
"active.all.freed": 7337327,
|
10 |
+
"active.all.peak": 627,
|
11 |
+
"active.large_pool.allocated": 564754,
|
12 |
+
"active.large_pool.current": 77,
|
13 |
+
"active.large_pool.freed": 564677,
|
14 |
+
"active.large_pool.peak": 237,
|
15 |
+
"active.small_pool.allocated": 6772939,
|
16 |
+
"active.small_pool.current": 289,
|
17 |
+
"active.small_pool.freed": 6772650,
|
18 |
+
"active.small_pool.peak": 442,
|
19 |
+
"active_bytes.all.allocated": 3311136859136,
|
20 |
+
"active_bytes.all.current": 459692544,
|
21 |
+
"active_bytes.all.freed": 3310677166592,
|
22 |
+
"active_bytes.all.peak": 1249124352,
|
23 |
+
"active_bytes.large_pool.allocated": 1744599758848,
|
24 |
+
"active_bytes.large_pool.current": 455606272,
|
25 |
+
"active_bytes.large_pool.freed": 1744144152576,
|
26 |
+
"active_bytes.large_pool.peak": 1238638592,
|
27 |
+
"active_bytes.small_pool.allocated": 1566537100288,
|
28 |
+
"active_bytes.small_pool.current": 4086272,
|
29 |
+
"active_bytes.small_pool.freed": 1566533014016,
|
30 |
+
"active_bytes.small_pool.peak": 53204992,
|
31 |
+
"allocated_bytes.all.allocated": 3311136859136,
|
32 |
+
"allocated_bytes.all.current": 459692544,
|
33 |
+
"allocated_bytes.all.freed": 3310677166592,
|
34 |
+
"allocated_bytes.all.peak": 1249124352,
|
35 |
+
"allocated_bytes.large_pool.allocated": 1744599758848,
|
36 |
+
"allocated_bytes.large_pool.current": 455606272,
|
37 |
+
"allocated_bytes.large_pool.freed": 1744144152576,
|
38 |
+
"allocated_bytes.large_pool.peak": 1238638592,
|
39 |
+
"allocated_bytes.small_pool.allocated": 1566537100288,
|
40 |
+
"allocated_bytes.small_pool.current": 4086272,
|
41 |
+
"allocated_bytes.small_pool.freed": 1566533014016,
|
42 |
+
"allocated_bytes.small_pool.peak": 53204992,
|
43 |
+
"allocation.all.allocated": 7337693,
|
44 |
+
"allocation.all.current": 366,
|
45 |
+
"allocation.all.freed": 7337327,
|
46 |
+
"allocation.all.peak": 627,
|
47 |
+
"allocation.large_pool.allocated": 564754,
|
48 |
+
"allocation.large_pool.current": 77,
|
49 |
+
"allocation.large_pool.freed": 564677,
|
50 |
+
"allocation.large_pool.peak": 237,
|
51 |
+
"allocation.small_pool.allocated": 6772939,
|
52 |
+
"allocation.small_pool.current": 289,
|
53 |
+
"allocation.small_pool.freed": 6772650,
|
54 |
+
"allocation.small_pool.peak": 442,
|
55 |
+
"inactive_split.all.allocated": 3416705,
|
56 |
+
"inactive_split.all.current": 29,
|
57 |
+
"inactive_split.all.freed": 3416676,
|
58 |
+
"inactive_split.all.peak": 80,
|
59 |
+
"inactive_split.large_pool.allocated": 395610,
|
60 |
+
"inactive_split.large_pool.current": 19,
|
61 |
+
"inactive_split.large_pool.freed": 395591,
|
62 |
+
"inactive_split.large_pool.peak": 46,
|
63 |
+
"inactive_split.small_pool.allocated": 3021095,
|
64 |
+
"inactive_split.small_pool.current": 10,
|
65 |
+
"inactive_split.small_pool.freed": 3021085,
|
66 |
+
"inactive_split.small_pool.peak": 58,
|
67 |
+
"inactive_split_bytes.all.allocated": 3588253569024,
|
68 |
+
"inactive_split_bytes.all.current": 64595456,
|
69 |
+
"inactive_split_bytes.all.freed": 3588188973568,
|
70 |
+
"inactive_split_bytes.all.peak": 142978560,
|
71 |
+
"inactive_split_bytes.large_pool.allocated": 1962144710656,
|
72 |
+
"inactive_split_bytes.large_pool.current": 58195968,
|
73 |
+
"inactive_split_bytes.large_pool.freed": 1962086514688,
|
74 |
+
"inactive_split_bytes.large_pool.peak": 139067392,
|
75 |
+
"inactive_split_bytes.small_pool.allocated": 1626108858368,
|
76 |
+
"inactive_split_bytes.small_pool.current": 6399488,
|
77 |
+
"inactive_split_bytes.small_pool.freed": 1626102458880,
|
78 |
+
"inactive_split_bytes.small_pool.peak": 34997760,
|
79 |
+
"max_split_size": -1,
|
80 |
+
"num_alloc_retries": 0,
|
81 |
+
"num_device_alloc": 89,
|
82 |
+
"num_device_free": 0,
|
83 |
+
"num_ooms": 0,
|
84 |
+
"num_sync_all_streams": 0,
|
85 |
+
"oversize_allocations.allocated": 0,
|
86 |
+
"oversize_allocations.current": 0,
|
87 |
+
"oversize_allocations.freed": 0,
|
88 |
+
"oversize_allocations.peak": 0,
|
89 |
+
"oversize_segments.allocated": 0,
|
90 |
+
"oversize_segments.current": 0,
|
91 |
+
"oversize_segments.freed": 0,
|
92 |
+
"oversize_segments.peak": 0,
|
93 |
+
"requested_bytes.all.allocated": 3277613335107,
|
94 |
+
"requested_bytes.all.current": 458553772,
|
95 |
+
"requested_bytes.all.freed": 3277154781335,
|
96 |
+
"requested_bytes.all.peak": 1224985496,
|
97 |
+
"requested_bytes.large_pool.allocated": 1711670908928,
|
98 |
+
"requested_bytes.large_pool.current": 454473728,
|
99 |
+
"requested_bytes.large_pool.freed": 1711216435200,
|
100 |
+
"requested_bytes.large_pool.peak": 1214511104,
|
101 |
+
"requested_bytes.small_pool.allocated": 1565942426179,
|
102 |
+
"requested_bytes.small_pool.current": 4080044,
|
103 |
+
"requested_bytes.small_pool.freed": 1565938346135,
|
104 |
+
"requested_bytes.small_pool.peak": 53200664,
|
105 |
+
"reserved_bytes.all.allocated": 1367343104,
|
106 |
+
"reserved_bytes.all.current": 1367343104,
|
107 |
+
"reserved_bytes.all.freed": 0,
|
108 |
+
"reserved_bytes.all.peak": 1367343104,
|
109 |
+
"reserved_bytes.large_pool.allocated": 1312817152,
|
110 |
+
"reserved_bytes.large_pool.current": 1312817152,
|
111 |
+
"reserved_bytes.large_pool.freed": 0,
|
112 |
+
"reserved_bytes.large_pool.peak": 1312817152,
|
113 |
+
"reserved_bytes.small_pool.allocated": 54525952,
|
114 |
+
"reserved_bytes.small_pool.current": 54525952,
|
115 |
+
"reserved_bytes.small_pool.freed": 0,
|
116 |
+
"reserved_bytes.small_pool.peak": 54525952,
|
117 |
+
"segment.all.allocated": 89,
|
118 |
+
"segment.all.current": 89,
|
119 |
+
"segment.all.freed": 0,
|
120 |
+
"segment.all.peak": 89,
|
121 |
+
"segment.large_pool.allocated": 63,
|
122 |
+
"segment.large_pool.current": 63,
|
123 |
+
"segment.large_pool.freed": 0,
|
124 |
+
"segment.large_pool.peak": 63,
|
125 |
+
"segment.small_pool.allocated": 26,
|
126 |
+
"segment.small_pool.current": 26,
|
127 |
+
"segment.small_pool.freed": 0,
|
128 |
+
"segment.small_pool.peak": 26
|
129 |
+
}
|
130 |
+
}
|
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_1069/special_tokens_map.json
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": "[CLS]",
|
3 |
+
"mask_token": "[MASK]",
|
4 |
+
"pad_token": "[PAD]",
|
5 |
+
"sep_token": "[SEP]",
|
6 |
+
"unk_token": "[UNK]"
|
7 |
+
}
|
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_1069/tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_1069/tokenizer_config.json
ADDED
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "[PAD]",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"100": {
|
12 |
+
"content": "[UNK]",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"101": {
|
20 |
+
"content": "[CLS]",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"102": {
|
28 |
+
"content": "[SEP]",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"103": {
|
36 |
+
"content": "[MASK]",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"clean_up_tokenization_spaces": true,
|
45 |
+
"cls_token": "[CLS]",
|
46 |
+
"do_lower_case": true,
|
47 |
+
"mask_token": "[MASK]",
|
48 |
+
"model_max_length": 512,
|
49 |
+
"pad_token": "[PAD]",
|
50 |
+
"padding_side": "left",
|
51 |
+
"sep_token": "[SEP]",
|
52 |
+
"strip_accents": null,
|
53 |
+
"tokenize_chinese_chars": true,
|
54 |
+
"tokenizer_class": "BertTokenizer",
|
55 |
+
"unk_token": "[UNK]"
|
56 |
+
}
|
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_1069/vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_1999/README.md
ADDED
@@ -0,0 +1,202 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
library_name: peft
|
3 |
+
base_model: bert-base-uncased
|
4 |
+
---
|
5 |
+
|
6 |
+
# Model Card for Model ID
|
7 |
+
|
8 |
+
<!-- Provide a quick summary of what the model is/does. -->
|
9 |
+
|
10 |
+
|
11 |
+
|
12 |
+
## Model Details
|
13 |
+
|
14 |
+
### Model Description
|
15 |
+
|
16 |
+
<!-- Provide a longer summary of what this model is. -->
|
17 |
+
|
18 |
+
|
19 |
+
|
20 |
+
- **Developed by:** [More Information Needed]
|
21 |
+
- **Funded by [optional]:** [More Information Needed]
|
22 |
+
- **Shared by [optional]:** [More Information Needed]
|
23 |
+
- **Model type:** [More Information Needed]
|
24 |
+
- **Language(s) (NLP):** [More Information Needed]
|
25 |
+
- **License:** [More Information Needed]
|
26 |
+
- **Finetuned from model [optional]:** [More Information Needed]
|
27 |
+
|
28 |
+
### Model Sources [optional]
|
29 |
+
|
30 |
+
<!-- Provide the basic links for the model. -->
|
31 |
+
|
32 |
+
- **Repository:** [More Information Needed]
|
33 |
+
- **Paper [optional]:** [More Information Needed]
|
34 |
+
- **Demo [optional]:** [More Information Needed]
|
35 |
+
|
36 |
+
## Uses
|
37 |
+
|
38 |
+
<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
|
39 |
+
|
40 |
+
### Direct Use
|
41 |
+
|
42 |
+
<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
|
43 |
+
|
44 |
+
[More Information Needed]
|
45 |
+
|
46 |
+
### Downstream Use [optional]
|
47 |
+
|
48 |
+
<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
|
49 |
+
|
50 |
+
[More Information Needed]
|
51 |
+
|
52 |
+
### Out-of-Scope Use
|
53 |
+
|
54 |
+
<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
|
55 |
+
|
56 |
+
[More Information Needed]
|
57 |
+
|
58 |
+
## Bias, Risks, and Limitations
|
59 |
+
|
60 |
+
<!-- This section is meant to convey both technical and sociotechnical limitations. -->
|
61 |
+
|
62 |
+
[More Information Needed]
|
63 |
+
|
64 |
+
### Recommendations
|
65 |
+
|
66 |
+
<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
|
67 |
+
|
68 |
+
Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
|
69 |
+
|
70 |
+
## How to Get Started with the Model
|
71 |
+
|
72 |
+
Use the code below to get started with the model.
|
73 |
+
|
74 |
+
[More Information Needed]
|
75 |
+
|
76 |
+
## Training Details
|
77 |
+
|
78 |
+
### Training Data
|
79 |
+
|
80 |
+
<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
|
81 |
+
|
82 |
+
[More Information Needed]
|
83 |
+
|
84 |
+
### Training Procedure
|
85 |
+
|
86 |
+
<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
|
87 |
+
|
88 |
+
#### Preprocessing [optional]
|
89 |
+
|
90 |
+
[More Information Needed]
|
91 |
+
|
92 |
+
|
93 |
+
#### Training Hyperparameters
|
94 |
+
|
95 |
+
- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
|
96 |
+
|
97 |
+
#### Speeds, Sizes, Times [optional]
|
98 |
+
|
99 |
+
<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
|
100 |
+
|
101 |
+
[More Information Needed]
|
102 |
+
|
103 |
+
## Evaluation
|
104 |
+
|
105 |
+
<!-- This section describes the evaluation protocols and provides the results. -->
|
106 |
+
|
107 |
+
### Testing Data, Factors & Metrics
|
108 |
+
|
109 |
+
#### Testing Data
|
110 |
+
|
111 |
+
<!-- This should link to a Dataset Card if possible. -->
|
112 |
+
|
113 |
+
[More Information Needed]
|
114 |
+
|
115 |
+
#### Factors
|
116 |
+
|
117 |
+
<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
|
118 |
+
|
119 |
+
[More Information Needed]
|
120 |
+
|
121 |
+
#### Metrics
|
122 |
+
|
123 |
+
<!-- These are the evaluation metrics being used, ideally with a description of why. -->
|
124 |
+
|
125 |
+
[More Information Needed]
|
126 |
+
|
127 |
+
### Results
|
128 |
+
|
129 |
+
[More Information Needed]
|
130 |
+
|
131 |
+
#### Summary
|
132 |
+
|
133 |
+
|
134 |
+
|
135 |
+
## Model Examination [optional]
|
136 |
+
|
137 |
+
<!-- Relevant interpretability work for the model goes here -->
|
138 |
+
|
139 |
+
[More Information Needed]
|
140 |
+
|
141 |
+
## Environmental Impact
|
142 |
+
|
143 |
+
<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
|
144 |
+
|
145 |
+
Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
|
146 |
+
|
147 |
+
- **Hardware Type:** [More Information Needed]
|
148 |
+
- **Hours used:** [More Information Needed]
|
149 |
+
- **Cloud Provider:** [More Information Needed]
|
150 |
+
- **Compute Region:** [More Information Needed]
|
151 |
+
- **Carbon Emitted:** [More Information Needed]
|
152 |
+
|
153 |
+
## Technical Specifications [optional]
|
154 |
+
|
155 |
+
### Model Architecture and Objective
|
156 |
+
|
157 |
+
[More Information Needed]
|
158 |
+
|
159 |
+
### Compute Infrastructure
|
160 |
+
|
161 |
+
[More Information Needed]
|
162 |
+
|
163 |
+
#### Hardware
|
164 |
+
|
165 |
+
[More Information Needed]
|
166 |
+
|
167 |
+
#### Software
|
168 |
+
|
169 |
+
[More Information Needed]
|
170 |
+
|
171 |
+
## Citation [optional]
|
172 |
+
|
173 |
+
<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
|
174 |
+
|
175 |
+
**BibTeX:**
|
176 |
+
|
177 |
+
[More Information Needed]
|
178 |
+
|
179 |
+
**APA:**
|
180 |
+
|
181 |
+
[More Information Needed]
|
182 |
+
|
183 |
+
## Glossary [optional]
|
184 |
+
|
185 |
+
<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
|
186 |
+
|
187 |
+
[More Information Needed]
|
188 |
+
|
189 |
+
## More Information [optional]
|
190 |
+
|
191 |
+
[More Information Needed]
|
192 |
+
|
193 |
+
## Model Card Authors [optional]
|
194 |
+
|
195 |
+
[More Information Needed]
|
196 |
+
|
197 |
+
## Model Card Contact
|
198 |
+
|
199 |
+
[More Information Needed]
|
200 |
+
### Framework versions
|
201 |
+
|
202 |
+
- PEFT 0.11.1
|
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_1999/adapter_config.json
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"alpha_pattern": {},
|
3 |
+
"auto_mapping": null,
|
4 |
+
"base_model_name_or_path": "bert-base-uncased",
|
5 |
+
"bias": "none",
|
6 |
+
"fan_in_fan_out": false,
|
7 |
+
"inference_mode": true,
|
8 |
+
"init_lora_weights": true,
|
9 |
+
"layer_replication": null,
|
10 |
+
"layers_pattern": null,
|
11 |
+
"layers_to_transform": null,
|
12 |
+
"loftq_config": {},
|
13 |
+
"lora_alpha": 16,
|
14 |
+
"lora_dropout": 0.1,
|
15 |
+
"megatron_config": null,
|
16 |
+
"megatron_core": "megatron.core",
|
17 |
+
"modules_to_save": [
|
18 |
+
"classifier",
|
19 |
+
"score"
|
20 |
+
],
|
21 |
+
"peft_type": "LORA",
|
22 |
+
"r": 8,
|
23 |
+
"rank_pattern": {},
|
24 |
+
"revision": null,
|
25 |
+
"target_modules": [
|
26 |
+
"query",
|
27 |
+
"value"
|
28 |
+
],
|
29 |
+
"task_type": "SEQ_CLS",
|
30 |
+
"use_dora": false,
|
31 |
+
"use_rslora": false
|
32 |
+
}
|
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_1999/adapter_model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:170807390621cfa35a2ba755e0fa2ca2e160ad1ad34d5daf3c607389333cb913
|
3 |
+
size 1192672
|
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_1999/all_results.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"eval_matthews_correlation": 0.5179821135635028}
|
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_1999/all_results_la_kron_all_homo_mc_corr_1000.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"eval_matthews_correlation": 0.5153229695223019}
|
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_1999/all_results_la_kron_last_layer_homo_mc_corr_1000.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"eval_matthews_correlation": 0.5179821135635028}
|
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_1999/all_results_val.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"eval_matthews_correlation": 0.4930429468927303}
|
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_1999/eval_res.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_1999/eval_res_la_kron_all_homo_mc_corr_1000.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_1999/eval_res_la_kron_last_layer_homo_mc_corr_1000.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_1999/gpu_stats.json
ADDED
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"memory_allocated": 459701248,
|
3 |
+
"max_memory_allocated": 1249127424,
|
4 |
+
"memory_reserved": 1367343104,
|
5 |
+
"max_memory_reserved": 1367343104,
|
6 |
+
"memory_stats": {
|
7 |
+
"active.all.allocated": 3970721,
|
8 |
+
"active.all.current": 368,
|
9 |
+
"active.all.freed": 3970353,
|
10 |
+
"active.all.peak": 627,
|
11 |
+
"active.large_pool.allocated": 1056039,
|
12 |
+
"active.large_pool.current": 77,
|
13 |
+
"active.large_pool.freed": 1055962,
|
14 |
+
"active.large_pool.peak": 237,
|
15 |
+
"active.small_pool.allocated": 2914682,
|
16 |
+
"active.small_pool.current": 291,
|
17 |
+
"active.small_pool.freed": 2914391,
|
18 |
+
"active.small_pool.peak": 442,
|
19 |
+
"active_bytes.all.allocated": 3969839958528,
|
20 |
+
"active_bytes.all.current": 459701248,
|
21 |
+
"active_bytes.all.freed": 3969380257280,
|
22 |
+
"active_bytes.all.peak": 1249127424,
|
23 |
+
"active_bytes.large_pool.allocated": 3262617366016,
|
24 |
+
"active_bytes.large_pool.current": 455606272,
|
25 |
+
"active_bytes.large_pool.freed": 3262161759744,
|
26 |
+
"active_bytes.large_pool.peak": 1238638592,
|
27 |
+
"active_bytes.small_pool.allocated": 707222592512,
|
28 |
+
"active_bytes.small_pool.current": 4094976,
|
29 |
+
"active_bytes.small_pool.freed": 707218497536,
|
30 |
+
"active_bytes.small_pool.peak": 53208064,
|
31 |
+
"allocated_bytes.all.allocated": 3969839958528,
|
32 |
+
"allocated_bytes.all.current": 459701248,
|
33 |
+
"allocated_bytes.all.freed": 3969380257280,
|
34 |
+
"allocated_bytes.all.peak": 1249127424,
|
35 |
+
"allocated_bytes.large_pool.allocated": 3262617366016,
|
36 |
+
"allocated_bytes.large_pool.current": 455606272,
|
37 |
+
"allocated_bytes.large_pool.freed": 3262161759744,
|
38 |
+
"allocated_bytes.large_pool.peak": 1238638592,
|
39 |
+
"allocated_bytes.small_pool.allocated": 707222592512,
|
40 |
+
"allocated_bytes.small_pool.current": 4094976,
|
41 |
+
"allocated_bytes.small_pool.freed": 707218497536,
|
42 |
+
"allocated_bytes.small_pool.peak": 53208064,
|
43 |
+
"allocation.all.allocated": 3970721,
|
44 |
+
"allocation.all.current": 368,
|
45 |
+
"allocation.all.freed": 3970353,
|
46 |
+
"allocation.all.peak": 627,
|
47 |
+
"allocation.large_pool.allocated": 1056039,
|
48 |
+
"allocation.large_pool.current": 77,
|
49 |
+
"allocation.large_pool.freed": 1055962,
|
50 |
+
"allocation.large_pool.peak": 237,
|
51 |
+
"allocation.small_pool.allocated": 2914682,
|
52 |
+
"allocation.small_pool.current": 291,
|
53 |
+
"allocation.small_pool.freed": 2914391,
|
54 |
+
"allocation.small_pool.peak": 442,
|
55 |
+
"inactive_split.all.allocated": 2105270,
|
56 |
+
"inactive_split.all.current": 31,
|
57 |
+
"inactive_split.all.freed": 2105239,
|
58 |
+
"inactive_split.all.peak": 80,
|
59 |
+
"inactive_split.large_pool.allocated": 740193,
|
60 |
+
"inactive_split.large_pool.current": 19,
|
61 |
+
"inactive_split.large_pool.freed": 740174,
|
62 |
+
"inactive_split.large_pool.peak": 46,
|
63 |
+
"inactive_split.small_pool.allocated": 1365077,
|
64 |
+
"inactive_split.small_pool.current": 12,
|
65 |
+
"inactive_split.small_pool.freed": 1365065,
|
66 |
+
"inactive_split.small_pool.peak": 57,
|
67 |
+
"inactive_split_bytes.all.allocated": 4444347665408,
|
68 |
+
"inactive_split_bytes.all.current": 66683904,
|
69 |
+
"inactive_split_bytes.all.freed": 4444280981504,
|
70 |
+
"inactive_split_bytes.all.peak": 142978560,
|
71 |
+
"inactive_split_bytes.large_pool.allocated": 3667648128512,
|
72 |
+
"inactive_split_bytes.large_pool.current": 58195968,
|
73 |
+
"inactive_split_bytes.large_pool.freed": 3667589932544,
|
74 |
+
"inactive_split_bytes.large_pool.peak": 139067392,
|
75 |
+
"inactive_split_bytes.small_pool.allocated": 776699536896,
|
76 |
+
"inactive_split_bytes.small_pool.current": 8487936,
|
77 |
+
"inactive_split_bytes.small_pool.freed": 776691048960,
|
78 |
+
"inactive_split_bytes.small_pool.peak": 33155584,
|
79 |
+
"max_split_size": -1,
|
80 |
+
"num_alloc_retries": 0,
|
81 |
+
"num_device_alloc": 89,
|
82 |
+
"num_device_free": 0,
|
83 |
+
"num_ooms": 0,
|
84 |
+
"num_sync_all_streams": 0,
|
85 |
+
"oversize_allocations.allocated": 0,
|
86 |
+
"oversize_allocations.current": 0,
|
87 |
+
"oversize_allocations.freed": 0,
|
88 |
+
"oversize_allocations.peak": 0,
|
89 |
+
"oversize_segments.allocated": 0,
|
90 |
+
"oversize_segments.current": 0,
|
91 |
+
"oversize_segments.freed": 0,
|
92 |
+
"oversize_segments.peak": 0,
|
93 |
+
"requested_bytes.all.allocated": 3907020432961,
|
94 |
+
"requested_bytes.all.current": 458561584,
|
95 |
+
"requested_bytes.all.freed": 3906561871377,
|
96 |
+
"requested_bytes.all.peak": 1224986276,
|
97 |
+
"requested_bytes.large_pool.allocated": 3200025939968,
|
98 |
+
"requested_bytes.large_pool.current": 454473728,
|
99 |
+
"requested_bytes.large_pool.freed": 3199571466240,
|
100 |
+
"requested_bytes.large_pool.peak": 1214511104,
|
101 |
+
"requested_bytes.small_pool.allocated": 706994492993,
|
102 |
+
"requested_bytes.small_pool.current": 4087856,
|
103 |
+
"requested_bytes.small_pool.freed": 706990405137,
|
104 |
+
"requested_bytes.small_pool.peak": 53201444,
|
105 |
+
"reserved_bytes.all.allocated": 1367343104,
|
106 |
+
"reserved_bytes.all.current": 1367343104,
|
107 |
+
"reserved_bytes.all.freed": 0,
|
108 |
+
"reserved_bytes.all.peak": 1367343104,
|
109 |
+
"reserved_bytes.large_pool.allocated": 1312817152,
|
110 |
+
"reserved_bytes.large_pool.current": 1312817152,
|
111 |
+
"reserved_bytes.large_pool.freed": 0,
|
112 |
+
"reserved_bytes.large_pool.peak": 1312817152,
|
113 |
+
"reserved_bytes.small_pool.allocated": 54525952,
|
114 |
+
"reserved_bytes.small_pool.current": 54525952,
|
115 |
+
"reserved_bytes.small_pool.freed": 0,
|
116 |
+
"reserved_bytes.small_pool.peak": 54525952,
|
117 |
+
"segment.all.allocated": 89,
|
118 |
+
"segment.all.current": 89,
|
119 |
+
"segment.all.freed": 0,
|
120 |
+
"segment.all.peak": 89,
|
121 |
+
"segment.large_pool.allocated": 63,
|
122 |
+
"segment.large_pool.current": 63,
|
123 |
+
"segment.large_pool.freed": 0,
|
124 |
+
"segment.large_pool.peak": 63,
|
125 |
+
"segment.small_pool.allocated": 26,
|
126 |
+
"segment.small_pool.current": 26,
|
127 |
+
"segment.small_pool.freed": 0,
|
128 |
+
"segment.small_pool.peak": 26
|
129 |
+
}
|
130 |
+
}
|
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_1999/gpu_stats_la.json
ADDED
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"memory_allocated": 2317025792,
|
3 |
+
"max_memory_allocated": 2365171200,
|
4 |
+
"memory_reserved": 2537553920,
|
5 |
+
"max_memory_reserved": 2646605824,
|
6 |
+
"memory_stats": {
|
7 |
+
"active.all.allocated": 1187828,
|
8 |
+
"active.all.current": 3154,
|
9 |
+
"active.all.freed": 1184674,
|
10 |
+
"active.all.peak": 3162,
|
11 |
+
"active.large_pool.allocated": 98722,
|
12 |
+
"active.large_pool.current": 451,
|
13 |
+
"active.large_pool.freed": 98271,
|
14 |
+
"active.large_pool.peak": 457,
|
15 |
+
"active.small_pool.allocated": 1089106,
|
16 |
+
"active.small_pool.current": 2703,
|
17 |
+
"active.small_pool.freed": 1086403,
|
18 |
+
"active.small_pool.peak": 2711,
|
19 |
+
"active_bytes.all.allocated": 535694891008,
|
20 |
+
"active_bytes.all.current": 2317025792,
|
21 |
+
"active_bytes.all.freed": 533377865216,
|
22 |
+
"active_bytes.all.peak": 2365171200,
|
23 |
+
"active_bytes.large_pool.allocated": 307173800960,
|
24 |
+
"active_bytes.large_pool.current": 2309062656,
|
25 |
+
"active_bytes.large_pool.freed": 304864738304,
|
26 |
+
"active_bytes.large_pool.peak": 2357886976,
|
27 |
+
"active_bytes.small_pool.allocated": 228521090048,
|
28 |
+
"active_bytes.small_pool.current": 7963136,
|
29 |
+
"active_bytes.small_pool.freed": 228513126912,
|
30 |
+
"active_bytes.small_pool.peak": 9539072,
|
31 |
+
"allocated_bytes.all.allocated": 535694891008,
|
32 |
+
"allocated_bytes.all.current": 2317025792,
|
33 |
+
"allocated_bytes.all.freed": 533377865216,
|
34 |
+
"allocated_bytes.all.peak": 2365171200,
|
35 |
+
"allocated_bytes.large_pool.allocated": 307173800960,
|
36 |
+
"allocated_bytes.large_pool.current": 2309062656,
|
37 |
+
"allocated_bytes.large_pool.freed": 304864738304,
|
38 |
+
"allocated_bytes.large_pool.peak": 2357886976,
|
39 |
+
"allocated_bytes.small_pool.allocated": 228521090048,
|
40 |
+
"allocated_bytes.small_pool.current": 7963136,
|
41 |
+
"allocated_bytes.small_pool.freed": 228513126912,
|
42 |
+
"allocated_bytes.small_pool.peak": 9539072,
|
43 |
+
"allocation.all.allocated": 1187828,
|
44 |
+
"allocation.all.current": 3154,
|
45 |
+
"allocation.all.freed": 1184674,
|
46 |
+
"allocation.all.peak": 3162,
|
47 |
+
"allocation.large_pool.allocated": 98722,
|
48 |
+
"allocation.large_pool.current": 451,
|
49 |
+
"allocation.large_pool.freed": 98271,
|
50 |
+
"allocation.large_pool.peak": 457,
|
51 |
+
"allocation.small_pool.allocated": 1089106,
|
52 |
+
"allocation.small_pool.current": 2703,
|
53 |
+
"allocation.small_pool.freed": 1086403,
|
54 |
+
"allocation.small_pool.peak": 2711,
|
55 |
+
"inactive_split.all.allocated": 716026,
|
56 |
+
"inactive_split.all.current": 113,
|
57 |
+
"inactive_split.all.freed": 715913,
|
58 |
+
"inactive_split.all.peak": 585,
|
59 |
+
"inactive_split.large_pool.allocated": 87028,
|
60 |
+
"inactive_split.large_pool.current": 106,
|
61 |
+
"inactive_split.large_pool.freed": 86922,
|
62 |
+
"inactive_split.large_pool.peak": 114,
|
63 |
+
"inactive_split.small_pool.allocated": 628998,
|
64 |
+
"inactive_split.small_pool.current": 7,
|
65 |
+
"inactive_split.small_pool.freed": 628991,
|
66 |
+
"inactive_split.small_pool.peak": 516,
|
67 |
+
"inactive_split_bytes.all.allocated": 504578099200,
|
68 |
+
"inactive_split_bytes.all.current": 218430976,
|
69 |
+
"inactive_split_bytes.all.freed": 504359668224,
|
70 |
+
"inactive_split_bytes.all.peak": 258366464,
|
71 |
+
"inactive_split_bytes.large_pool.allocated": 272549037056,
|
72 |
+
"inactive_split_bytes.large_pool.current": 218005504,
|
73 |
+
"inactive_split_bytes.large_pool.freed": 272331031552,
|
74 |
+
"inactive_split_bytes.large_pool.peak": 257097728,
|
75 |
+
"inactive_split_bytes.small_pool.allocated": 232029062144,
|
76 |
+
"inactive_split_bytes.small_pool.current": 425472,
|
77 |
+
"inactive_split_bytes.small_pool.freed": 232028636672,
|
78 |
+
"inactive_split_bytes.small_pool.peak": 3717632,
|
79 |
+
"max_split_size": -1,
|
80 |
+
"num_alloc_retries": 0,
|
81 |
+
"num_device_alloc": 150,
|
82 |
+
"num_device_free": 17,
|
83 |
+
"num_ooms": 0,
|
84 |
+
"num_sync_all_streams": 3,
|
85 |
+
"oversize_allocations.allocated": 0,
|
86 |
+
"oversize_allocations.current": 0,
|
87 |
+
"oversize_allocations.freed": 0,
|
88 |
+
"oversize_allocations.peak": 0,
|
89 |
+
"oversize_segments.allocated": 0,
|
90 |
+
"oversize_segments.current": 0,
|
91 |
+
"oversize_segments.freed": 0,
|
92 |
+
"oversize_segments.peak": 0,
|
93 |
+
"requested_bytes.all.allocated": 526471447634,
|
94 |
+
"requested_bytes.all.current": 2311816512,
|
95 |
+
"requested_bytes.all.freed": 524159631122,
|
96 |
+
"requested_bytes.all.peak": 2360647952,
|
97 |
+
"requested_bytes.large_pool.allocated": 298113684744,
|
98 |
+
"requested_bytes.large_pool.current": 2304911360,
|
99 |
+
"requested_bytes.large_pool.freed": 295808773384,
|
100 |
+
"requested_bytes.large_pool.peak": 2353375232,
|
101 |
+
"requested_bytes.small_pool.allocated": 228357762890,
|
102 |
+
"requested_bytes.small_pool.current": 6905152,
|
103 |
+
"requested_bytes.small_pool.freed": 228350857738,
|
104 |
+
"requested_bytes.small_pool.peak": 8780848,
|
105 |
+
"reserved_bytes.all.allocated": 2761949184,
|
106 |
+
"reserved_bytes.all.current": 2537553920,
|
107 |
+
"reserved_bytes.all.freed": 224395264,
|
108 |
+
"reserved_bytes.all.peak": 2646605824,
|
109 |
+
"reserved_bytes.large_pool.allocated": 2745171968,
|
110 |
+
"reserved_bytes.large_pool.current": 2527068160,
|
111 |
+
"reserved_bytes.large_pool.freed": 218103808,
|
112 |
+
"reserved_bytes.large_pool.peak": 2636120064,
|
113 |
+
"reserved_bytes.small_pool.allocated": 16777216,
|
114 |
+
"reserved_bytes.small_pool.current": 10485760,
|
115 |
+
"reserved_bytes.small_pool.freed": 6291456,
|
116 |
+
"reserved_bytes.small_pool.peak": 10485760,
|
117 |
+
"segment.all.allocated": 150,
|
118 |
+
"segment.all.current": 133,
|
119 |
+
"segment.all.freed": 17,
|
120 |
+
"segment.all.peak": 140,
|
121 |
+
"segment.large_pool.allocated": 142,
|
122 |
+
"segment.large_pool.current": 128,
|
123 |
+
"segment.large_pool.freed": 14,
|
124 |
+
"segment.large_pool.peak": 135,
|
125 |
+
"segment.small_pool.allocated": 8,
|
126 |
+
"segment.small_pool.current": 5,
|
127 |
+
"segment.small_pool.freed": 3,
|
128 |
+
"segment.small_pool.peak": 5
|
129 |
+
}
|
130 |
+
}
|
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_1999/special_tokens_map.json
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": "[CLS]",
|
3 |
+
"mask_token": "[MASK]",
|
4 |
+
"pad_token": "[PAD]",
|
5 |
+
"sep_token": "[SEP]",
|
6 |
+
"unk_token": "[UNK]"
|
7 |
+
}
|
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_1999/tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
outputs/cola/bert-base-uncased_loratrain_val_8_16_0.1_5e-05_42/step_1999/tokenizer_config.json
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "[PAD]",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"100": {
|
12 |
+
"content": "[UNK]",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"101": {
|
20 |
+
"content": "[CLS]",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"102": {
|
28 |
+
"content": "[SEP]",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"103": {
|
36 |
+
"content": "[MASK]",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"clean_up_tokenization_spaces": true,
|
45 |
+
"cls_token": "[CLS]",
|
46 |
+
"do_lower_case": true,
|
47 |
+
"mask_token": "[MASK]",
|
48 |
+
"model_max_length": 512,
|
49 |
+
"pad_token": "[PAD]",
|
50 |
+
"sep_token": "[SEP]",
|
51 |
+
"strip_accents": null,
|
52 |
+
"tokenize_chinese_chars": true,
|
53 |
+
"tokenizer_class": "BertTokenizer",
|
54 |
+
"unk_token": "[UNK]"
|
55 |
+
}
|