StevenLimcorn commited on
Commit
d0cc5eb
·
1 Parent(s): 2d6804d

Upload 9 files

Browse files
config.json CHANGED
@@ -1,10 +1,9 @@
1
  {
2
- "_name_or_path": "/content/content/result/semeval-unsup-promcse-bert-base-uncased-semeval2014-restaurants",
3
  "architectures": [
4
  "BertModel"
5
  ],
6
  "attention_probs_dropout_prob": 0.1,
7
- "classifier_dropout": null,
8
  "gradient_checkpointing": false,
9
  "hidden_act": "gelu",
10
  "hidden_dropout_prob": 0.1,
@@ -18,8 +17,8 @@
18
  "num_hidden_layers": 12,
19
  "pad_token_id": 0,
20
  "position_embedding_type": "absolute",
21
- "transformers_version": "4.28.1",
22
  "type_vocab_size": 2,
23
  "use_cache": true,
24
  "vocab_size": 30522
25
- }
 
1
  {
2
+ "_name_or_path": "/content/result/semeval-unsup-promcse-bert-base-uncased-semeval2014-restaurants",
3
  "architectures": [
4
  "BertModel"
5
  ],
6
  "attention_probs_dropout_prob": 0.1,
 
7
  "gradient_checkpointing": false,
8
  "hidden_act": "gelu",
9
  "hidden_dropout_prob": 0.1,
 
17
  "num_hidden_layers": 12,
18
  "pad_token_id": 0,
19
  "position_embedding_type": "absolute",
20
+ "transformers_version": "4.2.1",
21
  "type_vocab_size": 2,
22
  "use_cache": true,
23
  "vocab_size": 30522
24
+ }
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9ebb43a9e5c5abcb35347fd7ac50afa920310ad7dedbff67e448c81bc007b8d
3
+ size 7086823
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:becfe44fbd3459bd89d25773ebd50a5e7e6003526564e9a4d6ff927e8e0574a6
3
+ size 627
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"do_lower_case": true, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "special_tokens_map_file": null, "name_or_path": "YuxinJiang/unsup-promcse-bert-base-uncased", "do_basic_tokenize": true, "never_split": null}
train_results.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ epoch = 30.0
2
+ train_runtime = 1516.7011
3
+ train_samples_per_second = 1.899
trainer_state.json ADDED
@@ -0,0 +1,213 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.8253311880533157,
3
+ "best_model_checkpoint": "/content/result/semeval-unsup-promcse-bert-base-uncased-semeval2014-restaurants",
4
+ "epoch": 30.0,
5
+ "global_step": 2880,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 1.3,
12
+ "eval_avg_sts": 0.7734256836656246,
13
+ "eval_sickr_spearman": 0.7301818685217797,
14
+ "eval_stsb_spearman": 0.8166694988094696,
15
+ "step": 125
16
+ },
17
+ {
18
+ "epoch": 2.6,
19
+ "eval_avg_sts": 0.7703746643952976,
20
+ "eval_sickr_spearman": 0.7321891362819799,
21
+ "eval_stsb_spearman": 0.8085601925086153,
22
+ "step": 250
23
+ },
24
+ {
25
+ "epoch": 3.91,
26
+ "eval_avg_sts": 0.7788971740918453,
27
+ "eval_sickr_spearman": 0.7471303631307394,
28
+ "eval_stsb_spearman": 0.8106639850529513,
29
+ "step": 375
30
+ },
31
+ {
32
+ "epoch": 5.21,
33
+ "learning_rate": 0.024791666666666663,
34
+ "loss": 0.0007,
35
+ "step": 500
36
+ },
37
+ {
38
+ "epoch": 5.21,
39
+ "eval_avg_sts": 0.7773926482680351,
40
+ "eval_sickr_spearman": 0.7365530020031397,
41
+ "eval_stsb_spearman": 0.8182322945329304,
42
+ "step": 500
43
+ },
44
+ {
45
+ "epoch": 6.51,
46
+ "eval_avg_sts": 0.775445904944555,
47
+ "eval_sickr_spearman": 0.740137130823553,
48
+ "eval_stsb_spearman": 0.8107546790655571,
49
+ "step": 625
50
+ },
51
+ {
52
+ "epoch": 7.81,
53
+ "eval_avg_sts": 0.7784724629491336,
54
+ "eval_sickr_spearman": 0.7476811838020316,
55
+ "eval_stsb_spearman": 0.8092637420962356,
56
+ "step": 750
57
+ },
58
+ {
59
+ "epoch": 9.11,
60
+ "eval_avg_sts": 0.7813093219093801,
61
+ "eval_sickr_spearman": 0.7487985313147865,
62
+ "eval_stsb_spearman": 0.8138201125039736,
63
+ "step": 875
64
+ },
65
+ {
66
+ "epoch": 10.42,
67
+ "learning_rate": 0.019583333333333335,
68
+ "loss": 0.0006,
69
+ "step": 1000
70
+ },
71
+ {
72
+ "epoch": 10.42,
73
+ "eval_avg_sts": 0.7821698012287537,
74
+ "eval_sickr_spearman": 0.7488977155392563,
75
+ "eval_stsb_spearman": 0.8154418869182509,
76
+ "step": 1000
77
+ },
78
+ {
79
+ "epoch": 11.72,
80
+ "eval_avg_sts": 0.7816201340578361,
81
+ "eval_sickr_spearman": 0.7485443026948746,
82
+ "eval_stsb_spearman": 0.8146959654207976,
83
+ "step": 1125
84
+ },
85
+ {
86
+ "epoch": 13.02,
87
+ "eval_avg_sts": 0.7749666873628911,
88
+ "eval_sickr_spearman": 0.7446217467337283,
89
+ "eval_stsb_spearman": 0.8053116279920538,
90
+ "step": 1250
91
+ },
92
+ {
93
+ "epoch": 14.32,
94
+ "eval_avg_sts": 0.7836477830344395,
95
+ "eval_sickr_spearman": 0.7419643780155633,
96
+ "eval_stsb_spearman": 0.8253311880533157,
97
+ "step": 1375
98
+ },
99
+ {
100
+ "epoch": 15.62,
101
+ "learning_rate": 0.014375,
102
+ "loss": 0.0006,
103
+ "step": 1500
104
+ },
105
+ {
106
+ "epoch": 15.62,
107
+ "eval_avg_sts": 0.7843268448693694,
108
+ "eval_sickr_spearman": 0.744167948887341,
109
+ "eval_stsb_spearman": 0.824485740851398,
110
+ "step": 1500
111
+ },
112
+ {
113
+ "epoch": 16.93,
114
+ "eval_avg_sts": 0.7833021826430621,
115
+ "eval_sickr_spearman": 0.7446480677773164,
116
+ "eval_stsb_spearman": 0.8219562975088077,
117
+ "step": 1625
118
+ },
119
+ {
120
+ "epoch": 18.23,
121
+ "eval_avg_sts": 0.7763940328282399,
122
+ "eval_sickr_spearman": 0.7362944505840984,
123
+ "eval_stsb_spearman": 0.8164936150723816,
124
+ "step": 1750
125
+ },
126
+ {
127
+ "epoch": 19.53,
128
+ "eval_avg_sts": 0.7798534729613469,
129
+ "eval_sickr_spearman": 0.7372460427657902,
130
+ "eval_stsb_spearman": 0.8224609031569036,
131
+ "step": 1875
132
+ },
133
+ {
134
+ "epoch": 20.83,
135
+ "learning_rate": 0.009166666666666667,
136
+ "loss": 0.0005,
137
+ "step": 2000
138
+ },
139
+ {
140
+ "epoch": 20.83,
141
+ "eval_avg_sts": 0.7800079514190481,
142
+ "eval_sickr_spearman": 0.7419313806488752,
143
+ "eval_stsb_spearman": 0.8180845221892211,
144
+ "step": 2000
145
+ },
146
+ {
147
+ "epoch": 22.14,
148
+ "eval_avg_sts": 0.775204360127723,
149
+ "eval_sickr_spearman": 0.7362090993168428,
150
+ "eval_stsb_spearman": 0.8141996209386033,
151
+ "step": 2125
152
+ },
153
+ {
154
+ "epoch": 23.44,
155
+ "eval_avg_sts": 0.7765059369521707,
156
+ "eval_sickr_spearman": 0.737198107726555,
157
+ "eval_stsb_spearman": 0.8158137661777864,
158
+ "step": 2250
159
+ },
160
+ {
161
+ "epoch": 24.74,
162
+ "eval_avg_sts": 0.7762751811566875,
163
+ "eval_sickr_spearman": 0.7387870245932292,
164
+ "eval_stsb_spearman": 0.8137633377201456,
165
+ "step": 2375
166
+ },
167
+ {
168
+ "epoch": 26.04,
169
+ "learning_rate": 0.003958333333333334,
170
+ "loss": 0.0004,
171
+ "step": 2500
172
+ },
173
+ {
174
+ "epoch": 26.04,
175
+ "eval_avg_sts": 0.7760480299115587,
176
+ "eval_sickr_spearman": 0.7409553886676528,
177
+ "eval_stsb_spearman": 0.8111406711554647,
178
+ "step": 2500
179
+ },
180
+ {
181
+ "epoch": 27.34,
182
+ "eval_avg_sts": 0.7772760835199366,
183
+ "eval_sickr_spearman": 0.7415818102926086,
184
+ "eval_stsb_spearman": 0.8129703567472646,
185
+ "step": 2625
186
+ },
187
+ {
188
+ "epoch": 28.65,
189
+ "eval_avg_sts": 0.7773403818371579,
190
+ "eval_sickr_spearman": 0.7416166328411512,
191
+ "eval_stsb_spearman": 0.8130641308331646,
192
+ "step": 2750
193
+ },
194
+ {
195
+ "epoch": 29.95,
196
+ "eval_avg_sts": 0.7772868899414305,
197
+ "eval_sickr_spearman": 0.7413437859462035,
198
+ "eval_stsb_spearman": 0.8132299939366575,
199
+ "step": 2875
200
+ },
201
+ {
202
+ "epoch": 30.0,
203
+ "step": 2880,
204
+ "train_runtime": 1516.7011,
205
+ "train_samples_per_second": 1.899
206
+ }
207
+ ],
208
+ "max_steps": 2880,
209
+ "num_train_epochs": 30,
210
+ "total_flos": 3837671792925696,
211
+ "trial_name": null,
212
+ "trial_params": null
213
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:247902bef5ea1090ba0bc27182e6799820174bf65be968546a9a310f73d7d5b0
3
+ size 2107
vocab.txt ADDED
The diff for this file is too large to render. See raw diff