jonny9f commited on
Commit
eab6132
·
verified ·
1 Parent(s): 2a0f942

Upload folder using huggingface_hub

Browse files
Files changed (42) hide show
  1. README.md +75 -0
  2. checkpoint-1500/config.json +26 -0
  3. checkpoint-1500/model.safetensors +3 -0
  4. checkpoint-1500/optimizer.pt +3 -0
  5. checkpoint-1500/rng_state.pth +3 -0
  6. checkpoint-1500/scheduler.pt +3 -0
  7. checkpoint-1500/special_tokens_map.json +37 -0
  8. checkpoint-1500/tokenizer.json +0 -0
  9. checkpoint-1500/tokenizer_config.json +58 -0
  10. checkpoint-1500/trainer_state.json +310 -0
  11. checkpoint-1500/training_args.bin +3 -0
  12. checkpoint-1500/vocab.txt +0 -0
  13. checkpoint-2000/config.json +26 -0
  14. checkpoint-2000/model.safetensors +3 -0
  15. checkpoint-2000/optimizer.pt +3 -0
  16. checkpoint-2000/rng_state.pth +3 -0
  17. checkpoint-2000/scheduler.pt +3 -0
  18. checkpoint-2000/special_tokens_map.json +37 -0
  19. checkpoint-2000/tokenizer.json +0 -0
  20. checkpoint-2000/tokenizer_config.json +58 -0
  21. checkpoint-2000/trainer_state.json +399 -0
  22. checkpoint-2000/training_args.bin +3 -0
  23. checkpoint-2000/vocab.txt +0 -0
  24. checkpoint-2280/config.json +26 -0
  25. checkpoint-2280/model.safetensors +3 -0
  26. checkpoint-2280/optimizer.pt +3 -0
  27. checkpoint-2280/rng_state.pth +3 -0
  28. checkpoint-2280/scheduler.pt +3 -0
  29. checkpoint-2280/special_tokens_map.json +37 -0
  30. checkpoint-2280/tokenizer.json +0 -0
  31. checkpoint-2280/tokenizer_config.json +58 -0
  32. checkpoint-2280/trainer_state.json +434 -0
  33. checkpoint-2280/training_args.bin +3 -0
  34. checkpoint-2280/vocab.txt +0 -0
  35. config.json +26 -0
  36. eval_metrics.json +18 -0
  37. model.safetensors +3 -0
  38. special_tokens_map.json +37 -0
  39. tokenizer.json +0 -0
  40. tokenizer_config.json +58 -0
  41. training_args.bin +3 -0
  42. vocab.txt +0 -0
README.md ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language: en
3
+ tags:
4
+ - bge
5
+ - food
6
+ - reranking
7
+ - sequence-classification
8
+ - sentence-similarity
9
+ library_name: transformers
10
+ pipeline_tag: text-classification
11
+ license: mit
12
+ ---
13
+
14
+ # Food Re-ranker Model
15
+
16
+ This is a fine-tuned BGE (BAAI General Embedding) model trained for binary classification of food description pairs. The model determines whether two food descriptions refer to the same item, enabling accurate re-ranking of search results.
17
+
18
+ ## Model Details
19
+
20
+ ### Model Description
21
+ - **Base Model**: BAAI/bge-base-en-v1.5
22
+ - **Model Type**: bert
23
+ - **Task**: Binary classification (food description matching)
24
+ - **Output**: Binary classification scores (0 = different foods, 1 = same food)
25
+
26
+ ### Architecture
27
+ The model uses the BGE architecture with the following specifications:
28
+ - **Hidden Size**: 768
29
+ - **Number of Layers**: 12
30
+ - **Number of Attention Heads**: 12
31
+ - **Intermediate Size**: 3072
32
+ - **Maximum Position Embeddings**: 512
33
+ - **Vocabulary Size**: 30522
34
+ - **Hidden Act**: gelu
35
+
36
+ ### Performance Metrics
37
+ Key evaluation metrics on the test set:
38
+
39
+
40
+ ## Use Case
41
+
42
+ Designed for improving food search accuracy by re-ranking initial search results, this model:
43
+ - Takes pairs of food descriptions as input
44
+ - Determines if they refer to the same food item
45
+ - Enables more accurate matching of food descriptions
46
+ - Helps surface the most relevant matches in search results
47
+
48
+ ## Training Configuration
49
+ - **Batch Size**: 32
50
+ - **Learning Rate**: 2e-05
51
+ - **Number of Epochs**: 10
52
+ - **Warmup Steps**: 0
53
+ - **Weight Decay**: 0.01
54
+ - **Dropout**: 0.1
55
+ - **Attention Dropout**: 0.1
56
+ - **Layer Norm Eps**: 1e-12
57
+
58
+ ## Example Usage
59
+
60
+ ```python
61
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
62
+
63
+ # Load model
64
+ tokenizer = AutoTokenizer.from_pretrained("jonny9f/food_reranker2")
65
+ model = AutoModelForSequenceClassification.from_pretrained("jonny9f/food_reranker2")
66
+
67
+ # Prepare input
68
+ query = "chicken breast"
69
+ candidate = "grilled chicken breast"
70
+ inputs = tokenizer(query, candidate, padding=True, truncation=True, return_tensors="pt")
71
+
72
+ # Get prediction
73
+ outputs = model(**inputs)
74
+ score = outputs.logits.softmax(dim=1)[0][1].item() # Score for positive class
75
+ ```
checkpoint-1500/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "gradient_checkpointing": false,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 768,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 3072,
13
+ "layer_norm_eps": 1e-12,
14
+ "max_position_embeddings": 512,
15
+ "model_type": "bert",
16
+ "num_attention_heads": 12,
17
+ "num_hidden_layers": 12,
18
+ "pad_token_id": 0,
19
+ "position_embedding_type": "absolute",
20
+ "problem_type": "single_label_classification",
21
+ "torch_dtype": "float32",
22
+ "transformers_version": "4.51.3",
23
+ "type_vocab_size": 2,
24
+ "use_cache": true,
25
+ "vocab_size": 30522
26
+ }
checkpoint-1500/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85999e6c07543e7d00356ae72be27a068b634dc8ff23562e10dd2160a6e053ea
3
+ size 437958648
checkpoint-1500/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6f96f1b0ccd7d27d9734883b475135b5db96bc60358adbb1642e2d45ec87f5f
3
+ size 876038795
checkpoint-1500/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:867ad06cb65bf1f3cd5c95e4e703f60dcd05051c966fa678f0b8e3e69b269614
3
+ size 14645
checkpoint-1500/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4246e9af2fbe4b4997361ef8d0c0da7b2e1264a2467019754c916a12a724518a
3
+ size 1465
checkpoint-1500/special_tokens_map.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "mask_token": {
10
+ "content": "[MASK]",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "sep_token": {
24
+ "content": "[SEP]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "[UNK]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
+ }
checkpoint-1500/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-1500/tokenizer_config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": true,
48
+ "extra_special_tokens": {},
49
+ "mask_token": "[MASK]",
50
+ "model_max_length": 512,
51
+ "never_split": null,
52
+ "pad_token": "[PAD]",
53
+ "sep_token": "[SEP]",
54
+ "strip_accents": null,
55
+ "tokenize_chinese_chars": true,
56
+ "tokenizer_class": "BertTokenizer",
57
+ "unk_token": "[UNK]"
58
+ }
checkpoint-1500/trainer_state.json ADDED
@@ -0,0 +1,310 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 1500,
3
+ "best_metric": 0.8241525423728814,
4
+ "best_model_checkpoint": "./models/checkpoints_v4/checkpoint-1500",
5
+ "epoch": 6.578947368421053,
6
+ "eval_steps": 500,
7
+ "global_step": 1500,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.21929824561403508,
14
+ "grad_norm": 3.511173725128174,
15
+ "learning_rate": 4.298245614035088e-06,
16
+ "loss": 0.8133,
17
+ "step": 50
18
+ },
19
+ {
20
+ "epoch": 0.43859649122807015,
21
+ "grad_norm": 5.231376647949219,
22
+ "learning_rate": 8.68421052631579e-06,
23
+ "loss": 0.5079,
24
+ "step": 100
25
+ },
26
+ {
27
+ "epoch": 0.6578947368421053,
28
+ "grad_norm": 4.2739081382751465,
29
+ "learning_rate": 1.3070175438596493e-05,
30
+ "loss": 0.3543,
31
+ "step": 150
32
+ },
33
+ {
34
+ "epoch": 0.8771929824561403,
35
+ "grad_norm": 4.163576602935791,
36
+ "learning_rate": 1.7456140350877195e-05,
37
+ "loss": 0.3143,
38
+ "step": 200
39
+ },
40
+ {
41
+ "epoch": 1.0964912280701755,
42
+ "grad_norm": 3.981715202331543,
43
+ "learning_rate": 1.9795321637426903e-05,
44
+ "loss": 0.3005,
45
+ "step": 250
46
+ },
47
+ {
48
+ "epoch": 1.3157894736842106,
49
+ "grad_norm": 4.984482288360596,
50
+ "learning_rate": 1.9307992202729045e-05,
51
+ "loss": 0.2521,
52
+ "step": 300
53
+ },
54
+ {
55
+ "epoch": 1.5350877192982457,
56
+ "grad_norm": 7.214444637298584,
57
+ "learning_rate": 1.882066276803119e-05,
58
+ "loss": 0.2481,
59
+ "step": 350
60
+ },
61
+ {
62
+ "epoch": 1.7543859649122808,
63
+ "grad_norm": 3.6749589443206787,
64
+ "learning_rate": 1.8333333333333333e-05,
65
+ "loss": 0.2475,
66
+ "step": 400
67
+ },
68
+ {
69
+ "epoch": 1.973684210526316,
70
+ "grad_norm": 5.445246696472168,
71
+ "learning_rate": 1.784600389863548e-05,
72
+ "loss": 0.2289,
73
+ "step": 450
74
+ },
75
+ {
76
+ "epoch": 2.192982456140351,
77
+ "grad_norm": 12.260554313659668,
78
+ "learning_rate": 1.7358674463937624e-05,
79
+ "loss": 0.1811,
80
+ "step": 500
81
+ },
82
+ {
83
+ "epoch": 2.192982456140351,
84
+ "eval_accuracy": 0.8987154150197628,
85
+ "eval_auc": 0.9507858176268903,
86
+ "eval_f1": 0.7724750277469479,
87
+ "eval_false_negatives": 147,
88
+ "eval_false_positives": 58,
89
+ "eval_loss": 0.29392391443252563,
90
+ "eval_npv": 0.9091470951792336,
91
+ "eval_precision": 0.8571428571428571,
92
+ "eval_recall": 0.703030303030303,
93
+ "eval_runtime": 7.4602,
94
+ "eval_samples_per_second": 271.308,
95
+ "eval_specificity": 0.9620667102681492,
96
+ "eval_steps_per_second": 4.289,
97
+ "eval_true_negatives": 1471,
98
+ "eval_true_positives": 348,
99
+ "step": 500
100
+ },
101
+ {
102
+ "epoch": 2.412280701754386,
103
+ "grad_norm": 3.9946436882019043,
104
+ "learning_rate": 1.6871345029239766e-05,
105
+ "loss": 0.1934,
106
+ "step": 550
107
+ },
108
+ {
109
+ "epoch": 2.6315789473684212,
110
+ "grad_norm": 6.733097553253174,
111
+ "learning_rate": 1.638401559454191e-05,
112
+ "loss": 0.1759,
113
+ "step": 600
114
+ },
115
+ {
116
+ "epoch": 2.8508771929824563,
117
+ "grad_norm": 12.989212036132812,
118
+ "learning_rate": 1.5896686159844057e-05,
119
+ "loss": 0.1953,
120
+ "step": 650
121
+ },
122
+ {
123
+ "epoch": 3.0701754385964914,
124
+ "grad_norm": 4.068276882171631,
125
+ "learning_rate": 1.5409356725146202e-05,
126
+ "loss": 0.1248,
127
+ "step": 700
128
+ },
129
+ {
130
+ "epoch": 3.2894736842105265,
131
+ "grad_norm": 0.6126830577850342,
132
+ "learning_rate": 1.4922027290448344e-05,
133
+ "loss": 0.1289,
134
+ "step": 750
135
+ },
136
+ {
137
+ "epoch": 3.5087719298245617,
138
+ "grad_norm": 6.3761796951293945,
139
+ "learning_rate": 1.4434697855750488e-05,
140
+ "loss": 0.1117,
141
+ "step": 800
142
+ },
143
+ {
144
+ "epoch": 3.7280701754385968,
145
+ "grad_norm": 4.546126365661621,
146
+ "learning_rate": 1.3947368421052631e-05,
147
+ "loss": 0.1491,
148
+ "step": 850
149
+ },
150
+ {
151
+ "epoch": 3.9473684210526314,
152
+ "grad_norm": 4.006205081939697,
153
+ "learning_rate": 1.3460038986354777e-05,
154
+ "loss": 0.1345,
155
+ "step": 900
156
+ },
157
+ {
158
+ "epoch": 4.166666666666667,
159
+ "grad_norm": 2.2420225143432617,
160
+ "learning_rate": 1.297270955165692e-05,
161
+ "loss": 0.0904,
162
+ "step": 950
163
+ },
164
+ {
165
+ "epoch": 4.385964912280702,
166
+ "grad_norm": 3.2088565826416016,
167
+ "learning_rate": 1.2485380116959064e-05,
168
+ "loss": 0.0996,
169
+ "step": 1000
170
+ },
171
+ {
172
+ "epoch": 4.385964912280702,
173
+ "eval_accuracy": 0.9115612648221344,
174
+ "eval_auc": 0.9578519002979434,
175
+ "eval_f1": 0.8182741116751269,
176
+ "eval_false_negatives": 92,
177
+ "eval_false_positives": 87,
178
+ "eval_loss": 0.3153094947338104,
179
+ "eval_npv": 0.940026075619296,
180
+ "eval_precision": 0.8224489795918367,
181
+ "eval_recall": 0.8141414141414142,
182
+ "eval_runtime": 7.4628,
183
+ "eval_samples_per_second": 271.212,
184
+ "eval_specificity": 0.9431000654022237,
185
+ "eval_steps_per_second": 4.288,
186
+ "eval_true_negatives": 1442,
187
+ "eval_true_positives": 403,
188
+ "step": 1000
189
+ },
190
+ {
191
+ "epoch": 4.605263157894737,
192
+ "grad_norm": 10.743696212768555,
193
+ "learning_rate": 1.1998050682261208e-05,
194
+ "loss": 0.0909,
195
+ "step": 1050
196
+ },
197
+ {
198
+ "epoch": 4.824561403508772,
199
+ "grad_norm": 1.6505861282348633,
200
+ "learning_rate": 1.1510721247563355e-05,
201
+ "loss": 0.088,
202
+ "step": 1100
203
+ },
204
+ {
205
+ "epoch": 5.043859649122807,
206
+ "grad_norm": 6.567928314208984,
207
+ "learning_rate": 1.1023391812865499e-05,
208
+ "loss": 0.1129,
209
+ "step": 1150
210
+ },
211
+ {
212
+ "epoch": 5.2631578947368425,
213
+ "grad_norm": 3.445005416870117,
214
+ "learning_rate": 1.0536062378167643e-05,
215
+ "loss": 0.0605,
216
+ "step": 1200
217
+ },
218
+ {
219
+ "epoch": 5.482456140350878,
220
+ "grad_norm": 1.5516074895858765,
221
+ "learning_rate": 1.0048732943469786e-05,
222
+ "loss": 0.0642,
223
+ "step": 1250
224
+ },
225
+ {
226
+ "epoch": 5.701754385964913,
227
+ "grad_norm": 9.6685209274292,
228
+ "learning_rate": 9.56140350877193e-06,
229
+ "loss": 0.0759,
230
+ "step": 1300
231
+ },
232
+ {
233
+ "epoch": 5.921052631578947,
234
+ "grad_norm": 12.226219177246094,
235
+ "learning_rate": 9.074074074074075e-06,
236
+ "loss": 0.0806,
237
+ "step": 1350
238
+ },
239
+ {
240
+ "epoch": 6.140350877192983,
241
+ "grad_norm": 1.530657172203064,
242
+ "learning_rate": 8.586744639376219e-06,
243
+ "loss": 0.0636,
244
+ "step": 1400
245
+ },
246
+ {
247
+ "epoch": 6.359649122807017,
248
+ "grad_norm": 4.697056293487549,
249
+ "learning_rate": 8.099415204678363e-06,
250
+ "loss": 0.0402,
251
+ "step": 1450
252
+ },
253
+ {
254
+ "epoch": 6.578947368421053,
255
+ "grad_norm": 0.09656322002410889,
256
+ "learning_rate": 7.612085769980507e-06,
257
+ "loss": 0.0629,
258
+ "step": 1500
259
+ },
260
+ {
261
+ "epoch": 6.578947368421053,
262
+ "eval_accuracy": 0.9179841897233202,
263
+ "eval_auc": 0.9618949468524354,
264
+ "eval_f1": 0.8241525423728814,
265
+ "eval_false_negatives": 106,
266
+ "eval_false_positives": 60,
267
+ "eval_loss": 0.38592642545700073,
268
+ "eval_npv": 0.9326984126984127,
269
+ "eval_precision": 0.8663697104677061,
270
+ "eval_recall": 0.7858585858585858,
271
+ "eval_runtime": 7.4828,
272
+ "eval_samples_per_second": 270.486,
273
+ "eval_specificity": 0.960758665794637,
274
+ "eval_steps_per_second": 4.276,
275
+ "eval_true_negatives": 1469,
276
+ "eval_true_positives": 389,
277
+ "step": 1500
278
+ }
279
+ ],
280
+ "logging_steps": 50,
281
+ "max_steps": 2280,
282
+ "num_input_tokens_seen": 0,
283
+ "num_train_epochs": 10,
284
+ "save_steps": 500,
285
+ "stateful_callbacks": {
286
+ "EarlyStoppingCallback": {
287
+ "args": {
288
+ "early_stopping_patience": 3,
289
+ "early_stopping_threshold": 0.001
290
+ },
291
+ "attributes": {
292
+ "early_stopping_patience_counter": 0
293
+ }
294
+ },
295
+ "TrainerControl": {
296
+ "args": {
297
+ "should_epoch_stop": false,
298
+ "should_evaluate": false,
299
+ "should_log": false,
300
+ "should_save": true,
301
+ "should_training_stop": false
302
+ },
303
+ "attributes": {}
304
+ }
305
+ },
306
+ "total_flos": 6303614664314880.0,
307
+ "train_batch_size": 32,
308
+ "trial_name": null,
309
+ "trial_params": null
310
+ }
checkpoint-1500/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0fa0b6e71cf97dcf125328dcde4d9907006c97167c4f41af8ac98ea5f88ac6b4
3
+ size 5713
checkpoint-1500/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-2000/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "gradient_checkpointing": false,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 768,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 3072,
13
+ "layer_norm_eps": 1e-12,
14
+ "max_position_embeddings": 512,
15
+ "model_type": "bert",
16
+ "num_attention_heads": 12,
17
+ "num_hidden_layers": 12,
18
+ "pad_token_id": 0,
19
+ "position_embedding_type": "absolute",
20
+ "problem_type": "single_label_classification",
21
+ "torch_dtype": "float32",
22
+ "transformers_version": "4.51.3",
23
+ "type_vocab_size": 2,
24
+ "use_cache": true,
25
+ "vocab_size": 30522
26
+ }
checkpoint-2000/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41590102e903624f664df64252a13ee88e911b1cb5be074c17b4957d02712d72
3
+ size 437958648
checkpoint-2000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9540b36b35a13d5702f7e16c9325860ffefc231e7ff5bcfd89f94e7f795172fe
3
+ size 876038795
checkpoint-2000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9f129fc5e5d59c080e9fd48b5d303d31b63156f53c9c6d279333ff06b06681a
3
+ size 14645
checkpoint-2000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5fab9a9e4e1a8e90792d09d3ab98391d1f33f696a4645f21acb71aadc7c4ebe0
3
+ size 1465
checkpoint-2000/special_tokens_map.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "mask_token": {
10
+ "content": "[MASK]",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "sep_token": {
24
+ "content": "[SEP]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "[UNK]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
+ }
checkpoint-2000/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-2000/tokenizer_config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": true,
48
+ "extra_special_tokens": {},
49
+ "mask_token": "[MASK]",
50
+ "model_max_length": 512,
51
+ "never_split": null,
52
+ "pad_token": "[PAD]",
53
+ "sep_token": "[SEP]",
54
+ "strip_accents": null,
55
+ "tokenize_chinese_chars": true,
56
+ "tokenizer_class": "BertTokenizer",
57
+ "unk_token": "[UNK]"
58
+ }
checkpoint-2000/trainer_state.json ADDED
@@ -0,0 +1,399 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 2000,
3
+ "best_metric": 0.8264462809917356,
4
+ "best_model_checkpoint": "./models/checkpoints_v4/checkpoint-2000",
5
+ "epoch": 8.771929824561404,
6
+ "eval_steps": 500,
7
+ "global_step": 2000,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.21929824561403508,
14
+ "grad_norm": 3.511173725128174,
15
+ "learning_rate": 4.298245614035088e-06,
16
+ "loss": 0.8133,
17
+ "step": 50
18
+ },
19
+ {
20
+ "epoch": 0.43859649122807015,
21
+ "grad_norm": 5.231376647949219,
22
+ "learning_rate": 8.68421052631579e-06,
23
+ "loss": 0.5079,
24
+ "step": 100
25
+ },
26
+ {
27
+ "epoch": 0.6578947368421053,
28
+ "grad_norm": 4.2739081382751465,
29
+ "learning_rate": 1.3070175438596493e-05,
30
+ "loss": 0.3543,
31
+ "step": 150
32
+ },
33
+ {
34
+ "epoch": 0.8771929824561403,
35
+ "grad_norm": 4.163576602935791,
36
+ "learning_rate": 1.7456140350877195e-05,
37
+ "loss": 0.3143,
38
+ "step": 200
39
+ },
40
+ {
41
+ "epoch": 1.0964912280701755,
42
+ "grad_norm": 3.981715202331543,
43
+ "learning_rate": 1.9795321637426903e-05,
44
+ "loss": 0.3005,
45
+ "step": 250
46
+ },
47
+ {
48
+ "epoch": 1.3157894736842106,
49
+ "grad_norm": 4.984482288360596,
50
+ "learning_rate": 1.9307992202729045e-05,
51
+ "loss": 0.2521,
52
+ "step": 300
53
+ },
54
+ {
55
+ "epoch": 1.5350877192982457,
56
+ "grad_norm": 7.214444637298584,
57
+ "learning_rate": 1.882066276803119e-05,
58
+ "loss": 0.2481,
59
+ "step": 350
60
+ },
61
+ {
62
+ "epoch": 1.7543859649122808,
63
+ "grad_norm": 3.6749589443206787,
64
+ "learning_rate": 1.8333333333333333e-05,
65
+ "loss": 0.2475,
66
+ "step": 400
67
+ },
68
+ {
69
+ "epoch": 1.973684210526316,
70
+ "grad_norm": 5.445246696472168,
71
+ "learning_rate": 1.784600389863548e-05,
72
+ "loss": 0.2289,
73
+ "step": 450
74
+ },
75
+ {
76
+ "epoch": 2.192982456140351,
77
+ "grad_norm": 12.260554313659668,
78
+ "learning_rate": 1.7358674463937624e-05,
79
+ "loss": 0.1811,
80
+ "step": 500
81
+ },
82
+ {
83
+ "epoch": 2.192982456140351,
84
+ "eval_accuracy": 0.8987154150197628,
85
+ "eval_auc": 0.9507858176268903,
86
+ "eval_f1": 0.7724750277469479,
87
+ "eval_false_negatives": 147,
88
+ "eval_false_positives": 58,
89
+ "eval_loss": 0.29392391443252563,
90
+ "eval_npv": 0.9091470951792336,
91
+ "eval_precision": 0.8571428571428571,
92
+ "eval_recall": 0.703030303030303,
93
+ "eval_runtime": 7.4602,
94
+ "eval_samples_per_second": 271.308,
95
+ "eval_specificity": 0.9620667102681492,
96
+ "eval_steps_per_second": 4.289,
97
+ "eval_true_negatives": 1471,
98
+ "eval_true_positives": 348,
99
+ "step": 500
100
+ },
101
+ {
102
+ "epoch": 2.412280701754386,
103
+ "grad_norm": 3.9946436882019043,
104
+ "learning_rate": 1.6871345029239766e-05,
105
+ "loss": 0.1934,
106
+ "step": 550
107
+ },
108
+ {
109
+ "epoch": 2.6315789473684212,
110
+ "grad_norm": 6.733097553253174,
111
+ "learning_rate": 1.638401559454191e-05,
112
+ "loss": 0.1759,
113
+ "step": 600
114
+ },
115
+ {
116
+ "epoch": 2.8508771929824563,
117
+ "grad_norm": 12.989212036132812,
118
+ "learning_rate": 1.5896686159844057e-05,
119
+ "loss": 0.1953,
120
+ "step": 650
121
+ },
122
+ {
123
+ "epoch": 3.0701754385964914,
124
+ "grad_norm": 4.068276882171631,
125
+ "learning_rate": 1.5409356725146202e-05,
126
+ "loss": 0.1248,
127
+ "step": 700
128
+ },
129
+ {
130
+ "epoch": 3.2894736842105265,
131
+ "grad_norm": 0.6126830577850342,
132
+ "learning_rate": 1.4922027290448344e-05,
133
+ "loss": 0.1289,
134
+ "step": 750
135
+ },
136
+ {
137
+ "epoch": 3.5087719298245617,
138
+ "grad_norm": 6.3761796951293945,
139
+ "learning_rate": 1.4434697855750488e-05,
140
+ "loss": 0.1117,
141
+ "step": 800
142
+ },
143
+ {
144
+ "epoch": 3.7280701754385968,
145
+ "grad_norm": 4.546126365661621,
146
+ "learning_rate": 1.3947368421052631e-05,
147
+ "loss": 0.1491,
148
+ "step": 850
149
+ },
150
+ {
151
+ "epoch": 3.9473684210526314,
152
+ "grad_norm": 4.006205081939697,
153
+ "learning_rate": 1.3460038986354777e-05,
154
+ "loss": 0.1345,
155
+ "step": 900
156
+ },
157
+ {
158
+ "epoch": 4.166666666666667,
159
+ "grad_norm": 2.2420225143432617,
160
+ "learning_rate": 1.297270955165692e-05,
161
+ "loss": 0.0904,
162
+ "step": 950
163
+ },
164
+ {
165
+ "epoch": 4.385964912280702,
166
+ "grad_norm": 3.2088565826416016,
167
+ "learning_rate": 1.2485380116959064e-05,
168
+ "loss": 0.0996,
169
+ "step": 1000
170
+ },
171
+ {
172
+ "epoch": 4.385964912280702,
173
+ "eval_accuracy": 0.9115612648221344,
174
+ "eval_auc": 0.9578519002979434,
175
+ "eval_f1": 0.8182741116751269,
176
+ "eval_false_negatives": 92,
177
+ "eval_false_positives": 87,
178
+ "eval_loss": 0.3153094947338104,
179
+ "eval_npv": 0.940026075619296,
180
+ "eval_precision": 0.8224489795918367,
181
+ "eval_recall": 0.8141414141414142,
182
+ "eval_runtime": 7.4628,
183
+ "eval_samples_per_second": 271.212,
184
+ "eval_specificity": 0.9431000654022237,
185
+ "eval_steps_per_second": 4.288,
186
+ "eval_true_negatives": 1442,
187
+ "eval_true_positives": 403,
188
+ "step": 1000
189
+ },
190
+ {
191
+ "epoch": 4.605263157894737,
192
+ "grad_norm": 10.743696212768555,
193
+ "learning_rate": 1.1998050682261208e-05,
194
+ "loss": 0.0909,
195
+ "step": 1050
196
+ },
197
+ {
198
+ "epoch": 4.824561403508772,
199
+ "grad_norm": 1.6505861282348633,
200
+ "learning_rate": 1.1510721247563355e-05,
201
+ "loss": 0.088,
202
+ "step": 1100
203
+ },
204
+ {
205
+ "epoch": 5.043859649122807,
206
+ "grad_norm": 6.567928314208984,
207
+ "learning_rate": 1.1023391812865499e-05,
208
+ "loss": 0.1129,
209
+ "step": 1150
210
+ },
211
+ {
212
+ "epoch": 5.2631578947368425,
213
+ "grad_norm": 3.445005416870117,
214
+ "learning_rate": 1.0536062378167643e-05,
215
+ "loss": 0.0605,
216
+ "step": 1200
217
+ },
218
+ {
219
+ "epoch": 5.482456140350878,
220
+ "grad_norm": 1.5516074895858765,
221
+ "learning_rate": 1.0048732943469786e-05,
222
+ "loss": 0.0642,
223
+ "step": 1250
224
+ },
225
+ {
226
+ "epoch": 5.701754385964913,
227
+ "grad_norm": 9.6685209274292,
228
+ "learning_rate": 9.56140350877193e-06,
229
+ "loss": 0.0759,
230
+ "step": 1300
231
+ },
232
+ {
233
+ "epoch": 5.921052631578947,
234
+ "grad_norm": 12.226219177246094,
235
+ "learning_rate": 9.074074074074075e-06,
236
+ "loss": 0.0806,
237
+ "step": 1350
238
+ },
239
+ {
240
+ "epoch": 6.140350877192983,
241
+ "grad_norm": 1.530657172203064,
242
+ "learning_rate": 8.586744639376219e-06,
243
+ "loss": 0.0636,
244
+ "step": 1400
245
+ },
246
+ {
247
+ "epoch": 6.359649122807017,
248
+ "grad_norm": 4.697056293487549,
249
+ "learning_rate": 8.099415204678363e-06,
250
+ "loss": 0.0402,
251
+ "step": 1450
252
+ },
253
+ {
254
+ "epoch": 6.578947368421053,
255
+ "grad_norm": 0.09656322002410889,
256
+ "learning_rate": 7.612085769980507e-06,
257
+ "loss": 0.0629,
258
+ "step": 1500
259
+ },
260
+ {
261
+ "epoch": 6.578947368421053,
262
+ "eval_accuracy": 0.9179841897233202,
263
+ "eval_auc": 0.9618949468524354,
264
+ "eval_f1": 0.8241525423728814,
265
+ "eval_false_negatives": 106,
266
+ "eval_false_positives": 60,
267
+ "eval_loss": 0.38592642545700073,
268
+ "eval_npv": 0.9326984126984127,
269
+ "eval_precision": 0.8663697104677061,
270
+ "eval_recall": 0.7858585858585858,
271
+ "eval_runtime": 7.4828,
272
+ "eval_samples_per_second": 270.486,
273
+ "eval_specificity": 0.960758665794637,
274
+ "eval_steps_per_second": 4.276,
275
+ "eval_true_negatives": 1469,
276
+ "eval_true_positives": 389,
277
+ "step": 1500
278
+ },
279
+ {
280
+ "epoch": 6.798245614035087,
281
+ "grad_norm": 36.65667724609375,
282
+ "learning_rate": 7.124756335282652e-06,
283
+ "loss": 0.0606,
284
+ "step": 1550
285
+ },
286
+ {
287
+ "epoch": 7.017543859649122,
288
+ "grad_norm": 2.3685500621795654,
289
+ "learning_rate": 6.637426900584796e-06,
290
+ "loss": 0.0513,
291
+ "step": 1600
292
+ },
293
+ {
294
+ "epoch": 7.2368421052631575,
295
+ "grad_norm": 2.3388023376464844,
296
+ "learning_rate": 6.15009746588694e-06,
297
+ "loss": 0.0328,
298
+ "step": 1650
299
+ },
300
+ {
301
+ "epoch": 7.456140350877193,
302
+ "grad_norm": 0.18014875054359436,
303
+ "learning_rate": 5.662768031189084e-06,
304
+ "loss": 0.0526,
305
+ "step": 1700
306
+ },
307
+ {
308
+ "epoch": 7.675438596491228,
309
+ "grad_norm": 5.354085445404053,
310
+ "learning_rate": 5.175438596491229e-06,
311
+ "loss": 0.033,
312
+ "step": 1750
313
+ },
314
+ {
315
+ "epoch": 7.894736842105263,
316
+ "grad_norm": 0.14219924807548523,
317
+ "learning_rate": 4.688109161793373e-06,
318
+ "loss": 0.0286,
319
+ "step": 1800
320
+ },
321
+ {
322
+ "epoch": 8.114035087719298,
323
+ "grad_norm": 0.8848229646682739,
324
+ "learning_rate": 4.200779727095517e-06,
325
+ "loss": 0.023,
326
+ "step": 1850
327
+ },
328
+ {
329
+ "epoch": 8.333333333333334,
330
+ "grad_norm": 0.09083296358585358,
331
+ "learning_rate": 3.713450292397661e-06,
332
+ "loss": 0.0342,
333
+ "step": 1900
334
+ },
335
+ {
336
+ "epoch": 8.552631578947368,
337
+ "grad_norm": 5.818343162536621,
338
+ "learning_rate": 3.2261208576998056e-06,
339
+ "loss": 0.0317,
340
+ "step": 1950
341
+ },
342
+ {
343
+ "epoch": 8.771929824561404,
344
+ "grad_norm": 0.027245348319411278,
345
+ "learning_rate": 2.7387914230019497e-06,
346
+ "loss": 0.0344,
347
+ "step": 2000
348
+ },
349
+ {
350
+ "epoch": 8.771929824561404,
351
+ "eval_accuracy": 0.9169960474308301,
352
+ "eval_auc": 0.9587609251441821,
353
+ "eval_f1": 0.8264462809917356,
354
+ "eval_false_negatives": 95,
355
+ "eval_false_positives": 73,
356
+ "eval_loss": 0.44914552569389343,
357
+ "eval_npv": 0.938749194068343,
358
+ "eval_precision": 0.8456659619450317,
359
+ "eval_recall": 0.8080808080808081,
360
+ "eval_runtime": 7.4877,
361
+ "eval_samples_per_second": 270.31,
362
+ "eval_specificity": 0.9522563767168084,
363
+ "eval_steps_per_second": 4.274,
364
+ "eval_true_negatives": 1456,
365
+ "eval_true_positives": 400,
366
+ "step": 2000
367
+ }
368
+ ],
369
+ "logging_steps": 50,
370
+ "max_steps": 2280,
371
+ "num_input_tokens_seen": 0,
372
+ "num_train_epochs": 10,
373
+ "save_steps": 500,
374
+ "stateful_callbacks": {
375
+ "EarlyStoppingCallback": {
376
+ "args": {
377
+ "early_stopping_patience": 3,
378
+ "early_stopping_threshold": 0.001
379
+ },
380
+ "attributes": {
381
+ "early_stopping_patience_counter": 0
382
+ }
383
+ },
384
+ "TrainerControl": {
385
+ "args": {
386
+ "should_epoch_stop": false,
387
+ "should_evaluate": false,
388
+ "should_log": false,
389
+ "should_save": true,
390
+ "should_training_stop": false
391
+ },
392
+ "attributes": {}
393
+ }
394
+ },
395
+ "total_flos": 8404819552419840.0,
396
+ "train_batch_size": 32,
397
+ "trial_name": null,
398
+ "trial_params": null
399
+ }
checkpoint-2000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0fa0b6e71cf97dcf125328dcde4d9907006c97167c4f41af8ac98ea5f88ac6b4
3
+ size 5713
checkpoint-2000/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-2280/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "gradient_checkpointing": false,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 768,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 3072,
13
+ "layer_norm_eps": 1e-12,
14
+ "max_position_embeddings": 512,
15
+ "model_type": "bert",
16
+ "num_attention_heads": 12,
17
+ "num_hidden_layers": 12,
18
+ "pad_token_id": 0,
19
+ "position_embedding_type": "absolute",
20
+ "problem_type": "single_label_classification",
21
+ "torch_dtype": "float32",
22
+ "transformers_version": "4.51.3",
23
+ "type_vocab_size": 2,
24
+ "use_cache": true,
25
+ "vocab_size": 30522
26
+ }
checkpoint-2280/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:898191b3113c997f08feb51d48ffff8478f23c749d621d6ec03b43acea202416
3
+ size 437958648
checkpoint-2280/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec925b71bf77e7f6c8815d63a7b3484f3b38ba069d09ccc30e4a15db317c4cdb
3
+ size 876038795
checkpoint-2280/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1cd6b56f462107b8bd25375a943da9b9dfe7f453a3380ae0c6204366bc7628a0
3
+ size 14645
checkpoint-2280/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:319d0be530cda2200e1d0a0824971ef0e84d372efc641975cd1f3d064cf6b46a
3
+ size 1465
checkpoint-2280/special_tokens_map.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "mask_token": {
10
+ "content": "[MASK]",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "sep_token": {
24
+ "content": "[SEP]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "[UNK]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
+ }
checkpoint-2280/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-2280/tokenizer_config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": true,
48
+ "extra_special_tokens": {},
49
+ "mask_token": "[MASK]",
50
+ "model_max_length": 512,
51
+ "never_split": null,
52
+ "pad_token": "[PAD]",
53
+ "sep_token": "[SEP]",
54
+ "strip_accents": null,
55
+ "tokenize_chinese_chars": true,
56
+ "tokenizer_class": "BertTokenizer",
57
+ "unk_token": "[UNK]"
58
+ }
checkpoint-2280/trainer_state.json ADDED
@@ -0,0 +1,434 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 2000,
3
+ "best_metric": 0.8264462809917356,
4
+ "best_model_checkpoint": "./models/checkpoints_v4/checkpoint-2000",
5
+ "epoch": 10.0,
6
+ "eval_steps": 500,
7
+ "global_step": 2280,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.21929824561403508,
14
+ "grad_norm": 3.511173725128174,
15
+ "learning_rate": 4.298245614035088e-06,
16
+ "loss": 0.8133,
17
+ "step": 50
18
+ },
19
+ {
20
+ "epoch": 0.43859649122807015,
21
+ "grad_norm": 5.231376647949219,
22
+ "learning_rate": 8.68421052631579e-06,
23
+ "loss": 0.5079,
24
+ "step": 100
25
+ },
26
+ {
27
+ "epoch": 0.6578947368421053,
28
+ "grad_norm": 4.2739081382751465,
29
+ "learning_rate": 1.3070175438596493e-05,
30
+ "loss": 0.3543,
31
+ "step": 150
32
+ },
33
+ {
34
+ "epoch": 0.8771929824561403,
35
+ "grad_norm": 4.163576602935791,
36
+ "learning_rate": 1.7456140350877195e-05,
37
+ "loss": 0.3143,
38
+ "step": 200
39
+ },
40
+ {
41
+ "epoch": 1.0964912280701755,
42
+ "grad_norm": 3.981715202331543,
43
+ "learning_rate": 1.9795321637426903e-05,
44
+ "loss": 0.3005,
45
+ "step": 250
46
+ },
47
+ {
48
+ "epoch": 1.3157894736842106,
49
+ "grad_norm": 4.984482288360596,
50
+ "learning_rate": 1.9307992202729045e-05,
51
+ "loss": 0.2521,
52
+ "step": 300
53
+ },
54
+ {
55
+ "epoch": 1.5350877192982457,
56
+ "grad_norm": 7.214444637298584,
57
+ "learning_rate": 1.882066276803119e-05,
58
+ "loss": 0.2481,
59
+ "step": 350
60
+ },
61
+ {
62
+ "epoch": 1.7543859649122808,
63
+ "grad_norm": 3.6749589443206787,
64
+ "learning_rate": 1.8333333333333333e-05,
65
+ "loss": 0.2475,
66
+ "step": 400
67
+ },
68
+ {
69
+ "epoch": 1.973684210526316,
70
+ "grad_norm": 5.445246696472168,
71
+ "learning_rate": 1.784600389863548e-05,
72
+ "loss": 0.2289,
73
+ "step": 450
74
+ },
75
+ {
76
+ "epoch": 2.192982456140351,
77
+ "grad_norm": 12.260554313659668,
78
+ "learning_rate": 1.7358674463937624e-05,
79
+ "loss": 0.1811,
80
+ "step": 500
81
+ },
82
+ {
83
+ "epoch": 2.192982456140351,
84
+ "eval_accuracy": 0.8987154150197628,
85
+ "eval_auc": 0.9507858176268903,
86
+ "eval_f1": 0.7724750277469479,
87
+ "eval_false_negatives": 147,
88
+ "eval_false_positives": 58,
89
+ "eval_loss": 0.29392391443252563,
90
+ "eval_npv": 0.9091470951792336,
91
+ "eval_precision": 0.8571428571428571,
92
+ "eval_recall": 0.703030303030303,
93
+ "eval_runtime": 7.4602,
94
+ "eval_samples_per_second": 271.308,
95
+ "eval_specificity": 0.9620667102681492,
96
+ "eval_steps_per_second": 4.289,
97
+ "eval_true_negatives": 1471,
98
+ "eval_true_positives": 348,
99
+ "step": 500
100
+ },
101
+ {
102
+ "epoch": 2.412280701754386,
103
+ "grad_norm": 3.9946436882019043,
104
+ "learning_rate": 1.6871345029239766e-05,
105
+ "loss": 0.1934,
106
+ "step": 550
107
+ },
108
+ {
109
+ "epoch": 2.6315789473684212,
110
+ "grad_norm": 6.733097553253174,
111
+ "learning_rate": 1.638401559454191e-05,
112
+ "loss": 0.1759,
113
+ "step": 600
114
+ },
115
+ {
116
+ "epoch": 2.8508771929824563,
117
+ "grad_norm": 12.989212036132812,
118
+ "learning_rate": 1.5896686159844057e-05,
119
+ "loss": 0.1953,
120
+ "step": 650
121
+ },
122
+ {
123
+ "epoch": 3.0701754385964914,
124
+ "grad_norm": 4.068276882171631,
125
+ "learning_rate": 1.5409356725146202e-05,
126
+ "loss": 0.1248,
127
+ "step": 700
128
+ },
129
+ {
130
+ "epoch": 3.2894736842105265,
131
+ "grad_norm": 0.6126830577850342,
132
+ "learning_rate": 1.4922027290448344e-05,
133
+ "loss": 0.1289,
134
+ "step": 750
135
+ },
136
+ {
137
+ "epoch": 3.5087719298245617,
138
+ "grad_norm": 6.3761796951293945,
139
+ "learning_rate": 1.4434697855750488e-05,
140
+ "loss": 0.1117,
141
+ "step": 800
142
+ },
143
+ {
144
+ "epoch": 3.7280701754385968,
145
+ "grad_norm": 4.546126365661621,
146
+ "learning_rate": 1.3947368421052631e-05,
147
+ "loss": 0.1491,
148
+ "step": 850
149
+ },
150
+ {
151
+ "epoch": 3.9473684210526314,
152
+ "grad_norm": 4.006205081939697,
153
+ "learning_rate": 1.3460038986354777e-05,
154
+ "loss": 0.1345,
155
+ "step": 900
156
+ },
157
+ {
158
+ "epoch": 4.166666666666667,
159
+ "grad_norm": 2.2420225143432617,
160
+ "learning_rate": 1.297270955165692e-05,
161
+ "loss": 0.0904,
162
+ "step": 950
163
+ },
164
+ {
165
+ "epoch": 4.385964912280702,
166
+ "grad_norm": 3.2088565826416016,
167
+ "learning_rate": 1.2485380116959064e-05,
168
+ "loss": 0.0996,
169
+ "step": 1000
170
+ },
171
+ {
172
+ "epoch": 4.385964912280702,
173
+ "eval_accuracy": 0.9115612648221344,
174
+ "eval_auc": 0.9578519002979434,
175
+ "eval_f1": 0.8182741116751269,
176
+ "eval_false_negatives": 92,
177
+ "eval_false_positives": 87,
178
+ "eval_loss": 0.3153094947338104,
179
+ "eval_npv": 0.940026075619296,
180
+ "eval_precision": 0.8224489795918367,
181
+ "eval_recall": 0.8141414141414142,
182
+ "eval_runtime": 7.4628,
183
+ "eval_samples_per_second": 271.212,
184
+ "eval_specificity": 0.9431000654022237,
185
+ "eval_steps_per_second": 4.288,
186
+ "eval_true_negatives": 1442,
187
+ "eval_true_positives": 403,
188
+ "step": 1000
189
+ },
190
+ {
191
+ "epoch": 4.605263157894737,
192
+ "grad_norm": 10.743696212768555,
193
+ "learning_rate": 1.1998050682261208e-05,
194
+ "loss": 0.0909,
195
+ "step": 1050
196
+ },
197
+ {
198
+ "epoch": 4.824561403508772,
199
+ "grad_norm": 1.6505861282348633,
200
+ "learning_rate": 1.1510721247563355e-05,
201
+ "loss": 0.088,
202
+ "step": 1100
203
+ },
204
+ {
205
+ "epoch": 5.043859649122807,
206
+ "grad_norm": 6.567928314208984,
207
+ "learning_rate": 1.1023391812865499e-05,
208
+ "loss": 0.1129,
209
+ "step": 1150
210
+ },
211
+ {
212
+ "epoch": 5.2631578947368425,
213
+ "grad_norm": 3.445005416870117,
214
+ "learning_rate": 1.0536062378167643e-05,
215
+ "loss": 0.0605,
216
+ "step": 1200
217
+ },
218
+ {
219
+ "epoch": 5.482456140350878,
220
+ "grad_norm": 1.5516074895858765,
221
+ "learning_rate": 1.0048732943469786e-05,
222
+ "loss": 0.0642,
223
+ "step": 1250
224
+ },
225
+ {
226
+ "epoch": 5.701754385964913,
227
+ "grad_norm": 9.6685209274292,
228
+ "learning_rate": 9.56140350877193e-06,
229
+ "loss": 0.0759,
230
+ "step": 1300
231
+ },
232
+ {
233
+ "epoch": 5.921052631578947,
234
+ "grad_norm": 12.226219177246094,
235
+ "learning_rate": 9.074074074074075e-06,
236
+ "loss": 0.0806,
237
+ "step": 1350
238
+ },
239
+ {
240
+ "epoch": 6.140350877192983,
241
+ "grad_norm": 1.530657172203064,
242
+ "learning_rate": 8.586744639376219e-06,
243
+ "loss": 0.0636,
244
+ "step": 1400
245
+ },
246
+ {
247
+ "epoch": 6.359649122807017,
248
+ "grad_norm": 4.697056293487549,
249
+ "learning_rate": 8.099415204678363e-06,
250
+ "loss": 0.0402,
251
+ "step": 1450
252
+ },
253
+ {
254
+ "epoch": 6.578947368421053,
255
+ "grad_norm": 0.09656322002410889,
256
+ "learning_rate": 7.612085769980507e-06,
257
+ "loss": 0.0629,
258
+ "step": 1500
259
+ },
260
+ {
261
+ "epoch": 6.578947368421053,
262
+ "eval_accuracy": 0.9179841897233202,
263
+ "eval_auc": 0.9618949468524354,
264
+ "eval_f1": 0.8241525423728814,
265
+ "eval_false_negatives": 106,
266
+ "eval_false_positives": 60,
267
+ "eval_loss": 0.38592642545700073,
268
+ "eval_npv": 0.9326984126984127,
269
+ "eval_precision": 0.8663697104677061,
270
+ "eval_recall": 0.7858585858585858,
271
+ "eval_runtime": 7.4828,
272
+ "eval_samples_per_second": 270.486,
273
+ "eval_specificity": 0.960758665794637,
274
+ "eval_steps_per_second": 4.276,
275
+ "eval_true_negatives": 1469,
276
+ "eval_true_positives": 389,
277
+ "step": 1500
278
+ },
279
+ {
280
+ "epoch": 6.798245614035087,
281
+ "grad_norm": 36.65667724609375,
282
+ "learning_rate": 7.124756335282652e-06,
283
+ "loss": 0.0606,
284
+ "step": 1550
285
+ },
286
+ {
287
+ "epoch": 7.017543859649122,
288
+ "grad_norm": 2.3685500621795654,
289
+ "learning_rate": 6.637426900584796e-06,
290
+ "loss": 0.0513,
291
+ "step": 1600
292
+ },
293
+ {
294
+ "epoch": 7.2368421052631575,
295
+ "grad_norm": 2.3388023376464844,
296
+ "learning_rate": 6.15009746588694e-06,
297
+ "loss": 0.0328,
298
+ "step": 1650
299
+ },
300
+ {
301
+ "epoch": 7.456140350877193,
302
+ "grad_norm": 0.18014875054359436,
303
+ "learning_rate": 5.662768031189084e-06,
304
+ "loss": 0.0526,
305
+ "step": 1700
306
+ },
307
+ {
308
+ "epoch": 7.675438596491228,
309
+ "grad_norm": 5.354085445404053,
310
+ "learning_rate": 5.175438596491229e-06,
311
+ "loss": 0.033,
312
+ "step": 1750
313
+ },
314
+ {
315
+ "epoch": 7.894736842105263,
316
+ "grad_norm": 0.14219924807548523,
317
+ "learning_rate": 4.688109161793373e-06,
318
+ "loss": 0.0286,
319
+ "step": 1800
320
+ },
321
+ {
322
+ "epoch": 8.114035087719298,
323
+ "grad_norm": 0.8848229646682739,
324
+ "learning_rate": 4.200779727095517e-06,
325
+ "loss": 0.023,
326
+ "step": 1850
327
+ },
328
+ {
329
+ "epoch": 8.333333333333334,
330
+ "grad_norm": 0.09083296358585358,
331
+ "learning_rate": 3.713450292397661e-06,
332
+ "loss": 0.0342,
333
+ "step": 1900
334
+ },
335
+ {
336
+ "epoch": 8.552631578947368,
337
+ "grad_norm": 5.818343162536621,
338
+ "learning_rate": 3.2261208576998056e-06,
339
+ "loss": 0.0317,
340
+ "step": 1950
341
+ },
342
+ {
343
+ "epoch": 8.771929824561404,
344
+ "grad_norm": 0.027245348319411278,
345
+ "learning_rate": 2.7387914230019497e-06,
346
+ "loss": 0.0344,
347
+ "step": 2000
348
+ },
349
+ {
350
+ "epoch": 8.771929824561404,
351
+ "eval_accuracy": 0.9169960474308301,
352
+ "eval_auc": 0.9587609251441821,
353
+ "eval_f1": 0.8264462809917356,
354
+ "eval_false_negatives": 95,
355
+ "eval_false_positives": 73,
356
+ "eval_loss": 0.44914552569389343,
357
+ "eval_npv": 0.938749194068343,
358
+ "eval_precision": 0.8456659619450317,
359
+ "eval_recall": 0.8080808080808081,
360
+ "eval_runtime": 7.4877,
361
+ "eval_samples_per_second": 270.31,
362
+ "eval_specificity": 0.9522563767168084,
363
+ "eval_steps_per_second": 4.274,
364
+ "eval_true_negatives": 1456,
365
+ "eval_true_positives": 400,
366
+ "step": 2000
367
+ },
368
+ {
369
+ "epoch": 8.991228070175438,
370
+ "grad_norm": 6.446545124053955,
371
+ "learning_rate": 2.2514619883040934e-06,
372
+ "loss": 0.031,
373
+ "step": 2050
374
+ },
375
+ {
376
+ "epoch": 9.210526315789474,
377
+ "grad_norm": 3.8243560791015625,
378
+ "learning_rate": 1.7641325536062378e-06,
379
+ "loss": 0.0322,
380
+ "step": 2100
381
+ },
382
+ {
383
+ "epoch": 9.429824561403509,
384
+ "grad_norm": 0.14315390586853027,
385
+ "learning_rate": 1.2768031189083821e-06,
386
+ "loss": 0.025,
387
+ "step": 2150
388
+ },
389
+ {
390
+ "epoch": 9.649122807017545,
391
+ "grad_norm": 0.08034974336624146,
392
+ "learning_rate": 7.894736842105263e-07,
393
+ "loss": 0.0143,
394
+ "step": 2200
395
+ },
396
+ {
397
+ "epoch": 9.868421052631579,
398
+ "grad_norm": 0.1442982703447342,
399
+ "learning_rate": 3.021442495126706e-07,
400
+ "loss": 0.0121,
401
+ "step": 2250
402
+ }
403
+ ],
404
+ "logging_steps": 50,
405
+ "max_steps": 2280,
406
+ "num_input_tokens_seen": 0,
407
+ "num_train_epochs": 10,
408
+ "save_steps": 500,
409
+ "stateful_callbacks": {
410
+ "EarlyStoppingCallback": {
411
+ "args": {
412
+ "early_stopping_patience": 3,
413
+ "early_stopping_threshold": 0.001
414
+ },
415
+ "attributes": {
416
+ "early_stopping_patience_counter": 0
417
+ }
418
+ },
419
+ "TrainerControl": {
420
+ "args": {
421
+ "should_epoch_stop": false,
422
+ "should_evaluate": false,
423
+ "should_log": false,
424
+ "should_save": true,
425
+ "should_training_stop": true
426
+ },
427
+ "attributes": {}
428
+ }
429
+ },
430
+ "total_flos": 9579873525657600.0,
431
+ "train_batch_size": 32,
432
+ "trial_name": null,
433
+ "trial_params": null
434
+ }
checkpoint-2280/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0fa0b6e71cf97dcf125328dcde4d9907006c97167c4f41af8ac98ea5f88ac6b4
3
+ size 5713
checkpoint-2280/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "gradient_checkpointing": false,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 768,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 3072,
13
+ "layer_norm_eps": 1e-12,
14
+ "max_position_embeddings": 512,
15
+ "model_type": "bert",
16
+ "num_attention_heads": 12,
17
+ "num_hidden_layers": 12,
18
+ "pad_token_id": 0,
19
+ "position_embedding_type": "absolute",
20
+ "problem_type": "single_label_classification",
21
+ "torch_dtype": "float32",
22
+ "transformers_version": "4.51.3",
23
+ "type_vocab_size": 2,
24
+ "use_cache": true,
25
+ "vocab_size": 30522
26
+ }
eval_metrics.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_loss": 0.44914552569389343,
3
+ "eval_accuracy": 0.9169960474308301,
4
+ "eval_precision": 0.8456659619450317,
5
+ "eval_recall": 0.8080808080808081,
6
+ "eval_f1": 0.8264462809917356,
7
+ "eval_auc": 0.9587609251441821,
8
+ "eval_specificity": 0.9522563767168084,
9
+ "eval_npv": 0.938749194068343,
10
+ "eval_true_positives": 400,
11
+ "eval_true_negatives": 1456,
12
+ "eval_false_positives": 73,
13
+ "eval_false_negatives": 95,
14
+ "eval_runtime": 7.4496,
15
+ "eval_samples_per_second": 271.691,
16
+ "eval_steps_per_second": 4.296,
17
+ "epoch": 10.0
18
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41590102e903624f664df64252a13ee88e911b1cb5be074c17b4957d02712d72
3
+ size 437958648
special_tokens_map.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "mask_token": {
10
+ "content": "[MASK]",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "sep_token": {
24
+ "content": "[SEP]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "[UNK]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": true,
48
+ "extra_special_tokens": {},
49
+ "mask_token": "[MASK]",
50
+ "model_max_length": 512,
51
+ "never_split": null,
52
+ "pad_token": "[PAD]",
53
+ "sep_token": "[SEP]",
54
+ "strip_accents": null,
55
+ "tokenize_chinese_chars": true,
56
+ "tokenizer_class": "BertTokenizer",
57
+ "unk_token": "[UNK]"
58
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0fa0b6e71cf97dcf125328dcde4d9907006c97167c4f41af8ac98ea5f88ac6b4
3
+ size 5713
vocab.txt ADDED
The diff for this file is too large to render. See raw diff