quanxuantruong commited on
Commit
d01e81c
·
verified ·
1 Parent(s): 499d4e0

Model save

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ license: mit
4
+ base_model: FacebookAI/xlm-roberta-large
5
+ tags:
6
+ - generated_from_trainer
7
+ metrics:
8
+ - accuracy
9
+ - precision
10
+ - recall
11
+ - f1
12
+ model-index:
13
+ - name: xlm-large-finetuned-ner-covidmed-v5
14
+ results: []
15
+ ---
16
+
17
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
18
+ should probably proofread and complete it, then remove this comment. -->
19
+
20
+ # xlm-large-finetuned-ner-covidmed-v5
21
+
22
+ This model is a fine-tuned version of [FacebookAI/xlm-roberta-large](https://huggingface.co/FacebookAI/xlm-roberta-large) on an unknown dataset.
23
+ It achieves the following results on the evaluation set:
24
+ - Loss: 0.0778
25
+ - Accuracy: 0.9818
26
+ - Precision: 0.9105
27
+ - Recall: 0.9395
28
+ - F1: 0.9241
29
+ - Age Precision: 0.9692
30
+ - Age Recall: 0.9725
31
+ - Age F1-score: 0.9708
32
+ - Date Precision: 0.9832
33
+ - Date Recall: 0.9927
34
+ - Date F1-score: 0.9880
35
+ - Gender Precision: 0.9539
36
+ - Gender Recall: 0.9848
37
+ - Gender F1-score: 0.9691
38
+ - Job Precision: 0.6667
39
+ - Job Recall: 0.8208
40
+ - Job F1-score: 0.7358
41
+ - Location Precision: 0.9394
42
+ - Location Recall: 0.9532
43
+ - Location F1-score: 0.9462
44
+ - Name Precision: 0.9128
45
+ - Name Recall: 0.9214
46
+ - Name F1-score: 0.9171
47
+ - Organization Precision: 0.8692
48
+ - Organization Recall: 0.8962
49
+ - Organization F1-score: 0.8825
50
+ - Patient Id Precision: 0.9786
51
+ - Patient Id Recall: 0.9796
52
+ - Patient Id F1-score: 0.9791
53
+ - Symptom And Disease Precision: 0.8632
54
+ - Symptom And Disease Recall: 0.8944
55
+ - Symptom And Disease F1-score: 0.8785
56
+ - Transportation Precision: 0.9692
57
+ - Transportation Recall: 0.9793
58
+ - Transportation F1-score: 0.9742
59
+ - Micro avg Precision: 0.9369
60
+ - Micro avg Recall: 0.9536
61
+ - Micro avg F1-score: 0.9452
62
+ - Macro avg Precision: 0.9105
63
+ - Macro avg Recall: 0.9395
64
+ - Macro avg F1-score: 0.9241
65
+ - Weighted avg Precision: 0.9381
66
+ - Weighted avg Recall: 0.9536
67
+ - Weighted avg F1-score: 0.9457
68
+
69
+ ## Model description
70
+
71
+ More information needed
72
+
73
+ ## Intended uses & limitations
74
+
75
+ More information needed
76
+
77
+ ## Training and evaluation data
78
+
79
+ More information needed
80
+
81
+ ## Training procedure
82
+
83
+ ### Training hyperparameters
84
+
85
+ The following hyperparameters were used during training:
86
+ - learning_rate: 2e-05
87
+ - train_batch_size: 32
88
+ - eval_batch_size: 32
89
+ - seed: 42
90
+ - optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
91
+ - lr_scheduler_type: linear
92
+ - num_epochs: 4
93
+
94
+ ### Training results
95
+
96
+ | Training Loss | Epoch | Step | Validation Loss | Accuracy | Precision | Recall | F1 | Age Precision | Age Recall | Age F1-score | Date Precision | Date Recall | Date F1-score | Gender Precision | Gender Recall | Gender F1-score | Job Precision | Job Recall | Job F1-score | Location Precision | Location Recall | Location F1-score | Name Precision | Name Recall | Name F1-score | Organization Precision | Organization Recall | Organization F1-score | Patient Id Precision | Patient Id Recall | Patient Id F1-score | Symptom And Disease Precision | Symptom And Disease Recall | Symptom And Disease F1-score | Transportation Precision | Transportation Recall | Transportation F1-score | Micro avg Precision | Micro avg Recall | Micro avg F1-score | Macro avg Precision | Macro avg Recall | Macro avg F1-score | Weighted avg Precision | Weighted avg Recall | Weighted avg F1-score |
97
+ |:-------------:|:-----:|:----:|:---------------:|:--------:|:---------:|:------:|:------:|:-------------:|:----------:|:------------:|:--------------:|:-----------:|:-------------:|:----------------:|:-------------:|:---------------:|:-------------:|:----------:|:------------:|:------------------:|:---------------:|:-----------------:|:--------------:|:-----------:|:-------------:|:----------------------:|:-------------------:|:---------------------:|:--------------------:|:-----------------:|:-------------------:|:-----------------------------:|:--------------------------:|:----------------------------:|:------------------------:|:---------------------:|:-----------------------:|:-------------------:|:----------------:|:------------------:|:-------------------:|:----------------:|:------------------:|:----------------------:|:-------------------:|:---------------------:|
98
+ | No log | 1.0 | 158 | 0.0963 | 0.9733 | 0.7903 | 0.9007 | 0.8387 | 0.8174 | 0.9845 | 0.8932 | 0.9814 | 0.9915 | 0.9865 | 0.7897 | 0.9913 | 0.8791 | 0.4495 | 0.7457 | 0.5609 | 0.9119 | 0.9302 | 0.9210 | 0.8216 | 0.8836 | 0.8515 | 0.7558 | 0.8911 | 0.8179 | 0.9286 | 0.9791 | 0.9531 | 0.7831 | 0.8741 | 0.8261 | 0.6636 | 0.7358 | 0.6978 | 0.8717 | 0.9371 | 0.9032 | 0.7903 | 0.9007 | 0.8387 | 0.8790 | 0.9371 | 0.9059 |
99
+ | No log | 2.0 | 316 | 0.0762 | 0.9797 | 0.8970 | 0.9193 | 0.9078 | 0.9659 | 0.9725 | 0.9692 | 0.9791 | 0.9897 | 0.9844 | 0.9479 | 0.9848 | 0.9660 | 0.6392 | 0.7168 | 0.6757 | 0.9368 | 0.9514 | 0.9440 | 0.8627 | 0.9088 | 0.8851 | 0.8793 | 0.8885 | 0.8839 | 0.9752 | 0.9791 | 0.9771 | 0.8352 | 0.8477 | 0.8414 | 0.9485 | 0.9534 | 0.9509 | 0.9308 | 0.9451 | 0.9379 | 0.8970 | 0.9193 | 0.9078 | 0.9314 | 0.9451 | 0.9382 |
100
+ | No log | 3.0 | 474 | 0.0761 | 0.9812 | 0.9018 | 0.9405 | 0.9199 | 0.9468 | 0.9794 | 0.9628 | 0.9844 | 0.9933 | 0.9889 | 0.9459 | 0.9848 | 0.9650 | 0.6606 | 0.8439 | 0.7411 | 0.9222 | 0.9525 | 0.9371 | 0.8981 | 0.9151 | 0.9065 | 0.8672 | 0.8807 | 0.8739 | 0.9729 | 0.9850 | 0.9789 | 0.8555 | 0.8908 | 0.8728 | 0.9643 | 0.9793 | 0.9717 | 0.9266 | 0.9536 | 0.9399 | 0.9018 | 0.9405 | 0.9199 | 0.9279 | 0.9536 | 0.9404 |
101
+ | 0.1535 | 4.0 | 632 | 0.0778 | 0.9818 | 0.9105 | 0.9395 | 0.9241 | 0.9692 | 0.9725 | 0.9708 | 0.9832 | 0.9927 | 0.9880 | 0.9539 | 0.9848 | 0.9691 | 0.6667 | 0.8208 | 0.7358 | 0.9394 | 0.9532 | 0.9462 | 0.9128 | 0.9214 | 0.9171 | 0.8692 | 0.8962 | 0.8825 | 0.9786 | 0.9796 | 0.9791 | 0.8632 | 0.8944 | 0.8785 | 0.9692 | 0.9793 | 0.9742 | 0.9369 | 0.9536 | 0.9452 | 0.9105 | 0.9395 | 0.9241 | 0.9381 | 0.9536 | 0.9457 |
102
+
103
+
104
+ ### Framework versions
105
+
106
+ - Transformers 4.51.3
107
+ - Pytorch 2.6.0+cu124
108
+ - Datasets 3.6.0
109
+ - Tokenizers 0.21.1
config.json ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "XLMRobertaForTokenClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "bos_token_id": 0,
7
+ "classifier_dropout": null,
8
+ "eos_token_id": 2,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 1024,
12
+ "id2label": {
13
+ "0": "B-AGE",
14
+ "1": "B-DATE",
15
+ "2": "B-GENDER",
16
+ "3": "B-JOB",
17
+ "4": "B-LOCATION",
18
+ "5": "B-NAME",
19
+ "6": "B-ORGANIZATION",
20
+ "7": "B-PATIENT_ID",
21
+ "8": "B-SYMPTOM_AND_DISEASE",
22
+ "9": "B-TRANSPORTATION",
23
+ "10": "I-AGE",
24
+ "11": "I-DATE",
25
+ "12": "I-GENDER",
26
+ "13": "I-JOB",
27
+ "14": "I-LOCATION",
28
+ "15": "I-NAME",
29
+ "16": "I-ORGANIZATION",
30
+ "17": "I-PATIENT_ID",
31
+ "18": "I-SYMPTOM_AND_DISEASE",
32
+ "19": "I-TRANSPORTATION",
33
+ "20": "O"
34
+ },
35
+ "initializer_range": 0.02,
36
+ "intermediate_size": 4096,
37
+ "label2id": {
38
+ "B-AGE": 0,
39
+ "B-DATE": 1,
40
+ "B-GENDER": 2,
41
+ "B-JOB": 3,
42
+ "B-LOCATION": 4,
43
+ "B-NAME": 5,
44
+ "B-ORGANIZATION": 6,
45
+ "B-PATIENT_ID": 7,
46
+ "B-SYMPTOM_AND_DISEASE": 8,
47
+ "B-TRANSPORTATION": 9,
48
+ "I-AGE": 10,
49
+ "I-DATE": 11,
50
+ "I-GENDER": 12,
51
+ "I-JOB": 13,
52
+ "I-LOCATION": 14,
53
+ "I-NAME": 15,
54
+ "I-ORGANIZATION": 16,
55
+ "I-PATIENT_ID": 17,
56
+ "I-SYMPTOM_AND_DISEASE": 18,
57
+ "I-TRANSPORTATION": 19,
58
+ "O": 20
59
+ },
60
+ "layer_norm_eps": 1e-05,
61
+ "max_position_embeddings": 514,
62
+ "model_type": "xlm-roberta",
63
+ "num_attention_heads": 16,
64
+ "num_hidden_layers": 24,
65
+ "output_past": true,
66
+ "pad_token_id": 1,
67
+ "position_embedding_type": "absolute",
68
+ "torch_dtype": "float32",
69
+ "transformers_version": "4.51.3",
70
+ "type_vocab_size": 1,
71
+ "use_cache": true,
72
+ "vocab_size": 250002
73
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:908753a619cd45f75c6a5bcf0cfea0f07c9bf87e82163b6f2052e01b28b0fdcc
3
+ size 2235497956
sentencepiece.bpe.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
3
+ size 5069051
special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "cls_token": "<s>",
4
+ "eos_token": "</s>",
5
+ "mask_token": {
6
+ "content": "<mask>",
7
+ "lstrip": true,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "pad_token": "<pad>",
13
+ "sep_token": "</s>",
14
+ "unk_token": "<unk>"
15
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8373f9cd3d27591e1924426bcc1c8799bc5a9affc4fc857982c5d66668dd1f41
3
+ size 17082832
tokenizer_config.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<s>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<pad>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "<unk>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "250001": {
36
+ "content": "<mask>",
37
+ "lstrip": true,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "bos_token": "<s>",
45
+ "clean_up_tokenization_spaces": false,
46
+ "cls_token": "<s>",
47
+ "eos_token": "</s>",
48
+ "extra_special_tokens": {},
49
+ "mask_token": "<mask>",
50
+ "model_max_length": 512,
51
+ "pad_token": "<pad>",
52
+ "sep_token": "</s>",
53
+ "tokenizer_class": "XLMRobertaTokenizer",
54
+ "unk_token": "<unk>"
55
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4b09782c2ebb143e4e184d40c734e163010a554a3df43c7a81fc3f4b3a63c7a
3
+ size 5368