init
Browse files- config.json +102 -0
- parameter.json +1 -0
- pytorch_model.bin +3 -0
- sentencepiece.bpe.model +3 -0
- special_tokens_map.json +1 -0
- test_bc5cdr_span.json +1 -0
- test_bionlp2004_span.json +1 -0
- test_conll2003_span.json +1 -0
- test_fin_span.json +1 -0
- test_ontonotes5.json +1 -0
- test_ontonotes5_span.json +1 -0
- test_panx_dataset-en_span.json +1 -0
- test_wnut2017_span.json +1 -0
- tokenizer_config.json +1 -0
config.json
ADDED
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "xlm-roberta-base",
|
3 |
+
"architectures": [
|
4 |
+
"XLMRobertaForTokenClassification"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"bos_token_id": 0,
|
8 |
+
"eos_token_id": 2,
|
9 |
+
"gradient_checkpointing": false,
|
10 |
+
"hidden_act": "gelu",
|
11 |
+
"hidden_dropout_prob": 0.1,
|
12 |
+
"hidden_size": 768,
|
13 |
+
"id2label": {
|
14 |
+
"0": "O",
|
15 |
+
"1": "B-cardinal number",
|
16 |
+
"2": "B-date",
|
17 |
+
"3": "I-date",
|
18 |
+
"4": "B-person",
|
19 |
+
"5": "I-person",
|
20 |
+
"6": "B-group",
|
21 |
+
"7": "B-geopolitical area",
|
22 |
+
"8": "I-geopolitical area",
|
23 |
+
"9": "B-law",
|
24 |
+
"10": "I-law",
|
25 |
+
"11": "B-organization",
|
26 |
+
"12": "I-organization",
|
27 |
+
"13": "B-percent",
|
28 |
+
"14": "I-percent",
|
29 |
+
"15": "B-ordinal number",
|
30 |
+
"16": "B-money",
|
31 |
+
"17": "I-money",
|
32 |
+
"18": "B-work of art",
|
33 |
+
"19": "I-work of art",
|
34 |
+
"20": "B-facility",
|
35 |
+
"21": "B-time",
|
36 |
+
"22": "I-cardinal number",
|
37 |
+
"23": "B-location",
|
38 |
+
"24": "B-quantity",
|
39 |
+
"25": "I-quantity",
|
40 |
+
"26": "I-group",
|
41 |
+
"27": "I-location",
|
42 |
+
"28": "B-product",
|
43 |
+
"29": "I-time",
|
44 |
+
"30": "B-event",
|
45 |
+
"31": "I-event",
|
46 |
+
"32": "I-facility",
|
47 |
+
"33": "B-language",
|
48 |
+
"34": "I-product",
|
49 |
+
"35": "I-ordinal number",
|
50 |
+
"36": "I-language"
|
51 |
+
},
|
52 |
+
"initializer_range": 0.02,
|
53 |
+
"intermediate_size": 3072,
|
54 |
+
"label2id": {
|
55 |
+
"B-cardinal number": 1,
|
56 |
+
"B-date": 2,
|
57 |
+
"B-event": 30,
|
58 |
+
"B-facility": 20,
|
59 |
+
"B-geopolitical area": 7,
|
60 |
+
"B-group": 6,
|
61 |
+
"B-language": 33,
|
62 |
+
"B-law": 9,
|
63 |
+
"B-location": 23,
|
64 |
+
"B-money": 16,
|
65 |
+
"B-ordinal number": 15,
|
66 |
+
"B-organization": 11,
|
67 |
+
"B-percent": 13,
|
68 |
+
"B-person": 4,
|
69 |
+
"B-product": 28,
|
70 |
+
"B-quantity": 24,
|
71 |
+
"B-time": 21,
|
72 |
+
"B-work of art": 18,
|
73 |
+
"I-cardinal number": 22,
|
74 |
+
"I-date": 3,
|
75 |
+
"I-event": 31,
|
76 |
+
"I-facility": 32,
|
77 |
+
"I-geopolitical area": 8,
|
78 |
+
"I-group": 26,
|
79 |
+
"I-language": 36,
|
80 |
+
"I-law": 10,
|
81 |
+
"I-location": 27,
|
82 |
+
"I-money": 17,
|
83 |
+
"I-ordinal number": 35,
|
84 |
+
"I-organization": 12,
|
85 |
+
"I-percent": 14,
|
86 |
+
"I-person": 5,
|
87 |
+
"I-product": 34,
|
88 |
+
"I-quantity": 25,
|
89 |
+
"I-time": 29,
|
90 |
+
"I-work of art": 19,
|
91 |
+
"O": 0
|
92 |
+
},
|
93 |
+
"layer_norm_eps": 1e-05,
|
94 |
+
"max_position_embeddings": 514,
|
95 |
+
"model_type": "xlm-roberta",
|
96 |
+
"num_attention_heads": 12,
|
97 |
+
"num_hidden_layers": 12,
|
98 |
+
"output_past": true,
|
99 |
+
"pad_token_id": 1,
|
100 |
+
"type_vocab_size": 1,
|
101 |
+
"vocab_size": 250002
|
102 |
+
}
|
parameter.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"dataset": ["ontonotes5"], "transformers_model": "xlm-roberta-base", "random_seed": 1234, "lr": 1e-05, "total_step": 13000, "warmup_step": 700, "weight_decay": 1e-07, "batch_size": 16, "max_seq_length": 128, "fp16": false, "max_grad_norm": 1.0, "lower_case": false}
|
pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:13c17f9d9b4742ac5aa7f0e56886c3b4fef04737f67efb63730616a17cf39682
|
3 |
+
size 1110011650
|
sentencepiece.bpe.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
|
3 |
+
size 5069051
|
special_tokens_map.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"bos_token": "<s>", "eos_token": "</s>", "unk_token": "<unk>", "sep_token": "</s>", "pad_token": "<pad>", "cls_token": "<s>", "mask_token": "<mask>"}
|
test_bc5cdr_span.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"valid": {"f1": 0.0, "recall": 0.0, "precision": 0.0, "summary": ""}, "test": {"f1": 0.0, "recall": 0.0, "precision": 0.0, "summary": ""}}
|
test_bionlp2004_span.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"valid": {"f1": 0.0, "recall": 0.0, "precision": 0.0, "summary": ""}}
|
test_conll2003_span.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"valid": {"f1": 64.09021794221997, "recall": 50.54967019788127, "precision": 87.53894080996885, "summary": " precision recall f1-score support\n\n entity 0.88 0.51 0.64 5003\n\n micro avg 0.88 0.51 0.64 5003\n macro avg 0.88 0.51 0.64 5003\nweighted avg 0.88 0.51 0.64 5003\n"}, "test": {"f1": 62.21701795472286, "recall": 48.48915027377814, "precision": 86.78765880217786, "summary": " precision recall f1-score support\n\n entity 0.87 0.48 0.62 4931\n\n micro avg 0.87 0.48 0.62 4931\n macro avg 0.87 0.48 0.62 4931\nweighted avg 0.87 0.48 0.62 4931\n"}}
|
test_fin_span.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"valid": {"f1": 31.753554502369667, "recall": 26.58730158730159, "precision": 39.411764705882355, "summary": " precision recall f1-score support\n\n entity 0.39 0.27 0.32 252\n\n micro avg 0.39 0.27 0.32 252\n macro avg 0.39 0.27 0.32 252\nweighted avg 0.39 0.27 0.32 252\n"}}
|
test_ontonotes5.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"valid": {"f1": 86.98359051896652, "recall": 88.57893306765692, "precision": 85.4446968373231, "summary": " precision recall f1-score support\n\n cardinal number 0.82 0.88 0.85 937\n date 0.83 0.87 0.85 1507\n event 0.63 0.55 0.59 143\n facility 0.54 0.64 0.59 115\ngeopolitical area 0.94 0.93 0.94 2262\n group 0.90 0.93 0.91 847\n language 0.86 0.73 0.79 33\n law 0.46 0.70 0.55 40\n location 0.67 0.74 0.70 204\n money 0.90 0.93 0.92 274\n ordinal number 0.83 0.86 0.85 232\n organization 0.86 0.86 0.86 1728\n percent 0.89 0.89 0.89 177\n person 0.91 0.96 0.93 2014\n product 0.55 0.67 0.60 72\n quantity 0.79 0.81 0.80 100\n time 0.68 0.79 0.73 214\n work of art 0.42 0.54 0.48 142\n\n micro avg 0.85 0.89 0.87 11041\n macro avg 0.75 0.79 0.77 11041\n weighted avg 0.86 0.89 0.87 11041\n"}, "test": {"f1": 89.0153671030165, "recall": 90.35641276330992, "precision": 87.71354616048318, "summary": " precision recall f1-score support\n\n cardinal number 0.85 0.88 0.86 934\n date 0.84 0.89 0.86 1601\n event 0.61 0.65 0.63 63\n facility 0.76 0.74 0.75 135\ngeopolitical area 0.96 0.96 0.96 2240\n group 0.89 0.94 0.92 841\n language 0.75 0.55 0.63 22\n law 0.63 0.60 0.62 40\n location 0.70 0.80 0.74 179\n money 0.85 0.90 0.87 314\n ordinal number 0.81 0.92 0.86 195\n organization 0.89 0.89 0.89 1792\n percent 0.89 0.92 0.90 348\n person 0.93 0.96 0.95 1988\n product 0.65 0.72 0.69 76\n quantity 0.77 0.81 0.79 105\n time 0.60 0.66 0.63 212\n work of art 0.60 0.60 0.60 166\n\n micro avg 0.88 0.90 0.89 11251\n macro avg 0.78 0.80 0.79 11251\n weighted avg 0.88 0.90 0.89 11251\n"}}
|
test_ontonotes5_span.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"valid": {"f1": 91.05923313841896, "recall": 92.38293632823114, "precision": 89.77292730153142, "summary": " precision recall f1-score support\n\n entity 0.90 0.92 0.91 11041\n\n micro avg 0.90 0.92 0.91 11041\n macro avg 0.90 0.92 0.91 11041\nweighted avg 0.90 0.92 0.91 11041\n"}, "test": {"f1": 91.83754116355654, "recall": 92.95173762332237, "precision": 90.7497396737244, "summary": " precision recall f1-score support\n\n entity 0.91 0.93 0.92 11251\n\n micro avg 0.91 0.93 0.92 11251\n macro avg 0.91 0.93 0.92 11251\nweighted avg 0.91 0.93 0.92 11251\n"}}
|
test_panx_dataset-en_span.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"valid": {"f1": 44.32529043789097, "recall": 35.13992206872122, "precision": 60.01209921355112, "summary": " precision recall f1-score support\n\n entity 0.60 0.35 0.44 14115\n\n micro avg 0.60 0.35 0.44 14115\n macro avg 0.60 0.35 0.44 14115\nweighted avg 0.60 0.35 0.44 14115\n"}, "test": {"f1": 44.73243010557796, "recall": 35.526126385490144, "precision": 60.379204892966364, "summary": " precision recall f1-score support\n\n entity 0.60 0.36 0.45 13894\n\n micro avg 0.60 0.36 0.45 13894\n macro avg 0.60 0.36 0.45 13894\nweighted avg 0.60 0.36 0.45 13894\n"}}
|
test_wnut2017_span.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"valid": {"f1": 62.40928882438316, "recall": 53.61596009975062, "precision": 74.65277777777779, "summary": " precision recall f1-score support\n\n entity 0.75 0.54 0.62 802\n\n micro avg 0.75 0.54 0.62 802\n macro avg 0.75 0.54 0.62 802\nweighted avg 0.75 0.54 0.62 802\n"}, "test": {"f1": 51.70387779083432, "recall": 43.52126607319486, "precision": 63.67583212735166, "summary": " precision recall f1-score support\n\n entity 0.64 0.44 0.52 1011\n\n micro avg 0.64 0.44 0.52 1011\n macro avg 0.64 0.44 0.52 1011\nweighted avg 0.64 0.44 0.52 1011\n"}}
|
tokenizer_config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"bos_token": "<s>", "eos_token": "</s>", "sep_token": "</s>", "cls_token": "<s>", "unk_token": "<unk>", "pad_token": "<pad>", "mask_token": "<mask>", "model_max_length": 512, "name_or_path": "xlm-roberta-base"}
|