maiurilorenzo commited on
Commit
e4b10fc
·
verified ·
1 Parent(s): de117f7

Upload tokenizer

Browse files
Files changed (5) hide show
  1. README.md +36 -36
  2. special_tokens_map.json +7 -0
  3. tokenizer.json +0 -0
  4. tokenizer_config.json +58 -0
  5. vocab.txt +0 -0
README.md CHANGED
@@ -1,66 +1,66 @@
1
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  model-index:
3
  - name: misogyny-detection-it
4
  results:
5
  - task:
6
  type: classification
7
  dataset:
8
- type: text-classification
9
  name: sapienzanlp/ami
 
10
  metrics:
11
- - name: test_loss
12
- type: loss
13
  value: 0.2205
 
14
  verified: false
15
- - name: test_accuracy
16
- type: accuracy
17
  value: 0.9412
 
18
  verified: false
19
- - name: test_f1
20
- type: f1
21
  value: 0.942
 
22
  verified: false
23
- - name: test_precision
24
- type: precision
25
  value: 0.9291
 
26
  verified: false
27
- - name: test_recall
28
- type: recall
29
  value: 0.9553
 
30
  verified: false
31
- - name: test_runtime
32
- type: runtime
33
  value: 13.0069
 
34
  verified: false
35
- - name: test_samples_per_second
36
- type: samples_per_second
37
  value: 223.573
 
38
  verified: false
39
- - name: test_steps_per_second
40
- type: steps_per_second
41
  value: 6.996
 
42
  verified: false
43
- - name: epoch
44
- type: epoch
45
  value: 5
 
46
  verified: false
47
- license: cc-by-nc-sa-4.0
48
- library_name: transformers
49
- datasets:
50
- - sapienzanlp/ami
51
- language:
52
- - it
53
- base_model:
54
- - dbmdz/bert-base-italian-xxl-uncased
55
- pipeline_tag: text-classification
56
- tags:
57
- - misogyny
58
- - italian
59
- metrics:
60
- - accuracy
61
- - f1
62
- - recall
63
- - precision
64
  ---
65
 
66
  # Model Card for Misogyny Detection in Italian
 
1
  ---
2
+ base_model:
3
+ - dbmdz/bert-base-italian-xxl-uncased
4
+ datasets:
5
+ - sapienzanlp/ami
6
+ language:
7
+ - it
8
+ library_name: transformers
9
+ license: cc-by-nc-sa-4.0
10
+ metrics:
11
+ - accuracy
12
+ - f1
13
+ - recall
14
+ - precision
15
+ pipeline_tag: text-classification
16
+ tags:
17
+ - misogyny
18
+ - italian
19
  model-index:
20
  - name: misogyny-detection-it
21
  results:
22
  - task:
23
  type: classification
24
  dataset:
 
25
  name: sapienzanlp/ami
26
+ type: text-classification
27
  metrics:
28
+ - type: loss
 
29
  value: 0.2205
30
+ name: test_loss
31
  verified: false
32
+ - type: accuracy
 
33
  value: 0.9412
34
+ name: test_accuracy
35
  verified: false
36
+ - type: f1
 
37
  value: 0.942
38
+ name: test_f1
39
  verified: false
40
+ - type: precision
 
41
  value: 0.9291
42
+ name: test_precision
43
  verified: false
44
+ - type: recall
 
45
  value: 0.9553
46
+ name: test_recall
47
  verified: false
48
+ - type: runtime
 
49
  value: 13.0069
50
+ name: test_runtime
51
  verified: false
52
+ - type: samples_per_second
 
53
  value: 223.573
54
+ name: test_samples_per_second
55
  verified: false
56
+ - type: steps_per_second
 
57
  value: 6.996
58
+ name: test_steps_per_second
59
  verified: false
60
+ - type: epoch
 
61
  value: 5
62
+ name: epoch
63
  verified: false
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  ---
65
 
66
  # Model Card for Misogyny Detection in Italian
special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "101": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "102": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "103": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "104": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": true,
48
+ "mask_token": "[MASK]",
49
+ "max_len": 512,
50
+ "model_max_length": 512,
51
+ "never_split": null,
52
+ "pad_token": "[PAD]",
53
+ "sep_token": "[SEP]",
54
+ "strip_accents": null,
55
+ "tokenize_chinese_chars": true,
56
+ "tokenizer_class": "BertTokenizer",
57
+ "unk_token": "[UNK]"
58
+ }
vocab.txt ADDED
The diff for this file is too large to render. See raw diff