nahiar commited on
Commit
053f34b
·
verified ·
1 Parent(s): c7becd6

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.json filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ base_model: cahya/NusaBert-v1.3
4
+ tags:
5
+ - generated_from_trainer
6
+ datasets:
7
+ - grit-id/id_nergrit_corpus
8
+ metrics:
9
+ - precision
10
+ - recall
11
+ - f1
12
+ - accuracy
13
+ model-index:
14
+ - name: nusabert_nergrit_1.3
15
+ results:
16
+ - task:
17
+ name: Token Classification
18
+ type: token-classification
19
+ dataset:
20
+ name: grit-id/id_nergrit_corpus ner
21
+ type: grit-id/id_nergrit_corpus
22
+ config: ner
23
+ split: validation
24
+ args: ner
25
+ metrics:
26
+ - name: Precision
27
+ type: precision
28
+ value: 0.8010483135824977
29
+ - name: Recall
30
+ type: recall
31
+ value: 0.8338275412169375
32
+ - name: F1
33
+ type: f1
34
+ value: 0.8171093159760562
35
+ - name: Accuracy
36
+ type: accuracy
37
+ value: 0.9476653696498054
38
+ pipeline_tag: token-classification
39
+ license: mit
40
+ language:
41
+ - id
42
+ ---
43
+
44
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
45
+ should probably proofread and complete it, then remove this comment. -->
46
+
47
+ # NusaBert-ner-v1.3
48
+
49
+ This model is a fine-tuned version of [cahya/NusaBert-v1.3](https://huggingface.co/cahya/NusaBert-v1.3) on the grit-id/id_nergrit_corpus ner dataset.
50
+ It supports a context length of 8192, the same as the model *cahya/NusaBert-v1.3* which was pre-trained from scratch using ModernBERT architecture.
51
+ It achieves the following results on the evaluation set:
52
+ - Loss: 0.2174
53
+ - Precision: 0.8010
54
+ - Recall: 0.8338
55
+ - F1: 0.8171
56
+ - Accuracy: 0.9477
57
+
58
+ ## Model description
59
+
60
+ The dataset contains 19 following entities
61
+ ```
62
+ 'CRD': Cardinal
63
+ 'DAT': Date
64
+ 'EVT': Event
65
+ 'FAC': Facility
66
+ 'GPE': Geopolitical Entity
67
+ 'LAW': Law Entity (such as Undang-Undang)
68
+ 'LOC': Location
69
+ 'MON': Money
70
+ 'NOR': Political Organization
71
+ 'ORD': Ordinal
72
+ 'ORG': Organization
73
+ 'PER': Person
74
+ 'PRC': Percent
75
+ 'PRD': Product
76
+ 'QTY': Quantity
77
+ 'REG': Religion
78
+ 'TIM': Time
79
+ 'WOA': Work of Art
80
+ 'LAN': Language
81
+ ```
82
+
83
+ ## Intended uses & limitations
84
+
85
+ More information needed
86
+
87
+ ## Training and evaluation data
88
+
89
+ More information needed
90
+
91
+ ## Training procedure
92
+
93
+ ### Training hyperparameters
94
+
95
+ The following hyperparameters were used during training:
96
+ - learning_rate: 5e-05
97
+ - train_batch_size: 32
98
+ - eval_batch_size: 32
99
+ - seed: 42
100
+ - distributed_type: multi-GPU
101
+ - num_devices: 2
102
+ - total_train_batch_size: 64
103
+ - total_eval_batch_size: 64
104
+ - optimizer: Use adamw_torch with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
105
+ - lr_scheduler_type: linear
106
+ - num_epochs: 3.0
107
+
108
+ ### Training results
109
+
110
+
111
+
112
+ ### Framework versions
113
+
114
+ - Transformers 4.49.0
115
+ - Pytorch 2.5.1+cu124
116
+ - Datasets 2.19.2
117
+ - Tokenizers 0.21.0
118
+
119
+ ## Usage
120
+ ```
121
+ from transformers import pipeline
122
+ ner = pipeline("ner", model="cahya/NusaBert-ner-v1.3", grouped_entities=True)
123
+ text = "Jakarta, April 2025 - Polisi mengungkap sosok teman pemberi uang palsu kepada artis Sekar Arum Widara. Sosok tersebut ternyata adalah Bayu Setio Aribowo (BS), pegawai nonaktif Garuda yang ditangkap Polsek Tanah Abang di kasus serupa."
124
+ result = ner(text)
125
+ print(result)
126
+ ```
all_results.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "eval_accuracy": 0.9476653696498054,
4
+ "eval_f1": 0.8171093159760562,
5
+ "eval_loss": 0.2173503339290619,
6
+ "eval_precision": 0.8010483135824977,
7
+ "eval_recall": 0.8338275412169375,
8
+ "eval_runtime": 2.1971,
9
+ "eval_samples": 2521,
10
+ "eval_samples_per_second": 1147.437,
11
+ "eval_steps_per_second": 18.206,
12
+ "total_flos": 2986592784351232.0,
13
+ "train_loss": 0.1634680819349224,
14
+ "train_runtime": 152.3657,
15
+ "train_samples": 12532,
16
+ "train_samples_per_second": 246.748,
17
+ "train_steps_per_second": 3.859
18
+ }
config.json ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "cahya/NusaBert-v1.3",
3
+ "architectures": [
4
+ "ModernBertForTokenClassification"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "bos_token_id": 65003,
9
+ "classifier_activation": "gelu",
10
+ "classifier_bias": false,
11
+ "classifier_dropout": 0.0,
12
+ "classifier_pooling": "cls",
13
+ "cls_token_id": 65003,
14
+ "decoder_bias": true,
15
+ "deterministic_flash_attn": false,
16
+ "embedding_dropout": 0.0,
17
+ "eos_token_id": 65001,
18
+ "finetuning_task": "ner",
19
+ "global_attn_every_n_layers": 3,
20
+ "global_rope_theta": 160000.0,
21
+ "hidden_activation": "gelu",
22
+ "hidden_size": 768,
23
+ "id2label": {
24
+ "0": "B-CRD",
25
+ "1": "B-DAT",
26
+ "2": "B-EVT",
27
+ "3": "B-FAC",
28
+ "4": "B-GPE",
29
+ "5": "B-LAN",
30
+ "6": "B-LAW",
31
+ "7": "B-LOC",
32
+ "8": "B-MON",
33
+ "9": "B-NOR",
34
+ "10": "B-ORD",
35
+ "11": "B-ORG",
36
+ "12": "B-PER",
37
+ "13": "B-PRC",
38
+ "14": "B-PRD",
39
+ "15": "B-QTY",
40
+ "16": "B-REG",
41
+ "17": "B-TIM",
42
+ "18": "B-WOA",
43
+ "19": "I-CRD",
44
+ "20": "I-DAT",
45
+ "21": "I-EVT",
46
+ "22": "I-FAC",
47
+ "23": "I-GPE",
48
+ "24": "I-LAN",
49
+ "25": "I-LAW",
50
+ "26": "I-LOC",
51
+ "27": "I-MON",
52
+ "28": "I-NOR",
53
+ "29": "I-ORD",
54
+ "30": "I-ORG",
55
+ "31": "I-PER",
56
+ "32": "I-PRC",
57
+ "33": "I-PRD",
58
+ "34": "I-QTY",
59
+ "35": "I-REG",
60
+ "36": "I-TIM",
61
+ "37": "I-WOA",
62
+ "38": "O"
63
+ },
64
+ "initializer_cutoff_factor": 2.0,
65
+ "initializer_range": 0.02,
66
+ "intermediate_size": 1152,
67
+ "label2id": {
68
+ "B-CRD": 0,
69
+ "B-DAT": 1,
70
+ "B-EVT": 2,
71
+ "B-FAC": 3,
72
+ "B-GPE": 4,
73
+ "B-LAN": 5,
74
+ "B-LAW": 6,
75
+ "B-LOC": 7,
76
+ "B-MON": 8,
77
+ "B-NOR": 9,
78
+ "B-ORD": 10,
79
+ "B-ORG": 11,
80
+ "B-PER": 12,
81
+ "B-PRC": 13,
82
+ "B-PRD": 14,
83
+ "B-QTY": 15,
84
+ "B-REG": 16,
85
+ "B-TIM": 17,
86
+ "B-WOA": 18,
87
+ "I-CRD": 19,
88
+ "I-DAT": 20,
89
+ "I-EVT": 21,
90
+ "I-FAC": 22,
91
+ "I-GPE": 23,
92
+ "I-LAN": 24,
93
+ "I-LAW": 25,
94
+ "I-LOC": 26,
95
+ "I-MON": 27,
96
+ "I-NOR": 28,
97
+ "I-ORD": 29,
98
+ "I-ORG": 30,
99
+ "I-PER": 31,
100
+ "I-PRC": 32,
101
+ "I-PRD": 33,
102
+ "I-QTY": 34,
103
+ "I-REG": 35,
104
+ "I-TIM": 36,
105
+ "I-WOA": 37,
106
+ "O": 38
107
+ },
108
+ "local_attention": 128,
109
+ "local_rope_theta": 10000.0,
110
+ "max_position_embeddings": 8192,
111
+ "mlp_bias": false,
112
+ "mlp_dropout": 0.0,
113
+ "model_type": "modernbert",
114
+ "norm_bias": false,
115
+ "norm_eps": 1e-05,
116
+ "num_attention_heads": 12,
117
+ "num_hidden_layers": 22,
118
+ "pad_token_id": 65002,
119
+ "reference_compile": false,
120
+ "repad_logits_with_grad": false,
121
+ "sep_token_id": 65001,
122
+ "sparse_pred_ignore_index": -100,
123
+ "sparse_prediction": false,
124
+ "torch_dtype": "float32",
125
+ "transformers_version": "4.49.0",
126
+ "vocab_size": 65107
127
+ }
eval_results.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "eval_accuracy": 0.9476653696498054,
4
+ "eval_f1": 0.8171093159760562,
5
+ "eval_loss": 0.2173503339290619,
6
+ "eval_precision": 0.8010483135824977,
7
+ "eval_recall": 0.8338275412169375,
8
+ "eval_runtime": 2.1971,
9
+ "eval_samples": 2521,
10
+ "eval_samples_per_second": 1147.437,
11
+ "eval_steps_per_second": 18.206
12
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60cab610f37b5f22886b4d92115969fd27d9efa9ed6b4a3ae0c1dd95af4dab97
3
+ size 643831812
special_tokens_map.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "mask_token": {
10
+ "content": "[MASK]",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "sep_token": {
24
+ "content": "[SEP]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "[UNK]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,873 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "65000": {
4
+ "content": "[UNK]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "65001": {
12
+ "content": "[SEP]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "65002": {
20
+ "content": "[PAD]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "65003": {
28
+ "content": "[CLS]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "65004": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "65005": {
44
+ "content": "<|endoftext|>",
45
+ "lstrip": false,
46
+ "normalized": true,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": false
50
+ },
51
+ "65006": {
52
+ "content": "<|padding|>",
53
+ "lstrip": false,
54
+ "normalized": true,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": false
58
+ },
59
+ "65007": {
60
+ "content": "[unused000]",
61
+ "lstrip": false,
62
+ "normalized": true,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": false
66
+ },
67
+ "65008": {
68
+ "content": "[unused001]",
69
+ "lstrip": false,
70
+ "normalized": true,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": false
74
+ },
75
+ "65009": {
76
+ "content": "[unused002]",
77
+ "lstrip": false,
78
+ "normalized": true,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": false
82
+ },
83
+ "65010": {
84
+ "content": "[unused003]",
85
+ "lstrip": false,
86
+ "normalized": true,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": false
90
+ },
91
+ "65011": {
92
+ "content": "[unused004]",
93
+ "lstrip": false,
94
+ "normalized": true,
95
+ "rstrip": false,
96
+ "single_word": false,
97
+ "special": false
98
+ },
99
+ "65012": {
100
+ "content": "[unused005]",
101
+ "lstrip": false,
102
+ "normalized": true,
103
+ "rstrip": false,
104
+ "single_word": false,
105
+ "special": false
106
+ },
107
+ "65013": {
108
+ "content": "[unused006]",
109
+ "lstrip": false,
110
+ "normalized": true,
111
+ "rstrip": false,
112
+ "single_word": false,
113
+ "special": false
114
+ },
115
+ "65014": {
116
+ "content": "[unused007]",
117
+ "lstrip": false,
118
+ "normalized": true,
119
+ "rstrip": false,
120
+ "single_word": false,
121
+ "special": false
122
+ },
123
+ "65015": {
124
+ "content": "[unused008]",
125
+ "lstrip": false,
126
+ "normalized": true,
127
+ "rstrip": false,
128
+ "single_word": false,
129
+ "special": false
130
+ },
131
+ "65016": {
132
+ "content": "[unused009]",
133
+ "lstrip": false,
134
+ "normalized": true,
135
+ "rstrip": false,
136
+ "single_word": false,
137
+ "special": false
138
+ },
139
+ "65017": {
140
+ "content": "[unused010]",
141
+ "lstrip": false,
142
+ "normalized": true,
143
+ "rstrip": false,
144
+ "single_word": false,
145
+ "special": false
146
+ },
147
+ "65018": {
148
+ "content": "[unused011]",
149
+ "lstrip": false,
150
+ "normalized": true,
151
+ "rstrip": false,
152
+ "single_word": false,
153
+ "special": false
154
+ },
155
+ "65019": {
156
+ "content": "[unused012]",
157
+ "lstrip": false,
158
+ "normalized": true,
159
+ "rstrip": false,
160
+ "single_word": false,
161
+ "special": false
162
+ },
163
+ "65020": {
164
+ "content": "[unused013]",
165
+ "lstrip": false,
166
+ "normalized": true,
167
+ "rstrip": false,
168
+ "single_word": false,
169
+ "special": false
170
+ },
171
+ "65021": {
172
+ "content": "[unused014]",
173
+ "lstrip": false,
174
+ "normalized": true,
175
+ "rstrip": false,
176
+ "single_word": false,
177
+ "special": false
178
+ },
179
+ "65022": {
180
+ "content": "[unused015]",
181
+ "lstrip": false,
182
+ "normalized": true,
183
+ "rstrip": false,
184
+ "single_word": false,
185
+ "special": false
186
+ },
187
+ "65023": {
188
+ "content": "[unused016]",
189
+ "lstrip": false,
190
+ "normalized": true,
191
+ "rstrip": false,
192
+ "single_word": false,
193
+ "special": false
194
+ },
195
+ "65024": {
196
+ "content": "[unused017]",
197
+ "lstrip": false,
198
+ "normalized": true,
199
+ "rstrip": false,
200
+ "single_word": false,
201
+ "special": false
202
+ },
203
+ "65025": {
204
+ "content": "[unused018]",
205
+ "lstrip": false,
206
+ "normalized": true,
207
+ "rstrip": false,
208
+ "single_word": false,
209
+ "special": false
210
+ },
211
+ "65026": {
212
+ "content": "[unused019]",
213
+ "lstrip": false,
214
+ "normalized": true,
215
+ "rstrip": false,
216
+ "single_word": false,
217
+ "special": false
218
+ },
219
+ "65027": {
220
+ "content": "[unused020]",
221
+ "lstrip": false,
222
+ "normalized": true,
223
+ "rstrip": false,
224
+ "single_word": false,
225
+ "special": false
226
+ },
227
+ "65028": {
228
+ "content": "[unused021]",
229
+ "lstrip": false,
230
+ "normalized": true,
231
+ "rstrip": false,
232
+ "single_word": false,
233
+ "special": false
234
+ },
235
+ "65029": {
236
+ "content": "[unused022]",
237
+ "lstrip": false,
238
+ "normalized": true,
239
+ "rstrip": false,
240
+ "single_word": false,
241
+ "special": false
242
+ },
243
+ "65030": {
244
+ "content": "[unused023]",
245
+ "lstrip": false,
246
+ "normalized": true,
247
+ "rstrip": false,
248
+ "single_word": false,
249
+ "special": false
250
+ },
251
+ "65031": {
252
+ "content": "[unused024]",
253
+ "lstrip": false,
254
+ "normalized": true,
255
+ "rstrip": false,
256
+ "single_word": false,
257
+ "special": false
258
+ },
259
+ "65032": {
260
+ "content": "[unused025]",
261
+ "lstrip": false,
262
+ "normalized": true,
263
+ "rstrip": false,
264
+ "single_word": false,
265
+ "special": false
266
+ },
267
+ "65033": {
268
+ "content": "[unused026]",
269
+ "lstrip": false,
270
+ "normalized": true,
271
+ "rstrip": false,
272
+ "single_word": false,
273
+ "special": false
274
+ },
275
+ "65034": {
276
+ "content": "[unused027]",
277
+ "lstrip": false,
278
+ "normalized": true,
279
+ "rstrip": false,
280
+ "single_word": false,
281
+ "special": false
282
+ },
283
+ "65035": {
284
+ "content": "[unused028]",
285
+ "lstrip": false,
286
+ "normalized": true,
287
+ "rstrip": false,
288
+ "single_word": false,
289
+ "special": false
290
+ },
291
+ "65036": {
292
+ "content": "[unused029]",
293
+ "lstrip": false,
294
+ "normalized": true,
295
+ "rstrip": false,
296
+ "single_word": false,
297
+ "special": false
298
+ },
299
+ "65037": {
300
+ "content": "[unused030]",
301
+ "lstrip": false,
302
+ "normalized": true,
303
+ "rstrip": false,
304
+ "single_word": false,
305
+ "special": false
306
+ },
307
+ "65038": {
308
+ "content": "[unused031]",
309
+ "lstrip": false,
310
+ "normalized": true,
311
+ "rstrip": false,
312
+ "single_word": false,
313
+ "special": false
314
+ },
315
+ "65039": {
316
+ "content": "[unused032]",
317
+ "lstrip": false,
318
+ "normalized": true,
319
+ "rstrip": false,
320
+ "single_word": false,
321
+ "special": false
322
+ },
323
+ "65040": {
324
+ "content": "[unused033]",
325
+ "lstrip": false,
326
+ "normalized": true,
327
+ "rstrip": false,
328
+ "single_word": false,
329
+ "special": false
330
+ },
331
+ "65041": {
332
+ "content": "[unused034]",
333
+ "lstrip": false,
334
+ "normalized": true,
335
+ "rstrip": false,
336
+ "single_word": false,
337
+ "special": false
338
+ },
339
+ "65042": {
340
+ "content": "[unused035]",
341
+ "lstrip": false,
342
+ "normalized": true,
343
+ "rstrip": false,
344
+ "single_word": false,
345
+ "special": false
346
+ },
347
+ "65043": {
348
+ "content": "[unused036]",
349
+ "lstrip": false,
350
+ "normalized": true,
351
+ "rstrip": false,
352
+ "single_word": false,
353
+ "special": false
354
+ },
355
+ "65044": {
356
+ "content": "[unused037]",
357
+ "lstrip": false,
358
+ "normalized": true,
359
+ "rstrip": false,
360
+ "single_word": false,
361
+ "special": false
362
+ },
363
+ "65045": {
364
+ "content": "[unused038]",
365
+ "lstrip": false,
366
+ "normalized": true,
367
+ "rstrip": false,
368
+ "single_word": false,
369
+ "special": false
370
+ },
371
+ "65046": {
372
+ "content": "[unused039]",
373
+ "lstrip": false,
374
+ "normalized": true,
375
+ "rstrip": false,
376
+ "single_word": false,
377
+ "special": false
378
+ },
379
+ "65047": {
380
+ "content": "[unused040]",
381
+ "lstrip": false,
382
+ "normalized": true,
383
+ "rstrip": false,
384
+ "single_word": false,
385
+ "special": false
386
+ },
387
+ "65048": {
388
+ "content": "[unused041]",
389
+ "lstrip": false,
390
+ "normalized": true,
391
+ "rstrip": false,
392
+ "single_word": false,
393
+ "special": false
394
+ },
395
+ "65049": {
396
+ "content": "[unused042]",
397
+ "lstrip": false,
398
+ "normalized": true,
399
+ "rstrip": false,
400
+ "single_word": false,
401
+ "special": false
402
+ },
403
+ "65050": {
404
+ "content": "[unused043]",
405
+ "lstrip": false,
406
+ "normalized": true,
407
+ "rstrip": false,
408
+ "single_word": false,
409
+ "special": false
410
+ },
411
+ "65051": {
412
+ "content": "[unused044]",
413
+ "lstrip": false,
414
+ "normalized": true,
415
+ "rstrip": false,
416
+ "single_word": false,
417
+ "special": false
418
+ },
419
+ "65052": {
420
+ "content": "[unused045]",
421
+ "lstrip": false,
422
+ "normalized": true,
423
+ "rstrip": false,
424
+ "single_word": false,
425
+ "special": false
426
+ },
427
+ "65053": {
428
+ "content": "[unused046]",
429
+ "lstrip": false,
430
+ "normalized": true,
431
+ "rstrip": false,
432
+ "single_word": false,
433
+ "special": false
434
+ },
435
+ "65054": {
436
+ "content": "[unused047]",
437
+ "lstrip": false,
438
+ "normalized": true,
439
+ "rstrip": false,
440
+ "single_word": false,
441
+ "special": false
442
+ },
443
+ "65055": {
444
+ "content": "[unused048]",
445
+ "lstrip": false,
446
+ "normalized": true,
447
+ "rstrip": false,
448
+ "single_word": false,
449
+ "special": false
450
+ },
451
+ "65056": {
452
+ "content": "[unused049]",
453
+ "lstrip": false,
454
+ "normalized": true,
455
+ "rstrip": false,
456
+ "single_word": false,
457
+ "special": false
458
+ },
459
+ "65057": {
460
+ "content": "[unused050]",
461
+ "lstrip": false,
462
+ "normalized": true,
463
+ "rstrip": false,
464
+ "single_word": false,
465
+ "special": false
466
+ },
467
+ "65058": {
468
+ "content": "[unused051]",
469
+ "lstrip": false,
470
+ "normalized": true,
471
+ "rstrip": false,
472
+ "single_word": false,
473
+ "special": false
474
+ },
475
+ "65059": {
476
+ "content": "[unused052]",
477
+ "lstrip": false,
478
+ "normalized": true,
479
+ "rstrip": false,
480
+ "single_word": false,
481
+ "special": false
482
+ },
483
+ "65060": {
484
+ "content": "[unused053]",
485
+ "lstrip": false,
486
+ "normalized": true,
487
+ "rstrip": false,
488
+ "single_word": false,
489
+ "special": false
490
+ },
491
+ "65061": {
492
+ "content": "[unused054]",
493
+ "lstrip": false,
494
+ "normalized": true,
495
+ "rstrip": false,
496
+ "single_word": false,
497
+ "special": false
498
+ },
499
+ "65062": {
500
+ "content": "[unused055]",
501
+ "lstrip": false,
502
+ "normalized": true,
503
+ "rstrip": false,
504
+ "single_word": false,
505
+ "special": false
506
+ },
507
+ "65063": {
508
+ "content": "[unused056]",
509
+ "lstrip": false,
510
+ "normalized": true,
511
+ "rstrip": false,
512
+ "single_word": false,
513
+ "special": false
514
+ },
515
+ "65064": {
516
+ "content": "[unused057]",
517
+ "lstrip": false,
518
+ "normalized": true,
519
+ "rstrip": false,
520
+ "single_word": false,
521
+ "special": false
522
+ },
523
+ "65065": {
524
+ "content": "[unused058]",
525
+ "lstrip": false,
526
+ "normalized": true,
527
+ "rstrip": false,
528
+ "single_word": false,
529
+ "special": false
530
+ },
531
+ "65066": {
532
+ "content": "[unused059]",
533
+ "lstrip": false,
534
+ "normalized": true,
535
+ "rstrip": false,
536
+ "single_word": false,
537
+ "special": false
538
+ },
539
+ "65067": {
540
+ "content": "[unused060]",
541
+ "lstrip": false,
542
+ "normalized": true,
543
+ "rstrip": false,
544
+ "single_word": false,
545
+ "special": false
546
+ },
547
+ "65068": {
548
+ "content": "[unused061]",
549
+ "lstrip": false,
550
+ "normalized": true,
551
+ "rstrip": false,
552
+ "single_word": false,
553
+ "special": false
554
+ },
555
+ "65069": {
556
+ "content": "[unused062]",
557
+ "lstrip": false,
558
+ "normalized": true,
559
+ "rstrip": false,
560
+ "single_word": false,
561
+ "special": false
562
+ },
563
+ "65070": {
564
+ "content": "[unused063]",
565
+ "lstrip": false,
566
+ "normalized": true,
567
+ "rstrip": false,
568
+ "single_word": false,
569
+ "special": false
570
+ },
571
+ "65071": {
572
+ "content": "[unused064]",
573
+ "lstrip": false,
574
+ "normalized": true,
575
+ "rstrip": false,
576
+ "single_word": false,
577
+ "special": false
578
+ },
579
+ "65072": {
580
+ "content": "[unused065]",
581
+ "lstrip": false,
582
+ "normalized": true,
583
+ "rstrip": false,
584
+ "single_word": false,
585
+ "special": false
586
+ },
587
+ "65073": {
588
+ "content": "[unused066]",
589
+ "lstrip": false,
590
+ "normalized": true,
591
+ "rstrip": false,
592
+ "single_word": false,
593
+ "special": false
594
+ },
595
+ "65074": {
596
+ "content": "[unused067]",
597
+ "lstrip": false,
598
+ "normalized": true,
599
+ "rstrip": false,
600
+ "single_word": false,
601
+ "special": false
602
+ },
603
+ "65075": {
604
+ "content": "[unused068]",
605
+ "lstrip": false,
606
+ "normalized": true,
607
+ "rstrip": false,
608
+ "single_word": false,
609
+ "special": false
610
+ },
611
+ "65076": {
612
+ "content": "[unused069]",
613
+ "lstrip": false,
614
+ "normalized": true,
615
+ "rstrip": false,
616
+ "single_word": false,
617
+ "special": false
618
+ },
619
+ "65077": {
620
+ "content": "[unused070]",
621
+ "lstrip": false,
622
+ "normalized": true,
623
+ "rstrip": false,
624
+ "single_word": false,
625
+ "special": false
626
+ },
627
+ "65078": {
628
+ "content": "[unused071]",
629
+ "lstrip": false,
630
+ "normalized": true,
631
+ "rstrip": false,
632
+ "single_word": false,
633
+ "special": false
634
+ },
635
+ "65079": {
636
+ "content": "[unused072]",
637
+ "lstrip": false,
638
+ "normalized": true,
639
+ "rstrip": false,
640
+ "single_word": false,
641
+ "special": false
642
+ },
643
+ "65080": {
644
+ "content": "[unused073]",
645
+ "lstrip": false,
646
+ "normalized": true,
647
+ "rstrip": false,
648
+ "single_word": false,
649
+ "special": false
650
+ },
651
+ "65081": {
652
+ "content": "[unused074]",
653
+ "lstrip": false,
654
+ "normalized": true,
655
+ "rstrip": false,
656
+ "single_word": false,
657
+ "special": false
658
+ },
659
+ "65082": {
660
+ "content": "[unused075]",
661
+ "lstrip": false,
662
+ "normalized": true,
663
+ "rstrip": false,
664
+ "single_word": false,
665
+ "special": false
666
+ },
667
+ "65083": {
668
+ "content": "[unused076]",
669
+ "lstrip": false,
670
+ "normalized": true,
671
+ "rstrip": false,
672
+ "single_word": false,
673
+ "special": false
674
+ },
675
+ "65084": {
676
+ "content": "[unused077]",
677
+ "lstrip": false,
678
+ "normalized": true,
679
+ "rstrip": false,
680
+ "single_word": false,
681
+ "special": false
682
+ },
683
+ "65085": {
684
+ "content": "[unused078]",
685
+ "lstrip": false,
686
+ "normalized": true,
687
+ "rstrip": false,
688
+ "single_word": false,
689
+ "special": false
690
+ },
691
+ "65086": {
692
+ "content": "[unused079]",
693
+ "lstrip": false,
694
+ "normalized": true,
695
+ "rstrip": false,
696
+ "single_word": false,
697
+ "special": false
698
+ },
699
+ "65087": {
700
+ "content": "[unused080]",
701
+ "lstrip": false,
702
+ "normalized": true,
703
+ "rstrip": false,
704
+ "single_word": false,
705
+ "special": false
706
+ },
707
+ "65088": {
708
+ "content": "[unused081]",
709
+ "lstrip": false,
710
+ "normalized": true,
711
+ "rstrip": false,
712
+ "single_word": false,
713
+ "special": false
714
+ },
715
+ "65089": {
716
+ "content": "[unused082]",
717
+ "lstrip": false,
718
+ "normalized": true,
719
+ "rstrip": false,
720
+ "single_word": false,
721
+ "special": false
722
+ },
723
+ "65090": {
724
+ "content": "[unused083]",
725
+ "lstrip": false,
726
+ "normalized": true,
727
+ "rstrip": false,
728
+ "single_word": false,
729
+ "special": false
730
+ },
731
+ "65091": {
732
+ "content": "[unused084]",
733
+ "lstrip": false,
734
+ "normalized": true,
735
+ "rstrip": false,
736
+ "single_word": false,
737
+ "special": false
738
+ },
739
+ "65092": {
740
+ "content": "[unused085]",
741
+ "lstrip": false,
742
+ "normalized": true,
743
+ "rstrip": false,
744
+ "single_word": false,
745
+ "special": false
746
+ },
747
+ "65093": {
748
+ "content": "[unused086]",
749
+ "lstrip": false,
750
+ "normalized": true,
751
+ "rstrip": false,
752
+ "single_word": false,
753
+ "special": false
754
+ },
755
+ "65094": {
756
+ "content": "[unused087]",
757
+ "lstrip": false,
758
+ "normalized": true,
759
+ "rstrip": false,
760
+ "single_word": false,
761
+ "special": false
762
+ },
763
+ "65095": {
764
+ "content": "[unused088]",
765
+ "lstrip": false,
766
+ "normalized": true,
767
+ "rstrip": false,
768
+ "single_word": false,
769
+ "special": false
770
+ },
771
+ "65096": {
772
+ "content": "[unused089]",
773
+ "lstrip": false,
774
+ "normalized": true,
775
+ "rstrip": false,
776
+ "single_word": false,
777
+ "special": false
778
+ },
779
+ "65097": {
780
+ "content": "[unused090]",
781
+ "lstrip": false,
782
+ "normalized": true,
783
+ "rstrip": false,
784
+ "single_word": false,
785
+ "special": false
786
+ },
787
+ "65098": {
788
+ "content": "[unused091]",
789
+ "lstrip": false,
790
+ "normalized": true,
791
+ "rstrip": false,
792
+ "single_word": false,
793
+ "special": false
794
+ },
795
+ "65099": {
796
+ "content": "[unused092]",
797
+ "lstrip": false,
798
+ "normalized": true,
799
+ "rstrip": false,
800
+ "single_word": false,
801
+ "special": false
802
+ },
803
+ "65100": {
804
+ "content": "[unused093]",
805
+ "lstrip": false,
806
+ "normalized": true,
807
+ "rstrip": false,
808
+ "single_word": false,
809
+ "special": false
810
+ },
811
+ "65101": {
812
+ "content": "[unused094]",
813
+ "lstrip": false,
814
+ "normalized": true,
815
+ "rstrip": false,
816
+ "single_word": false,
817
+ "special": false
818
+ },
819
+ "65102": {
820
+ "content": "[unused095]",
821
+ "lstrip": false,
822
+ "normalized": true,
823
+ "rstrip": false,
824
+ "single_word": false,
825
+ "special": false
826
+ },
827
+ "65103": {
828
+ "content": "[unused096]",
829
+ "lstrip": false,
830
+ "normalized": true,
831
+ "rstrip": false,
832
+ "single_word": false,
833
+ "special": false
834
+ },
835
+ "65104": {
836
+ "content": "[unused097]",
837
+ "lstrip": false,
838
+ "normalized": true,
839
+ "rstrip": false,
840
+ "single_word": false,
841
+ "special": false
842
+ },
843
+ "65105": {
844
+ "content": "[unused098]",
845
+ "lstrip": false,
846
+ "normalized": true,
847
+ "rstrip": false,
848
+ "single_word": false,
849
+ "special": false
850
+ },
851
+ "65106": {
852
+ "content": "[unused099]",
853
+ "lstrip": false,
854
+ "normalized": true,
855
+ "rstrip": false,
856
+ "single_word": false,
857
+ "special": false
858
+ }
859
+ },
860
+ "clean_up_tokenization_spaces": false,
861
+ "cls_token": "[CLS]",
862
+ "extra_special_tokens": {},
863
+ "mask_token": "[MASK]",
864
+ "model_input_names": [
865
+ "input_ids",
866
+ "attention_mask"
867
+ ],
868
+ "model_max_length": 8192,
869
+ "pad_token": "[PAD]",
870
+ "sep_token": "[SEP]",
871
+ "tokenizer_class": "PreTrainedTokenizer",
872
+ "unk_token": "[UNK]"
873
+ }
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "total_flos": 2986592784351232.0,
4
+ "train_loss": 0.1634680819349224,
5
+ "train_runtime": 152.3657,
6
+ "train_samples": 12532,
7
+ "train_samples_per_second": 246.748,
8
+ "train_steps_per_second": 3.859
9
+ }
trainer_state.json ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 3.0,
5
+ "eval_steps": 500,
6
+ "global_step": 588,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 2.5510204081632653,
13
+ "grad_norm": 1.0083805322647095,
14
+ "learning_rate": 7.482993197278912e-06,
15
+ "loss": 0.1852,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 3.0,
20
+ "step": 588,
21
+ "total_flos": 2986592784351232.0,
22
+ "train_loss": 0.1634680819349224,
23
+ "train_runtime": 152.3657,
24
+ "train_samples_per_second": 246.748,
25
+ "train_steps_per_second": 3.859
26
+ }
27
+ ],
28
+ "logging_steps": 500,
29
+ "max_steps": 588,
30
+ "num_input_tokens_seen": 0,
31
+ "num_train_epochs": 3,
32
+ "save_steps": 500,
33
+ "stateful_callbacks": {
34
+ "TrainerControl": {
35
+ "args": {
36
+ "should_epoch_stop": false,
37
+ "should_evaluate": false,
38
+ "should_log": false,
39
+ "should_save": true,
40
+ "should_training_stop": true
41
+ },
42
+ "attributes": {}
43
+ }
44
+ },
45
+ "total_flos": 2986592784351232.0,
46
+ "train_batch_size": 32,
47
+ "trial_name": null,
48
+ "trial_params": null
49
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84072a1b7f8c55ad76da5b704b6d031fbad8cfbaaf99f4c9b9605981df66296a
3
+ size 5368