Files changed (1) hide show
  1. deepset_roberta-base-squad2.json +237 -0
deepset_roberta-base-squad2.json ADDED
@@ -0,0 +1,237 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bomFormat": "CycloneDX",
3
+ "specVersion": "1.6",
4
+ "serialNumber": "urn:uuid:cef2615a-e860-42b2-b351-7f8a5f49e535",
5
+ "version": 1,
6
+ "metadata": {
7
+ "timestamp": "2025-07-10T08:45:16.787707+00:00",
8
+ "component": {
9
+ "type": "machine-learning-model",
10
+ "bom-ref": "deepset/roberta-base-squad2-12395755-d71a-5489-a970-16cfa514aa95",
11
+ "name": "deepset/roberta-base-squad2",
12
+ "externalReferences": [
13
+ {
14
+ "url": "https://huggingface.co/deepset/roberta-base-squad2",
15
+ "type": "documentation"
16
+ }
17
+ ],
18
+ "modelCard": {
19
+ "modelParameters": {
20
+ "task": "question-answering",
21
+ "architectureFamily": "roberta",
22
+ "modelArchitecture": "RobertaForQuestionAnswering",
23
+ "datasets": [
24
+ {
25
+ "ref": "squad_v2-9c72005c-340e-5f42-8f7a-ae0c57af7584"
26
+ }
27
+ ]
28
+ },
29
+ "properties": [
30
+ {
31
+ "name": "library_name",
32
+ "value": "transformers"
33
+ },
34
+ {
35
+ "name": "base_model",
36
+ "value": "FacebookAI/roberta-base"
37
+ }
38
+ ],
39
+ "quantitativeAnalysis": {
40
+ "performanceMetrics": [
41
+ {
42
+ "slice": "dataset: squad_v2, split: validation, config: squad_v2",
43
+ "type": "exact_match",
44
+ "value": 79.9309
45
+ },
46
+ {
47
+ "slice": "dataset: squad_v2, split: validation, config: squad_v2",
48
+ "type": "f1",
49
+ "value": 82.9501
50
+ },
51
+ {
52
+ "slice": "dataset: squad_v2, split: validation, config: squad_v2",
53
+ "type": "total",
54
+ "value": 11869
55
+ },
56
+ {
57
+ "slice": "dataset: squad, split: validation, config: plain_text",
58
+ "type": "exact_match",
59
+ "value": 85.289
60
+ },
61
+ {
62
+ "slice": "dataset: squad, split: validation, config: plain_text",
63
+ "type": "f1",
64
+ "value": 91.841
65
+ },
66
+ {
67
+ "slice": "dataset: adversarial_qa, split: validation, config: adversarialQA",
68
+ "type": "exact_match",
69
+ "value": 29.5
70
+ },
71
+ {
72
+ "slice": "dataset: adversarial_qa, split: validation, config: adversarialQA",
73
+ "type": "f1",
74
+ "value": 40.367
75
+ },
76
+ {
77
+ "slice": "dataset: squad_adversarial, split: validation, config: AddOneSent",
78
+ "type": "exact_match",
79
+ "value": 78.567
80
+ },
81
+ {
82
+ "slice": "dataset: squad_adversarial, split: validation, config: AddOneSent",
83
+ "type": "f1",
84
+ "value": 84.469
85
+ },
86
+ {
87
+ "slice": "dataset: squadshifts, split: test, config: amazon",
88
+ "type": "exact_match",
89
+ "value": 69.924
90
+ },
91
+ {
92
+ "slice": "dataset: squadshifts, split: test, config: amazon",
93
+ "type": "f1",
94
+ "value": 83.284
95
+ },
96
+ {
97
+ "slice": "dataset: squadshifts, split: test, config: new_wiki",
98
+ "type": "exact_match",
99
+ "value": 81.204
100
+ },
101
+ {
102
+ "slice": "dataset: squadshifts, split: test, config: new_wiki",
103
+ "type": "f1",
104
+ "value": 90.595
105
+ },
106
+ {
107
+ "slice": "dataset: squadshifts, split: test, config: nyt",
108
+ "type": "exact_match",
109
+ "value": 82.931
110
+ },
111
+ {
112
+ "slice": "dataset: squadshifts, split: test, config: nyt",
113
+ "type": "f1",
114
+ "value": 90.756
115
+ },
116
+ {
117
+ "slice": "dataset: squadshifts, split: test, config: reddit",
118
+ "type": "exact_match",
119
+ "value": 71.55
120
+ },
121
+ {
122
+ "slice": "dataset: squadshifts, split: test, config: reddit",
123
+ "type": "f1",
124
+ "value": 82.939
125
+ }
126
+ ]
127
+ }
128
+ },
129
+ "authors": [
130
+ {
131
+ "name": "deepset"
132
+ }
133
+ ],
134
+ "licenses": [
135
+ {
136
+ "license": {
137
+ "id": "CC-BY-4.0",
138
+ "url": "https://spdx.org/licenses/CC-BY-4.0.html"
139
+ }
140
+ }
141
+ ],
142
+ "description": "**Language model:** roberta-base**Language:** English**Downstream-task:** Extractive QA**Training data:** SQuAD 2.0**Eval data:** SQuAD 2.0**Code:** See [an example extractive QA pipeline built with Haystack](https://haystack.deepset.ai/tutorials/34_extractive_qa_pipeline)**Infrastructure**: 4x Tesla v100",
143
+ "tags": [
144
+ "transformers",
145
+ "pytorch",
146
+ "tf",
147
+ "jax",
148
+ "rust",
149
+ "safetensors",
150
+ "roberta",
151
+ "question-answering",
152
+ "en",
153
+ "dataset:squad_v2",
154
+ "base_model:FacebookAI/roberta-base",
155
+ "base_model:finetune:FacebookAI/roberta-base",
156
+ "license:cc-by-4.0",
157
+ "model-index",
158
+ "endpoints_compatible",
159
+ "region:us"
160
+ ]
161
+ }
162
+ },
163
+ "components": [
164
+ {
165
+ "type": "data",
166
+ "bom-ref": "squad_v2-9c72005c-340e-5f42-8f7a-ae0c57af7584",
167
+ "name": "squad_v2",
168
+ "data": [
169
+ {
170
+ "type": "dataset",
171
+ "bom-ref": "squad_v2-9c72005c-340e-5f42-8f7a-ae0c57af7584",
172
+ "name": "squad_v2",
173
+ "contents": {
174
+ "url": "https://huggingface.co/datasets/squad_v2",
175
+ "properties": [
176
+ {
177
+ "name": "task_categories",
178
+ "value": "question-answering"
179
+ },
180
+ {
181
+ "name": "task_ids",
182
+ "value": "open-domain-qa, extractive-qa"
183
+ },
184
+ {
185
+ "name": "language",
186
+ "value": "en"
187
+ },
188
+ {
189
+ "name": "size_categories",
190
+ "value": "100K<n<1M"
191
+ },
192
+ {
193
+ "name": "annotations_creators",
194
+ "value": "crowdsourced"
195
+ },
196
+ {
197
+ "name": "language_creators",
198
+ "value": "crowdsourced"
199
+ },
200
+ {
201
+ "name": "pretty_name",
202
+ "value": "SQuAD2.0"
203
+ },
204
+ {
205
+ "name": "source_datasets",
206
+ "value": "original"
207
+ },
208
+ {
209
+ "name": "paperswithcode_id",
210
+ "value": "squad"
211
+ },
212
+ {
213
+ "name": "configs",
214
+ "value": "Name of the dataset subset: squad_v2 {\"split\": \"train\", \"path\": \"squad_v2/train-*\"}, {\"split\": \"validation\", \"path\": \"squad_v2/validation-*\"}"
215
+ },
216
+ {
217
+ "name": "license",
218
+ "value": "cc-by-sa-4.0"
219
+ }
220
+ ]
221
+ },
222
+ "governance": {
223
+ "owners": [
224
+ {
225
+ "organization": {
226
+ "name": "rajpurkar",
227
+ "url": "https://huggingface.co/rajpurkar"
228
+ }
229
+ }
230
+ ]
231
+ },
232
+ "description": "\n\t\n\t\t\n\t\tDataset Card for SQuAD 2.0\n\t\n\n\n\t\n\t\t\n\t\tDataset Summary\n\t\n\nStanford Question Answering Dataset (SQuAD) is a reading comprehension dataset, consisting of questions posed by crowdworkers on a set of Wikipedia articles, where the answer to every question is a segment of text, or span, from the corresponding reading passage, or the question might be unanswerable.\nSQuAD 2.0 combines the 100,000 questions in SQuAD1.1 with over 50,000 unanswerable questions written adversarially by crowdworkers\u2026 See the full description on the dataset page: https://huggingface.co/datasets/rajpurkar/squad_v2."
233
+ }
234
+ ]
235
+ }
236
+ ]
237
+ }