vprelovac commited on
Commit
7cff39a
·
1 Parent(s): 7b95524

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +451 -0
README.md CHANGED
@@ -4,6 +4,71 @@ tags:
4
  model-index:
5
  - name: universal-sentence-encoder-multilingual-large-3
6
  results:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  - task:
8
  type: STS
9
  dataset:
@@ -25,6 +90,165 @@ model-index:
25
  value: 79.0536298599996
26
  - type: manhattan_spearman
27
  value: 79.15240595090333
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  - task:
29
  type: STS
30
  dataset:
@@ -214,6 +438,83 @@ model-index:
214
  value: 81.20700319509191
215
  - type: manhattan_spearman
216
  value: 80.56078137874846
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
217
  - task:
218
  type: Summarization
219
  dataset:
@@ -231,7 +532,157 @@ model-index:
231
  value: 30.459236115198866
232
  - type: dot_spearman
233
  value: 29.714606257782066
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
234
  ---
 
235
  This is a part of the [MTEB test](https://huggingface.co/spaces/mteb/leaderboard).
236
 
237
  ```
 
4
  model-index:
5
  - name: universal-sentence-encoder-multilingual-large-3
6
  results:
7
+ - task:
8
+ type: Classification
9
+ dataset:
10
+ type: mteb/amazon_counterfactual
11
+ name: MTEB AmazonCounterfactualClassification (en)
12
+ config: en
13
+ split: test
14
+ revision: e8379541af4e31359cca9fbcf4b00f2671dba205
15
+ metrics:
16
+ - type: accuracy
17
+ value: 70.80597014925372
18
+ - type: ap
19
+ value: 32.82048192776259
20
+ - type: f1
21
+ value: 64.5323001151201
22
+ - task:
23
+ type: Classification
24
+ dataset:
25
+ type: mteb/amazon_polarity
26
+ name: MTEB AmazonPolarityClassification
27
+ config: default
28
+ split: test
29
+ revision: e2d317d38cd51312af73b3d32a06d1a08b442046
30
+ metrics:
31
+ - type: accuracy
32
+ value: 67.04549999999999
33
+ - type: ap
34
+ value: 61.7344066191823
35
+ - type: f1
36
+ value: 66.66233213924507
37
+ - task:
38
+ type: Classification
39
+ dataset:
40
+ type: mteb/amazon_reviews_multi
41
+ name: MTEB AmazonReviewsClassification (en)
42
+ config: en
43
+ split: test
44
+ revision: 1399c76144fd37290681b995c656ef9b2e06e26d
45
+ metrics:
46
+ - type: accuracy
47
+ value: 35.85
48
+ - type: f1
49
+ value: 35.332188148679464
50
+ - task:
51
+ type: Clustering
52
+ dataset:
53
+ type: mteb/arxiv-clustering-p2p
54
+ name: MTEB ArxivClusteringP2P
55
+ config: default
56
+ split: test
57
+ revision: a122ad7f3f0291bf49cc6f4d32aa80929df69d5d
58
+ metrics:
59
+ - type: v_measure
60
+ value: 34.745135349238126
61
+ - task:
62
+ type: Clustering
63
+ dataset:
64
+ type: mteb/arxiv-clustering-s2s
65
+ name: MTEB ArxivClusteringS2S
66
+ config: default
67
+ split: test
68
+ revision: f910caf1a6075f7329cdf8c1a6135696f37dbd53
69
+ metrics:
70
+ - type: v_measure
71
+ value: 22.620886813816306
72
  - task:
73
  type: STS
74
  dataset:
 
90
  value: 79.0536298599996
91
  - type: manhattan_spearman
92
  value: 79.15240595090333
93
+ - task:
94
+ type: Classification
95
+ dataset:
96
+ type: mteb/banking77
97
+ name: MTEB Banking77Classification
98
+ config: default
99
+ split: test
100
+ revision: 0fd18e25b25c072e09e0d92ab615fda904d66300
101
+ metrics:
102
+ - type: accuracy
103
+ value: 74.66883116883116
104
+ - type: f1
105
+ value: 73.79377347715479
106
+ - task:
107
+ type: Clustering
108
+ dataset:
109
+ type: mteb/biorxiv-clustering-p2p
110
+ name: MTEB BiorxivClusteringP2P
111
+ config: default
112
+ split: test
113
+ revision: 65b79d1d13f80053f67aca9498d9402c2d9f1f40
114
+ metrics:
115
+ - type: v_measure
116
+ value: 28.750702236182818
117
+ - task:
118
+ type: Clustering
119
+ dataset:
120
+ type: mteb/biorxiv-clustering-s2s
121
+ name: MTEB BiorxivClusteringS2S
122
+ config: default
123
+ split: test
124
+ revision: 258694dd0231531bc1fd9de6ceb52a0853c6d908
125
+ metrics:
126
+ - type: v_measure
127
+ value: 20.142702408387194
128
+ - task:
129
+ type: Classification
130
+ dataset:
131
+ type: mteb/emotion
132
+ name: MTEB EmotionClassification
133
+ config: default
134
+ split: test
135
+ revision: 4f58c6b202a23cf9a4da393831edf4f9183cad37
136
+ metrics:
137
+ - type: accuracy
138
+ value: 42.30500000000001
139
+ - type: f1
140
+ value: 38.547388314307206
141
+ - task:
142
+ type: Classification
143
+ dataset:
144
+ type: mteb/imdb
145
+ name: MTEB ImdbClassification
146
+ config: default
147
+ split: test
148
+ revision: 3d86128a09e091d6018b6d26cad27f2739fc2db7
149
+ metrics:
150
+ - type: accuracy
151
+ value: 63.690000000000005
152
+ - type: ap
153
+ value: 59.157513278784734
154
+ - type: f1
155
+ value: 63.35865572988864
156
+ - task:
157
+ type: Classification
158
+ dataset:
159
+ type: mteb/mtop_domain
160
+ name: MTEB MTOPDomainClassification (en)
161
+ config: en
162
+ split: test
163
+ revision: d80d48c1eb48d3562165c59d59d0034df9fff0bf
164
+ metrics:
165
+ - type: accuracy
166
+ value: 92.48062015503875
167
+ - type: f1
168
+ value: 92.14919344822017
169
+ - task:
170
+ type: Classification
171
+ dataset:
172
+ type: mteb/mtop_intent
173
+ name: MTEB MTOPIntentClassification (en)
174
+ config: en
175
+ split: test
176
+ revision: ae001d0e6b1228650b7bd1c2c65fb50ad11a8aba
177
+ metrics:
178
+ - type: accuracy
179
+ value: 70.26675786593708
180
+ - type: f1
181
+ value: 47.72003620900994
182
+ - task:
183
+ type: Classification
184
+ dataset:
185
+ type: mteb/amazon_massive_intent
186
+ name: MTEB MassiveIntentClassification (en)
187
+ config: en
188
+ split: test
189
+ revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7
190
+ metrics:
191
+ - type: accuracy
192
+ value: 69.04505716207129
193
+ - type: f1
194
+ value: 65.75319040584333
195
+ - task:
196
+ type: Classification
197
+ dataset:
198
+ type: mteb/amazon_massive_scenario
199
+ name: MTEB MassiveScenarioClassification (en)
200
+ config: en
201
+ split: test
202
+ revision: 7d571f92784cd94a019292a1f45445077d0ef634
203
+ metrics:
204
+ - type: accuracy
205
+ value: 75.80363147276395
206
+ - type: f1
207
+ value: 74.16118757920125
208
+ - task:
209
+ type: Clustering
210
+ dataset:
211
+ type: mteb/medrxiv-clustering-p2p
212
+ name: MTEB MedrxivClusteringP2P
213
+ config: default
214
+ split: test
215
+ revision: e7a26af6f3ae46b30dde8737f02c07b1505bcc73
216
+ metrics:
217
+ - type: v_measure
218
+ value: 31.197732425855694
219
+ - task:
220
+ type: Clustering
221
+ dataset:
222
+ type: mteb/medrxiv-clustering-s2s
223
+ name: MTEB MedrxivClusteringS2S
224
+ config: default
225
+ split: test
226
+ revision: 35191c8c0dca72d8ff3efcd72aa802307d469663
227
+ metrics:
228
+ - type: v_measure
229
+ value: 25.802309075396522
230
+ - task:
231
+ type: Clustering
232
+ dataset:
233
+ type: mteb/reddit-clustering
234
+ name: MTEB RedditClustering
235
+ config: default
236
+ split: test
237
+ revision: 24640382cdbf8abc73003fb0fa6d111a705499eb
238
+ metrics:
239
+ - type: v_measure
240
+ value: 46.17008358584782
241
+ - task:
242
+ type: Clustering
243
+ dataset:
244
+ type: mteb/reddit-clustering-p2p
245
+ name: MTEB RedditClusteringP2P
246
+ config: default
247
+ split: test
248
+ revision: 282350215ef01743dc01b456c7f5241fa8937f16
249
+ metrics:
250
+ - type: v_measure
251
+ value: 56.53148530944687
252
  - task:
253
  type: STS
254
  dataset:
 
438
  value: 81.20700319509191
439
  - type: manhattan_spearman
440
  value: 80.56078137874846
441
+ - task:
442
+ type: PairClassification
443
+ dataset:
444
+ type: mteb/sprintduplicatequestions-pairclassification
445
+ name: MTEB SprintDuplicateQuestions
446
+ config: default
447
+ split: test
448
+ revision: d66bd1f72af766a5cc4b0ca5e00c162f89e8cc46
449
+ metrics:
450
+ - type: cos_sim_accuracy
451
+ value: 99.71089108910891
452
+ - type: cos_sim_ap
453
+ value: 90.8870929231928
454
+ - type: cos_sim_f1
455
+ value: 85.3719420868697
456
+ - type: cos_sim_precision
457
+ value: 85.24426719840478
458
+ - type: cos_sim_recall
459
+ value: 85.5
460
+ - type: dot_accuracy
461
+ value: 99.71089108910891
462
+ - type: dot_ap
463
+ value: 90.88709292319278
464
+ - type: dot_f1
465
+ value: 85.3719420868697
466
+ - type: dot_precision
467
+ value: 85.24426719840478
468
+ - type: dot_recall
469
+ value: 85.5
470
+ - type: euclidean_accuracy
471
+ value: 99.71089108910891
472
+ - type: euclidean_ap
473
+ value: 90.8870929231928
474
+ - type: euclidean_f1
475
+ value: 85.3719420868697
476
+ - type: euclidean_precision
477
+ value: 85.24426719840478
478
+ - type: euclidean_recall
479
+ value: 85.5
480
+ - type: manhattan_accuracy
481
+ value: 99.72871287128713
482
+ - type: manhattan_ap
483
+ value: 91.50016707647607
484
+ - type: manhattan_f1
485
+ value: 86.21700879765396
486
+ - type: manhattan_precision
487
+ value: 84.32122370936902
488
+ - type: manhattan_recall
489
+ value: 88.2
490
+ - type: max_accuracy
491
+ value: 99.72871287128713
492
+ - type: max_ap
493
+ value: 91.50016707647607
494
+ - type: max_f1
495
+ value: 86.21700879765396
496
+ - task:
497
+ type: Clustering
498
+ dataset:
499
+ type: mteb/stackexchange-clustering
500
+ name: MTEB StackExchangeClustering
501
+ config: default
502
+ split: test
503
+ revision: 6cbc1f7b2bc0622f2e39d2c77fa502909748c259
504
+ metrics:
505
+ - type: v_measure
506
+ value: 49.339384566987555
507
+ - task:
508
+ type: Clustering
509
+ dataset:
510
+ type: mteb/stackexchange-clustering-p2p
511
+ name: MTEB StackExchangeClusteringP2P
512
+ config: default
513
+ split: test
514
+ revision: 815ca46b2622cec33ccafc3735d572c266efdb44
515
+ metrics:
516
+ - type: v_measure
517
+ value: 33.39729645390336
518
  - task:
519
  type: Summarization
520
  dataset:
 
532
  value: 30.459236115198866
533
  - type: dot_spearman
534
  value: 29.714606257782066
535
+ - task:
536
+ type: Classification
537
+ dataset:
538
+ type: mteb/toxic_conversations_50k
539
+ name: MTEB ToxicConversationsClassification
540
+ config: default
541
+ split: test
542
+ revision: d7c0de2777da35d6aae2200a62c6e0e5af397c4c
543
+ metrics:
544
+ - type: accuracy
545
+ value: 68.223
546
+ - type: ap
547
+ value: 13.10327282975004
548
+ - type: f1
549
+ value: 52.52588280152648
550
+ - task:
551
+ type: Classification
552
+ dataset:
553
+ type: mteb/tweet_sentiment_extraction
554
+ name: MTEB TweetSentimentExtractionClassification
555
+ config: default
556
+ split: test
557
+ revision: d604517c81ca91fe16a244d1248fc021f9ecee7a
558
+ metrics:
559
+ - type: accuracy
560
+ value: 59.18788907753254
561
+ - type: f1
562
+ value: 59.47679105840768
563
+ - task:
564
+ type: Clustering
565
+ dataset:
566
+ type: mteb/twentynewsgroups-clustering
567
+ name: MTEB TwentyNewsgroupsClustering
568
+ config: default
569
+ split: test
570
+ revision: 6125ec4e24fa026cec8a478383ee943acfbd5449
571
+ metrics:
572
+ - type: v_measure
573
+ value: 36.93253191095803
574
+ - task:
575
+ type: PairClassification
576
+ dataset:
577
+ type: mteb/twittersemeval2015-pairclassification
578
+ name: MTEB TwitterSemEval2015
579
+ config: default
580
+ split: test
581
+ revision: 70970daeab8776df92f5ea462b6173c0b46fd2d1
582
+ metrics:
583
+ - type: cos_sim_accuracy
584
+ value: 83.37009000417238
585
+ - type: cos_sim_ap
586
+ value: 63.75973129735431
587
+ - type: cos_sim_f1
588
+ value: 59.62504595025121
589
+ - type: cos_sim_precision
590
+ value: 55.66231983527798
591
+ - type: cos_sim_recall
592
+ value: 64.1952506596306
593
+ - type: dot_accuracy
594
+ value: 83.37009000417238
595
+ - type: dot_ap
596
+ value: 63.759728820348414
597
+ - type: dot_f1
598
+ value: 59.62504595025121
599
+ - type: dot_precision
600
+ value: 55.66231983527798
601
+ - type: dot_recall
602
+ value: 64.1952506596306
603
+ - type: euclidean_accuracy
604
+ value: 83.37009000417238
605
+ - type: euclidean_ap
606
+ value: 63.75972622477462
607
+ - type: euclidean_f1
608
+ value: 59.62504595025121
609
+ - type: euclidean_precision
610
+ value: 55.66231983527798
611
+ - type: euclidean_recall
612
+ value: 64.1952506596306
613
+ - type: manhattan_accuracy
614
+ value: 83.28068188591524
615
+ - type: manhattan_ap
616
+ value: 63.109413220673375
617
+ - type: manhattan_f1
618
+ value: 59.085923217550274
619
+ - type: manhattan_precision
620
+ value: 54.903737259343146
621
+ - type: manhattan_recall
622
+ value: 63.95778364116095
623
+ - type: max_accuracy
624
+ value: 83.37009000417238
625
+ - type: max_ap
626
+ value: 63.75973129735431
627
+ - type: max_f1
628
+ value: 59.62504595025121
629
+ - task:
630
+ type: PairClassification
631
+ dataset:
632
+ type: mteb/twitterurlcorpus-pairclassification
633
+ name: MTEB TwitterURLCorpus
634
+ config: default
635
+ split: test
636
+ revision: 8b6510b0b1fa4e4c4f879467980e9be563ec1cdf
637
+ metrics:
638
+ - type: cos_sim_accuracy
639
+ value: 88.34167733923235
640
+ - type: cos_sim_ap
641
+ value: 84.20066403502292
642
+ - type: cos_sim_f1
643
+ value: 76.64693381906498
644
+ - type: cos_sim_precision
645
+ value: 75.56869200838072
646
+ - type: cos_sim_recall
647
+ value: 77.75639051432091
648
+ - type: dot_accuracy
649
+ value: 88.34167733923235
650
+ - type: dot_ap
651
+ value: 84.20066476075668
652
+ - type: dot_f1
653
+ value: 76.64693381906498
654
+ - type: dot_precision
655
+ value: 75.56869200838072
656
+ - type: dot_recall
657
+ value: 77.75639051432091
658
+ - type: euclidean_accuracy
659
+ value: 88.34167733923235
660
+ - type: euclidean_ap
661
+ value: 84.20066533105057
662
+ - type: euclidean_f1
663
+ value: 76.64693381906498
664
+ - type: euclidean_precision
665
+ value: 75.56869200838072
666
+ - type: euclidean_recall
667
+ value: 77.75639051432091
668
+ - type: manhattan_accuracy
669
+ value: 88.32809407381535
670
+ - type: manhattan_ap
671
+ value: 84.17666758732113
672
+ - type: manhattan_f1
673
+ value: 76.6911654417279
674
+ - type: manhattan_precision
675
+ value: 74.75146198830409
676
+ - type: manhattan_recall
677
+ value: 78.73421619956883
678
+ - type: max_accuracy
679
+ value: 88.34167733923235
680
+ - type: max_ap
681
+ value: 84.20066533105057
682
+ - type: max_f1
683
+ value: 76.6911654417279
684
  ---
685
+
686
  This is a part of the [MTEB test](https://huggingface.co/spaces/mteb/leaderboard).
687
 
688
  ```