kaitos255 commited on 6 days ago

Commit

d25181b

1 Parent(s): 98db8b2

add results

Browse files

Files changed (17) hide show

results/Classification/scores_amazon_counterfactual_classification.json +23 -0
results/Classification/scores_amazon_review_classification.json +23 -0
results/Classification/scores_massive_intent_classification.json +23 -0
results/Classification/scores_massive_scenario_classification.json +23 -0
results/Clustering/scores_livedoor_news.json +36 -0
results/Clustering/scores_mewsc16.json +36 -0
results/PairClassification/scores_paws_x_ja.json +41 -0
results/Reranking/scores_esci.json +31 -0
results/Retrieval/scores_jagovfaqs_22k.json +43 -0
results/Retrieval/scores_jaqket.json +43 -0
results/Retrieval/scores_mrtydi.json +43 -0
results/Retrieval/scores_nlp_journal_abs_intro.json +43 -0
results/Retrieval/scores_nlp_journal_title_abs.json +43 -0
results/Retrieval/scores_nlp_journal_title_intro.json +43 -0
results/STS/scores_jsick.json +31 -0
results/STS/scores_jsts.json +31 -0
results/summary.json +62 -0

results/Classification/scores_amazon_counterfactual_classification.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+    "metric_name": "macro_f1",
+    "metric_value": 0.7719949223708622,
+    "details": {
+        "optimal_classifier_name": "logreg",
+        "val_scores": {
+            "knn_cosine_k_2": {
+                "accuracy": 0.8969957081545065,
+                "macro_f1": 0.619281045751634
+            },
+            "logreg": {
+                "accuracy": 0.8948497854077253,
+                "macro_f1": 0.7073577095108103
+            }
+        },
+        "test_scores": {
+            "logreg": {
+                "accuracy": 0.9143468950749465,
+                "macro_f1": 0.7719949223708622
+            }
+        }
+    }
+}

results/Classification/scores_amazon_review_classification.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+    "metric_name": "macro_f1",
+    "metric_value": 0.5925073173503907,
+    "details": {
+        "optimal_classifier_name": "logreg",
+        "val_scores": {
+            "knn_cosine_k_2": {
+                "accuracy": 0.4426,
+                "macro_f1": 0.4315855430386996
+            },
+            "logreg": {
+                "accuracy": 0.5996,
+                "macro_f1": 0.5954262204317589
+            }
+        },
+        "test_scores": {
+            "logreg": {
+                "accuracy": 0.5942,
+                "macro_f1": 0.5925073173503907
+            }
+        }
+    }
+}

results/Classification/scores_massive_intent_classification.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+    "metric_name": "macro_f1",
+    "metric_value": 0.8253141488047901,
+    "details": {
+        "optimal_classifier_name": "logreg",
+        "val_scores": {
+            "knn_cosine_k_2": {
+                "accuracy": 0.778160354156419,
+                "macro_f1": 0.745326126234686
+            },
+            "logreg": {
+                "accuracy": 0.8622725036891293,
+                "macro_f1": 0.8676531684090193
+            }
+        },
+        "test_scores": {
+            "logreg": {
+                "accuracy": 0.851714862138534,
+                "macro_f1": 0.8253141488047901
+            }
+        }
+    }
+}

results/Classification/scores_massive_scenario_classification.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+    "metric_name": "macro_f1",
+    "metric_value": 0.8980428060442256,
+    "details": {
+        "optimal_classifier_name": "logreg",
+        "val_scores": {
+            "knn_cosine_k_2": {
+                "accuracy": 0.8735858337432366,
+                "macro_f1": 0.8686101645781477
+            },
+            "logreg": {
+                "accuracy": 0.8981800295130349,
+                "macro_f1": 0.892948610024518
+            }
+        },
+        "test_scores": {
+            "logreg": {
+                "accuracy": 0.8997982515131137,
+                "macro_f1": 0.8980428060442256
+            }
+        }
+    }
+}

results/Clustering/scores_livedoor_news.json ADDED Viewed

	@@ -0,0 +1,36 @@

+{
+    "metric_name": "v_measure_score",
+    "metric_value": 0.5739739304506893,
+    "details": {
+        "optimal_clustering_model_name": "BisectingKMeans",
+        "val_scores": {
+            "MiniBatchKMeans": {
+                "v_measure_score": 0.5515315083996958,
+                "homogeneity_score": 0.540996148835059,
+                "completeness_score": 0.5624853486641362
+            },
+            "AgglomerativeClustering": {
+                "v_measure_score": 0.5851108907607359,
+                "homogeneity_score": 0.5574200221929413,
+                "completeness_score": 0.615696764056103
+            },
+            "BisectingKMeans": {
+                "v_measure_score": 0.5992730974248482,
+                "homogeneity_score": 0.5944306847450772,
+                "completeness_score": 0.6041950536154801
+            },
+            "Birch": {
+                "v_measure_score": 0.5851848221376154,
+                "homogeneity_score": 0.5574904547047808,
+                "completeness_score": 0.6157745600948122
+            }
+        },
+        "test_scores": {
+            "BisectingKMeans": {
+                "v_measure_score": 0.5739739304506893,
+                "homogeneity_score": 0.5568637819003157,
+                "completeness_score": 0.5921688597800226
+            }
+        }
+    }
+}

results/Clustering/scores_mewsc16.json ADDED Viewed

	@@ -0,0 +1,36 @@

+{
+    "metric_name": "v_measure_score",
+    "metric_value": 0.4954965558606599,
+    "details": {
+        "optimal_clustering_model_name": "AgglomerativeClustering",
+        "val_scores": {
+            "MiniBatchKMeans": {
+                "v_measure_score": 0.4721527268664167,
+                "homogeneity_score": 0.5136266029271387,
+                "completeness_score": 0.4368762239322192
+            },
+            "AgglomerativeClustering": {
+                "v_measure_score": 0.5470131408248342,
+                "homogeneity_score": 0.5855979961031388,
+                "completeness_score": 0.5131986538882831
+            },
+            "BisectingKMeans": {
+                "v_measure_score": 0.3922707477594705,
+                "homogeneity_score": 0.4278917603414956,
+                "completeness_score": 0.3621246934158807
+            },
+            "Birch": {
+                "v_measure_score": 0.5470131408248342,
+                "homogeneity_score": 0.5855979961031388,
+                "completeness_score": 0.5131986538882831
+            }
+        },
+        "test_scores": {
+            "AgglomerativeClustering": {
+                "v_measure_score": 0.4954965558606599,
+                "homogeneity_score": 0.522186969924259,
+                "completeness_score": 0.47140190625618494
+            }
+        }
+    }
+}

results/PairClassification/scores_paws_x_ja.json ADDED Viewed

	@@ -0,0 +1,41 @@

+{
+    "metric_name": "binary_f1",
+    "metric_value": 0.6236711552090716,
+    "details": {
+        "optimal_distance_metric": "dot_similarities",
+        "val_scores": {
+            "cosine_distances": {
+                "accuracy": 0.573,
+                "accuracy_threshold": 0.36769089102745056,
+                "binary_f1": 0.5979670522257273,
+                "binary_f1_threshold": 1.0
+            },
+            "manhatten_distances": {
+                "accuracy": 0.604,
+                "accuracy_threshold": 789.7718505859375,
+                "binary_f1": 0.6019760056457304,
+                "binary_f1_threshold": 6190.7177734375
+            },
+            "euclidean_distances": {
+                "accuracy": 0.6045,
+                "accuracy_threshold": 22.832660675048828,
+                "binary_f1": 0.6019760056457304,
+                "binary_f1_threshold": 175.00918579101562
+            },
+            "dot_similarities": {
+                "accuracy": 0.577,
+                "accuracy_threshold": 28862.74609375,
+                "binary_f1": 0.603399433427762,
+                "binary_f1_threshold": 18507.880859375
+            }
+        },
+        "test_scores": {
+            "dot_similarities": {
+                "accuracy": 0.5635,
+                "accuracy_threshold": 28862.74609375,
+                "binary_f1": 0.6236711552090716,
+                "binary_f1_threshold": 18507.880859375
+            }
+        }
+    }
+}

results/Reranking/scores_esci.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+    "metric_name": "ndcg@10",
+    "metric_value": 0.9357116238638268,
+    "details": {
+        "optimal_distance_metric": "euclidean_distance",
+        "val_scores": {
+            "cosine_similarity": {
+                "ndcg@10": 0.9500953468155826,
+                "ndcg@20": 0.9602754278591416,
+                "ndcg@40": 0.9675518837921879
+            },
+            "dot_score": {
+                "ndcg@10": 0.9427565724100708,
+                "ndcg@20": 0.9545726032461535,
+                "ndcg@40": 0.9624774804074006
+            },
+            "euclidean_distance": {
+                "ndcg@10": 0.9501614180478811,
+                "ndcg@20": 0.9600756632519867,
+                "ndcg@40": 0.9672878521654014
+            }
+        },
+        "test_scores": {
+            "euclidean_distance": {
+                "ndcg@10": 0.9357116238638268,
+                "ndcg@20": 0.9514444389357,
+                "ndcg@40": 0.9602841337071619
+            }
+        }
+    }
+}

results/Retrieval/scores_jagovfaqs_22k.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+    "metric_name": "ndcg@10",
+    "metric_value": 0.7936169854294086,
+    "details": {
+        "optimal_distance_metric": "cosine_similarity",
+        "val_scores": {
+            "cosine_similarity": {
+                "accuracy@1": 0.6741737350102369,
+                "accuracy@3": 0.8370868675051184,
+                "accuracy@5": 0.8792044457443697,
+                "accuracy@10": 0.9157648435214976,
+                "ndcg@10": 0.8007799864324369,
+                "mrr@10": 0.7632489078306575
+            },
+            "dot_score": {
+                "accuracy@1": 0.5633226089499854,
+                "accuracy@3": 0.7505118455688798,
+                "accuracy@5": 0.8113483474700205,
+                "accuracy@10": 0.8754021643755484,
+                "ndcg@10": 0.7208761856444875,
+                "mrr@10": 0.6712681699373708
+            },
+            "euclidean_distance": {
+                "accuracy@1": 0.6744662181924539,
+                "accuracy@3": 0.8356244515940333,
+                "accuracy@5": 0.8751096811933314,
+                "accuracy@10": 0.9116700789704592,
+                "ndcg@10": 0.7987606332412315,
+                "mrr@10": 0.7619904176938394
+            }
+        },
+        "test_scores": {
+            "cosine_similarity": {
+                "accuracy@1": 0.6646198830409357,
+                "accuracy@3": 0.82953216374269,
+                "accuracy@5": 0.8707602339181286,
+                "accuracy@10": 0.9128654970760234,
+                "ndcg@10": 0.7936169854294086,
+                "mrr@10": 0.7548609950802945
+            }
+        }
+    }
+}

results/Retrieval/scores_jaqket.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+    "metric_name": "ndcg@10",
+    "metric_value": 0.6851508044977195,
+    "details": {
+        "optimal_distance_metric": "cosine_similarity",
+        "val_scores": {
+            "cosine_similarity": {
+                "accuracy@1": 0.5346733668341709,
+                "accuracy@3": 0.714572864321608,
+                "accuracy@5": 0.7698492462311558,
+                "accuracy@10": 0.828140703517588,
+                "ndcg@10": 0.682570463755056,
+                "mrr@10": 0.6357936507936508
+            },
+            "dot_score": {
+                "accuracy@1": 0.4442211055276382,
+                "accuracy@3": 0.6090452261306533,
+                "accuracy@5": 0.6753768844221105,
+                "accuracy@10": 0.7467336683417085,
+                "ndcg@10": 0.5922822430922994,
+                "mrr@10": 0.5432818856185687
+            },
+            "euclidean_distance": {
+                "accuracy@1": 0.5085427135678392,
+                "accuracy@3": 0.6984924623115578,
+                "accuracy@5": 0.7557788944723618,
+                "accuracy@10": 0.807035175879397,
+                "ndcg@10": 0.6603163598862779,
+                "mrr@10": 0.612886256680226
+            }
+        },
+        "test_scores": {
+            "cosine_similarity": {
+                "accuracy@1": 0.5336008024072216,
+                "accuracy@3": 0.7261785356068204,
+                "accuracy@5": 0.7773319959879639,
+                "accuracy@10": 0.827482447342026,
+                "ndcg@10": 0.6851508044977195,
+                "mrr@10": 0.638956154176816
+            }
+        }
+    }
+}

results/Retrieval/scores_mrtydi.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+    "metric_name": "ndcg@10",
+    "metric_value": 0.4192795726565468,
+    "details": {
+        "optimal_distance_metric": "euclidean_distance",
+        "val_scores": {
+            "cosine_similarity": {
+                "accuracy@1": 0.3125,
+                "accuracy@3": 0.49137931034482757,
+                "accuracy@5": 0.5614224137931034,
+                "accuracy@10": 0.6443965517241379,
+                "ndcg@10": 0.47285001680703276,
+                "mrr@10": 0.41843955254515547
+            },
+            "dot_score": {
+                "accuracy@1": 0.05387931034482758,
+                "accuracy@3": 0.11961206896551724,
+                "accuracy@5": 0.16056034482758622,
+                "accuracy@10": 0.23060344827586207,
+                "ndcg@10": 0.1307756460179494,
+                "mrr@10": 0.1003318281335523
+            },
+            "euclidean_distance": {
+                "accuracy@1": 0.33405172413793105,
+                "accuracy@3": 0.509698275862069,
+                "accuracy@5": 0.5818965517241379,
+                "accuracy@10": 0.6605603448275862,
+                "ndcg@10": 0.4910721986056566,
+                "mrr@10": 0.4374769088669945
+            }
+        },
+        "test_scores": {
+            "euclidean_distance": {
+                "accuracy@1": 0.26944444444444443,
+                "accuracy@3": 0.4583333333333333,
+                "accuracy@5": 0.5541666666666667,
+                "accuracy@10": 0.6388888888888888,
+                "ndcg@10": 0.4192795726565468,
+                "mrr@10": 0.38793099647266294
+            }
+        }
+    }
+}

results/Retrieval/scores_nlp_journal_abs_intro.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+    "metric_name": "ndcg@10",
+    "metric_value": 0.9752397041145251,
+    "details": {
+        "optimal_distance_metric": "cosine_similarity",
+        "val_scores": {
+            "cosine_similarity": {
+                "accuracy@1": 0.9672131147540983,
+                "accuracy@3": 0.9836065573770492,
+                "accuracy@5": 0.9918032786885246,
+                "accuracy@10": 0.9918032786885246,
+                "ndcg@10": 0.9810863611902975,
+                "mrr@10": 0.9774590163934426
+            },
+            "dot_score": {
+                "accuracy@1": 0.9590163934426229,
+                "accuracy@3": 0.9836065573770492,
+                "accuracy@5": 0.9918032786885246,
+                "accuracy@10": 0.9918032786885246,
+                "ndcg@10": 0.9780611952359652,
+                "mrr@10": 0.9733606557377049
+            },
+            "euclidean_distance": {
+                "accuracy@1": 0.9672131147540983,
+                "accuracy@3": 0.9836065573770492,
+                "accuracy@5": 0.9918032786885246,
+                "accuracy@10": 0.9918032786885246,
+                "ndcg@10": 0.9800131664888921,
+                "mrr@10": 0.9760928961748634
+            }
+        },
+        "test_scores": {
+            "cosine_similarity": {
+                "accuracy@1": 0.9491869918699187,
+                "accuracy@3": 0.9817073170731707,
+                "accuracy@5": 0.9898373983739838,
+                "accuracy@10": 0.9979674796747967,
+                "ndcg@10": 0.9752397041145251,
+                "mrr@10": 0.9678095560717512
+            }
+        }
+    }
+}

results/Retrieval/scores_nlp_journal_title_abs.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+    "metric_name": "ndcg@10",
+    "metric_value": 0.98790129680604,
+    "details": {
+        "optimal_distance_metric": "euclidean_distance",
+        "val_scores": {
+            "cosine_similarity": {
+                "accuracy@1": 0.9590163934426229,
+                "accuracy@3": 0.9836065573770492,
+                "accuracy@5": 0.9918032786885246,
+                "accuracy@10": 0.9918032786885246,
+                "ndcg@10": 0.9780611952359652,
+                "mrr@10": 0.9733606557377049
+            },
+            "dot_score": {
+                "accuracy@1": 0.9426229508196722,
+                "accuracy@3": 0.9836065573770492,
+                "accuracy@5": 0.9918032786885246,
+                "accuracy@10": 0.9918032786885246,
+                "ndcg@10": 0.9698644739244898,
+                "mrr@10": 0.9624316939890709
+            },
+            "euclidean_distance": {
+                "accuracy@1": 0.9672131147540983,
+                "accuracy@3": 0.9836065573770492,
+                "accuracy@5": 0.9918032786885246,
+                "accuracy@10": 0.9918032786885246,
+                "ndcg@10": 0.9810863611902975,
+                "mrr@10": 0.9774590163934426
+            }
+        },
+        "test_scores": {
+            "euclidean_distance": {
+                "accuracy@1": 0.975609756097561,
+                "accuracy@3": 0.9959349593495935,
+                "accuracy@5": 0.9959349593495935,
+                "accuracy@10": 0.9959349593495935,
+                "ndcg@10": 0.98790129680604,
+                "mrr@10": 0.9850948509485096
+            }
+        }
+    }
+}

results/Retrieval/scores_nlp_journal_title_intro.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+    "metric_name": "ndcg@10",
+    "metric_value": 0.935404667723217,
+    "details": {
+        "optimal_distance_metric": "cosine_similarity",
+        "val_scores": {
+            "cosine_similarity": {
+                "accuracy@1": 0.8852459016393442,
+                "accuracy@3": 0.9590163934426229,
+                "accuracy@5": 0.9672131147540983,
+                "accuracy@10": 0.9836065573770492,
+                "ndcg@10": 0.9379859904574276,
+                "mrr@10": 0.923087431693989
+            },
+            "dot_score": {
+                "accuracy@1": 0.7786885245901639,
+                "accuracy@3": 0.9180327868852459,
+                "accuracy@5": 0.9508196721311475,
+                "accuracy@10": 0.9754098360655737,
+                "ndcg@10": 0.8867916105948846,
+                "mrr@10": 0.8573185011709602
+            },
+            "euclidean_distance": {
+                "accuracy@1": 0.860655737704918,
+                "accuracy@3": 0.9590163934426229,
+                "accuracy@5": 0.9672131147540983,
+                "accuracy@10": 0.9754098360655737,
+                "ndcg@10": 0.9264675321320149,
+                "mrr@10": 0.9099726775956284
+            }
+        },
+        "test_scores": {
+            "cosine_similarity": {
+                "accuracy@1": 0.8760162601626016,
+                "accuracy@3": 0.9532520325203252,
+                "accuracy@5": 0.9695121951219512,
+                "accuracy@10": 0.9878048780487805,
+                "ndcg@10": 0.935404667723217,
+                "mrr@10": 0.9182717124790296
+            }
+        }
+    }
+}

results/STS/scores_jsick.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+    "metric_name": "spearman",
+    "metric_value": 0.818348421573557,
+    "details": {
+        "optimal_similarity_metric": "cosine_similarity",
+        "val_scores": {
+            "cosine_similarity": {
+                "pearson": 0.835308064752953,
+                "spearman": 0.8272512935151636
+            },
+            "manhatten_distance": {
+                "pearson": 0.8389303465274891,
+                "spearman": 0.8219151654579893
+            },
+            "euclidean_distance": {
+                "pearson": 0.8389303465274891,
+                "spearman": 0.8219151654579893
+            },
+            "dot_score": {
+                "pearson": 0.8246951338051995,
+                "spearman": 0.8068593942621228
+            }
+        },
+        "test_scores": {
+            "cosine_similarity": {
+                "pearson": 0.8249749618012283,
+                "spearman": 0.818348421573557
+            }
+        }
+    }
+}

results/STS/scores_jsts.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+    "metric_name": "spearman",
+    "metric_value": 0.8444160195227215,
+    "details": {
+        "optimal_similarity_metric": "cosine_similarity",
+        "val_scores": {
+            "cosine_similarity": {
+                "pearson": 0.8474526069337475,
+                "spearman": 0.8084969817527035
+            },
+            "manhatten_distance": {
+                "pearson": 0.8399416970995852,
+                "spearman": 0.8066658111776235
+            },
+            "euclidean_distance": {
+                "pearson": 0.8399416970995852,
+                "spearman": 0.8066658111776235
+            },
+            "dot_score": {
+                "pearson": 0.8298431009782351,
+                "spearman": 0.7888123463133008
+            }
+        },
+        "test_scores": {
+            "cosine_similarity": {
+                "pearson": 0.8792858102276115,
+                "spearman": 0.8444160195227215
+            }
+        }
+    }
+}

results/summary.json ADDED Viewed

	@@ -0,0 +1,62 @@

+{
+    "Classification": {
+        "amazon_counterfactual_classification": {
+            "macro_f1": 0.7719949223708622
+        },
+        "amazon_review_classification": {
+            "macro_f1": 0.5925073173503907
+        },
+        "massive_intent_classification": {
+            "macro_f1": 0.8253141488047901
+        },
+        "massive_scenario_classification": {
+            "macro_f1": 0.8980428060442256
+        }
+    },
+    "Reranking": {
+        "esci": {
+            "ndcg@10": 0.9357116238638268
+        }
+    },
+    "Retrieval": {
+        "jagovfaqs_22k": {
+            "ndcg@10": 0.7936169854294086
+        },
+        "nlp_journal_abs_intro": {
+            "ndcg@10": 0.9752397041145251
+        },
+        "nlp_journal_title_abs": {
+            "ndcg@10": 0.98790129680604
+        },
+        "nlp_journal_title_intro": {
+            "ndcg@10": 0.935404667723217
+        },
+        "jaqket": {
+            "ndcg@10": 0.6851508044977195
+        },
+        "mrtydi": {
+            "ndcg@10": 0.4192795726565468
+        }
+    },
+    "STS": {
+        "jsick": {
+            "spearman": 0.818348421573557
+        },
+        "jsts": {
+            "spearman": 0.8444160195227215
+        }
+    },
+    "Clustering": {
+        "livedoor_news": {
+            "v_measure_score": 0.5739739304506893
+        },
+        "mewsc16": {
+            "v_measure_score": 0.4954965558606599
+        }
+    },
+    "PairClassification": {
+        "paws_x_ja": {
+            "binary_f1": 0.6236711552090716
+        }
+    }
+}