hpprc commited on 8 days ago

Commit

7886bde

verified ·

1 Parent(s): b7044e8

Upload 17 files

Browse files

Files changed (17) hide show

results/Classification/scores_amazon_counterfactual_classification.json +23 -0
results/Classification/scores_amazon_review_classification.json +23 -0
results/Classification/scores_massive_intent_classification.json +23 -0
results/Classification/scores_massive_scenario_classification.json +23 -0
results/Clustering/scores_livedoor_news.json +36 -0
results/Clustering/scores_mewsc16.json +36 -0
results/PairClassification/scores_paws_x_ja.json +41 -0
results/Reranking/scores_esci.json +31 -0
results/Retrieval/scores_jagovfaqs_22k.json +43 -0
results/Retrieval/scores_jaqket.json +43 -0
results/Retrieval/scores_mrtydi.json +43 -0
results/Retrieval/scores_nlp_journal_abs_intro.json +43 -0
results/Retrieval/scores_nlp_journal_title_abs.json +43 -0
results/Retrieval/scores_nlp_journal_title_intro.json +43 -0
results/STS/scores_jsick.json +31 -0
results/STS/scores_jsts.json +31 -0
results/summary.json +62 -0

results/Classification/scores_amazon_counterfactual_classification.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+    "metric_name": "macro_f1",
+    "metric_value": 0.8076989283604759,
+    "details": {
+        "optimal_classifier_name": "logreg",
+        "val_scores": {
+            "knn_cosine_k_2": {
+                "accuracy": 0.9098712446351931,
+                "macro_f1": 0.6259174311926605
+            },
+            "logreg": {
+                "accuracy": 0.9120171673819742,
+                "macro_f1": 0.7401074610623682
+            }
+        },
+        "test_scores": {
+            "logreg": {
+                "accuracy": 0.9346895074946466,
+                "macro_f1": 0.8076989283604759
+            }
+        }
+    }
+}

results/Classification/scores_amazon_review_classification.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+    "metric_name": "macro_f1",
+    "metric_value": 0.6023266627293248,
+    "details": {
+        "optimal_classifier_name": "logreg",
+        "val_scores": {
+            "knn_cosine_k_2": {
+                "accuracy": 0.4572,
+                "macro_f1": 0.4502483462385972
+            },
+            "logreg": {
+                "accuracy": 0.6076,
+                "macro_f1": 0.6024977722075338
+            }
+        },
+        "test_scores": {
+            "logreg": {
+                "accuracy": 0.6066,
+                "macro_f1": 0.6023266627293248
+            }
+        }
+    }
+}

results/Classification/scores_massive_intent_classification.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+    "metric_name": "macro_f1",
+    "metric_value": 0.7927230771632646,
+    "details": {
+        "optimal_classifier_name": "logreg",
+        "val_scores": {
+            "knn_cosine_k_2": {
+                "accuracy": 0.7712739793408756,
+                "macro_f1": 0.7556875588894797
+            },
+            "logreg": {
+                "accuracy": 0.8327594687653713,
+                "macro_f1": 0.8262484502743833
+            }
+        },
+        "test_scores": {
+            "logreg": {
+                "accuracy": 0.8298587760591796,
+                "macro_f1": 0.7927230771632646
+            }
+        }
+    }
+}

results/Classification/scores_massive_scenario_classification.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+    "metric_name": "macro_f1",
+    "metric_value": 0.8836398132177599,
+    "details": {
+        "optimal_classifier_name": "logreg",
+        "val_scores": {
+            "knn_cosine_k_2": {
+                "accuracy": 0.8657156910969012,
+                "macro_f1": 0.859699565451082
+            },
+            "logreg": {
+                "accuracy": 0.882439744220364,
+                "macro_f1": 0.8795176163694028
+            }
+        },
+        "test_scores": {
+            "logreg": {
+                "accuracy": 0.8856758574310692,
+                "macro_f1": 0.8836398132177599
+            }
+        }
+    }
+}

results/Clustering/scores_livedoor_news.json ADDED Viewed

	@@ -0,0 +1,36 @@

+{
+    "metric_name": "v_measure_score",
+    "metric_value": 0.5939535754736538,
+    "details": {
+        "optimal_clustering_model_name": "MiniBatchKMeans",
+        "val_scores": {
+            "MiniBatchKMeans": {
+                "v_measure_score": 0.6179348586449083,
+                "homogeneity_score": 0.6097315440677872,
+                "completeness_score": 0.6263619179001741
+            },
+            "AgglomerativeClustering": {
+                "v_measure_score": 0.565105497880692,
+                "homogeneity_score": 0.5487443358232884,
+                "completeness_score": 0.5824722800893678
+            },
+            "BisectingKMeans": {
+                "v_measure_score": 0.5881070284908587,
+                "homogeneity_score": 0.5832820306222675,
+                "completeness_score": 0.5930125184379058
+            },
+            "Birch": {
+                "v_measure_score": 0.5762425233128152,
+                "homogeneity_score": 0.5604724804195008,
+                "completeness_score": 0.5929257042269046
+            }
+        },
+        "test_scores": {
+            "MiniBatchKMeans": {
+                "v_measure_score": 0.5939535754736538,
+                "homogeneity_score": 0.584706903609041,
+                "completeness_score": 0.6034974040690197
+            }
+        }
+    }
+}

results/Clustering/scores_mewsc16.json ADDED Viewed

	@@ -0,0 +1,36 @@

+{
+    "metric_name": "v_measure_score",
+    "metric_value": 0.5133038855338062,
+    "details": {
+        "optimal_clustering_model_name": "Birch",
+        "val_scores": {
+            "MiniBatchKMeans": {
+                "v_measure_score": 0.5141650401941852,
+                "homogeneity_score": 0.5643883631931368,
+                "completeness_score": 0.472149768651869
+            },
+            "AgglomerativeClustering": {
+                "v_measure_score": 0.5147662040011469,
+                "homogeneity_score": 0.5531310208295678,
+                "completeness_score": 0.48137812268486785
+            },
+            "BisectingKMeans": {
+                "v_measure_score": 0.40154435516793424,
+                "homogeneity_score": 0.4376668007935588,
+                "completeness_score": 0.37092997494412233
+            },
+            "Birch": {
+                "v_measure_score": 0.51740915477174,
+                "homogeneity_score": 0.5588139550196363,
+                "completeness_score": 0.4817167967576891
+            }
+        },
+        "test_scores": {
+            "Birch": {
+                "v_measure_score": 0.5133038855338062,
+                "homogeneity_score": 0.5481383517989595,
+                "completeness_score": 0.4826323579250646
+            }
+        }
+    }
+}

results/PairClassification/scores_paws_x_ja.json ADDED Viewed

	@@ -0,0 +1,41 @@

+{
+    "metric_name": "binary_f1",
+    "metric_value": 0.6225736879942487,
+    "details": {
+        "optimal_distance_metric": "dot_similarities",
+        "val_scores": {
+            "cosine_distances": {
+                "accuracy": 0.5725,
+                "accuracy_threshold": 0.7325698733329773,
+                "binary_f1": 0.5979670522257273,
+                "binary_f1_threshold": 1.0
+            },
+            "manhatten_distances": {
+                "accuracy": 0.6075,
+                "accuracy_threshold": 91.31901550292969,
+                "binary_f1": 0.6016949152542372,
+                "binary_f1_threshold": 786.4454956054688
+            },
+            "euclidean_distances": {
+                "accuracy": 0.6075,
+                "accuracy_threshold": 5.207228660583496,
+                "binary_f1": 0.6016949152542372,
+                "binary_f1_threshold": 45.460025787353516
+            },
+            "dot_similarities": {
+                "accuracy": 0.5825,
+                "accuracy_threshold": 6222.314453125,
+                "binary_f1": 0.6046176046176047,
+                "binary_f1_threshold": 5251.693359375
+            }
+        },
+        "test_scores": {
+            "dot_similarities": {
+                "accuracy": 0.5705,
+                "accuracy_threshold": 6222.314453125,
+                "binary_f1": 0.6225736879942487,
+                "binary_f1_threshold": 5251.693359375
+            }
+        }
+    }
+}

results/Reranking/scores_esci.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+    "metric_name": "ndcg@10",
+    "metric_value": 0.9331356704615785,
+    "details": {
+        "optimal_distance_metric": "cosine_similarity",
+        "val_scores": {
+            "cosine_similarity": {
+                "ndcg@10": 0.9483311922009414,
+                "ndcg@20": 0.9590595401853188,
+                "ndcg@40": 0.9665080519499122
+            },
+            "dot_score": {
+                "ndcg@10": 0.9318167851589819,
+                "ndcg@20": 0.9466971872170977,
+                "ndcg@40": 0.9552823872527728
+            },
+            "euclidean_distance": {
+                "ndcg@10": 0.9481847893372933,
+                "ndcg@20": 0.9590500530543331,
+                "ndcg@40": 0.9664888451197884
+            }
+        },
+        "test_scores": {
+            "cosine_similarity": {
+                "ndcg@10": 0.9331356704615785,
+                "ndcg@20": 0.9494976605587823,
+                "ndcg@40": 0.9587744868943343
+            }
+        }
+    }
+}

results/Retrieval/scores_jagovfaqs_22k.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+    "metric_name": "ndcg@10",
+    "metric_value": 0.7663671365613799,
+    "details": {
+        "optimal_distance_metric": "cosine_similarity",
+        "val_scores": {
+            "cosine_similarity": {
+                "accuracy@1": 0.6343960222287218,
+                "accuracy@3": 0.8008189529102077,
+                "accuracy@5": 0.8464463293360632,
+                "accuracy@10": 0.8900263234863995,
+                "ndcg@10": 0.7660950250050058,
+                "mrr@10": 0.7259386156724558
+            },
+            "dot_score": {
+                "accuracy@1": 0.3208540508920737,
+                "accuracy@3": 0.5007312079555426,
+                "accuracy@5": 0.5761918689675344,
+                "accuracy@10": 0.6689090377303305,
+                "ndcg@10": 0.48749995541910557,
+                "mrr@10": 0.43037089653059146
+            },
+            "euclidean_distance": {
+                "accuracy@1": 0.6320561567709857,
+                "accuracy@3": 0.7990640538169055,
+                "accuracy@5": 0.8455688797894121,
+                "accuracy@10": 0.8873939748464463,
+                "ndcg@10": 0.7638332042813676,
+                "mrr@10": 0.7237720348937082
+            }
+        },
+        "test_scores": {
+            "cosine_similarity": {
+                "accuracy@1": 0.6356725146198831,
+                "accuracy@3": 0.8014619883040935,
+                "accuracy@5": 0.8482456140350877,
+                "accuracy@10": 0.887719298245614,
+                "ndcg@10": 0.7663671365613799,
+                "mrr@10": 0.7268944351619795
+            }
+        }
+    }
+}

results/Retrieval/scores_jaqket.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+    "metric_name": "ndcg@10",
+    "metric_value": 0.7328228714411557,
+    "details": {
+        "optimal_distance_metric": "euclidean_distance",
+        "val_scores": {
+            "cosine_similarity": {
+                "accuracy@1": 0.5829145728643216,
+                "accuracy@3": 0.7849246231155779,
+                "accuracy@5": 0.8261306532663316,
+                "accuracy@10": 0.8613065326633166,
+                "ndcg@10": 0.731508011835382,
+                "mrr@10": 0.6888549892318742
+            },
+            "dot_score": {
+                "accuracy@1": 0.314572864321608,
+                "accuracy@3": 0.4592964824120603,
+                "accuracy@5": 0.5236180904522613,
+                "accuracy@10": 0.6100502512562814,
+                "ndcg@10": 0.45425172193082125,
+                "mrr@10": 0.4054387014437263
+            },
+            "euclidean_distance": {
+                "accuracy@1": 0.5748743718592965,
+                "accuracy@3": 0.7849246231155779,
+                "accuracy@5": 0.8301507537688442,
+                "accuracy@10": 0.8703517587939699,
+                "ndcg@10": 0.7322414692489264,
+                "mrr@10": 0.6869342745473402
+            }
+        },
+        "test_scores": {
+            "euclidean_distance": {
+                "accuracy@1": 0.5707121364092277,
+                "accuracy@3": 0.7823470411233701,
+                "accuracy@5": 0.8445336008024072,
+                "accuracy@10": 0.8786359077231695,
+                "ndcg@10": 0.7328228714411557,
+                "mrr@10": 0.6850639219245043
+            }
+        }
+    }
+}

results/Retrieval/scores_mrtydi.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+    "metric_name": "ndcg@10",
+    "metric_value": 0.4743228662069064,
+    "details": {
+        "optimal_distance_metric": "euclidean_distance",
+        "val_scores": {
+            "cosine_similarity": {
+                "accuracy@1": 0.3933189655172414,
+                "accuracy@3": 0.5711206896551724,
+                "accuracy@5": 0.6336206896551724,
+                "accuracy@10": 0.7047413793103449,
+                "ndcg@10": 0.5454869459262537,
+                "mrr@10": 0.4949580083470164
+            },
+            "dot_score": {
+                "accuracy@1": 0.15193965517241378,
+                "accuracy@3": 0.26185344827586204,
+                "accuracy@5": 0.3114224137931034,
+                "accuracy@10": 0.3771551724137931,
+                "ndcg@10": 0.2578188772550815,
+                "mrr@10": 0.22044848111658447
+            },
+            "euclidean_distance": {
+                "accuracy@1": 0.39655172413793105,
+                "accuracy@3": 0.5754310344827587,
+                "accuracy@5": 0.6325431034482759,
+                "accuracy@10": 0.7036637931034483,
+                "ndcg@10": 0.5474573216533163,
+                "mrr@10": 0.49769387999452624
+            }
+        },
+        "test_scores": {
+            "euclidean_distance": {
+                "accuracy@1": 0.3402777777777778,
+                "accuracy@3": 0.5333333333333333,
+                "accuracy@5": 0.6097222222222223,
+                "accuracy@10": 0.6805555555555556,
+                "ndcg@10": 0.4743228662069064,
+                "mrr@10": 0.4500749559082889
+            }
+        }
+    }
+}

results/Retrieval/scores_nlp_journal_abs_intro.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+    "metric_name": "ndcg@10",
+    "metric_value": 0.9940302632170871,
+    "details": {
+        "optimal_distance_metric": "cosine_similarity",
+        "val_scores": {
+            "cosine_similarity": {
+                "accuracy@1": 1.0,
+                "accuracy@3": 1.0,
+                "accuracy@5": 1.0,
+                "accuracy@10": 1.0,
+                "ndcg@10": 1.0,
+                "mrr@10": 1.0
+            },
+            "dot_score": {
+                "accuracy@1": 0.819672131147541,
+                "accuracy@3": 0.9426229508196722,
+                "accuracy@5": 0.9672131147540983,
+                "accuracy@10": 0.9836065573770492,
+                "ndcg@10": 0.9085710688150764,
+                "mrr@10": 0.8836976320582878
+            },
+            "euclidean_distance": {
+                "accuracy@1": 1.0,
+                "accuracy@3": 1.0,
+                "accuracy@5": 1.0,
+                "accuracy@10": 1.0,
+                "ndcg@10": 1.0,
+                "mrr@10": 1.0
+            }
+        },
+        "test_scores": {
+            "cosine_similarity": {
+                "accuracy@1": 0.9898373983739838,
+                "accuracy@3": 0.9959349593495935,
+                "accuracy@5": 0.9959349593495935,
+                "accuracy@10": 0.9979674796747967,
+                "ndcg@10": 0.9940302632170871,
+                "mrr@10": 0.992773261065944
+            }
+        }
+    }
+}

results/Retrieval/scores_nlp_journal_title_abs.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+    "metric_name": "ndcg@10",
+    "metric_value": 0.9812952838148481,
+    "details": {
+        "optimal_distance_metric": "cosine_similarity",
+        "val_scores": {
+            "cosine_similarity": {
+                "accuracy@1": 0.9590163934426229,
+                "accuracy@3": 0.9836065573770492,
+                "accuracy@5": 1.0,
+                "accuracy@10": 1.0,
+                "ndcg@10": 0.9812321198854286,
+                "mrr@10": 0.975
+            },
+            "dot_score": {
+                "accuracy@1": 0.680327868852459,
+                "accuracy@3": 0.8852459016393442,
+                "accuracy@5": 0.9344262295081968,
+                "accuracy@10": 0.9754098360655737,
+                "ndcg@10": 0.8353833079929985,
+                "mrr@10": 0.7894320843091334
+            },
+            "euclidean_distance": {
+                "accuracy@1": 0.9590163934426229,
+                "accuracy@3": 0.9836065573770492,
+                "accuracy@5": 1.0,
+                "accuracy@10": 1.0,
+                "ndcg@10": 0.9812321198854286,
+                "mrr@10": 0.975
+            }
+        },
+        "test_scores": {
+            "cosine_similarity": {
+                "accuracy@1": 0.959349593495935,
+                "accuracy@3": 0.9939024390243902,
+                "accuracy@5": 0.9939024390243902,
+                "accuracy@10": 0.9959349593495935,
+                "ndcg@10": 0.9812952838148481,
+                "mrr@10": 0.9762388695315525
+            }
+        }
+    }
+}

results/Retrieval/scores_nlp_journal_title_intro.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+    "metric_name": "ndcg@10",
+    "metric_value": 0.9645142540656014,
+    "details": {
+        "optimal_distance_metric": "cosine_similarity",
+        "val_scores": {
+            "cosine_similarity": {
+                "accuracy@1": 0.9426229508196722,
+                "accuracy@3": 0.9836065573770492,
+                "accuracy@5": 0.9918032786885246,
+                "accuracy@10": 1.0,
+                "ndcg@10": 0.9724502516030613,
+                "mrr@10": 0.9634562841530055
+            },
+            "dot_score": {
+                "accuracy@1": 0.8278688524590164,
+                "accuracy@3": 0.9262295081967213,
+                "accuracy@5": 0.9426229508196722,
+                "accuracy@10": 0.9590163934426229,
+                "ndcg@10": 0.8978665647400809,
+                "mrr@10": 0.8777322404371585
+            },
+            "euclidean_distance": {
+                "accuracy@1": 0.9344262295081968,
+                "accuracy@3": 0.9836065573770492,
+                "accuracy@5": 0.9918032786885246,
+                "accuracy@10": 0.9918032786885246,
+                "ndcg@10": 0.9668393079701575,
+                "mrr@10": 0.9583333333333334
+            }
+        },
+        "test_scores": {
+            "cosine_similarity": {
+                "accuracy@1": 0.9288617886178862,
+                "accuracy@3": 0.9817073170731707,
+                "accuracy@5": 0.9878048780487805,
+                "accuracy@10": 0.991869918699187,
+                "ndcg@10": 0.9645142540656014,
+                "mrr@10": 0.9553184281842817
+            }
+        }
+    }
+}

results/STS/scores_jsick.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+    "metric_name": "spearman",
+    "metric_value": 0.7464955416231873,
+    "details": {
+        "optimal_similarity_metric": "manhatten_distance",
+        "val_scores": {
+            "cosine_similarity": {
+                "pearson": 0.7836218434851538,
+                "spearman": 0.756430688801211
+            },
+            "manhatten_distance": {
+                "pearson": 0.7923932174397492,
+                "spearman": 0.7607985259507607
+            },
+            "euclidean_distance": {
+                "pearson": 0.7923932174397492,
+                "spearman": 0.7607985259507607
+            },
+            "dot_score": {
+                "pearson": 0.5635997924106966,
+                "spearman": 0.5248804985646915
+            }
+        },
+        "test_scores": {
+            "manhatten_distance": {
+                "pearson": 0.7817941273703908,
+                "spearman": 0.7464955416231873
+            }
+        }
+    }
+}

results/STS/scores_jsts.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+    "metric_name": "spearman",
+    "metric_value": 0.8385298782229563,
+    "details": {
+        "optimal_similarity_metric": "manhatten_distance",
+        "val_scores": {
+            "cosine_similarity": {
+                "pearson": 0.8486043626272783,
+                "spearman": 0.8120034758535889
+            },
+            "manhatten_distance": {
+                "pearson": 0.855291696445933,
+                "spearman": 0.8162534021929027
+            },
+            "euclidean_distance": {
+                "pearson": 0.855291696445933,
+                "spearman": 0.8162534021929027
+            },
+            "dot_score": {
+                "pearson": 0.6439197584013935,
+                "spearman": 0.5839306171056204
+            }
+        },
+        "test_scores": {
+            "manhatten_distance": {
+                "pearson": 0.8750297182135832,
+                "spearman": 0.8385298782229563
+            }
+        }
+    }
+}

results/summary.json ADDED Viewed

	@@ -0,0 +1,62 @@

+{
+    "Classification": {
+        "amazon_counterfactual_classification": {
+            "macro_f1": 0.8076989283604759
+        },
+        "amazon_review_classification": {
+            "macro_f1": 0.6023266627293248
+        },
+        "massive_intent_classification": {
+            "macro_f1": 0.7927230771632646
+        },
+        "massive_scenario_classification": {
+            "macro_f1": 0.8836398132177599
+        }
+    },
+    "Reranking": {
+        "esci": {
+            "ndcg@10": 0.9331356704615785
+        }
+    },
+    "Retrieval": {
+        "jagovfaqs_22k": {
+            "ndcg@10": 0.7663671365613799
+        },
+        "jaqket": {
+            "ndcg@10": 0.7328228714411557
+        },
+        "mrtydi": {
+            "ndcg@10": 0.4743228662069064
+        },
+        "nlp_journal_abs_intro": {
+            "ndcg@10": 0.9940302632170871
+        },
+        "nlp_journal_title_abs": {
+            "ndcg@10": 0.9812952838148481
+        },
+        "nlp_journal_title_intro": {
+            "ndcg@10": 0.9645142540656014
+        }
+    },
+    "STS": {
+        "jsick": {
+            "spearman": 0.7464955416231873
+        },
+        "jsts": {
+            "spearman": 0.8385298782229563
+        }
+    },
+    "Clustering": {
+        "livedoor_news": {
+            "v_measure_score": 0.5939535754736538
+        },
+        "mewsc16": {
+            "v_measure_score": 0.5133038855338062
+        }
+    },
+    "PairClassification": {
+        "paws_x_ja": {
+            "binary_f1": 0.6225736879942487
+        }
+    }
+}