from unittest import TestCase from multi_label_precision_recall_accuracy_fscore import MultiLabelPrecisionRecallAccuracyFscore class MultiLabelPrecisionRecallAccuracyFscoreTest(TestCase): """ All of these tests are also used for multiset configuration. So please mind this and write the test in a way that it is valid for both configurations (do not use same label multiple times). """ def setUp(self): self.multi_label_precision_recall_accuracy_fscore = MultiLabelPrecisionRecallAccuracyFscore() def test_eok(self): self.assertDictEqual( { "precision": 1.0, "recall": 1.0, "accuracy": 1.0, "fscore": 1.0 }, self.multi_label_precision_recall_accuracy_fscore.compute( predictions=[ [0, 1], [1, 2], [0, 1, 2], ], references=[ [0, 1], [1, 2], [0, 1, 2], ] ) ) def test_eok_string(self): self.assertDictEqual( { "precision": 1.0, "recall": 1.0, "accuracy": 1.0, "fscore": 1.0 }, self.multi_label_precision_recall_accuracy_fscore.compute( predictions=[ ["0", "1"], ["1", "2"], ["0", "1", "2"], ], references=[ ["0", "1"], ["1", "2"], ["0", "1", "2"], ] ) ) def test_empty(self): self.assertDictEqual( { "precision": 1.0, "recall": 1.0, "accuracy": 1.0, "fscore": 1.0 }, self.multi_label_precision_recall_accuracy_fscore.compute( predictions=[ [], [], [], ], references=[ [], [], [], ] ) ) def test_empty_reference(self): self.assertDictEqual( { "precision": 0.0, "recall": 0.0, "accuracy": 0.0, "fscore": 0.0 }, self.multi_label_precision_recall_accuracy_fscore.compute( predictions=[ [0, 1], [1, 2], [0, 1, 2], ], references=[ [], [], [], ] ) ) def test_empty_prediction(self): self.assertDictEqual( { "precision": 0.0, "recall": 0.0, "accuracy": 0.0, "fscore": 0.0 }, self.multi_label_precision_recall_accuracy_fscore.compute( predictions=[ [], [], [], ], references=[ [0, 1], [1, 2], [0, 1, 2], ] ) ) def test_completely_different(self): self.assertDictEqual( { "precision": 0.0, "recall": 0.0, "accuracy": 0.0, "fscore": 0.0 }, self.multi_label_precision_recall_accuracy_fscore.compute( predictions=[ [0, 1], [1, 2], [0, 1, 2], ], references=[ [3, 4], [5, 6], [7, 8, 9], ] ) ) def test_max_precision(self): self.assertDictEqual( { "precision": 1.0, "recall": 0.5, "accuracy": 0.5, "fscore": 2 / 3 }, self.multi_label_precision_recall_accuracy_fscore.compute( predictions=[ [0, 1] ], references=[ [0, 1, 2, 3] ] ) ) def test_max_recall(self): self.assertDictEqual( { "precision": 0.5, "recall": 1.0, "accuracy": 0.5, "fscore": 2 / 3 }, self.multi_label_precision_recall_accuracy_fscore.compute( predictions=[ [0, 1, 2, 3] ], references=[ [0, 1] ] ) ) def test_partial_match(self): self.assertDictEqual( { "precision": 0.5, "recall": 0.5, "accuracy": 1 / 3, "fscore": 0.5 }, self.multi_label_precision_recall_accuracy_fscore.compute( predictions=[ [0, 1] ], references=[ [0, 2] ] ) ) def test_partial_match_multi_sample(self): self.assertDictEqual( { "precision": 2.5 / 3, "recall": 2 / 3, "accuracy": 0.5, "fscore": 2 * (2.5 / 3 * 2 / 3) / (2.5 / 3 + 2 / 3) }, self.multi_label_precision_recall_accuracy_fscore.compute( predictions=[ [0, 1], [0, 1], [2, 3] ], references=[ [0, 1, 2, 3], [0, 1, 2, 3], [2] ] ) ) def test_beta(self): self.multi_label_precision_recall_accuracy_fscore.beta = 2 self.assertDictEqual( { "precision": 2.5 / 3, "recall": 2 / 3, "accuracy": 0.5, "fscore": 5 * (2.5 / 3 * 2 / 3) / (4 * 2.5 / 3 + 2 / 3) }, self.multi_label_precision_recall_accuracy_fscore.compute( predictions=[ [0, 1], [0, 1], [2, 3] ], references=[ [0, 1, 2, 3], [0, 1, 2, 3], [2] ] ) ) self.assertDictEqual( { "precision": 2.5 / 3, "recall": 2 / 3, "accuracy": 0.5, "fscore": 10 * (2.5 / 3 * 2 / 3) / (9 * 2.5 / 3 + 2 / 3) }, self.multi_label_precision_recall_accuracy_fscore.compute( predictions=[ [0, 1], [0, 1], [2, 3] ], references=[ [0, 1, 2, 3], [0, 1, 2, 3], [2] ], beta=3 ) ) class MultiLabelPrecisionRecallAccuracyFscoreTestMultiset(MultiLabelPrecisionRecallAccuracyFscoreTest): def setUp(self): self.multi_label_precision_recall_accuracy_fscore = MultiLabelPrecisionRecallAccuracyFscore( config_name="multiset") def test_multiset_eok(self): self.assertDictEqual( { "precision": 1.0, "recall": 1.0, "accuracy": 1.0, "fscore": 1.0 }, self.multi_label_precision_recall_accuracy_fscore.compute( predictions=[ [0, 1, 1], [1, 2, 2], [0, 1, 2, 1], ], references=[ [1, 0, 1], [1, 2, 2], [0, 1, 1, 2], ] ) ) def test_multiset_partial_match(self): self.assertDictEqual( { "precision": 1.0, "recall": 0.5, "accuracy": 0.5, "fscore": 2 / 3 }, self.multi_label_precision_recall_accuracy_fscore.compute( predictions=[ [0, 1, 1] ], references=[ [1, 0, 1, 1, 0, 0], ] ) ) def test_multiset_partial_match_multi_sample(self): p = (1 + 2 / 3) / 2 r = (3 / 4 + 1) / 2 self.assertDictEqual( { "precision": p, "recall": r, "accuracy": (3 / 4 + 2 / 3) / 2, "fscore": 2 * p * r / (p + r) }, self.multi_label_precision_recall_accuracy_fscore.compute( predictions=[ [0, 1, 1], [1, 2, 2] ], references=[ [1, 0, 1, 1], [1, 2], ] ) ) def test_zero_cardinality_precision(self): self.multi_label_precision_recall_accuracy_fscore.zero_cardinality_precision = 0.5 self.assertEqual(0.5, self.multi_label_precision_recall_accuracy_fscore.compute( predictions=[ [] ], references=[ [0, 1, 1], ] )["precision"] ) self.assertEqual(1.0, self.multi_label_precision_recall_accuracy_fscore.compute( predictions=[ [] ], references=[ [], ] )["precision"] ) self.assertEqual(2 / 3, self.multi_label_precision_recall_accuracy_fscore.compute( predictions=[ [1, 2, 3] ], references=[ [1, 2], ] )["precision"] ) def test_zero_cardinality_recall(self): self.multi_label_precision_recall_accuracy_fscore.zero_cardinality_recall = 0.5 self.assertEqual(0.5, self.multi_label_precision_recall_accuracy_fscore.compute( predictions=[ [0, 1, 1], ], references=[ [] ] )["recall"] ) self.assertEqual(1.0, self.multi_label_precision_recall_accuracy_fscore.compute( predictions=[ [], ], references=[ [], ] )["recall"] ) self.assertEqual(2 / 3, self.multi_label_precision_recall_accuracy_fscore.compute( predictions=[ [1, 2], ], references=[ [1, 2, 3] ] )["recall"] )