Spaces:

aletrn
/

mgw

Sleeping

App Files Files Community

alessandro trinca tornidor commited on Aug 4

Commit

d6e9ab3

1 Parent(s): ff39414

test: add more test cases for get_wordnet_synonyms()

Browse files

Files changed (5) hide show

my_ghost_writer/text_parsers2.py +6 -0
tests/events/get_wordnet_synonyms_day_ok1.json +1 -0
tests/events/get_wordnet_synonyms_tense_ok1.json +1 -0
tests/my_ghost_writer/helpers_tests.py +72 -0
tests/my_ghost_writer/test_text_parsers2.py +9 -13

my_ghost_writer/text_parsers2.py CHANGED Viewed

@@ -6,6 +6,7 @@ import nltk
 import pyinflect
 import spacy
 from fastapi import HTTPException
 from my_ghost_writer.constants import ELIGIBLE_POS, NLTK_DATA, SPACY_MODEL_NAME, app_logger
 from my_ghost_writer.custom_synonym_handler import CustomSynonymHandler
@@ -341,8 +342,13 @@ def get_wordnet_synonyms(word: str, pos_tag: Optional[str] = None) -> list[dict[
                     # TermRelationships.PERTAINYM,
                     TermRelationships.SIMILAR_TO
                 ]:
                 result = _get_related_words(synset, rel_type, word_lower)
                 if result:
                     related_word_groups_raw.append(result)
     except Exception as ex1:

 import pyinflect
 import spacy
 from fastapi import HTTPException
+from nltk.corpus.reader import Synset
 from my_ghost_writer.constants import ELIGIBLE_POS, NLTK_DATA, SPACY_MODEL_NAME, app_logger
 from my_ghost_writer.custom_synonym_handler import CustomSynonymHandler
                     # TermRelationships.PERTAINYM,
                     TermRelationships.SIMILAR_TO
                 ]:
+                app_logger.info(f"synset: {type(synset)}, '{synset}'")
+                if not isinstance(synset, Synset):
+                    pass
                 result = _get_related_words(synset, rel_type, word_lower)
                 if result:
+                    if result["relation_type"] == TermRelationships.CAUSE:
+                        app_logger.info(f"Adding result for relation type '{rel_type}': {result}")
                     related_word_groups_raw.append(result)
     except Exception as ex1:

tests/events/get_wordnet_synonyms_day_ok1.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"$.definition": {"types": ["str"], "primary_type": "str", "is_array": false, "samples": ["time for Earth to make a complete rotation on its axis"], "sample_count": 1}, "$.examples": {"types": ["list"], "primary_type": "list", "is_array": false, "samples": [], "sample_count": 0}, "$.examples[*]": {"types": ["array"], "primary_type": "array", "is_array": true, "samples": [], "sample_count": 0, "array_length": 2}, "$.related_words": {"types": ["list"], "primary_type": "list", "is_array": false, "samples": [], "sample_count": 0}, "$.related_words[*]": {"types": ["array"], "primary_type": "array", "is_array": true, "samples": [], "sample_count": 0, "array_length": 5}, "$.related_words[*].base_form": {"types": ["str"], "primary_type": "str", "is_array": false, "samples": ["twenty-four hours", "twenty-four hour period", "24-hour interval"], "sample_count": 3}, "$.relation_type": {"types": ["TermRelationships"], "primary_type": "TermRelationships", "is_array": false, "samples": ["TermRelationships.SYNONYM"], "sample_count": 1}, "$.source": {"types": ["str"], "primary_type": "str", "is_array": false, "samples": ["wordnet"], "sample_count": 1}, "$.wordnet_pos": {"types": ["str"], "primary_type": "str", "is_array": false, "samples": ["n"], "sample_count": 1}}

tests/events/get_wordnet_synonyms_tense_ok1.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"$.definition": {"types": ["str"], "primary_type": "str", "is_array": false, "samples": ["a grammatical category of verbs used to express distinctions of time"], "sample_count": 1}, "$.examples": {"types": ["list"], "primary_type": "list", "is_array": false, "samples": [], "sample_count": 0}, "$.examples[*]": {"types": ["array"], "primary_type": "array", "is_array": true, "samples": [], "sample_count": 0, "array_length": 0}, "$.related_words": {"types": ["list"], "primary_type": "list", "is_array": false, "samples": [], "sample_count": 0}, "$.related_words[*]": {"types": ["array"], "primary_type": "array", "is_array": true, "samples": [], "sample_count": 0, "array_length": 2}, "$.related_words[*].base_form": {"types": ["str"], "primary_type": "str", "is_array": false, "samples": ["grammatical category", "syntactic category"], "sample_count": 2}, "$.relation_type": {"types": ["TermRelationships"], "primary_type": "TermRelationships", "is_array": false, "samples": ["TermRelationships.HYPERNYM"], "sample_count": 1}, "$.source": {"types": ["str"], "primary_type": "str", "is_array": false, "samples": ["wordnet"], "sample_count": 1}, "$.wordnet_pos": {"types": ["str"], "primary_type": "str", "is_array": false, "samples": ["n"], "sample_count": 1}}

tests/my_ghost_writer/helpers_tests.py CHANGED Viewed

@@ -1,3 +1,10 @@
 def analyze_detailed_report_lists(cls, detailed_report: dict, expected_detailed_report: dict):
@@ -8,3 +15,68 @@ def analyze_detailed_report_lists(cls, detailed_report: dict, expected_detailed_
         del row_v["sample_count"]
         del expected_row_v["sample_count"]
         cls.assertDictEqual(row_v, expected_row_v)

+import json
+from nltk.corpus import wordnet as wn
+from tests import EVENTS_FOLDER
+from my_ghost_writer.jsonpath_extractor import JSONPathStructureAnalyzer
+from my_ghost_writer.text_parsers2 import get_wordnet_synonyms
 def analyze_detailed_report_lists(cls, detailed_report: dict, expected_detailed_report: dict):
         del row_v["sample_count"]
         del expected_row_v["sample_count"]
         cls.assertDictEqual(row_v, expected_row_v)
+def assert__json_structure__get_wordnet_synonyms(cls, word):
+    with open(EVENTS_FOLDER / f"get_wordnet_synonyms_{word}_ok1.json", "r") as src:
+        expected_detailed_report = json.load(src)
+    related_words = get_wordnet_synonyms(word)
+    first_related_words = related_words[0]
+    analyzer = JSONPathStructureAnalyzer()
+    analyzer.extract_all_paths(first_related_words)
+    detailed_report = analyzer.get_detailed_type_report()
+    analyze_detailed_report_lists(cls, detailed_report, expected_detailed_report)
+    # with open(EVENTS_FOLDER / f"get_wordnet_synonyms_{word}_ok1.json", "w") as src:
+    #     json.dump(detailed_report, src)
+def get_relationships(synset):
+    relationships = {
+        'synonyms': len(synset.lemma_names()),
+        'antonyms': sum(len(lemma.antonyms()) for lemma in synset.lemmas()),
+        'hypernyms': len(synset.hypernyms()),
+        'hyponyms': len(synset.hyponyms()),
+        'holonyms': len(synset.member_holonyms()) + len(synset.part_holonyms()) + len(synset.substance_holonyms()),
+        'meronyms': len(synset.member_meronyms()) + len(synset.part_meronyms()) + len(synset.substance_meronyms()),
+        'similar_tos': len(synset.similar_tos()),
+        'also_sees': len(synset.also_sees()),
+        'causes': len(synset.causes())
+    }
+    return relationships
+def extract_word_relationships():
+    results = []
+    holonym_results = []
+    cause_results = []
+    for pos in ['n', 'v']:
+        for synset in wn.all_synsets(pos):
+            rels = get_relationships(synset)
+            total = sum(1 for v in rels.values() if v > 0)
+            results.append((synset, total, rels))
+            if rels['holonyms'] > 0:
+                holonym_results.append((synset, rels['holonyms'], rels))
+            if rels['causes'] > 0:
+                cause_results.append((synset, rels['causes'], rels))
+    # Sort and get top 5
+    results.sort(key=lambda x: x[1], reverse=True)
+    holonym_results.sort(key=lambda x: x[1], reverse=True)
+    cause_results.sort(key=lambda x: x[1], reverse=True)
+    print("Top 5 synsets with most relationships:")
+    for synset, total, rels in results[:5]:
+        print(f"{synset.name()} ({synset.definition()}): {rels}")
+    print("\nTop 5 synsets with holonym relationships:")
+    for synset, count, rels in holonym_results[:5]:
+        print(f"{synset.name()} ({synset.definition()}): {rels}")
+    print("\nTop 5 synsets with cause relationships:")
+    for synset, count, rels in cause_results[:5]:
+        print(f"{synset.name()} ({synset.definition()}): {rels}")
+if __name__ == "__main__":
+    extract_word_relationships()

tests/my_ghost_writer/test_text_parsers2.py CHANGED Viewed

@@ -10,7 +10,8 @@ from my_ghost_writer.text_parsers2 import (extract_contextual_info_by_indices, g
 from my_ghost_writer.jsonpath_extractor import JSONPathStructureAnalyzer
 from my_ghost_writer.type_hints import TermRelationships, RelatedEntry
 from tests import EVENTS_FOLDER
-from tests.my_ghost_writer.helpers_tests import analyze_detailed_report_lists
 class TestTextParsers2(unittest.TestCase):
@@ -83,18 +84,13 @@ class TestTextParsers2(unittest.TestCase):
     def test_get_wordnet_synonyms(self):
         # Test with a word that has known synonyms
-        with open(EVENTS_FOLDER / "get_wordnet_synonyms_piano_ok1.json", "r") as src:
-            expected_detailed_report = json.load(src)
-        word = "piano"
-        related_words = get_wordnet_synonyms(word)
-        first_related_words = related_words[0]
-        analyzer = JSONPathStructureAnalyzer()
-        analyzer.extract_all_paths(first_related_words)
-        detailed_report = analyzer.get_detailed_type_report()
-        analyze_detailed_report_lists(self, detailed_report, expected_detailed_report)
-        # with open(EVENTS_FOLDER / "get_wordnet_synonyms_piano_ok1.json", "w") as src:
-        #     json.dump(detailed_report, src)
     def test_get_wordnet_synonyms_custom_entry(self):
         word = "happy"

 from my_ghost_writer.jsonpath_extractor import JSONPathStructureAnalyzer
 from my_ghost_writer.type_hints import TermRelationships, RelatedEntry
 from tests import EVENTS_FOLDER
+from tests.my_ghost_writer.helpers_tests import (analyze_detailed_report_lists,
+    assert__json_structure__get_wordnet_synonyms)
 class TestTextParsers2(unittest.TestCase):
     def test_get_wordnet_synonyms(self):
         # Test with a word that has known synonyms
+        assert__json_structure__get_wordnet_synonyms(self, "piano")
+    def test_get_wordnet_synonyms_day(self):
+        assert__json_structure__get_wordnet_synonyms(self, "day")
+    def test_get_wordnet_synonyms_tense(self):
+        assert__json_structure__get_wordnet_synonyms(self, "tense")
     def test_get_wordnet_synonyms_custom_entry(self):
         word = "happy"