Spaces:

aletrn
/

mgw

Sleeping

mgw / tests /my_ghost_writer /test_text_parsers2.py

alessandro trinca tornidor

test: update test cases, optimize some imports

a9c8d84 4 months ago

19.9 kB

	import json
	import unittest
	from unittest.mock import patch, MagicMock

	from fastapi import HTTPException

	from my_ghost_writer.custom_synonym_handler import CustomSynonymHandler
	from my_ghost_writer.text_parsers2 import (extract_contextual_info_by_indices, get_wordnet_synonyms, inflect_synonym,
	is_nlp_available, process_synonym_groups)
	from my_ghost_writer.jsonpath_extractor import JSONPathStructureAnalyzer
	from my_ghost_writer.type_hints import TermRelationships, RelatedEntry
	from tests import EVENTS_FOLDER
	from tests.my_ghost_writer.helpers_tests import analyze_detailed_report_lists


	class TestTextParsers2(unittest.TestCase):
	@patch("my_ghost_writer.text_parsers2.nlp", new=MagicMock())
	def test_is_nlp_available_ok(self):
	check = is_nlp_available()
	self.assertTrue(check)

	@patch("my_ghost_writer.text_parsers2.nlp", new=None)
	def test_is_nlp_available_fail(self):
	check = is_nlp_available()
	self.assertFalse(check)

	def test_extract_contextual_info_by_indices_valid(self):
	"""Tests valid context extraction using the real spaCy model."""
	text = "The quick brown fox jumps over the lazy dog"
	start_idx, end_idx, target_word = 4, 9, "quick"

	result = extract_contextual_info_by_indices(text, start_idx, end_idx, target_word)

	self.assertEqual(result['word'], target_word)
	self.assertEqual(result['lemma'], "quick")
	self.assertEqual(result['pos'], "ADJ")
	self.assertEqual(result['tag'], "JJ")
	self.assertEqual(result['dependency'], "amod")
	self.assertEqual(result['context_sentence'], text)
	self.assertIn("context_words", result)
	self.assertEqual(result['char_start'], start_idx)
	self.assertEqual(result['char_end'], start_idx + len(target_word))

	def test_extract_contextual_info_by_indices_invalid_indices(self):
	"""Tests that invalid indices raise a 400 HTTPException."""
	with self.assertRaises(HTTPException) as context:
	extract_contextual_info_by_indices("Test text that raises a 400 ", 100, 200, "test")

	self.assertEqual(context.exception.status_code, 400)
	self.assertEqual(context.exception.detail, "Invalid start/end indices")

	@patch("my_ghost_writer.text_parsers2.nlp", new=None)
	def test_extract_contextual_info_nlp_unavailable(self):
	"""Tests that a 500 HTTPException is raised if spaCy model is not available."""
	with self.assertRaises(HTTPException) as context:
	extract_contextual_info_by_indices("text", 0, 4, "text")

	self.assertEqual(context.exception.status_code, 500)
	self.assertEqual(context.exception.detail, "spaCy model not available")

	def test_extract_contextual_info_word_mismatch(self):
	"""Tests that a 400->500 HTTPException is raised for a word/index mismatch."""
	text = "The quick brown fox"
	start_idx, end_idx, target_word = 4, 9, "brown"
	# Indices point to "quick", but target_word is "brown"
	with self.assertRaises(HTTPException) as context:
	extract_contextual_info_by_indices(text, start_idx, end_idx, target_word)

	self.assertEqual(context.exception.status_code, 500)
	self.assertIn(f"Error analyzing context: 400: Could not find token for word '{target_word}' at indices {start_idx}-{end_idx}", context.exception.detail)

	@patch("my_ghost_writer.text_parsers2.nlp")
	def test_extract_contextual_info_word_none(self, nlp_mock):
	nlp_mock.return_value = []
	text = "The quick brown fox jumps over the lazy dog"
	start_idx, end_idx, target_word = 4, 9, "quick"
	with self.assertRaises(HTTPException) as context:
	extract_contextual_info_by_indices(text, start_idx, end_idx, target_word)

	# 400 Exception intercepted and relaunched as 500
	self.assertEqual(context.exception.status_code, 500)
	self.assertIn("Error analyzing context: 400: Could not find token for word 'quick' at indices 4-9", context.exception.detail)

	def test_get_wordnet_synonyms(self):
	# Test with a word that has known synonyms
	with open(EVENTS_FOLDER / "get_wordnet_synonyms_piano_ok1.json", "r") as src:
	expected_detailed_report = json.load(src)
	word = "piano"
	related_words = get_wordnet_synonyms(word)
	first_related_words = related_words[0]
	analyzer = JSONPathStructureAnalyzer()
	analyzer.extract_all_paths(first_related_words)
	detailed_report = analyzer.get_detailed_type_report()
	analyze_detailed_report_lists(self, detailed_report, expected_detailed_report)

	# with open(EVENTS_FOLDER / "get_wordnet_synonyms_piano_ok1.json", "w") as src:
	# json.dump(detailed_report, src)

	def test_get_wordnet_synonyms_custom_entry(self):
	word = "happy"
	pos = "ADJ"
	with open(EVENTS_FOLDER / "get_wordnet_synonyms_custom_entry_happy.json", "r") as src:
	expected_report_dict_list = json.load(src)
	# expected_detailed_report = []
	related_word_groups = get_wordnet_synonyms(word, pos)
	self.assertIsInstance(related_word_groups[0]["examples"], list)
	for related_word_nth, expected_detailed_report_nth in zip(related_word_groups, expected_report_dict_list):
	del related_word_nth['examples']
	analyzer = JSONPathStructureAnalyzer()
	analyzer.extract_all_paths(related_word_nth)
	detailed_report = analyzer.get_detailed_type_report(get_samples=False)
	self.assertDictEqual(detailed_report, expected_detailed_report_nth)
	# expected_detailed_report.append(detailed_report)
	# with open(EVENTS_FOLDER / "get_wordnet_synonyms_custom_entry_happy.json", "w") as src:
	# json.dump(expected_detailed_report, src)

	def test_get_wordnet_synonyms_pos_filter(self):
	# Test with POS filtering
	word = "hunt"
	related_words_verbs = get_wordnet_synonyms(word, pos_tag="VERB")

	self.assertGreater(len(related_words_verbs), 0)
	for sense in related_words_verbs:
	self.assertEqual(sense['wordnet_pos'], 'v') # 'v' is the WordNet tag for VERB

	@patch("my_ghost_writer.text_parsers2.wn.synsets")
	def test_get_wordnet_synonyms_generic_exception(self, mock_synsets):
	mock_synsets.side_effect = Exception("test exception")
	with self.assertRaises(HTTPException) as context:
	get_wordnet_synonyms("test", 'NOUN')

	# 400 Exception intercepted and relaunched as 500
	self.assertEqual(context.exception.status_code, 500)
	self.assertIn("Error retrieving related words: 'test exception'", context.exception.detail)

	def test_inflect_synonym_noun_plural(self):
	# Test noun pluralization
	original_token_info = {'pos': 'NOUN', 'tag': 'NNS', 'is_lower': True, 'is_title': False, 'is_upper': False}
	# Test regular plural
	self.assertEqual(inflect_synonym("kid", original_token_info), "kids")
	# Test irregular plural
	self.assertEqual(inflect_synonym("child", original_token_info), "children")

	def test_inflect_synonym_verb_past1(self):
	# Test verb past tense
	original_token_info = {
	'word': 'looked', 'lemma': 'look', 'pos': 'VERB', 'tag': 'VBD', 'is_title': False, 'is_upper': False,
	'is_lower': True, 'dependency': 'ROOT',
	'context_sentence': 'He looked back at the whisperers as if he wanted to say something to them, but thought better of it.',
	'context_words': ['He', 'looked', 'back', 'at', 'the', 'whisperers', 'as'], 'sentence_position': 1,
	'char_start': 3, 'char_end': 9, 'original_indices': {'start': 3, 'end': 9}
	}
	result = inflect_synonym("write", original_token_info)
	self.assertEqual(result, "wrote")

	def test_inflect_synonym_verb_past2(self):
	"""Tests verb past tense inflection (VBD)."""
	original_token_info = {'pos': 'VERB', 'tag': 'VBD', 'is_lower': True, 'is_title': False, 'is_upper': False}
	self.assertEqual(inflect_synonym("write", original_token_info), "wrote")
	self.assertEqual(inflect_synonym("look", original_token_info), "looked")

	def test_inflect_synonym_verb_present_participle(self):
	"""Tests verb present participle inflection (VBG, e.g., 'writing')."""
	original_token_info = {'pos': 'VERB', 'tag': 'VBG', 'is_lower': True, 'is_title': False, 'is_upper': False}
	# Test with an irregular verb
	self.assertEqual(inflect_synonym("write", original_token_info), "writing")
	# Test with a regular verb
	self.assertEqual(inflect_synonym("look", original_token_info), "looking")

	def test_inflect_synonym_verb_third_person_singular(self):
	"""Tests verb third-person singular inflection (VBZ, e.g., 'writes')."""
	original_token_info = {'pos': 'VERB', 'tag': 'VBZ', 'is_lower': True, 'is_title': False, 'is_upper': False}
	# Test with an irregular verb
	self.assertEqual(inflect_synonym("write", original_token_info), "writes")
	# Test with a regular verb
	self.assertEqual(inflect_synonym("look", original_token_info), "looks")

	def test_inflect_synonym_adjective_comparative(self):
	"""Tests adjective comparative inflection (e.g., large -> larger) without mocks."""
	# Arrange: Create a complete context object for a comparative adjective
	original_token_info = {
	'word': 'bigger',
	'lemma': 'big',
	'pos': 'ADJ',
	'tag': 'JJR', # JJR = Adjective, comparative
	'is_title': False,
	'is_upper': False,
	'is_lower': True,
	'dependency': 'acomp',
	'context_sentence': 'My house is bigger than yours.',
	'context_words': ['house', 'is', 'bigger', 'than', 'yours', '.'],
	'sentence_position': 3,
	'char_start': 12,
	'char_end': 18,
	'original_indices': {'start': 12, 'end': 18}
	}
	synonym_to_inflect = "large"

	# Act: Call the function with the synonym and context
	result = inflect_synonym(synonym_to_inflect, original_token_info)

	# Assert: Check that the synonym was correctly inflected
	self.assertEqual(result, "larger")

	def test_inflect_synonym_adjective_superlative(self):
	"""Tests adjective superlative inflection (e.g., large -> largest) without mocks."""
	# Arrange: Create a complete context object for a superlative adjective
	original_token_info = {
	'word': 'greatest',
	'lemma': 'great',
	'pos': 'ADJ',
	'tag': 'JJS', # JJS = Adjective, superlative
	'is_title': False,
	'is_upper': False,
	'is_lower': True,
	'dependency': 'amod',
	'context_sentence': 'He is the greatest of all time.',
	'context_words': ['is', 'the', 'greatest', 'of', 'all', 'time', '.'],
	'sentence_position': 3,
	'char_start': 10,
	'char_end': 18,
	'original_indices': {'start': 10, 'end': 18}
	}
	synonym_to_inflect = "large"

	# Act: Call the function with the synonym and context
	result = inflect_synonym(synonym_to_inflect, original_token_info)

	# Assert: Check that the synonym was correctly inflected
	self.assertEqual(result, "largest")

	def test_inflect_synonym_is_title(self):
	"""Tests verb past tense inflection (VBD), is_title: True; set is_lower: False for good measure"""
	original_token_info = {'pos': 'VERB', 'tag': 'VBD', 'is_lower': False, 'is_title': True, 'is_upper': False}
	self.assertEqual(inflect_synonym("write", original_token_info), "Wrote")
	self.assertEqual(inflect_synonym("look", original_token_info), "Looked")

	def test_inflect_synonym_is_upper(self):
	"""Tests verb past tense inflection (VBD), is_upper: True; set is_lower: False for good measure"""
	original_token_info = {'pos': 'VERB', 'tag': 'VBD', 'is_lower': False, 'is_title': False, 'is_upper': True}
	self.assertEqual(inflect_synonym("write", original_token_info), "WROTE")
	self.assertEqual(inflect_synonym("look", original_token_info), "LOOKED")

	@patch("my_ghost_writer.text_parsers2.nlp", new=None)
	def test_inflect_synonym_nlp_none(self):
	result = inflect_synonym("test", {})
	self.assertEqual(result, "test")

	@patch("my_ghost_writer.text_parsers2.nlp")
	def test_inflect_synonym_nlp_exception(self, nlp_mock):
	nlp_mock.side_effect = Exception("test exception")
	original_token_info = {'pos': 'VERB', 'tag': 'VBG', 'is_lower': True, 'is_title': False, 'is_upper': False}
	inflect_synonym("test", original_token_info)

	self.assertEqual(inflect_synonym("write", original_token_info), "write")
	# Test with a regular verb
	self.assertEqual(inflect_synonym("look", original_token_info), "look")

	def test_inflect_synonym_verbs(self):
	"""Tests various verb inflections using subtests."""
	test_cases = [
	# (tag, synonym, expected_inflection)
	("VBD", "write", "wrote"), # Past tense
	("VBD", "look", "looked"),
	("VBG", "write", "writing"), # Present participle
	("VBG", "look", "looking"),
	("VBZ", "write", "writes"), # 3rd person singular
	("VBZ", "look", "looks"),
	]

	for tag, synonym, expected in test_cases:
	with self.subTest(tag=tag, synonym=synonym):
	original_token_info = {
	'pos': 'VERB', 'tag': tag, 'is_lower': True, 'is_title': False, 'is_upper': False
	}
	self.assertEqual(inflect_synonym(synonym, original_token_info), expected)

	def test_inflect_synonym_casing(self):
	"""Tests that casing is correctly applied during inflection."""
	test_cases = [
	# (is_title, is_upper, synonym, expected)
	(True, False, "write", "Wrote"),
	(False, True, "write", "WROTE"),
	(False, False, "look", "looked"),
	]
	for is_title, is_upper, synonym, expected in test_cases:
	with self.subTest(is_title=is_title, is_upper=is_upper):
	original_token_info = {
	'pos': 'VERB', 'tag': 'VBD', 'is_lower': not (is_title or is_upper),
	'is_title': is_title, 'is_upper': is_upper
	}
	self.assertEqual(inflect_synonym(synonym, original_token_info), expected)

	def test_process_synonym_groups(self):
	"""Tests the full processing pipeline for a verb."""
	word = "look"
	context_info = {'char_end': 9, 'char_start': 3,
	'context_sentence': 'He looked back at the whisperers as if he wanted to say something to them, but thought better of it.',
	'context_words': ['He', 'looked', 'back', 'at', 'the', 'whisperers', 'as'],
	'dependency': 'ROOT', 'is_lower': True, 'is_title': False, 'is_upper': False, 'lemma': 'look',
	'original_indices': {'end': 9, 'start': 3}, 'pos': 'VERB', 'sentence_position': 1, 'tag': 'VBD',
	'word': 'looked'}

	result = process_synonym_groups(word, context_info)

	# Assertions are flexible to avoid brittleness from library updates
	self.assertIsInstance(result, list)
	self.assertGreater(len(result), 0)

	first_sense = dict(result[0])
	self.assertIn('definition', first_sense)
	self.assertIn('related_words', first_sense)

	first_synonym_info = dict(first_sense['related_words'][0])
	self.assertIn('base_form', first_synonym_info)
	self.assertIn('inflected_form', first_synonym_info)
	# For a past-tense verb, the inflected form should be different from the base
	self.assertNotEqual(first_synonym_info['base_form'], first_synonym_info['inflected_form'])

	def test_process_synonym_groups_custom_entry(self):
	word = "happy"
	context_info = {
	'char_end': 60, 'char_start': 55,
	'context_sentence': 'Even Muggles like yourself should be celebrating, this happy, happy day!"',
	'context_words': ['should', 'be', 'celebrating', ',', 'this', 'happy', ',', 'happy', 'day', '!', '"'],
	'dependency': 'amod', 'is_lower': True, 'is_title': False, 'is_upper': False, 'lemma': 'happy',
	'original_indices': {'end': 60, 'start': 55}, 'pos': 'ADJ', 'sentence_position': 9,
	'tag': 'JJ', 'word': 'happy'
	}
	# RelatedWordGroup
	result_related_word_groups_list = process_synonym_groups(word, context_info)
	self.assertIsInstance(result_related_word_groups_list, list)
	for related_words_group in result_related_word_groups_list:
	related_word_group_dict = dict(related_words_group)
	self.assertIsInstance(related_word_group_dict, dict)
	self.assertIsInstance(related_word_group_dict["definition"], str)
	self.assertIn("relation_type", related_word_group_dict)
	self.assertIn(related_word_group_dict["relation_type"], TermRelationships)
	self.assertIsInstance(related_word_group_dict["examples"], list)
	related_words = related_word_group_dict["related_words"]
	for _word_dict in related_words:
	word_dict = dict(_word_dict)
	self.assertIsInstance(word_dict, dict)
	self.assertIsInstance(word_dict["base_form"], str)
	self.assertIsInstance(word_dict["inflected_form"], str)
	self.assertIsInstance(word_dict["matches_context"], bool)
	self.assertIn("is_custom", word_dict)

	@patch("my_ghost_writer.text_parsers2.wn.synsets")
	def test_process_synonym_groups_not_synonyms_by_sense(self, mock_synsets):
	mock_synsets.return_value = []
	context_info = {'pos': 'VERB', 'lemma': 'look'}
	result = process_synonym_groups("look", context_info)
	self.assertListEqual(result, [])

	@patch("my_ghost_writer.text_parsers2.custom_synonym_handler", new_callable=CustomSynonymHandler)
	def test_process_synonym_groups_includes_custom_entries(self, mock_handler):
	"""Tests that custom synonyms are correctly processed and included in the results."""
	# Arrange
	# 1. Add a custom synonym to our mocked handler
	custom_entry = RelatedEntry(
	type=TermRelationships.SYNONYM,
	words=["gleeful", "elated"],
	definition="A custom definition for happy"
	)
	mock_handler.add_entry("happy", [custom_entry])

	# 2. Define the context for the word "happy"
	word = "happy"
	context_info = {
	'lemma': 'happy', 'pos': 'ADJ', 'tag': 'JJ', 'is_lower': True,
	'is_title': False, 'is_upper': False
	}

	# Act
	result_groups = process_synonym_groups(word, context_info)

	# Assert
	# 1. Find the group that came from our custom source
	custom_group = next((g for g in result_groups if g.related_words and g.related_words[0].is_custom), None)

	# 2. Assert that the custom group was found and has the correct data
	self.assertIsNotNone(custom_group, "A custom synonym group should have been found in the results.")
	self.assertEqual(custom_group.definition, "A custom definition for happy")
	self.assertEqual(custom_group.relation_type, TermRelationships.SYNONYM)

	# 3. Check that the custom words are present
	custom_base_forms = {word.base_form for word in custom_group.related_words}
	self.assertEqual(custom_base_forms, {"gleeful", "elated"})


	if __name__ == '__main__':
	unittest.main()