mgw / tests /my_ghost_writer /test_extract_jsonpaths.py
alessandro trinca tornidor
feat: make /thesaurus-inflated-phrase agnostic, not bounded to synonyms - /thesaurus-custom fixed
a707261
raw
history blame
56.8 kB
import unittest
import json
from my_ghost_writer.constants import app_logger
from my_ghost_writer.jsonpath_comparator import (compare_json_with_jsonpath, compare_json_with_jsonpath_and_types,
JSONPathComparator, extract_structure_paths, extract_structure_paths_with_types)
from my_ghost_writer.jsonpath_extractor import (JSONPathStructureAnalyzer, analyze_with_jsonpath_detailed,
analyze_with_jsonpath_types, analyze_with_jsonpath, compare_json_with_jsonpath_structures, analyze_dict_list_simple)
old_json = {
"key1": "row 1",
"key2": 22,
"key_nested1": {
"key_nested2": "row 3",
"key_nested3": "row 4",
"array_nested_4": [
"row 5",
"row 6",
"row 7 nested",
{
"key_nested4": "row 8",
"array_nested_5": ["row 9", "row 10"]
}
]
}
}
new_json = {
"key1": "row 1",
"key2": 22,
"key_nested1": {
"key_nested2": "row 3",
"key_nested3": "row 4",
"array_changed_4": [
"row 5",
"row changed 6",
"row 7 nested",
{
"last_change": "row 8",
"array_nested_5": ["row 9", "row 10"]
}
]
}
}
class TestJSONPathStructureAnalyzer(unittest.TestCase):
"""
Test JSONPath structure analysis with the provided nested JSON data
"""
def test_get_paths_with_types_basic(self):
"""
Test get_paths_with_types with basic data types
"""
test_data = {
"string_field": "hello",
"int_field": 42,
"float_field": 3.14,
"bool_field": True,
"null_field": None
}
analyzer = JSONPathStructureAnalyzer()
analyzer.extract_all_paths(test_data)
paths_with_types = analyzer.get_paths_with_types()
# Verify basic types
self.assertEqual(paths_with_types["$.string_field"], "str")
self.assertEqual(paths_with_types["$.int_field"], "int")
self.assertEqual(paths_with_types["$.float_field"], "float")
self.assertEqual(paths_with_types["$.bool_field"], "bool")
self.assertEqual(paths_with_types["$.null_field"], "NoneType")
def test_get_paths_with_types_arrays(self):
"""
Test get_paths_with_types with arrays
"""
test_data = {
"simple_array": [1, 2, 3],
"empty_array": [],
"mixed_array": ["string", 42, True],
"nested_array": [[1, 2], [3, 4]]
}
analyzer = JSONPathStructureAnalyzer()
analyzer.extract_all_paths(test_data)
paths_with_types = analyzer.get_paths_with_types()
self.assertEqual(paths_with_types["$.simple_array[*]"], "array")
self.assertEqual(paths_with_types["$.empty_array[*]"], "array")
self.assertEqual(paths_with_types["$.mixed_array[*]"], "array")
self.assertEqual(paths_with_types["$.nested_array[*]"], "array")
def test_get_paths_with_types_with_old_json(self):
"""
Test get_paths_with_types with the old_json test data
"""
analyzer = JSONPathStructureAnalyzer()
analyzer.extract_all_paths(old_json)
paths_with_types = analyzer.get_paths_with_types()
# Test specific paths from old_json
self.assertEqual(paths_with_types["$.key1"], "str")
self.assertEqual(paths_with_types["$.key2"], "int")
self.assertEqual(paths_with_types["$.key_nested1"], "dict")
self.assertEqual(paths_with_types["$.key_nested1.array_nested_4[*]"], "array")
self.assertEqual(paths_with_types["$.key_nested1.key_nested2"], "str")
# Verify all expected paths are present
expected_paths = [
"$.key1", "$.key2", "$.key_nested1",
"$.key_nested1.key_nested2", "$.key_nested1.key_nested3",
"$.key_nested1.array_nested_4[*]"
]
for path in expected_paths:
self.assertIn(path, paths_with_types, f"Path {path} should be in paths_with_types")
def test_get_detailed_type_report_basic(self):
"""
Test get_detailed_type_report with basic data
"""
test_data = {
"test_field": "sample_value",
"array_field": [1, 2, 3]
}
analyzer = JSONPathStructureAnalyzer()
analyzer.extract_all_paths(test_data)
detailed_report = analyzer.get_detailed_type_report()
# Test structure of the detailed report
self.assertIn("$.test_field", detailed_report)
self.assertIn("$.array_field[*]", detailed_report)
# Test field details
field_info = detailed_report["$.test_field"]
self.assertIn("types", field_info)
self.assertIn("primary_type", field_info)
self.assertIn("is_array", field_info)
self.assertIn("samples", field_info)
self.assertIn("sample_count", field_info)
# Verify field values
self.assertEqual(field_info["primary_type"], "str")
self.assertFalse(field_info["is_array"])
self.assertIn("sample_value", field_info["samples"])
self.assertGreater(field_info["sample_count"], 0)
# Test array field details
array_info = detailed_report["$.array_field[*]"]
self.assertTrue(array_info["is_array"])
self.assertEqual(array_info["primary_type"], "array")
self.assertEqual(array_info['array_length'], 3)
def test_get_detailed_type_report_with_old_json(self):
"""
Test get_detailed_type_report with old_json data
"""
analyzer = JSONPathStructureAnalyzer()
analyzer.extract_all_paths(old_json)
detailed_report = analyzer.get_detailed_type_report()
# Test specific fields from old_json
key1_info = detailed_report["$.key1"]
self.assertEqual(key1_info["primary_type"], "str")
self.assertFalse(key1_info["is_array"])
self.assertIn("row 1", key1_info["samples"])
key2_info = detailed_report["$.key2"]
self.assertEqual(key2_info["primary_type"], "int")
self.assertFalse(key2_info["is_array"])
self.assertIn("22", key2_info["samples"])
# Test array field
array_info = detailed_report["$.key_nested1.array_nested_4[*]"]
self.assertTrue(array_info["is_array"])
self.assertEqual(array_info["primary_type"], "array")
self.assertEqual(array_info["array_length"], 4)
def test_get_detailed_type_report_mixed_types(self):
"""
Test get_detailed_type_report with mixed types (hypothetical case)
"""
# Create a scenario where a path might have mixed types
analyzer = JSONPathStructureAnalyzer()
# Manually add mixed type data to test the logic
analyzer.paths.add("$.mixed_field")
analyzer.types["$.mixed_field"].add("str")
analyzer.types["$.mixed_field"].add("int")
analyzer.samples["$.mixed_field"] = ["hello", "42"]
detailed_report = analyzer.get_detailed_type_report()
mixed_info = detailed_report["$.mixed_field"]
self.assertIn("mixed(", mixed_info["primary_type"])
self.assertFalse(mixed_info["is_array"])
self.assertEqual(len(mixed_info["types"]), 2)
def test_analyze_with_jsonpath_types_function(self):
"""
Test the convenience function analyze_with_jsonpath_types
"""
test_data = {
"name": "test",
"count": 5,
"items": ["a", "b", "c"]
}
paths_with_types = analyze_with_jsonpath_types(test_data)
# Verify function returns expected structure
self.assertIsInstance(paths_with_types, dict)
self.assertIn("$.name", paths_with_types)
self.assertIn("$.count", paths_with_types)
self.assertIn("$.items[*]", paths_with_types)
# Verify types
self.assertEqual(paths_with_types["$.name"], "str")
self.assertEqual(paths_with_types["$.count"], "int")
self.assertEqual(paths_with_types["$.items[*]"], "array")
def test_analyze_with_jsonpath_detailed_function(self):
"""
Test the convenience function analyze_with_jsonpath_detailed
"""
test_data = {
"description": "test description",
"tags": ["tag1", "tag2"]
}
detailed_info = analyze_with_jsonpath_detailed(test_data)
# Verify function returns expected structure
self.assertIsInstance(detailed_info, dict)
self.assertIn("$.description", detailed_info)
self.assertIn("$.tags[*]", detailed_info)
# Verify detailed structure
desc_info = detailed_info["$.description"]
self.assertIn("types", desc_info)
self.assertIn("primary_type", desc_info)
self.assertIn("samples", desc_info)
self.assertEqual(desc_info["primary_type"], "str")
tags_info = detailed_info["$.tags[*]"]
self.assertTrue(tags_info["is_array"])
self.assertEqual(tags_info["primary_type"], "array")
self.assertEqual(tags_info["array_length"], 2)
def test_get_paths_with_types_empty_data(self):
"""
Test get_paths_with_types with empty data
"""
analyzer = JSONPathStructureAnalyzer()
analyzer.extract_all_paths({})
paths_with_types = analyzer.get_paths_with_types()
# Should return empty dict for empty input
self.assertEqual(len(paths_with_types), 0)
def test_get_detailed_type_report_empty_data(self):
"""
Test get_detailed_type_report with empty data
"""
analyzer = JSONPathStructureAnalyzer()
analyzer.extract_all_paths({})
detailed_report = analyzer.get_detailed_type_report()
# Should return empty dict for empty input
self.assertEqual(len(detailed_report), 0)
def test_paths_with_types_comparison_old_vs_new(self):
"""
Test comparing paths with types between old and new JSON
"""
analyzer_old = JSONPathStructureAnalyzer()
analyzer_old.extract_all_paths(old_json)
old_paths_with_types = analyzer_old.get_paths_with_types()
analyzer_new = JSONPathStructureAnalyzer()
analyzer_new.extract_all_paths(new_json)
new_paths_with_types = analyzer_new.get_paths_with_types()
# Find differences
old_only = set(old_paths_with_types.keys()) - set(new_paths_with_types.keys())
new_only = set(new_paths_with_types.keys()) - set(old_paths_with_types.keys())
common = set(old_paths_with_types.keys()) & set(new_paths_with_types.keys())
# Verify expected differences
self.assertIn("$.key_nested1.array_nested_4[*]", old_only)
self.assertIn("$.key_nested1.array_changed_4[*]", new_only)
# Verify common paths have same types
for path in common:
self.assertEqual(old_paths_with_types[path], new_paths_with_types[path],
f"Type mismatch for common path {path}")
def test_detailed_report_comparison_old_vs_new(self):
"""
Test comparing detailed reports between old and new JSON
"""
old_detailed = analyze_with_jsonpath_detailed(old_json)
new_detailed = analyze_with_jsonpath_detailed(new_json)
# Check that common fields have consistent detailed info
common_paths = set(old_detailed.keys()) & set(new_detailed.keys())
for path in common_paths:
old_info = old_detailed[path]
new_info = new_detailed[path]
# Primary types should match for common paths
self.assertEqual(old_info["primary_type"], new_info["primary_type"],
f"Primary type mismatch for {path}")
# Array status should match
self.assertEqual(old_info["is_array"], new_info["is_array"],
f"Array status mismatch for {path}")
def test_integration_all_new_methods(self):
"""
Integration test using all new methods together
"""
test_data = {
"user": {
"name": "John Doe",
"age": 30,
"hobbies": ["reading", "coding", "gaming"],
"profile": {
"active": True,
"settings": {
"theme": "dark",
"notifications": False
}
}
}
}
# Test all three approaches
structure_report = analyze_with_jsonpath(test_data)
paths_with_types = analyze_with_jsonpath_types(test_data)
detailed_info = analyze_with_jsonpath_detailed(test_data)
# Verify all methods found the same paths
report_paths = set()
for line in structure_report.split('\n'):
if ' -- ' in line:
path = line.split(' -- ')[0]
report_paths.add(path)
types_paths = set(paths_with_types.keys())
detailed_paths = set(detailed_info.keys())
# All methods should find the same paths
self.assertEqual(report_paths, types_paths)
self.assertEqual(types_paths, detailed_paths)
# Verify specific expected paths exist
expected_paths = [
"$.user",
"$.user.name",
"$.user.age",
"$.user.hobbies[*]",
"$.user.profile",
"$.user.profile.active",
"$.user.profile.settings",
"$.user.profile.settings.theme",
"$.user.profile.settings.notifications"
]
for path in expected_paths:
self.assertIn(path, types_paths, f"Path {path} should be found by all methods")
self.assertIn(path, detailed_paths, f"Path {path} should be in detailed info")
def test_type_consistency_across_methods(self):
"""
Test that type information is consistent across different methods
"""
analyzer = JSONPathStructureAnalyzer()
analyzer.extract_all_paths(old_json)
# Get data using different methods
# structure_report = analyzer.get_structure_report()
paths_with_types = analyzer.get_paths_with_types()
detailed_report = analyzer.get_detailed_type_report()
# For each path, verify consistency
for path in paths_with_types:
# Detailed report should have the same primary type
if path in detailed_report:
detailed_type = detailed_report[path]["primary_type"]
simple_type = paths_with_types[path]
# They should match (detailed might have more info for mixed types)
if not detailed_type.startswith("mixed("):
self.assertEqual(simple_type, detailed_type,
f"Type inconsistency for {path}: {simple_type} vs {detailed_type}")
def test_extract_all_paths_from_old_structure(self):
"""
Test that analyzer correctly extracts all paths from the old JSON structure
"""
analyzer = JSONPathStructureAnalyzer()
paths = analyzer.extract_all_paths(old_json)
# Verify the top-level paths
self.assertIn("$.key1", paths)
self.assertIn("$.key2", paths)
self.assertIn("$.key_nested1", paths)
# Verify the nested object paths
self.assertIn("$.key_nested1.key_nested2", paths)
self.assertIn("$.key_nested1.key_nested3", paths)
self.assertIn("$.key_nested1.array_nested_4[*]", paths)
# Verify the deeply nested paths (3-4 levels deep)
self.assertIn("$.key_nested1.array_nested_4[*].key_nested4", paths)
self.assertIn("$.key_nested1.array_nested_4[*].array_nested_5[*]", paths)
def test_extract_all_paths_from_new_structure(self):
"""
Test that analyzer correctly extracts paths from the new JSON structure
"""
analyzer = JSONPathStructureAnalyzer()
paths = analyzer.extract_all_paths(new_json)
# Verify renamed the array path
self.assertIn("$.key_nested1.array_changed_4[*]", paths)
# Verify renamed the nested key
self.assertIn("$.key_nested1.array_changed_4[*].last_change", paths)
# Verify unchanged the nested array
self.assertIn("$.key_nested1.array_changed_4[*].array_nested_5[*]", paths)
def test_structure_report_format_old_json(self):
"""
Test structure report format for old JSON
"""
analyzer = JSONPathStructureAnalyzer()
analyzer.extract_all_paths(old_json)
report = analyzer.get_structure_report()
# Check specific format elements
self.assertIn("$.key1 -- row 1", report)
self.assertIn("$.key2 -- 22", report)
self.assertIn("$.key_nested1.array_nested_4[*] -- array[4]", report)
self.assertIn("$.key_nested1.array_nested_4[*].key_nested4 -- row 8", report)
self.assertIn("$.key_nested1.array_nested_4[*].array_nested_5[*] -- array[2]", report)
def test_structure_report_format_new_json(self):
"""
Test structure report format for new JSON
"""
analyzer = JSONPathStructureAnalyzer()
analyzer.extract_all_paths(new_json)
report = analyzer.get_structure_report()
# Check renamed elements appear correctly
self.assertIn("$.key_nested1.array_changed_4[*] -- array[4]", report)
self.assertIn("$.key_nested1.array_changed_4[*].last_change -- row 8", report)
# Check unchanged elements
self.assertIn("$.key1 -- row 1", report)
self.assertIn("$.key2 -- 22", report)
def test_analyze_with_jsonpath_function(self):
"""
Test the convenience function for structure analysis
"""
old_report = analyze_with_jsonpath(old_json)
new_report = analyze_with_jsonpath(new_json)
# Verify both reports are valid strings
self.assertIsInstance(old_report, str)
self.assertGreater(len(old_report), 0)
self.assertIsInstance(new_report, str)
self.assertGreater(len(new_report), 0)
# Verify key differences
self.assertIn("array_nested_4", old_report)
self.assertIn("array_changed_4", new_report)
self.assertIn("key_nested4", old_report)
self.assertIn("last_change", new_report)
def test_compare_json_structures_method(self):
"""
Test the compare_json_structures method directly
"""
analyzer = JSONPathStructureAnalyzer()
analyzer.extract_all_paths(old_json)
comparison = analyzer.compare_json_structures(new_json)
# Test all expected keys within the comparison result
expected_keys = [
"added_paths", "removed_paths", "common_paths",
"type_changes", "value_differences", "array_size_changes",
"array_lengths_old", "array_lengths_new", "summary"
]
for key in expected_keys:
self.assertIn(key, comparison, f"Key {key} should be in comparison result")
# Test summary statistics
summary = comparison["summary"]
self.assertGreater(summary["total_paths_old"], 0)
self.assertGreater(summary["total_paths_new"], 0)
self.assertGreater(summary["paths_removed"], 0)
self.assertGreater(summary["paths_added"], 0)
# Test array length tracking
self.assertIn("$.key_nested1.array_nested_4[*]", comparison["array_lengths_old"])
self.assertIn("$.key_nested1.array_changed_4[*]", comparison["array_lengths_new"])
self.assertEqual(comparison["array_lengths_old"]["$.key_nested1.array_nested_4[*]"], 4)
self.assertEqual(comparison["array_lengths_new"]["$.key_nested1.array_changed_4[*]"], 4)
def test_get_array_lengths_method(self):
"""
Test the get_array_lengths method
"""
test_data = {
"empty_array": [],
"small_array": [1, 2],
"large_array": list(range(100)),
"nested": {
"inner_array": ["a", "b", "c", "d", "e"]
}
}
analyzer = JSONPathStructureAnalyzer()
analyzer.extract_all_paths(test_data)
array_lengths = analyzer.get_array_lengths()
# Test all array lengths are captured
self.assertEqual(array_lengths["$.empty_array[*]"], 0)
self.assertEqual(array_lengths["$.small_array[*]"], 2)
self.assertEqual(array_lengths["$.large_array[*]"], 100)
self.assertEqual(array_lengths["$.nested.inner_array[*]"], 5)
# Test that non-array paths are not in array_lengths
for path in array_lengths.keys():
self.assertTrue(path.endswith("[*]"), f"Array length path {path} should end with [*]")
def test_value_differences_detection(self):
"""
Test detection of value changes in compare_json_structures
"""
old_data = {
"name": "John",
"age": 25,
"city": "New York"
}
new_data = {
"name": "John", # unchanged
"age": 26, # changed
"city": "Boston" # changed
}
analyzer = JSONPathStructureAnalyzer()
analyzer.extract_all_paths(old_data)
comparison = analyzer.compare_json_structures(new_data)
# Should detect value changes
self.assertIn("$.age", comparison["value_differences"])
self.assertIn("$.city", comparison["value_differences"])
self.assertNotIn("$.name", comparison["value_differences"])
# Test change details
age_change = comparison["value_differences"]["$.age"]
self.assertEqual(age_change["old_value"], "25")
self.assertEqual(age_change["new_value"], "26")
city_change = comparison["value_differences"]["$.city"]
self.assertEqual(city_change["old_value"], "New York")
self.assertEqual(city_change["new_value"], "Boston")
def test_array_size_changes_detection(self):
"""
Test detection of array size changes
"""
old_data = {
"items": [1, 2, 3],
"tags": ["a", "b"]
}
new_data = {
"items": [1, 2, 3, 4, 5], # size increased
"tags": ["a"] # size decreased
}
analyzer = JSONPathStructureAnalyzer()
analyzer.extract_all_paths(old_data)
comparison = analyzer.compare_json_structures(new_data)
# Should detect array size changes
self.assertIn("$.items[*]", comparison["array_size_changes"])
self.assertIn("$.tags[*]", comparison["array_size_changes"])
# Test size change details
items_change = comparison["array_size_changes"]["$.items[*]"]
self.assertEqual(items_change["old_size"], 3)
self.assertEqual(items_change["new_size"], 5)
self.assertEqual(items_change["size_change"], 2)
tags_change = comparison["array_size_changes"]["$.tags[*]"]
self.assertEqual(tags_change["old_size"], 2)
self.assertEqual(tags_change["new_size"], 1)
self.assertEqual(tags_change["size_change"], -1)
def test_compare_json_with_jsonpath_structures_function(self):
"""
Test the compare_json_with_jsonpath_structures convenience function
"""
# Test with print_report=False
comparison = compare_json_with_jsonpath_structures(old_json, new_json, print_report=False)
# Should return the same structure as the method
self.assertIn("summary", comparison)
self.assertIn("added_paths", comparison)
self.assertIn("removed_paths", comparison)
# Test that it works without printing (no exception thrown)
self.assertIsInstance(comparison, dict)
# Test specific changes
self.assertIn("$.key_nested1.array_nested_4[*]", comparison["removed_paths"])
self.assertIn("$.key_nested1.array_changed_4[*]", comparison["added_paths"])
def test_nested_arrays_length_tracking(self):
"""
Test array length tracking for deeply nested arrays
"""
test_data = {
"level1": [
{"level2": [1, 2, 3]},
{"level2": [4, 5]},
{"level2": [6, 7, 8, 9]}
]
}
analyzer = JSONPathStructureAnalyzer()
analyzer.extract_all_paths(test_data)
array_lengths = analyzer.get_array_lengths()
# Should track both levels of arrays
self.assertIn("$.level1[*]", array_lengths)
self.assertIn("$.level1[*].level2[*]", array_lengths)
# Check lengths
self.assertEqual(array_lengths["$.level1[*]"], 3)
# Note: The nested array length will be from the last item processed (current implementation)
self.assertEqual(array_lengths["$.level1[*].level2[*]"], 4)
def test_type_changes_detection_in_comparison(self):
"""
Test detection of type changes in compare_json_structures
"""
old_data = {
"field1": "string_value",
"field2": 42,
"field3": [1, 2, 3]
}
new_data = {
"field1": 123,
"field2": 42,
"field3": "not_array"
}
analyzer = JSONPathStructureAnalyzer()
analyzer.extract_all_paths(old_data)
comparison = analyzer.compare_json_structures(new_data)
# Should detect type changes
self.assertIn("$.field1", comparison["type_changes"])
self.assertIn("$.field3", comparison["type_changes"])
self.assertNotIn("$.field2", comparison["type_changes"])
# Test change details
field1_change = comparison["type_changes"]["$.field1"]
self.assertEqual(field1_change["old_type"], "str")
self.assertEqual(field1_change["new_type"], "int")
field3_change = comparison["type_changes"]["$.field3"]
self.assertEqual(field3_change["new_type"], "str")
# This will check the type and expect a list
self.assertEqual(field3_change["old_type"], "list")
def test_analyze_dict_list_simple(self):
"""
Test analyze_dict_list_simple function with a list of dictionaries
"""
dict_list = [
{
"user": "john",
"age": 25,
"tags": ["admin", "user"]
},
{
"user": "jane",
"age": 30,
"tags": ["user"],
"active": True
},
{
"user": "bob",
"score": 95.5,
"tags": ["guest", "temp", "new"]
}
]
# Test the function
results = analyze_dict_list_simple(dict_list)
# Basic structure tests
self.assertEqual(len(results), 3)
self.assertIsInstance(results, list)
# Test each result has expected keys
for i, result in enumerate(results):
self.assertEqual(result["index"], i)
self.assertIn("paths_with_types", result)
self.assertIn("detailed_report", result)
self.assertIn("array_lengths", result)
self.assertIn("structure_report", result)
# Test first dict analysis
first_result = results[0]
self.assertIn("$.user", first_result["paths_with_types"])
self.assertIn("$.age", first_result["paths_with_types"])
self.assertIn("$.tags[*]", first_result["paths_with_types"])
self.assertEqual(first_result["paths_with_types"]["$.user"], "str")
self.assertEqual(first_result["paths_with_types"]["$.age"], "int")
self.assertEqual(first_result["paths_with_types"]["$.tags[*]"], "array")
self.assertEqual(first_result["array_lengths"]["$.tags[*]"], 2)
# Test second dict has additional field
second_result = results[1]
self.assertIn("$.active", second_result["paths_with_types"])
self.assertEqual(second_result["paths_with_types"]["$.active"], "bool")
self.assertEqual(second_result["array_lengths"]["$.tags[*]"], 1)
# Test third dict differences
third_result = results[2]
self.assertIn("$.score", third_result["paths_with_types"])
self.assertNotIn("$.age", third_result["paths_with_types"]) # age missing in third dict
self.assertEqual(third_result["paths_with_types"]["$.score"], "float")
self.assertEqual(third_result["array_lengths"]["$.tags[*]"], 3)
# Test structure reports are strings
for result in results:
self.assertIsInstance(result["structure_report"], str)
self.assertGreater(len(result["structure_report"]), 0)
# Test detailed reports have proper structure
for result in results:
detailed = result["detailed_report"]
for path, info in detailed.items():
self.assertIn("types", info)
self.assertIn("primary_type", info)
self.assertIn("is_array", info)
self.assertIn("samples", info)
self.assertIn("sample_count", info)
def test_filter_paths_excluding_keys(self):
"""
Test filtering paths to exclude specific keys
"""
test_data = {
'definition': 'enjoying or showing or marked by joy or pleasure',
'examples': ['a happy smile', 'spent many happy days on the beach'],
'related_words': [{'base_form': 'euphoric'}, {'base_form': 'elated'}],
'relation_type': 'also_see',
'source': 'wordnet',
'wordnet_pos': 'a'
}
analyzer = JSONPathStructureAnalyzer()
analyzer.extract_all_paths(test_data)
# Test without exclusion
all_paths = analyzer.paths
self.assertIn("$.examples[*]", all_paths)
self.assertIn("$.definition", all_paths)
# Test with exclusion
filtered_paths = analyzer.filter_paths_excluding_keys({'examples'})
self.assertNotIn("$.examples[*]", filtered_paths)
self.assertIn("$.definition", filtered_paths)
self.assertIn("$.related_words[*]", filtered_paths)
self.assertIn("$.related_words[*].base_form", filtered_paths)
# Test excluding multiple keys
filtered_paths_multi = analyzer.filter_paths_excluding_keys({'examples', 'source'})
self.assertNotIn("$.examples[*]", filtered_paths_multi)
self.assertNotIn("$.source", filtered_paths_multi)
self.assertIn("$.definition", filtered_paths_multi)
def test_get_filtered_structure_report(self):
"""
Test filtered structure report generation
"""
test_data = {
'definition': 'test definition',
'examples': ['example1', 'example2'],
'metadata': {'source': 'test', 'version': 1},
'tags': ['tag1', 'tag2', 'tag3']
}
analyzer = JSONPathStructureAnalyzer()
analyzer.extract_all_paths(test_data)
# Test filtered report
filtered_report = analyzer.get_filtered_structure_report({'examples'})
# Should not contain examples
self.assertNotIn("examples", filtered_report)
# Should contain other fields
self.assertIn("$.definition", filtered_report)
self.assertIn("$.metadata", filtered_report)
self.assertIn("$.tags[*]", filtered_report)
# Test structure
lines = filtered_report.split('\n')
self.assertGreater(len(lines), 0)
# Verify specific content
self.assertIn("$.definition -- test definition", filtered_report)
self.assertIn("$.tags[*] -- array[3]", filtered_report)
def test_get_filtered_paths_with_types(self):
"""
Test filtered paths with types
"""
test_data = {
'name': 'test',
'count': 42,
'items': [1, 2, 3],
'exclude_me': {'nested': 'value'}
}
analyzer = JSONPathStructureAnalyzer()
analyzer.extract_all_paths(test_data)
# Test filtered paths with types
filtered_paths_types = analyzer.get_filtered_paths_with_types({'exclude_me'})
# Should not contain excluded paths
self.assertNotIn("$.exclude_me", filtered_paths_types)
self.assertNotIn("$.exclude_me.nested", filtered_paths_types)
# Should contain other paths
self.assertIn("$.name", filtered_paths_types)
self.assertIn("$.count", filtered_paths_types)
self.assertIn("$.items[*]", filtered_paths_types)
# Test types
self.assertEqual(filtered_paths_types["$.name"], "str")
self.assertEqual(filtered_paths_types["$.count"], "int")
self.assertEqual(filtered_paths_types["$.items[*]"], "array")
def test_get_filtered_detailed_type_report(self):
"""
Test filtered detailed type report
"""
test_data = {
'title': 'Sample Title',
'description': 'Sample Description',
'private_data': {'secret': 'hidden'},
'public_list': ['item1', 'item2']
}
analyzer = JSONPathStructureAnalyzer()
analyzer.extract_all_paths(test_data)
# Test filtered detailed report
filtered_detailed = analyzer.get_filtered_detailed_type_report({'private_data'})
# Should not contain excluded paths
self.assertNotIn("$.private_data", filtered_detailed)
self.assertNotIn("$.private_data.secret", filtered_detailed)
# Should contain other paths
self.assertIn("$.title", filtered_detailed)
self.assertIn("$.public_list[*]", filtered_detailed)
# Test structure of remaining items
title_info = filtered_detailed["$.title"]
self.assertEqual(title_info["primary_type"], "str")
self.assertFalse(title_info["is_array"])
self.assertIn("Sample Title", title_info["samples"])
list_info = filtered_detailed["$.public_list[*]"]
self.assertEqual(list_info["primary_type"], "array")
self.assertTrue(list_info["is_array"])
self.assertEqual(list_info["array_length"], 2)
def test_analyze_dict_list_simple_with_exclusion(self):
"""
Test analyze_dict_list_simple with key exclusion
"""
dict_list = [
{
"name": "John",
"age": 25,
"private_info": {"ssn": "123-45-6789"},
"tags": ["user", "admin"]
},
{
"name": "Jane",
"age": 30,
"private_info": {"ssn": "987-65-4321"},
"tags": ["user"],
"active": True
}
]
# Test with exclusion
results = analyze_dict_list_simple(dict_list, exclude_keys={'private_info'})
# Basic structure tests
self.assertEqual(len(results), 2)
# Test that private_info is excluded from all results
for result in results:
paths_with_types = result["paths_with_types"]
detailed_report = result["detailed_report"]
# Should not contain private_info paths
private_paths = [path for path in paths_with_types.keys() if 'private_info' in path]
self.assertEqual(len(private_paths), 0, "private_info paths should be excluded")
private_detailed = [path for path in detailed_report.keys() if 'private_info' in path]
self.assertEqual(len(private_detailed), 0, "private_info should be excluded from detailed report")
# Should contain other paths
self.assertIn("$.name", paths_with_types)
self.assertIn("$.age", paths_with_types)
self.assertIn("$.tags[*]", paths_with_types)
# Test second dict has additional field (but not private_info)
second_result = results[1]
self.assertIn("$.active", second_result["paths_with_types"])
self.assertEqual(second_result["paths_with_types"]["$.active"], "bool")
# Test structure reports don't contain excluded keys
for result in results:
structure_report = result["structure_report"]
self.assertNotIn("private_info", structure_report)
self.assertIn("$.name", structure_report)
def test_exclusion_with_nested_arrays(self):
"""
Test exclusion works with nested arrays and complex structures
"""
test_data = {
"valid_data": {
"items": [
{"id": 1, "name": "item1"},
{"id": 2, "name": "item2"}
]
},
"sensitive_data": {
"secrets": [
{"key": "secret1", "value": "hidden1"},
{"key": "secret2", "value": "hidden2"}
]
}
}
analyzer = JSONPathStructureAnalyzer()
analyzer.extract_all_paths(test_data)
# Test exclusion of nested structure
filtered_paths = analyzer.filter_paths_excluding_keys({'sensitive_data'})
# Should exclude all sensitive_data paths
sensitive_paths = [path for path in analyzer.paths if 'sensitive_data' in path]
self.assertGreater(len(sensitive_paths), 0, "Should have sensitive_data paths in original")
for sensitive_path in sensitive_paths:
self.assertNotIn(sensitive_path, filtered_paths, f"Should exclude {sensitive_path}")
# Should keep valid_data paths
self.assertIn("$.valid_data", filtered_paths)
self.assertIn("$.valid_data.items[*]", filtered_paths)
self.assertIn("$.valid_data.items[*].id", filtered_paths)
self.assertIn("$.valid_data.items[*].name", filtered_paths)
class TestJSONPathComparator(unittest.TestCase):
"""
Test JSONPath structure comparison functionality
"""
def test_extract_structure_paths_comparison(self):
"""
Test extraction of structure paths from both JSON structures
"""
old_paths = extract_structure_paths(old_json)
new_paths = extract_structure_paths(new_json)
# Verify we get a reasonable number of paths
self.assertGreaterEqual(len(old_paths), 7)
self.assertGreaterEqual(len(new_paths), 7)
# Verify specific differences
self.assertIn("$.key_nested1.array_nested_4[*]", old_paths)
self.assertIn("$.key_nested1.array_changed_4[*]", new_paths)
self.assertIn("$.key_nested1.array_nested_4[*].key_nested4", old_paths)
self.assertIn("$.key_nested1.array_changed_4[*].last_change", new_paths)
def test_extract_structure_paths_with_types(self):
"""
Test extraction of structure paths with type information
"""
old_paths_with_types = extract_structure_paths_with_types(old_json)
new_paths_with_types = extract_structure_paths_with_types(new_json)
# Verify we get type information
self.assertEqual(old_paths_with_types["$.key1"], "string")
self.assertEqual(old_paths_with_types["$.key2"], "integer")
self.assertEqual(old_paths_with_types["$.key_nested1"], "object")
# Check the actual array field, not the [*] path
self.assertIn("array", old_paths_with_types["$.key_nested1.array_nested_4"])
# The [*] path represents the type of array elements (first element)
self.assertEqual(old_paths_with_types["$.key_nested1.array_nested_4[*]"], "string")
# Verify type differences between old and new
self.assertIn("$.key_nested1.array_nested_4", old_paths_with_types)
self.assertIn("$.key_nested1.array_changed_4", new_paths_with_types)
self.assertNotIn("$.key_nested1.array_nested_4", new_paths_with_types)
def test_compare_structures_array_rename(self):
"""
Test comparison detects array field rename
"""
comparator = JSONPathComparator()
comparison = comparator.compare_structures(old_json, new_json)
# Should detect removed paths (old structure)
self.assertIn("$.key_nested1.array_nested_4[*]", comparison["removed_paths"])
self.assertIn("$.key_nested1.array_nested_4[*].key_nested4", comparison["removed_paths"])
# Should detect added paths (new structure)
self.assertIn("$.key_nested1.array_changed_4[*]", comparison["added_paths"])
self.assertIn("$.key_nested1.array_changed_4[*].last_change", comparison["added_paths"])
def test_compare_structures_with_types(self):
"""
Test comparison with type information
"""
comparator = JSONPathComparator()
comparison = comparator.compare_structures_with_types(old_json, new_json)
# Should detect added paths with types
self.assertIn("$.key_nested1.array_changed_4", comparison["added_paths"])
# Check for the actual existing paths in the comparison
# The deeply nested paths might not be included in the type comparison
if "$.key_nested1.array_changed_4[*].last_change" in comparison["added_paths"]:
self.assertIn("string", comparison["added_paths"]["$.key_nested1.array_changed_4[*].last_change"])
# Should detect removed paths with types
self.assertIn("$.key_nested1.array_nested_4", comparison["removed_paths"])
# Should detect common paths with types
self.assertIn("$.key1", comparison["common_paths"])
self.assertEqual(comparison["common_paths"]["$.key1"], "string")
# Should not detect type changes for this example (same types, different paths)
self.assertEqual(len(comparison["type_changes"]), 0)
def test_type_changes_detection(self):
"""
Test detection of type changes in paths
"""
# Create test data with type changes
json_with_string = {"test_field": "hello"}
json_with_number = {"test_field": 42}
comparator = JSONPathComparator()
comparison = comparator.compare_structures_with_types(json_with_string, json_with_number)
# Should detect type change
self.assertIn("$.test_field", comparison["type_changes"])
self.assertEqual(comparison["type_changes"]["$.test_field"]["old_type"], "string")
self.assertEqual(comparison["type_changes"]["$.test_field"]["new_type"], "integer")
def test_compare_structures_unchanged_paths(self):
"""
Test that unchanged paths are correctly identified
"""
comparator = JSONPathComparator()
comparison = comparator.compare_structures(old_json, new_json)
# These paths should remain unchanged
unchanged_paths = [
"$.key1",
"$.key2",
"$.key_nested1",
"$.key_nested1.key_nested2",
"$.key_nested1.key_nested3"
]
for path in unchanged_paths:
self.assertIn(path, comparison["common_paths"], f"Path {path} should be in common paths")
self.assertNotIn(path, comparison["added_paths"], f"Path {path} should not be added")
self.assertNotIn(path, comparison["removed_paths"], f"Path {path} should not be removed")
def test_compare_structures_nested_array_preserved(self):
"""
Test the deeply nested array structure is preserved despite parent changes
"""
comparator = JSONPathComparator()
comparison = comparator.compare_structures(old_json, new_json)
# The nested array should exist in both (though path changed due to parent rename)
old_nested_array = "$.key_nested1.array_nested_4[*].array_nested_5[*]"
new_nested_array = "$.key_nested1.array_changed_4[*].array_nested_5[*]"
self.assertIn(old_nested_array, comparison["removed_paths"])
self.assertIn(new_nested_array, comparison["added_paths"])
def test_path_validations_with_specific_paths(self):
"""
Test validation of specific paths between old and new structures
"""
common_paths = [
"$.key1", # Should exist in both
"$.key2", # Should exist in both
"$.key_nested1.array_nested_4[*]", # Exists only in old
"$.key_nested1.array_changed_4[*]", # Exists only in new
"$.key_nested1.key_nested2" # Should exist in both
]
comparator = JSONPathComparator(common_paths)
comparison = comparator.compare_structures(old_json, new_json)
validations = comparison["path_validations"]
# Test paths that exist in both
self.assertEqual(validations["$.key1"]["status"], "✅")
self.assertTrue(validations["$.key1"]["old_found"])
self.assertTrue(validations["$.key1"]["new_found"])
# Test paths that exist only in old
self.assertEqual(validations["$.key_nested1.array_nested_4[*]"]["status"], "❌")
self.assertTrue(validations["$.key_nested1.array_nested_4[*]"]["old_found"])
self.assertFalse(validations["$.key_nested1.array_nested_4[*]"]["new_found"])
# Test paths that exist only in new
self.assertEqual(validations["$.key_nested1.array_changed_4[*]"]["status"], "❌")
self.assertFalse(validations["$.key_nested1.array_changed_4[*]"]["old_found"])
self.assertTrue(validations["$.key_nested1.array_changed_4[*]"]["new_found"])
class TestJSONPathIntegration(unittest.TestCase):
"""
Integration tests for the complete JSONPath diff workflow
"""
def test_complete_diff_workflow(self):
"""
Test the complete workflow from analysis to comparison using the provided data
"""
# Step 1: Analyze the old structure
old_report = analyze_with_jsonpath(old_json)
self.assertIn("$.key_nested1.array_nested_4[*].key_nested4", old_report)
# Step 2: Analyze the new structure
new_report = analyze_with_jsonpath(new_json)
self.assertIn("$.key_nested1.array_changed_4[*].last_change", new_report)
# Step 3: Compare structures
critical_paths = [
"$.key1",
"$.key2",
"$.key_nested1.key_nested2",
"$.key_nested1.key_nested3"
]
comparison = compare_json_with_jsonpath(old_json, new_json, critical_paths)
# Verify comparison results
self.assertIsInstance(comparison, dict)
self.assertGreater(len(comparison["added_paths"]), 0)
self.assertGreater(len(comparison["removed_paths"]), 0)
self.assertGreater(len(comparison["common_paths"]), 0)
def test_complete_diff_workflow_with_types(self):
"""
Test the complete workflow with type information
"""
# Step 1: Compare structures with types
critical_paths = [
"$.key1",
"$.key2",
"$.key_nested1.key_nested2"
]
comparison = compare_json_with_jsonpath_and_types(old_json, new_json, critical_paths)
# Verify comparison results include type information
self.assertIsInstance(comparison, dict)
self.assertIn("added_paths", comparison)
self.assertIn("removed_paths", comparison)
self.assertIn("type_changes", comparison)
# Verify type information is included
if comparison["added_paths"]:
for path, type_info in comparison["added_paths"].items():
self.assertIsInstance(type_info, str)
self.assertGreater(len(type_info), 0)
def test_detect_specific_changes(self):
"""
Test detection of the specific changes between old and new JSON
"""
comparison = compare_json_with_jsonpath(old_json, new_json)
# Key changes that should be detected:
# 1. array_nested_4 -> array_changed_4
# 2. key_nested4 -> last_change
expected_removed = [
"$.key_nested1.array_nested_4[*]",
"$.key_nested1.array_nested_4[*].key_nested4"
]
expected_added = [
"$.key_nested1.array_changed_4[*]",
"$.key_nested1.array_changed_4[*].last_change"
]
for path in expected_removed:
self.assertIn(path, comparison["removed_paths"], f"Expected removed path {path} not found")
for path in expected_added:
self.assertIn(path, comparison["added_paths"], f"Expected added path {path} not found")
def test_structure_variations_old(self):
"""
Test that old JSON structure contains expected array name
"""
analyzer = JSONPathStructureAnalyzer()
paths = analyzer.extract_all_paths(old_json)
expected_path = "$.key_nested1.array_nested_4[*]"
self.assertIn(expected_path, paths, f"Expected path {expected_path} not found")
def test_structure_variations_new(self):
"""
Test that new JSON structure contains expected array name
"""
analyzer = JSONPathStructureAnalyzer()
paths = analyzer.extract_all_paths(new_json)
expected_path = "$.key_nested1.array_changed_4[*]"
self.assertIn(expected_path, paths, f"Expected path {expected_path} not found")
def test_json_string_compatibility(self):
"""
Test that the tools work with JSON strings (serialized/deserialized)
"""
# Convert to JSON string and back
old_string = json.dumps(old_json)
new_string = json.dumps(new_json)
old_parsed = json.loads(old_string)
new_parsed = json.loads(new_string)
# Should work the same as with dict objects
comparison = compare_json_with_jsonpath(old_parsed, new_parsed)
self.assertIn("$.key_nested1.array_nested_4[*]", comparison["removed_paths"])
self.assertIn("$.key_nested1.array_changed_4[*]", comparison["added_paths"])
class TestEdgeCases(unittest.TestCase):
"""
Test edge cases with the provided data structure
"""
def test_empty_json_comparison(self):
"""
Test comparison with empty JSON
"""
empty_json = {}
comparison = compare_json_with_jsonpath(old_json, empty_json)
# All old paths should be removed
self.assertGreater(len(comparison["removed_paths"]), 0)
self.assertEqual(len(comparison["added_paths"]), 0)
self.assertEqual(len(comparison["common_paths"]), 0)
def test_empty_json_comparison_with_types(self):
"""
Test comparison with empty JSON including types
"""
empty_json = {}
comparison = compare_json_with_jsonpath_and_types(old_json, empty_json)
# All old paths should be removed with type info
self.assertGreater(len(comparison["removed_paths"]), 0)
self.assertEqual(len(comparison["added_paths"]), 0)
# Empty JSON still has the root path "$" in common
# The comparison includes the root "$" path in both structures
self.assertLessEqual(len(comparison["common_paths"]), 1)
# Verify type information is preserved
for path, type_info in comparison["removed_paths"].items():
self.assertIsInstance(type_info, str)
def test_identical_json_comparison(self):
"""
Test comparison of identical JSON structures
"""
comparison = compare_json_with_jsonpath(old_json, old_json)
# Should have no changes
self.assertEqual(len(comparison["added_paths"]), 0)
self.assertEqual(len(comparison["removed_paths"]), 0)
self.assertGreater(len(comparison["common_paths"]), 0)
def test_identical_json_comparison_with_types(self):
"""
Test comparison of identical JSON structures with types
"""
comparison = compare_json_with_jsonpath_and_types(old_json, old_json)
# Should have no changes
self.assertEqual(len(comparison["added_paths"]), 0)
self.assertEqual(len(comparison["removed_paths"]), 0)
self.assertEqual(len(comparison["type_changes"]), 0)
self.assertGreater(len(comparison["common_paths"]), 0)
def test_deep_nested_array_analysis(self):
"""
Test analysis of the deepest nested array (array_nested_5)
"""
analyzer = JSONPathStructureAnalyzer()
analyzer.extract_all_paths(old_json)
report = analyzer.get_structure_report()
# Should properly analyze the deeply nested array
self.assertIn("$.key_nested1.array_nested_4[*].array_nested_5[*] -- array[2]", report)
def test_array_type_detection(self):
"""
Test proper detection of array types in extract_structure_paths_with_types
"""
paths_with_types = extract_structure_paths_with_types(old_json)
# Test array type detection on the actual array field, not the [*] path
# The array field itself should have "array" in its type
self.assertIn("array", paths_with_types["$.key_nested1.array_nested_4"])
# The [*] path represents the type of array elements (the first element is a string)
self.assertEqual(paths_with_types["$.key_nested1.array_nested_4[*]"], "string")
class TestSimpleUsageExamples(unittest.TestCase):
"""
Simple examples showing how to use the JSONPath diff tool
"""
def test_basic_structure_analysis_old(self):
"""
Analyze the structure of the old JSON
"""
report = analyze_with_jsonpath(old_json)
app_logger.info("\nOLD JSON STRUCTURE:")
app_logger.info(report)
# Basic assertions
self.assertIn("$.key1 -- row 1", report)
self.assertIn("$.key2 -- 22", report)
self.assertIn("array_nested_4", report)
self.assertIn("key_nested4", report)
def test_basic_structure_analysis_new(self):
"""
Analyze the structure of the new JSON
"""
report = analyze_with_jsonpath(new_json)
app_logger.info("\nNEW JSON STRUCTURE:")
app_logger.info(report)
# Basic assertions
self.assertIn("$.key1 -- row 1", report)
self.assertIn("$.key2 -- 22", report)
self.assertIn("array_changed_4", report)
self.assertIn("last_change", report)
def test_basic_comparison(self):
"""
Compare old versus new JSON structures
"""
app_logger.info("\nCOMPARISON RESULTS:")
comparison = compare_json_with_jsonpath(old_json, new_json)
# Verify the main changes
self.assertIn("$.key_nested1.array_nested_4[*]", comparison["removed_paths"])
self.assertIn("$.key_nested1.array_changed_4[*]", comparison["added_paths"])
# Verify unchanged elements
self.assertIn("$.key1", comparison["common_paths"])
self.assertIn("$.key2", comparison["common_paths"])
def test_basic_comparison_with_types(self):
"""
Compare old versus new JSON structures with type information
"""
app_logger.info("\nCOMPARISON RESULTS WITH TYPES:")
comparison = compare_json_with_jsonpath_and_types(old_json, new_json)
# Verify the main changes with types
self.assertIn("$.key_nested1.array_nested_4", comparison["removed_paths"])
self.assertIn("$.key_nested1.array_changed_4", comparison["added_paths"])
# Verify type information is included
self.assertEqual(comparison["common_paths"]["$.key1"], "string")
self.assertEqual(comparison["common_paths"]["$.key2"], "integer")
if __name__ == '__main__':
unittest.main()