alessandro trinca tornidor
feat: make /thesaurus-inflated-phrase agnostic, not bounded to synonyms - /thesaurus-custom fixed
a707261
| import unittest | |
| import json | |
| from my_ghost_writer.constants import app_logger | |
| from my_ghost_writer.jsonpath_comparator import (compare_json_with_jsonpath, compare_json_with_jsonpath_and_types, | |
| JSONPathComparator, extract_structure_paths, extract_structure_paths_with_types) | |
| from my_ghost_writer.jsonpath_extractor import (JSONPathStructureAnalyzer, analyze_with_jsonpath_detailed, | |
| analyze_with_jsonpath_types, analyze_with_jsonpath, compare_json_with_jsonpath_structures, analyze_dict_list_simple) | |
| old_json = { | |
| "key1": "row 1", | |
| "key2": 22, | |
| "key_nested1": { | |
| "key_nested2": "row 3", | |
| "key_nested3": "row 4", | |
| "array_nested_4": [ | |
| "row 5", | |
| "row 6", | |
| "row 7 nested", | |
| { | |
| "key_nested4": "row 8", | |
| "array_nested_5": ["row 9", "row 10"] | |
| } | |
| ] | |
| } | |
| } | |
| new_json = { | |
| "key1": "row 1", | |
| "key2": 22, | |
| "key_nested1": { | |
| "key_nested2": "row 3", | |
| "key_nested3": "row 4", | |
| "array_changed_4": [ | |
| "row 5", | |
| "row changed 6", | |
| "row 7 nested", | |
| { | |
| "last_change": "row 8", | |
| "array_nested_5": ["row 9", "row 10"] | |
| } | |
| ] | |
| } | |
| } | |
| class TestJSONPathStructureAnalyzer(unittest.TestCase): | |
| """ | |
| Test JSONPath structure analysis with the provided nested JSON data | |
| """ | |
| def test_get_paths_with_types_basic(self): | |
| """ | |
| Test get_paths_with_types with basic data types | |
| """ | |
| test_data = { | |
| "string_field": "hello", | |
| "int_field": 42, | |
| "float_field": 3.14, | |
| "bool_field": True, | |
| "null_field": None | |
| } | |
| analyzer = JSONPathStructureAnalyzer() | |
| analyzer.extract_all_paths(test_data) | |
| paths_with_types = analyzer.get_paths_with_types() | |
| # Verify basic types | |
| self.assertEqual(paths_with_types["$.string_field"], "str") | |
| self.assertEqual(paths_with_types["$.int_field"], "int") | |
| self.assertEqual(paths_with_types["$.float_field"], "float") | |
| self.assertEqual(paths_with_types["$.bool_field"], "bool") | |
| self.assertEqual(paths_with_types["$.null_field"], "NoneType") | |
| def test_get_paths_with_types_arrays(self): | |
| """ | |
| Test get_paths_with_types with arrays | |
| """ | |
| test_data = { | |
| "simple_array": [1, 2, 3], | |
| "empty_array": [], | |
| "mixed_array": ["string", 42, True], | |
| "nested_array": [[1, 2], [3, 4]] | |
| } | |
| analyzer = JSONPathStructureAnalyzer() | |
| analyzer.extract_all_paths(test_data) | |
| paths_with_types = analyzer.get_paths_with_types() | |
| self.assertEqual(paths_with_types["$.simple_array[*]"], "array") | |
| self.assertEqual(paths_with_types["$.empty_array[*]"], "array") | |
| self.assertEqual(paths_with_types["$.mixed_array[*]"], "array") | |
| self.assertEqual(paths_with_types["$.nested_array[*]"], "array") | |
| def test_get_paths_with_types_with_old_json(self): | |
| """ | |
| Test get_paths_with_types with the old_json test data | |
| """ | |
| analyzer = JSONPathStructureAnalyzer() | |
| analyzer.extract_all_paths(old_json) | |
| paths_with_types = analyzer.get_paths_with_types() | |
| # Test specific paths from old_json | |
| self.assertEqual(paths_with_types["$.key1"], "str") | |
| self.assertEqual(paths_with_types["$.key2"], "int") | |
| self.assertEqual(paths_with_types["$.key_nested1"], "dict") | |
| self.assertEqual(paths_with_types["$.key_nested1.array_nested_4[*]"], "array") | |
| self.assertEqual(paths_with_types["$.key_nested1.key_nested2"], "str") | |
| # Verify all expected paths are present | |
| expected_paths = [ | |
| "$.key1", "$.key2", "$.key_nested1", | |
| "$.key_nested1.key_nested2", "$.key_nested1.key_nested3", | |
| "$.key_nested1.array_nested_4[*]" | |
| ] | |
| for path in expected_paths: | |
| self.assertIn(path, paths_with_types, f"Path {path} should be in paths_with_types") | |
| def test_get_detailed_type_report_basic(self): | |
| """ | |
| Test get_detailed_type_report with basic data | |
| """ | |
| test_data = { | |
| "test_field": "sample_value", | |
| "array_field": [1, 2, 3] | |
| } | |
| analyzer = JSONPathStructureAnalyzer() | |
| analyzer.extract_all_paths(test_data) | |
| detailed_report = analyzer.get_detailed_type_report() | |
| # Test structure of the detailed report | |
| self.assertIn("$.test_field", detailed_report) | |
| self.assertIn("$.array_field[*]", detailed_report) | |
| # Test field details | |
| field_info = detailed_report["$.test_field"] | |
| self.assertIn("types", field_info) | |
| self.assertIn("primary_type", field_info) | |
| self.assertIn("is_array", field_info) | |
| self.assertIn("samples", field_info) | |
| self.assertIn("sample_count", field_info) | |
| # Verify field values | |
| self.assertEqual(field_info["primary_type"], "str") | |
| self.assertFalse(field_info["is_array"]) | |
| self.assertIn("sample_value", field_info["samples"]) | |
| self.assertGreater(field_info["sample_count"], 0) | |
| # Test array field details | |
| array_info = detailed_report["$.array_field[*]"] | |
| self.assertTrue(array_info["is_array"]) | |
| self.assertEqual(array_info["primary_type"], "array") | |
| self.assertEqual(array_info['array_length'], 3) | |
| def test_get_detailed_type_report_with_old_json(self): | |
| """ | |
| Test get_detailed_type_report with old_json data | |
| """ | |
| analyzer = JSONPathStructureAnalyzer() | |
| analyzer.extract_all_paths(old_json) | |
| detailed_report = analyzer.get_detailed_type_report() | |
| # Test specific fields from old_json | |
| key1_info = detailed_report["$.key1"] | |
| self.assertEqual(key1_info["primary_type"], "str") | |
| self.assertFalse(key1_info["is_array"]) | |
| self.assertIn("row 1", key1_info["samples"]) | |
| key2_info = detailed_report["$.key2"] | |
| self.assertEqual(key2_info["primary_type"], "int") | |
| self.assertFalse(key2_info["is_array"]) | |
| self.assertIn("22", key2_info["samples"]) | |
| # Test array field | |
| array_info = detailed_report["$.key_nested1.array_nested_4[*]"] | |
| self.assertTrue(array_info["is_array"]) | |
| self.assertEqual(array_info["primary_type"], "array") | |
| self.assertEqual(array_info["array_length"], 4) | |
| def test_get_detailed_type_report_mixed_types(self): | |
| """ | |
| Test get_detailed_type_report with mixed types (hypothetical case) | |
| """ | |
| # Create a scenario where a path might have mixed types | |
| analyzer = JSONPathStructureAnalyzer() | |
| # Manually add mixed type data to test the logic | |
| analyzer.paths.add("$.mixed_field") | |
| analyzer.types["$.mixed_field"].add("str") | |
| analyzer.types["$.mixed_field"].add("int") | |
| analyzer.samples["$.mixed_field"] = ["hello", "42"] | |
| detailed_report = analyzer.get_detailed_type_report() | |
| mixed_info = detailed_report["$.mixed_field"] | |
| self.assertIn("mixed(", mixed_info["primary_type"]) | |
| self.assertFalse(mixed_info["is_array"]) | |
| self.assertEqual(len(mixed_info["types"]), 2) | |
| def test_analyze_with_jsonpath_types_function(self): | |
| """ | |
| Test the convenience function analyze_with_jsonpath_types | |
| """ | |
| test_data = { | |
| "name": "test", | |
| "count": 5, | |
| "items": ["a", "b", "c"] | |
| } | |
| paths_with_types = analyze_with_jsonpath_types(test_data) | |
| # Verify function returns expected structure | |
| self.assertIsInstance(paths_with_types, dict) | |
| self.assertIn("$.name", paths_with_types) | |
| self.assertIn("$.count", paths_with_types) | |
| self.assertIn("$.items[*]", paths_with_types) | |
| # Verify types | |
| self.assertEqual(paths_with_types["$.name"], "str") | |
| self.assertEqual(paths_with_types["$.count"], "int") | |
| self.assertEqual(paths_with_types["$.items[*]"], "array") | |
| def test_analyze_with_jsonpath_detailed_function(self): | |
| """ | |
| Test the convenience function analyze_with_jsonpath_detailed | |
| """ | |
| test_data = { | |
| "description": "test description", | |
| "tags": ["tag1", "tag2"] | |
| } | |
| detailed_info = analyze_with_jsonpath_detailed(test_data) | |
| # Verify function returns expected structure | |
| self.assertIsInstance(detailed_info, dict) | |
| self.assertIn("$.description", detailed_info) | |
| self.assertIn("$.tags[*]", detailed_info) | |
| # Verify detailed structure | |
| desc_info = detailed_info["$.description"] | |
| self.assertIn("types", desc_info) | |
| self.assertIn("primary_type", desc_info) | |
| self.assertIn("samples", desc_info) | |
| self.assertEqual(desc_info["primary_type"], "str") | |
| tags_info = detailed_info["$.tags[*]"] | |
| self.assertTrue(tags_info["is_array"]) | |
| self.assertEqual(tags_info["primary_type"], "array") | |
| self.assertEqual(tags_info["array_length"], 2) | |
| def test_get_paths_with_types_empty_data(self): | |
| """ | |
| Test get_paths_with_types with empty data | |
| """ | |
| analyzer = JSONPathStructureAnalyzer() | |
| analyzer.extract_all_paths({}) | |
| paths_with_types = analyzer.get_paths_with_types() | |
| # Should return empty dict for empty input | |
| self.assertEqual(len(paths_with_types), 0) | |
| def test_get_detailed_type_report_empty_data(self): | |
| """ | |
| Test get_detailed_type_report with empty data | |
| """ | |
| analyzer = JSONPathStructureAnalyzer() | |
| analyzer.extract_all_paths({}) | |
| detailed_report = analyzer.get_detailed_type_report() | |
| # Should return empty dict for empty input | |
| self.assertEqual(len(detailed_report), 0) | |
| def test_paths_with_types_comparison_old_vs_new(self): | |
| """ | |
| Test comparing paths with types between old and new JSON | |
| """ | |
| analyzer_old = JSONPathStructureAnalyzer() | |
| analyzer_old.extract_all_paths(old_json) | |
| old_paths_with_types = analyzer_old.get_paths_with_types() | |
| analyzer_new = JSONPathStructureAnalyzer() | |
| analyzer_new.extract_all_paths(new_json) | |
| new_paths_with_types = analyzer_new.get_paths_with_types() | |
| # Find differences | |
| old_only = set(old_paths_with_types.keys()) - set(new_paths_with_types.keys()) | |
| new_only = set(new_paths_with_types.keys()) - set(old_paths_with_types.keys()) | |
| common = set(old_paths_with_types.keys()) & set(new_paths_with_types.keys()) | |
| # Verify expected differences | |
| self.assertIn("$.key_nested1.array_nested_4[*]", old_only) | |
| self.assertIn("$.key_nested1.array_changed_4[*]", new_only) | |
| # Verify common paths have same types | |
| for path in common: | |
| self.assertEqual(old_paths_with_types[path], new_paths_with_types[path], | |
| f"Type mismatch for common path {path}") | |
| def test_detailed_report_comparison_old_vs_new(self): | |
| """ | |
| Test comparing detailed reports between old and new JSON | |
| """ | |
| old_detailed = analyze_with_jsonpath_detailed(old_json) | |
| new_detailed = analyze_with_jsonpath_detailed(new_json) | |
| # Check that common fields have consistent detailed info | |
| common_paths = set(old_detailed.keys()) & set(new_detailed.keys()) | |
| for path in common_paths: | |
| old_info = old_detailed[path] | |
| new_info = new_detailed[path] | |
| # Primary types should match for common paths | |
| self.assertEqual(old_info["primary_type"], new_info["primary_type"], | |
| f"Primary type mismatch for {path}") | |
| # Array status should match | |
| self.assertEqual(old_info["is_array"], new_info["is_array"], | |
| f"Array status mismatch for {path}") | |
| def test_integration_all_new_methods(self): | |
| """ | |
| Integration test using all new methods together | |
| """ | |
| test_data = { | |
| "user": { | |
| "name": "John Doe", | |
| "age": 30, | |
| "hobbies": ["reading", "coding", "gaming"], | |
| "profile": { | |
| "active": True, | |
| "settings": { | |
| "theme": "dark", | |
| "notifications": False | |
| } | |
| } | |
| } | |
| } | |
| # Test all three approaches | |
| structure_report = analyze_with_jsonpath(test_data) | |
| paths_with_types = analyze_with_jsonpath_types(test_data) | |
| detailed_info = analyze_with_jsonpath_detailed(test_data) | |
| # Verify all methods found the same paths | |
| report_paths = set() | |
| for line in structure_report.split('\n'): | |
| if ' -- ' in line: | |
| path = line.split(' -- ')[0] | |
| report_paths.add(path) | |
| types_paths = set(paths_with_types.keys()) | |
| detailed_paths = set(detailed_info.keys()) | |
| # All methods should find the same paths | |
| self.assertEqual(report_paths, types_paths) | |
| self.assertEqual(types_paths, detailed_paths) | |
| # Verify specific expected paths exist | |
| expected_paths = [ | |
| "$.user", | |
| "$.user.name", | |
| "$.user.age", | |
| "$.user.hobbies[*]", | |
| "$.user.profile", | |
| "$.user.profile.active", | |
| "$.user.profile.settings", | |
| "$.user.profile.settings.theme", | |
| "$.user.profile.settings.notifications" | |
| ] | |
| for path in expected_paths: | |
| self.assertIn(path, types_paths, f"Path {path} should be found by all methods") | |
| self.assertIn(path, detailed_paths, f"Path {path} should be in detailed info") | |
| def test_type_consistency_across_methods(self): | |
| """ | |
| Test that type information is consistent across different methods | |
| """ | |
| analyzer = JSONPathStructureAnalyzer() | |
| analyzer.extract_all_paths(old_json) | |
| # Get data using different methods | |
| # structure_report = analyzer.get_structure_report() | |
| paths_with_types = analyzer.get_paths_with_types() | |
| detailed_report = analyzer.get_detailed_type_report() | |
| # For each path, verify consistency | |
| for path in paths_with_types: | |
| # Detailed report should have the same primary type | |
| if path in detailed_report: | |
| detailed_type = detailed_report[path]["primary_type"] | |
| simple_type = paths_with_types[path] | |
| # They should match (detailed might have more info for mixed types) | |
| if not detailed_type.startswith("mixed("): | |
| self.assertEqual(simple_type, detailed_type, | |
| f"Type inconsistency for {path}: {simple_type} vs {detailed_type}") | |
| def test_extract_all_paths_from_old_structure(self): | |
| """ | |
| Test that analyzer correctly extracts all paths from the old JSON structure | |
| """ | |
| analyzer = JSONPathStructureAnalyzer() | |
| paths = analyzer.extract_all_paths(old_json) | |
| # Verify the top-level paths | |
| self.assertIn("$.key1", paths) | |
| self.assertIn("$.key2", paths) | |
| self.assertIn("$.key_nested1", paths) | |
| # Verify the nested object paths | |
| self.assertIn("$.key_nested1.key_nested2", paths) | |
| self.assertIn("$.key_nested1.key_nested3", paths) | |
| self.assertIn("$.key_nested1.array_nested_4[*]", paths) | |
| # Verify the deeply nested paths (3-4 levels deep) | |
| self.assertIn("$.key_nested1.array_nested_4[*].key_nested4", paths) | |
| self.assertIn("$.key_nested1.array_nested_4[*].array_nested_5[*]", paths) | |
| def test_extract_all_paths_from_new_structure(self): | |
| """ | |
| Test that analyzer correctly extracts paths from the new JSON structure | |
| """ | |
| analyzer = JSONPathStructureAnalyzer() | |
| paths = analyzer.extract_all_paths(new_json) | |
| # Verify renamed the array path | |
| self.assertIn("$.key_nested1.array_changed_4[*]", paths) | |
| # Verify renamed the nested key | |
| self.assertIn("$.key_nested1.array_changed_4[*].last_change", paths) | |
| # Verify unchanged the nested array | |
| self.assertIn("$.key_nested1.array_changed_4[*].array_nested_5[*]", paths) | |
| def test_structure_report_format_old_json(self): | |
| """ | |
| Test structure report format for old JSON | |
| """ | |
| analyzer = JSONPathStructureAnalyzer() | |
| analyzer.extract_all_paths(old_json) | |
| report = analyzer.get_structure_report() | |
| # Check specific format elements | |
| self.assertIn("$.key1 -- row 1", report) | |
| self.assertIn("$.key2 -- 22", report) | |
| self.assertIn("$.key_nested1.array_nested_4[*] -- array[4]", report) | |
| self.assertIn("$.key_nested1.array_nested_4[*].key_nested4 -- row 8", report) | |
| self.assertIn("$.key_nested1.array_nested_4[*].array_nested_5[*] -- array[2]", report) | |
| def test_structure_report_format_new_json(self): | |
| """ | |
| Test structure report format for new JSON | |
| """ | |
| analyzer = JSONPathStructureAnalyzer() | |
| analyzer.extract_all_paths(new_json) | |
| report = analyzer.get_structure_report() | |
| # Check renamed elements appear correctly | |
| self.assertIn("$.key_nested1.array_changed_4[*] -- array[4]", report) | |
| self.assertIn("$.key_nested1.array_changed_4[*].last_change -- row 8", report) | |
| # Check unchanged elements | |
| self.assertIn("$.key1 -- row 1", report) | |
| self.assertIn("$.key2 -- 22", report) | |
| def test_analyze_with_jsonpath_function(self): | |
| """ | |
| Test the convenience function for structure analysis | |
| """ | |
| old_report = analyze_with_jsonpath(old_json) | |
| new_report = analyze_with_jsonpath(new_json) | |
| # Verify both reports are valid strings | |
| self.assertIsInstance(old_report, str) | |
| self.assertGreater(len(old_report), 0) | |
| self.assertIsInstance(new_report, str) | |
| self.assertGreater(len(new_report), 0) | |
| # Verify key differences | |
| self.assertIn("array_nested_4", old_report) | |
| self.assertIn("array_changed_4", new_report) | |
| self.assertIn("key_nested4", old_report) | |
| self.assertIn("last_change", new_report) | |
| def test_compare_json_structures_method(self): | |
| """ | |
| Test the compare_json_structures method directly | |
| """ | |
| analyzer = JSONPathStructureAnalyzer() | |
| analyzer.extract_all_paths(old_json) | |
| comparison = analyzer.compare_json_structures(new_json) | |
| # Test all expected keys within the comparison result | |
| expected_keys = [ | |
| "added_paths", "removed_paths", "common_paths", | |
| "type_changes", "value_differences", "array_size_changes", | |
| "array_lengths_old", "array_lengths_new", "summary" | |
| ] | |
| for key in expected_keys: | |
| self.assertIn(key, comparison, f"Key {key} should be in comparison result") | |
| # Test summary statistics | |
| summary = comparison["summary"] | |
| self.assertGreater(summary["total_paths_old"], 0) | |
| self.assertGreater(summary["total_paths_new"], 0) | |
| self.assertGreater(summary["paths_removed"], 0) | |
| self.assertGreater(summary["paths_added"], 0) | |
| # Test array length tracking | |
| self.assertIn("$.key_nested1.array_nested_4[*]", comparison["array_lengths_old"]) | |
| self.assertIn("$.key_nested1.array_changed_4[*]", comparison["array_lengths_new"]) | |
| self.assertEqual(comparison["array_lengths_old"]["$.key_nested1.array_nested_4[*]"], 4) | |
| self.assertEqual(comparison["array_lengths_new"]["$.key_nested1.array_changed_4[*]"], 4) | |
| def test_get_array_lengths_method(self): | |
| """ | |
| Test the get_array_lengths method | |
| """ | |
| test_data = { | |
| "empty_array": [], | |
| "small_array": [1, 2], | |
| "large_array": list(range(100)), | |
| "nested": { | |
| "inner_array": ["a", "b", "c", "d", "e"] | |
| } | |
| } | |
| analyzer = JSONPathStructureAnalyzer() | |
| analyzer.extract_all_paths(test_data) | |
| array_lengths = analyzer.get_array_lengths() | |
| # Test all array lengths are captured | |
| self.assertEqual(array_lengths["$.empty_array[*]"], 0) | |
| self.assertEqual(array_lengths["$.small_array[*]"], 2) | |
| self.assertEqual(array_lengths["$.large_array[*]"], 100) | |
| self.assertEqual(array_lengths["$.nested.inner_array[*]"], 5) | |
| # Test that non-array paths are not in array_lengths | |
| for path in array_lengths.keys(): | |
| self.assertTrue(path.endswith("[*]"), f"Array length path {path} should end with [*]") | |
| def test_value_differences_detection(self): | |
| """ | |
| Test detection of value changes in compare_json_structures | |
| """ | |
| old_data = { | |
| "name": "John", | |
| "age": 25, | |
| "city": "New York" | |
| } | |
| new_data = { | |
| "name": "John", # unchanged | |
| "age": 26, # changed | |
| "city": "Boston" # changed | |
| } | |
| analyzer = JSONPathStructureAnalyzer() | |
| analyzer.extract_all_paths(old_data) | |
| comparison = analyzer.compare_json_structures(new_data) | |
| # Should detect value changes | |
| self.assertIn("$.age", comparison["value_differences"]) | |
| self.assertIn("$.city", comparison["value_differences"]) | |
| self.assertNotIn("$.name", comparison["value_differences"]) | |
| # Test change details | |
| age_change = comparison["value_differences"]["$.age"] | |
| self.assertEqual(age_change["old_value"], "25") | |
| self.assertEqual(age_change["new_value"], "26") | |
| city_change = comparison["value_differences"]["$.city"] | |
| self.assertEqual(city_change["old_value"], "New York") | |
| self.assertEqual(city_change["new_value"], "Boston") | |
| def test_array_size_changes_detection(self): | |
| """ | |
| Test detection of array size changes | |
| """ | |
| old_data = { | |
| "items": [1, 2, 3], | |
| "tags": ["a", "b"] | |
| } | |
| new_data = { | |
| "items": [1, 2, 3, 4, 5], # size increased | |
| "tags": ["a"] # size decreased | |
| } | |
| analyzer = JSONPathStructureAnalyzer() | |
| analyzer.extract_all_paths(old_data) | |
| comparison = analyzer.compare_json_structures(new_data) | |
| # Should detect array size changes | |
| self.assertIn("$.items[*]", comparison["array_size_changes"]) | |
| self.assertIn("$.tags[*]", comparison["array_size_changes"]) | |
| # Test size change details | |
| items_change = comparison["array_size_changes"]["$.items[*]"] | |
| self.assertEqual(items_change["old_size"], 3) | |
| self.assertEqual(items_change["new_size"], 5) | |
| self.assertEqual(items_change["size_change"], 2) | |
| tags_change = comparison["array_size_changes"]["$.tags[*]"] | |
| self.assertEqual(tags_change["old_size"], 2) | |
| self.assertEqual(tags_change["new_size"], 1) | |
| self.assertEqual(tags_change["size_change"], -1) | |
| def test_compare_json_with_jsonpath_structures_function(self): | |
| """ | |
| Test the compare_json_with_jsonpath_structures convenience function | |
| """ | |
| # Test with print_report=False | |
| comparison = compare_json_with_jsonpath_structures(old_json, new_json, print_report=False) | |
| # Should return the same structure as the method | |
| self.assertIn("summary", comparison) | |
| self.assertIn("added_paths", comparison) | |
| self.assertIn("removed_paths", comparison) | |
| # Test that it works without printing (no exception thrown) | |
| self.assertIsInstance(comparison, dict) | |
| # Test specific changes | |
| self.assertIn("$.key_nested1.array_nested_4[*]", comparison["removed_paths"]) | |
| self.assertIn("$.key_nested1.array_changed_4[*]", comparison["added_paths"]) | |
| def test_nested_arrays_length_tracking(self): | |
| """ | |
| Test array length tracking for deeply nested arrays | |
| """ | |
| test_data = { | |
| "level1": [ | |
| {"level2": [1, 2, 3]}, | |
| {"level2": [4, 5]}, | |
| {"level2": [6, 7, 8, 9]} | |
| ] | |
| } | |
| analyzer = JSONPathStructureAnalyzer() | |
| analyzer.extract_all_paths(test_data) | |
| array_lengths = analyzer.get_array_lengths() | |
| # Should track both levels of arrays | |
| self.assertIn("$.level1[*]", array_lengths) | |
| self.assertIn("$.level1[*].level2[*]", array_lengths) | |
| # Check lengths | |
| self.assertEqual(array_lengths["$.level1[*]"], 3) | |
| # Note: The nested array length will be from the last item processed (current implementation) | |
| self.assertEqual(array_lengths["$.level1[*].level2[*]"], 4) | |
| def test_type_changes_detection_in_comparison(self): | |
| """ | |
| Test detection of type changes in compare_json_structures | |
| """ | |
| old_data = { | |
| "field1": "string_value", | |
| "field2": 42, | |
| "field3": [1, 2, 3] | |
| } | |
| new_data = { | |
| "field1": 123, | |
| "field2": 42, | |
| "field3": "not_array" | |
| } | |
| analyzer = JSONPathStructureAnalyzer() | |
| analyzer.extract_all_paths(old_data) | |
| comparison = analyzer.compare_json_structures(new_data) | |
| # Should detect type changes | |
| self.assertIn("$.field1", comparison["type_changes"]) | |
| self.assertIn("$.field3", comparison["type_changes"]) | |
| self.assertNotIn("$.field2", comparison["type_changes"]) | |
| # Test change details | |
| field1_change = comparison["type_changes"]["$.field1"] | |
| self.assertEqual(field1_change["old_type"], "str") | |
| self.assertEqual(field1_change["new_type"], "int") | |
| field3_change = comparison["type_changes"]["$.field3"] | |
| self.assertEqual(field3_change["new_type"], "str") | |
| # This will check the type and expect a list | |
| self.assertEqual(field3_change["old_type"], "list") | |
| def test_analyze_dict_list_simple(self): | |
| """ | |
| Test analyze_dict_list_simple function with a list of dictionaries | |
| """ | |
| dict_list = [ | |
| { | |
| "user": "john", | |
| "age": 25, | |
| "tags": ["admin", "user"] | |
| }, | |
| { | |
| "user": "jane", | |
| "age": 30, | |
| "tags": ["user"], | |
| "active": True | |
| }, | |
| { | |
| "user": "bob", | |
| "score": 95.5, | |
| "tags": ["guest", "temp", "new"] | |
| } | |
| ] | |
| # Test the function | |
| results = analyze_dict_list_simple(dict_list) | |
| # Basic structure tests | |
| self.assertEqual(len(results), 3) | |
| self.assertIsInstance(results, list) | |
| # Test each result has expected keys | |
| for i, result in enumerate(results): | |
| self.assertEqual(result["index"], i) | |
| self.assertIn("paths_with_types", result) | |
| self.assertIn("detailed_report", result) | |
| self.assertIn("array_lengths", result) | |
| self.assertIn("structure_report", result) | |
| # Test first dict analysis | |
| first_result = results[0] | |
| self.assertIn("$.user", first_result["paths_with_types"]) | |
| self.assertIn("$.age", first_result["paths_with_types"]) | |
| self.assertIn("$.tags[*]", first_result["paths_with_types"]) | |
| self.assertEqual(first_result["paths_with_types"]["$.user"], "str") | |
| self.assertEqual(first_result["paths_with_types"]["$.age"], "int") | |
| self.assertEqual(first_result["paths_with_types"]["$.tags[*]"], "array") | |
| self.assertEqual(first_result["array_lengths"]["$.tags[*]"], 2) | |
| # Test second dict has additional field | |
| second_result = results[1] | |
| self.assertIn("$.active", second_result["paths_with_types"]) | |
| self.assertEqual(second_result["paths_with_types"]["$.active"], "bool") | |
| self.assertEqual(second_result["array_lengths"]["$.tags[*]"], 1) | |
| # Test third dict differences | |
| third_result = results[2] | |
| self.assertIn("$.score", third_result["paths_with_types"]) | |
| self.assertNotIn("$.age", third_result["paths_with_types"]) # age missing in third dict | |
| self.assertEqual(third_result["paths_with_types"]["$.score"], "float") | |
| self.assertEqual(third_result["array_lengths"]["$.tags[*]"], 3) | |
| # Test structure reports are strings | |
| for result in results: | |
| self.assertIsInstance(result["structure_report"], str) | |
| self.assertGreater(len(result["structure_report"]), 0) | |
| # Test detailed reports have proper structure | |
| for result in results: | |
| detailed = result["detailed_report"] | |
| for path, info in detailed.items(): | |
| self.assertIn("types", info) | |
| self.assertIn("primary_type", info) | |
| self.assertIn("is_array", info) | |
| self.assertIn("samples", info) | |
| self.assertIn("sample_count", info) | |
| def test_filter_paths_excluding_keys(self): | |
| """ | |
| Test filtering paths to exclude specific keys | |
| """ | |
| test_data = { | |
| 'definition': 'enjoying or showing or marked by joy or pleasure', | |
| 'examples': ['a happy smile', 'spent many happy days on the beach'], | |
| 'related_words': [{'base_form': 'euphoric'}, {'base_form': 'elated'}], | |
| 'relation_type': 'also_see', | |
| 'source': 'wordnet', | |
| 'wordnet_pos': 'a' | |
| } | |
| analyzer = JSONPathStructureAnalyzer() | |
| analyzer.extract_all_paths(test_data) | |
| # Test without exclusion | |
| all_paths = analyzer.paths | |
| self.assertIn("$.examples[*]", all_paths) | |
| self.assertIn("$.definition", all_paths) | |
| # Test with exclusion | |
| filtered_paths = analyzer.filter_paths_excluding_keys({'examples'}) | |
| self.assertNotIn("$.examples[*]", filtered_paths) | |
| self.assertIn("$.definition", filtered_paths) | |
| self.assertIn("$.related_words[*]", filtered_paths) | |
| self.assertIn("$.related_words[*].base_form", filtered_paths) | |
| # Test excluding multiple keys | |
| filtered_paths_multi = analyzer.filter_paths_excluding_keys({'examples', 'source'}) | |
| self.assertNotIn("$.examples[*]", filtered_paths_multi) | |
| self.assertNotIn("$.source", filtered_paths_multi) | |
| self.assertIn("$.definition", filtered_paths_multi) | |
| def test_get_filtered_structure_report(self): | |
| """ | |
| Test filtered structure report generation | |
| """ | |
| test_data = { | |
| 'definition': 'test definition', | |
| 'examples': ['example1', 'example2'], | |
| 'metadata': {'source': 'test', 'version': 1}, | |
| 'tags': ['tag1', 'tag2', 'tag3'] | |
| } | |
| analyzer = JSONPathStructureAnalyzer() | |
| analyzer.extract_all_paths(test_data) | |
| # Test filtered report | |
| filtered_report = analyzer.get_filtered_structure_report({'examples'}) | |
| # Should not contain examples | |
| self.assertNotIn("examples", filtered_report) | |
| # Should contain other fields | |
| self.assertIn("$.definition", filtered_report) | |
| self.assertIn("$.metadata", filtered_report) | |
| self.assertIn("$.tags[*]", filtered_report) | |
| # Test structure | |
| lines = filtered_report.split('\n') | |
| self.assertGreater(len(lines), 0) | |
| # Verify specific content | |
| self.assertIn("$.definition -- test definition", filtered_report) | |
| self.assertIn("$.tags[*] -- array[3]", filtered_report) | |
| def test_get_filtered_paths_with_types(self): | |
| """ | |
| Test filtered paths with types | |
| """ | |
| test_data = { | |
| 'name': 'test', | |
| 'count': 42, | |
| 'items': [1, 2, 3], | |
| 'exclude_me': {'nested': 'value'} | |
| } | |
| analyzer = JSONPathStructureAnalyzer() | |
| analyzer.extract_all_paths(test_data) | |
| # Test filtered paths with types | |
| filtered_paths_types = analyzer.get_filtered_paths_with_types({'exclude_me'}) | |
| # Should not contain excluded paths | |
| self.assertNotIn("$.exclude_me", filtered_paths_types) | |
| self.assertNotIn("$.exclude_me.nested", filtered_paths_types) | |
| # Should contain other paths | |
| self.assertIn("$.name", filtered_paths_types) | |
| self.assertIn("$.count", filtered_paths_types) | |
| self.assertIn("$.items[*]", filtered_paths_types) | |
| # Test types | |
| self.assertEqual(filtered_paths_types["$.name"], "str") | |
| self.assertEqual(filtered_paths_types["$.count"], "int") | |
| self.assertEqual(filtered_paths_types["$.items[*]"], "array") | |
| def test_get_filtered_detailed_type_report(self): | |
| """ | |
| Test filtered detailed type report | |
| """ | |
| test_data = { | |
| 'title': 'Sample Title', | |
| 'description': 'Sample Description', | |
| 'private_data': {'secret': 'hidden'}, | |
| 'public_list': ['item1', 'item2'] | |
| } | |
| analyzer = JSONPathStructureAnalyzer() | |
| analyzer.extract_all_paths(test_data) | |
| # Test filtered detailed report | |
| filtered_detailed = analyzer.get_filtered_detailed_type_report({'private_data'}) | |
| # Should not contain excluded paths | |
| self.assertNotIn("$.private_data", filtered_detailed) | |
| self.assertNotIn("$.private_data.secret", filtered_detailed) | |
| # Should contain other paths | |
| self.assertIn("$.title", filtered_detailed) | |
| self.assertIn("$.public_list[*]", filtered_detailed) | |
| # Test structure of remaining items | |
| title_info = filtered_detailed["$.title"] | |
| self.assertEqual(title_info["primary_type"], "str") | |
| self.assertFalse(title_info["is_array"]) | |
| self.assertIn("Sample Title", title_info["samples"]) | |
| list_info = filtered_detailed["$.public_list[*]"] | |
| self.assertEqual(list_info["primary_type"], "array") | |
| self.assertTrue(list_info["is_array"]) | |
| self.assertEqual(list_info["array_length"], 2) | |
| def test_analyze_dict_list_simple_with_exclusion(self): | |
| """ | |
| Test analyze_dict_list_simple with key exclusion | |
| """ | |
| dict_list = [ | |
| { | |
| "name": "John", | |
| "age": 25, | |
| "private_info": {"ssn": "123-45-6789"}, | |
| "tags": ["user", "admin"] | |
| }, | |
| { | |
| "name": "Jane", | |
| "age": 30, | |
| "private_info": {"ssn": "987-65-4321"}, | |
| "tags": ["user"], | |
| "active": True | |
| } | |
| ] | |
| # Test with exclusion | |
| results = analyze_dict_list_simple(dict_list, exclude_keys={'private_info'}) | |
| # Basic structure tests | |
| self.assertEqual(len(results), 2) | |
| # Test that private_info is excluded from all results | |
| for result in results: | |
| paths_with_types = result["paths_with_types"] | |
| detailed_report = result["detailed_report"] | |
| # Should not contain private_info paths | |
| private_paths = [path for path in paths_with_types.keys() if 'private_info' in path] | |
| self.assertEqual(len(private_paths), 0, "private_info paths should be excluded") | |
| private_detailed = [path for path in detailed_report.keys() if 'private_info' in path] | |
| self.assertEqual(len(private_detailed), 0, "private_info should be excluded from detailed report") | |
| # Should contain other paths | |
| self.assertIn("$.name", paths_with_types) | |
| self.assertIn("$.age", paths_with_types) | |
| self.assertIn("$.tags[*]", paths_with_types) | |
| # Test second dict has additional field (but not private_info) | |
| second_result = results[1] | |
| self.assertIn("$.active", second_result["paths_with_types"]) | |
| self.assertEqual(second_result["paths_with_types"]["$.active"], "bool") | |
| # Test structure reports don't contain excluded keys | |
| for result in results: | |
| structure_report = result["structure_report"] | |
| self.assertNotIn("private_info", structure_report) | |
| self.assertIn("$.name", structure_report) | |
| def test_exclusion_with_nested_arrays(self): | |
| """ | |
| Test exclusion works with nested arrays and complex structures | |
| """ | |
| test_data = { | |
| "valid_data": { | |
| "items": [ | |
| {"id": 1, "name": "item1"}, | |
| {"id": 2, "name": "item2"} | |
| ] | |
| }, | |
| "sensitive_data": { | |
| "secrets": [ | |
| {"key": "secret1", "value": "hidden1"}, | |
| {"key": "secret2", "value": "hidden2"} | |
| ] | |
| } | |
| } | |
| analyzer = JSONPathStructureAnalyzer() | |
| analyzer.extract_all_paths(test_data) | |
| # Test exclusion of nested structure | |
| filtered_paths = analyzer.filter_paths_excluding_keys({'sensitive_data'}) | |
| # Should exclude all sensitive_data paths | |
| sensitive_paths = [path for path in analyzer.paths if 'sensitive_data' in path] | |
| self.assertGreater(len(sensitive_paths), 0, "Should have sensitive_data paths in original") | |
| for sensitive_path in sensitive_paths: | |
| self.assertNotIn(sensitive_path, filtered_paths, f"Should exclude {sensitive_path}") | |
| # Should keep valid_data paths | |
| self.assertIn("$.valid_data", filtered_paths) | |
| self.assertIn("$.valid_data.items[*]", filtered_paths) | |
| self.assertIn("$.valid_data.items[*].id", filtered_paths) | |
| self.assertIn("$.valid_data.items[*].name", filtered_paths) | |
| class TestJSONPathComparator(unittest.TestCase): | |
| """ | |
| Test JSONPath structure comparison functionality | |
| """ | |
| def test_extract_structure_paths_comparison(self): | |
| """ | |
| Test extraction of structure paths from both JSON structures | |
| """ | |
| old_paths = extract_structure_paths(old_json) | |
| new_paths = extract_structure_paths(new_json) | |
| # Verify we get a reasonable number of paths | |
| self.assertGreaterEqual(len(old_paths), 7) | |
| self.assertGreaterEqual(len(new_paths), 7) | |
| # Verify specific differences | |
| self.assertIn("$.key_nested1.array_nested_4[*]", old_paths) | |
| self.assertIn("$.key_nested1.array_changed_4[*]", new_paths) | |
| self.assertIn("$.key_nested1.array_nested_4[*].key_nested4", old_paths) | |
| self.assertIn("$.key_nested1.array_changed_4[*].last_change", new_paths) | |
| def test_extract_structure_paths_with_types(self): | |
| """ | |
| Test extraction of structure paths with type information | |
| """ | |
| old_paths_with_types = extract_structure_paths_with_types(old_json) | |
| new_paths_with_types = extract_structure_paths_with_types(new_json) | |
| # Verify we get type information | |
| self.assertEqual(old_paths_with_types["$.key1"], "string") | |
| self.assertEqual(old_paths_with_types["$.key2"], "integer") | |
| self.assertEqual(old_paths_with_types["$.key_nested1"], "object") | |
| # Check the actual array field, not the [*] path | |
| self.assertIn("array", old_paths_with_types["$.key_nested1.array_nested_4"]) | |
| # The [*] path represents the type of array elements (first element) | |
| self.assertEqual(old_paths_with_types["$.key_nested1.array_nested_4[*]"], "string") | |
| # Verify type differences between old and new | |
| self.assertIn("$.key_nested1.array_nested_4", old_paths_with_types) | |
| self.assertIn("$.key_nested1.array_changed_4", new_paths_with_types) | |
| self.assertNotIn("$.key_nested1.array_nested_4", new_paths_with_types) | |
| def test_compare_structures_array_rename(self): | |
| """ | |
| Test comparison detects array field rename | |
| """ | |
| comparator = JSONPathComparator() | |
| comparison = comparator.compare_structures(old_json, new_json) | |
| # Should detect removed paths (old structure) | |
| self.assertIn("$.key_nested1.array_nested_4[*]", comparison["removed_paths"]) | |
| self.assertIn("$.key_nested1.array_nested_4[*].key_nested4", comparison["removed_paths"]) | |
| # Should detect added paths (new structure) | |
| self.assertIn("$.key_nested1.array_changed_4[*]", comparison["added_paths"]) | |
| self.assertIn("$.key_nested1.array_changed_4[*].last_change", comparison["added_paths"]) | |
| def test_compare_structures_with_types(self): | |
| """ | |
| Test comparison with type information | |
| """ | |
| comparator = JSONPathComparator() | |
| comparison = comparator.compare_structures_with_types(old_json, new_json) | |
| # Should detect added paths with types | |
| self.assertIn("$.key_nested1.array_changed_4", comparison["added_paths"]) | |
| # Check for the actual existing paths in the comparison | |
| # The deeply nested paths might not be included in the type comparison | |
| if "$.key_nested1.array_changed_4[*].last_change" in comparison["added_paths"]: | |
| self.assertIn("string", comparison["added_paths"]["$.key_nested1.array_changed_4[*].last_change"]) | |
| # Should detect removed paths with types | |
| self.assertIn("$.key_nested1.array_nested_4", comparison["removed_paths"]) | |
| # Should detect common paths with types | |
| self.assertIn("$.key1", comparison["common_paths"]) | |
| self.assertEqual(comparison["common_paths"]["$.key1"], "string") | |
| # Should not detect type changes for this example (same types, different paths) | |
| self.assertEqual(len(comparison["type_changes"]), 0) | |
| def test_type_changes_detection(self): | |
| """ | |
| Test detection of type changes in paths | |
| """ | |
| # Create test data with type changes | |
| json_with_string = {"test_field": "hello"} | |
| json_with_number = {"test_field": 42} | |
| comparator = JSONPathComparator() | |
| comparison = comparator.compare_structures_with_types(json_with_string, json_with_number) | |
| # Should detect type change | |
| self.assertIn("$.test_field", comparison["type_changes"]) | |
| self.assertEqual(comparison["type_changes"]["$.test_field"]["old_type"], "string") | |
| self.assertEqual(comparison["type_changes"]["$.test_field"]["new_type"], "integer") | |
| def test_compare_structures_unchanged_paths(self): | |
| """ | |
| Test that unchanged paths are correctly identified | |
| """ | |
| comparator = JSONPathComparator() | |
| comparison = comparator.compare_structures(old_json, new_json) | |
| # These paths should remain unchanged | |
| unchanged_paths = [ | |
| "$.key1", | |
| "$.key2", | |
| "$.key_nested1", | |
| "$.key_nested1.key_nested2", | |
| "$.key_nested1.key_nested3" | |
| ] | |
| for path in unchanged_paths: | |
| self.assertIn(path, comparison["common_paths"], f"Path {path} should be in common paths") | |
| self.assertNotIn(path, comparison["added_paths"], f"Path {path} should not be added") | |
| self.assertNotIn(path, comparison["removed_paths"], f"Path {path} should not be removed") | |
| def test_compare_structures_nested_array_preserved(self): | |
| """ | |
| Test the deeply nested array structure is preserved despite parent changes | |
| """ | |
| comparator = JSONPathComparator() | |
| comparison = comparator.compare_structures(old_json, new_json) | |
| # The nested array should exist in both (though path changed due to parent rename) | |
| old_nested_array = "$.key_nested1.array_nested_4[*].array_nested_5[*]" | |
| new_nested_array = "$.key_nested1.array_changed_4[*].array_nested_5[*]" | |
| self.assertIn(old_nested_array, comparison["removed_paths"]) | |
| self.assertIn(new_nested_array, comparison["added_paths"]) | |
| def test_path_validations_with_specific_paths(self): | |
| """ | |
| Test validation of specific paths between old and new structures | |
| """ | |
| common_paths = [ | |
| "$.key1", # Should exist in both | |
| "$.key2", # Should exist in both | |
| "$.key_nested1.array_nested_4[*]", # Exists only in old | |
| "$.key_nested1.array_changed_4[*]", # Exists only in new | |
| "$.key_nested1.key_nested2" # Should exist in both | |
| ] | |
| comparator = JSONPathComparator(common_paths) | |
| comparison = comparator.compare_structures(old_json, new_json) | |
| validations = comparison["path_validations"] | |
| # Test paths that exist in both | |
| self.assertEqual(validations["$.key1"]["status"], "✅") | |
| self.assertTrue(validations["$.key1"]["old_found"]) | |
| self.assertTrue(validations["$.key1"]["new_found"]) | |
| # Test paths that exist only in old | |
| self.assertEqual(validations["$.key_nested1.array_nested_4[*]"]["status"], "❌") | |
| self.assertTrue(validations["$.key_nested1.array_nested_4[*]"]["old_found"]) | |
| self.assertFalse(validations["$.key_nested1.array_nested_4[*]"]["new_found"]) | |
| # Test paths that exist only in new | |
| self.assertEqual(validations["$.key_nested1.array_changed_4[*]"]["status"], "❌") | |
| self.assertFalse(validations["$.key_nested1.array_changed_4[*]"]["old_found"]) | |
| self.assertTrue(validations["$.key_nested1.array_changed_4[*]"]["new_found"]) | |
| class TestJSONPathIntegration(unittest.TestCase): | |
| """ | |
| Integration tests for the complete JSONPath diff workflow | |
| """ | |
| def test_complete_diff_workflow(self): | |
| """ | |
| Test the complete workflow from analysis to comparison using the provided data | |
| """ | |
| # Step 1: Analyze the old structure | |
| old_report = analyze_with_jsonpath(old_json) | |
| self.assertIn("$.key_nested1.array_nested_4[*].key_nested4", old_report) | |
| # Step 2: Analyze the new structure | |
| new_report = analyze_with_jsonpath(new_json) | |
| self.assertIn("$.key_nested1.array_changed_4[*].last_change", new_report) | |
| # Step 3: Compare structures | |
| critical_paths = [ | |
| "$.key1", | |
| "$.key2", | |
| "$.key_nested1.key_nested2", | |
| "$.key_nested1.key_nested3" | |
| ] | |
| comparison = compare_json_with_jsonpath(old_json, new_json, critical_paths) | |
| # Verify comparison results | |
| self.assertIsInstance(comparison, dict) | |
| self.assertGreater(len(comparison["added_paths"]), 0) | |
| self.assertGreater(len(comparison["removed_paths"]), 0) | |
| self.assertGreater(len(comparison["common_paths"]), 0) | |
| def test_complete_diff_workflow_with_types(self): | |
| """ | |
| Test the complete workflow with type information | |
| """ | |
| # Step 1: Compare structures with types | |
| critical_paths = [ | |
| "$.key1", | |
| "$.key2", | |
| "$.key_nested1.key_nested2" | |
| ] | |
| comparison = compare_json_with_jsonpath_and_types(old_json, new_json, critical_paths) | |
| # Verify comparison results include type information | |
| self.assertIsInstance(comparison, dict) | |
| self.assertIn("added_paths", comparison) | |
| self.assertIn("removed_paths", comparison) | |
| self.assertIn("type_changes", comparison) | |
| # Verify type information is included | |
| if comparison["added_paths"]: | |
| for path, type_info in comparison["added_paths"].items(): | |
| self.assertIsInstance(type_info, str) | |
| self.assertGreater(len(type_info), 0) | |
| def test_detect_specific_changes(self): | |
| """ | |
| Test detection of the specific changes between old and new JSON | |
| """ | |
| comparison = compare_json_with_jsonpath(old_json, new_json) | |
| # Key changes that should be detected: | |
| # 1. array_nested_4 -> array_changed_4 | |
| # 2. key_nested4 -> last_change | |
| expected_removed = [ | |
| "$.key_nested1.array_nested_4[*]", | |
| "$.key_nested1.array_nested_4[*].key_nested4" | |
| ] | |
| expected_added = [ | |
| "$.key_nested1.array_changed_4[*]", | |
| "$.key_nested1.array_changed_4[*].last_change" | |
| ] | |
| for path in expected_removed: | |
| self.assertIn(path, comparison["removed_paths"], f"Expected removed path {path} not found") | |
| for path in expected_added: | |
| self.assertIn(path, comparison["added_paths"], f"Expected added path {path} not found") | |
| def test_structure_variations_old(self): | |
| """ | |
| Test that old JSON structure contains expected array name | |
| """ | |
| analyzer = JSONPathStructureAnalyzer() | |
| paths = analyzer.extract_all_paths(old_json) | |
| expected_path = "$.key_nested1.array_nested_4[*]" | |
| self.assertIn(expected_path, paths, f"Expected path {expected_path} not found") | |
| def test_structure_variations_new(self): | |
| """ | |
| Test that new JSON structure contains expected array name | |
| """ | |
| analyzer = JSONPathStructureAnalyzer() | |
| paths = analyzer.extract_all_paths(new_json) | |
| expected_path = "$.key_nested1.array_changed_4[*]" | |
| self.assertIn(expected_path, paths, f"Expected path {expected_path} not found") | |
| def test_json_string_compatibility(self): | |
| """ | |
| Test that the tools work with JSON strings (serialized/deserialized) | |
| """ | |
| # Convert to JSON string and back | |
| old_string = json.dumps(old_json) | |
| new_string = json.dumps(new_json) | |
| old_parsed = json.loads(old_string) | |
| new_parsed = json.loads(new_string) | |
| # Should work the same as with dict objects | |
| comparison = compare_json_with_jsonpath(old_parsed, new_parsed) | |
| self.assertIn("$.key_nested1.array_nested_4[*]", comparison["removed_paths"]) | |
| self.assertIn("$.key_nested1.array_changed_4[*]", comparison["added_paths"]) | |
| class TestEdgeCases(unittest.TestCase): | |
| """ | |
| Test edge cases with the provided data structure | |
| """ | |
| def test_empty_json_comparison(self): | |
| """ | |
| Test comparison with empty JSON | |
| """ | |
| empty_json = {} | |
| comparison = compare_json_with_jsonpath(old_json, empty_json) | |
| # All old paths should be removed | |
| self.assertGreater(len(comparison["removed_paths"]), 0) | |
| self.assertEqual(len(comparison["added_paths"]), 0) | |
| self.assertEqual(len(comparison["common_paths"]), 0) | |
| def test_empty_json_comparison_with_types(self): | |
| """ | |
| Test comparison with empty JSON including types | |
| """ | |
| empty_json = {} | |
| comparison = compare_json_with_jsonpath_and_types(old_json, empty_json) | |
| # All old paths should be removed with type info | |
| self.assertGreater(len(comparison["removed_paths"]), 0) | |
| self.assertEqual(len(comparison["added_paths"]), 0) | |
| # Empty JSON still has the root path "$" in common | |
| # The comparison includes the root "$" path in both structures | |
| self.assertLessEqual(len(comparison["common_paths"]), 1) | |
| # Verify type information is preserved | |
| for path, type_info in comparison["removed_paths"].items(): | |
| self.assertIsInstance(type_info, str) | |
| def test_identical_json_comparison(self): | |
| """ | |
| Test comparison of identical JSON structures | |
| """ | |
| comparison = compare_json_with_jsonpath(old_json, old_json) | |
| # Should have no changes | |
| self.assertEqual(len(comparison["added_paths"]), 0) | |
| self.assertEqual(len(comparison["removed_paths"]), 0) | |
| self.assertGreater(len(comparison["common_paths"]), 0) | |
| def test_identical_json_comparison_with_types(self): | |
| """ | |
| Test comparison of identical JSON structures with types | |
| """ | |
| comparison = compare_json_with_jsonpath_and_types(old_json, old_json) | |
| # Should have no changes | |
| self.assertEqual(len(comparison["added_paths"]), 0) | |
| self.assertEqual(len(comparison["removed_paths"]), 0) | |
| self.assertEqual(len(comparison["type_changes"]), 0) | |
| self.assertGreater(len(comparison["common_paths"]), 0) | |
| def test_deep_nested_array_analysis(self): | |
| """ | |
| Test analysis of the deepest nested array (array_nested_5) | |
| """ | |
| analyzer = JSONPathStructureAnalyzer() | |
| analyzer.extract_all_paths(old_json) | |
| report = analyzer.get_structure_report() | |
| # Should properly analyze the deeply nested array | |
| self.assertIn("$.key_nested1.array_nested_4[*].array_nested_5[*] -- array[2]", report) | |
| def test_array_type_detection(self): | |
| """ | |
| Test proper detection of array types in extract_structure_paths_with_types | |
| """ | |
| paths_with_types = extract_structure_paths_with_types(old_json) | |
| # Test array type detection on the actual array field, not the [*] path | |
| # The array field itself should have "array" in its type | |
| self.assertIn("array", paths_with_types["$.key_nested1.array_nested_4"]) | |
| # The [*] path represents the type of array elements (the first element is a string) | |
| self.assertEqual(paths_with_types["$.key_nested1.array_nested_4[*]"], "string") | |
| class TestSimpleUsageExamples(unittest.TestCase): | |
| """ | |
| Simple examples showing how to use the JSONPath diff tool | |
| """ | |
| def test_basic_structure_analysis_old(self): | |
| """ | |
| Analyze the structure of the old JSON | |
| """ | |
| report = analyze_with_jsonpath(old_json) | |
| app_logger.info("\nOLD JSON STRUCTURE:") | |
| app_logger.info(report) | |
| # Basic assertions | |
| self.assertIn("$.key1 -- row 1", report) | |
| self.assertIn("$.key2 -- 22", report) | |
| self.assertIn("array_nested_4", report) | |
| self.assertIn("key_nested4", report) | |
| def test_basic_structure_analysis_new(self): | |
| """ | |
| Analyze the structure of the new JSON | |
| """ | |
| report = analyze_with_jsonpath(new_json) | |
| app_logger.info("\nNEW JSON STRUCTURE:") | |
| app_logger.info(report) | |
| # Basic assertions | |
| self.assertIn("$.key1 -- row 1", report) | |
| self.assertIn("$.key2 -- 22", report) | |
| self.assertIn("array_changed_4", report) | |
| self.assertIn("last_change", report) | |
| def test_basic_comparison(self): | |
| """ | |
| Compare old versus new JSON structures | |
| """ | |
| app_logger.info("\nCOMPARISON RESULTS:") | |
| comparison = compare_json_with_jsonpath(old_json, new_json) | |
| # Verify the main changes | |
| self.assertIn("$.key_nested1.array_nested_4[*]", comparison["removed_paths"]) | |
| self.assertIn("$.key_nested1.array_changed_4[*]", comparison["added_paths"]) | |
| # Verify unchanged elements | |
| self.assertIn("$.key1", comparison["common_paths"]) | |
| self.assertIn("$.key2", comparison["common_paths"]) | |
| def test_basic_comparison_with_types(self): | |
| """ | |
| Compare old versus new JSON structures with type information | |
| """ | |
| app_logger.info("\nCOMPARISON RESULTS WITH TYPES:") | |
| comparison = compare_json_with_jsonpath_and_types(old_json, new_json) | |
| # Verify the main changes with types | |
| self.assertIn("$.key_nested1.array_nested_4", comparison["removed_paths"]) | |
| self.assertIn("$.key_nested1.array_changed_4", comparison["added_paths"]) | |
| # Verify type information is included | |
| self.assertEqual(comparison["common_paths"]["$.key1"], "string") | |
| self.assertEqual(comparison["common_paths"]["$.key2"], "integer") | |
| if __name__ == '__main__': | |
| unittest.main() | |