diff --git a/README.md b/README.md index 59fcd99..5a82b23 100644 --- a/README.md +++ b/README.md @@ -28,10 +28,10 @@ pip install . #### Basic Usage ```python -from diffgetr.diff_get import diff_get +from diffgetr.diff_get import Diffr # Basic comparison -diff = diff_get(obj1, obj2) +diff = Diffr(obj1, obj2) print(diff) # Prints a summary of differences # Navigate to specific parts @@ -43,7 +43,7 @@ print(sub_diff) ```python # Custom DeepDiff parameters -diff = diff_get( +diff = Diffr( obj1, obj2, deep_diff_kw={'significant_digits': 5, 'ignore_string_case': True}, ignore_added=True # Focus only on changes and removals @@ -60,7 +60,7 @@ raw_diff = diff.diff_obj # Access underlying DeepDiff object ```python # Navigate through nested structures -diff = diff_get(data1, data2) +diff = Diffr(data1, data2) # Use tab completion to see available keys dir(diff) # Shows common keys between both datasets @@ -105,7 +105,7 @@ diffgetr file1.json file2.json path.to.key ### Constructor Parameters ```python -diff_get(s0, s1, loc=None, path=None, deep_diff_kw=None, ignore_added=False) +Diffr(s0, s1, loc=None, path=None, deep_diff_kw=None, ignore_added=False) ``` **Parameters:** @@ -169,13 +169,13 @@ When navigating to non-existent keys, the tool will: ```python import json -from diffgetr.diff_get import diff_get +from diffgetr.diff_get import Diffr with open('config_v1.json') as f1, open('config_v2.json') as f2: config1 = json.load(f1) config2 = json.load(f2) -diff = diff_get(config1, config2, ignore_added=True) +diff = Diffr(config1, config2, ignore_added=True) print(f"Changes found at: {diff.location}") diff.diff_summary(top=20) ``` @@ -184,7 +184,7 @@ diff.diff_summary(top=20) ```python # Compare two API responses with high precision -diff = diff_get( +diff = Diffr( response1, response2, deep_diff_kw={'significant_digits': 6, 'ignore_order': True} ) @@ -199,7 +199,7 @@ if user_diff: ```python # For detailed tabular comparison with percentage changes -diff = diff_get(financial_data_old, financial_data_new) +diff = Diffr(financial_data_old, financial_data_new) diff.diff_sidebyside() # Output example: @@ -234,7 +234,7 @@ The test suite covers: ## Contributing -This tool is part of the SMART_X project ecosystem. When contributing: +This tool is part of the Ottermatics projects ecosystem. When contributing: 1. Maintain backward compatibility with existing APIs 2. Add tests for new pattern recognition features diff --git a/diffgetr/diff_get.py b/diffgetr/diff_get.py index 6b7dd51..8d1b05c 100644 --- a/diffgetr/diff_get.py +++ b/diffgetr/diff_get.py @@ -12,19 +12,63 @@ class Diffr: def __init__( self, s0, s1, loc=None, path=None, deep_diff_kw=None, ignore_added=False ): + if deep_diff_kw is None: + self.deep_diff_kw = dict( + ignore_numeric_type_changes=True, significant_digits=3 + ) + else: + self.deep_diff_kw = deep_diff_kw + + threshold = 1.0 / (10 ** self.deep_diff_kw.get("significant_digits", 3)) - #TODO: fail here for type differences + # TODO: fail here for type differences st0 = type(s0) st1 = type(s1) if st0 != st1: preview_0 = str(s0)[:100] - preview_1 = str(s1)[:100] - raise Exception(f'{loc}.{path}| types different: {st0} vs {st1} |0: {preview_0} | 1:{preview_1}') - elif st0 in (str,int,float): + preview_1 = str(s1)[:100] + raise Exception( + f"{loc}.{path}| types different: {st0} vs {st1} |0: {preview_0} | 1:{preview_1}" + ) + + elif st0 in (str,): preview_0 = str(s0)[:100] - preview_1 = str(s1)[:100] + preview_1 = str(s1)[:100] + pct = None + v0_num = None + v1_num = None + # Try to convert to float if possible + try: + v0_num = float(v0) + except (ValueError, TypeError): + pass + try: + v1_num = float(v1) + except (ValueError, TypeError): + pass + if v0_num is not None and v1_num is not None: + diff = v1_num - v0_num + pct = abs(diff / v0_num) + pct_diff = f"{pct:10.3%}" + if abs(pct) > threshold: + raise Exception(f"{loc}.{path}| values different: {s0} vs {s1}") + elif s0 != s1: + raise Exception( + f"{loc}.{path}| values different: {preview_0} vs {preview_1}" + ) + return + + elif st0 in (int, float): if s0 != s1: - raise Exception(f'{loc}.{path}| values different: {preview_0} vs {preview_1}') + diff = s1 - s0 + pct = abs(diff / max(s1, s0)) + pct_diff = f"{pct:10.3%}" + if abs(pct) > threshold: + raise Exception( + f"{loc}.{path}| values different: {s0} vs {s1}| {pct_diff}" + ) + return + elif isinstance(s0, (tuple, list)) and isinstance(s1, (tuple, list)): print(f"converting lists -> dict") s0 = {i: v for i, v in enumerate(s0)} @@ -32,12 +76,6 @@ def __init__( self.s0 = s0 self.s1 = s1 self.ignore_added = ignore_added - if deep_diff_kw is None: - self.deep_diff_kw = dict( - ignore_numeric_type_changes=True, significant_digits=3 - ) - else: - self.deep_diff_kw = deep_diff_kw if loc is None: self.loc = [] @@ -65,67 +103,67 @@ def __getitem__(self, key): @property def path(self): - return '.'.join(self.loc) + return ".".join(self.loc) def keys(self): s0k = set(self.s0) s1k = set(self.s1) sa = set.intersection(s0k, s1k) return sa - - def dict_keys(self)->set: - return set(( k for k in self.keys() \ - if isinstance(self.s0[k],dict) \ - and isinstance(self.s1[k],dict) - )) - - def path_diffs(self,syskey:str) -> "Diffr": - """take sys key like root.p1.p2.pk[0].*.po[*].val and generate diffs through each matching key. If there is a key prefix'd with path, such as root.p1.p2 be sure to strip that so you can navigate the data. - """ - if '.' not in syskey and '[' not in syskey: - #you're here - #print(f'returning {self.loc}') + + def dict_keys(self) -> set: + return set( + ( + k + for k in self.keys() + if isinstance(self.s0[k], dict) and isinstance(self.s1[k], dict) + ) + ) + + def path_diffs(self, syskey: str) -> "Diffr": + """take sys key like root.p1.p2.pk[0].*.po[*].val and generate diffs through each matching key. If there is a key prefix'd with path, such as root.p1.p2 be sure to strip that so you can navigate the data.""" + if "." not in syskey and "[" not in syskey: + # you're here + # print(f'returning {self.loc}') if syskey in self.dict_keys(): yield self[syskey] else: yield self else: - #recursive - pre_path = '.'.join(self.loc) - + # recursive + pre_path = ".".join(self.loc) + find = syskey if pre_path in syskey: - #print(f'replacing: {pre_path} in {syskey}') - find = syskey.replace(pre_path,"") - - pths = find.split('.') - for i,key_seg in enumerate(pths): - nx = pths[i+1:] - nxt = '.'.join(nx) - if not key_seg or key_seg == '.': + # print(f'replacing: {pre_path} in {syskey}') + find = syskey.replace(pre_path, "") + + pths = find.split(".") + for i, key_seg in enumerate(pths): + nx = pths[i + 1 :] + nxt = ".".join(nx) + if not key_seg or key_seg == ".": continue elif nx: - #print(f'getting {key_seg} -> {nxt}') + # print(f'getting {key_seg} -> {nxt}') - if '*' == key_seg: + if "*" == key_seg: for ky in self.dict_keys(): for val in self[ky].path_diffs(nxt): yield val - elif '[*]' in key_seg: + elif "[*]" in key_seg: array1 = self.s0[key_seg] array2 = self.s1[key_seg] - for j in range(min(len(array1),len(array2))): + for j in range(min(len(array1), len(array2))): v1 = array1[j] v2 = array2[j] - for val in Diffrr(v1,v2).path_diffs(nxt): + for val in Diffrr(v1, v2).path_diffs(nxt): yield val elif key_seg in self.dict_keys(): for val in self[key_seg].path_diffs(nxt): yield val - - def __iter__(self): return self.keys() @@ -179,10 +217,10 @@ def print_here(self): pprint(d1, indent=2) def print_below(self): - print(f'## BASE') + print(f"## BASE") pprint(self.s0, indent=2) - print(f'\n## TEST') - pprint(self.s1, indent=2) + print(f"\n## TEST") + pprint(self.s1, indent=2) @property def diff_obj(self) -> deepdiff.DeepDiff: @@ -201,7 +239,7 @@ def diff_all(self, indent=2, file=None): bytes = False else: # Determine if file expects bytes or text - bytes = hasattr(file, 'mode') and 'b' in file.mode + bytes = hasattr(file, "mode") and "b" in file.mode title = f"{self.location} diffing data\n\n" file.write(title.encode("utf-8") if bytes else title) @@ -449,5 +487,6 @@ def main(): # diff_data already prints to stdout in __getitem__ on KeyError pass + if __name__ == "__main__": main() diff --git a/diffgetr/tests/test_diff_get.py b/diffgetr/tests/test_diff_get.py index a6dc49d..d0319ac 100644 --- a/diffgetr/tests/test_diff_get.py +++ b/diffgetr/tests/test_diff_get.py @@ -5,175 +5,176 @@ class TestDiffGet(unittest.TestCase): - + def test_basic_diff(self): """Test basic diff functionality""" s0 = {"a": 1, "b": 2, "c": {"d": 3}} s1 = {"a": 1, "b": 3, "c": {"d": 4}} - + diff = Diffr(s0, s1) assert diff.location == "root" - + # Test that diff object is created diff_obj = diff.diff_obj - assert 'values_changed' in diff_obj - + assert "values_changed" in diff_obj + def test_navigation(self): """Test navigation through nested structures""" s0 = {"level1": {"level2": {"value": 10}}} s1 = {"level1": {"level2": {"value": 20}}} - + diff = Diffr(s0, s1) - nested_diff = diff['level1']['level2'] - + nested_diff = diff["level1"]["level2"] + assert nested_diff.location == "root.level1.level2" - assert nested_diff.s0['value'] == 10 - assert nested_diff.s1['value'] == 20 - + assert nested_diff.s0["value"] == 10 + assert nested_diff.s1["value"] == 20 + def test_list_conversion(self): """Test automatic list to dict conversion""" s0 = [1, 2, 3] s1 = [1, 2, 4] - + diff = Diffr(s0, s1) assert isinstance(diff.s0, dict) assert isinstance(diff.s1, dict) assert diff.s0[2] == 3 assert diff.s1[2] == 4 - + def test_keys_method(self): """Test keys() method returns intersection""" s0 = {"a": 1, "b": 2, "c": 3} s1 = {"a": 1, "b": 3, "d": 4} - + diff = Diffr(s0, s1) keys = diff.keys() - + assert keys == {"a", "b"} - + def test_ignore_added(self): """Test ignore_added parameter""" s0 = {"a": 1, "b": 2} s1 = {"a": 1, "b": 2, "c": 3} - + diff = Diffr(s0, s1, ignore_added=True) diff_obj = diff.diff_obj - + # Should not contain dictionary_item_added - assert 'dictionary_item_added' not in diff_obj - + assert "dictionary_item_added" not in diff_obj + def test_custom_deep_diff_params(self): """Test custom DeepDiff parameters""" s0 = {"value": 1.123456} s1 = {"value": 1.123457} - + # With default precision (3), should see no difference diff1 = Diffr(s0, s1) assert len(diff1.diff_obj) == 0 - + # With high precision, should see difference - diff2 = Diffr(s0, s1, deep_diff_kw={'significant_digits': 6}) + diff2 = Diffr(s0, s1, deep_diff_kw={"significant_digits": 6}) assert len(diff2.diff_obj) > 0 - + def test_keyerror_handling(self): """Test KeyError handling when navigating to non-existent keys""" s0 = {"a": {"b": 1}} s1 = {"a": {"c": 2}} - + diff = Diffr(s0, s1) - + with self.assertRaises(KeyError) as context: - diff['a']['nonexistent'] - + diff["a"]["nonexistent"] + self.assertIn("key missing: nonexistent", str(context.exception)) - + def test_string_representation(self): """Test string representation of diff object""" s0 = {"a": 1} s1 = {"a": 2} - + diff = Diffr(s0, s1) str_repr = str(diff) - + assert "root diffing summary" in str_repr assert isinstance(str_repr, str) - + def test_repr(self): """Test repr of diff object""" s0 = {"a": 1} s1 = {"a": 2} - + diff = Diffr(s0, s1) repr_str = repr(diff) - + assert repr_str == "diff[root]" - + def test_diff_summary_output(self): """Test diff_summary method""" s0 = {"a": 1, "b": {"c": 2}} s1 = {"a": 2, "b": {"c": 3}} - + diff = Diffr(s0, s1) - + # Test with StringIO output = io.StringIO() diff.diff_summary(file=output, top=10) summary = output.getvalue() - + assert "root diffing summary" in summary assert "VALUES_CHANGED" in summary - + def test_diff_all_output(self): """Test diff_all method""" s0 = {"a": 1} s1 = {"a": 2} - + diff = Diffr(s0, s1) - + # Test with StringIO output = io.StringIO() diff.diff_all(file=output) result = output.getvalue() - + assert "root diffing data" in result - + def test_type_assertion(self): """Test that different types raise assertion error""" with self.assertRaises(Exception): Diffr({"a": 1}, ["a", 1]) - + def test_ipython_key_completions(self): """Test IPython tab completion support""" s0 = {"a": 1, "b": 2, "c": 3} s1 = {"a": 1, "b": 3, "d": 4} - + diff = Diffr(s0, s1) completions = diff._ipython_key_completions_() - + assert set(completions) == {"a", "b"} assert isinstance(completions, list) class TestCLI(unittest.TestCase): - + def test_main_function_exists(self): """Test that main function exists and is callable""" from diffgetr.diff_get import main + self.assertTrue(callable(main)) class TestPatternRecognition(unittest.TestCase): - + def test_uuid_pattern_replacement(self): """Test UUID pattern recognition in diff summary""" s0 = {"id": "550e8400-e29b-41d4-a716-446655440000"} s1 = {"id": "6ba7b810-9dad-11d1-80b4-00c04fd430c8"} - + diff = Diffr(s0, s1) output = io.StringIO() diff.diff_summary(file=output) summary = output.getvalue() - + # UUIDs should be abstracted in the summary assert "UUID" in summary or summary # At minimum should not crash diff --git a/pyproject.toml b/pyproject.toml index 8e4e12d..ad7b066 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "diffgetr" -version = "0.2.0" +version = "0.2.1" description = "A Python library for comparing nested data structures with detailed diff reporting and interactive navigation." authors = [ { name = "Your Name", email = "your.email@example.com" }