Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 10 additions & 10 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,10 @@ pip install .
#### Basic Usage

```python
from diffgetr.diff_get import diff_get
from diffgetr.diff_get import Diffr

# Basic comparison
diff = diff_get(obj1, obj2)
diff = Diffr(obj1, obj2)
print(diff) # Prints a summary of differences

# Navigate to specific parts
Expand All @@ -43,7 +43,7 @@ print(sub_diff)

```python
# Custom DeepDiff parameters
diff = diff_get(
diff = Diffr(
obj1, obj2,
deep_diff_kw={'significant_digits': 5, 'ignore_string_case': True},
ignore_added=True # Focus only on changes and removals
Expand All @@ -60,7 +60,7 @@ raw_diff = diff.diff_obj # Access underlying DeepDiff object

```python
# Navigate through nested structures
diff = diff_get(data1, data2)
diff = Diffr(data1, data2)

# Use tab completion to see available keys
dir(diff) # Shows common keys between both datasets
Expand Down Expand Up @@ -105,7 +105,7 @@ diffgetr file1.json file2.json path.to.key
### Constructor Parameters

```python
diff_get(s0, s1, loc=None, path=None, deep_diff_kw=None, ignore_added=False)
Diffr(s0, s1, loc=None, path=None, deep_diff_kw=None, ignore_added=False)
```

**Parameters:**
Expand Down Expand Up @@ -169,13 +169,13 @@ When navigating to non-existent keys, the tool will:

```python
import json
from diffgetr.diff_get import diff_get
from diffgetr.diff_get import Diffr

with open('config_v1.json') as f1, open('config_v2.json') as f2:
config1 = json.load(f1)
config2 = json.load(f2)

diff = diff_get(config1, config2, ignore_added=True)
diff = Diffr(config1, config2, ignore_added=True)
print(f"Changes found at: {diff.location}")
diff.diff_summary(top=20)
```
Expand All @@ -184,7 +184,7 @@ diff.diff_summary(top=20)

```python
# Compare two API responses with high precision
diff = diff_get(
diff = Diffr(
response1, response2,
deep_diff_kw={'significant_digits': 6, 'ignore_order': True}
)
Expand All @@ -199,7 +199,7 @@ if user_diff:

```python
# For detailed tabular comparison with percentage changes
diff = diff_get(financial_data_old, financial_data_new)
diff = Diffr(financial_data_old, financial_data_new)
diff.diff_sidebyside()

# Output example:
Expand Down Expand Up @@ -234,7 +234,7 @@ The test suite covers:

## Contributing

This tool is part of the SMART_X project ecosystem. When contributing:
This tool is part of the Ottermatics projects ecosystem. When contributing:

1. Maintain backward compatibility with existing APIs
2. Add tests for new pattern recognition features
Expand Down
135 changes: 87 additions & 48 deletions diffgetr/diff_get.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,32 +12,70 @@ class Diffr:
def __init__(
self, s0, s1, loc=None, path=None, deep_diff_kw=None, ignore_added=False
):
if deep_diff_kw is None:
self.deep_diff_kw = dict(
ignore_numeric_type_changes=True, significant_digits=3
)
else:
self.deep_diff_kw = deep_diff_kw

threshold = 1.0 / (10 ** self.deep_diff_kw.get("significant_digits", 3))

#TODO: fail here for type differences
# TODO: fail here for type differences
st0 = type(s0)
st1 = type(s1)
if st0 != st1:
preview_0 = str(s0)[:100]
preview_1 = str(s1)[:100]
raise Exception(f'{loc}.{path}| types different: {st0} vs {st1} |0: {preview_0} | 1:{preview_1}')
elif st0 in (str,int,float):
preview_1 = str(s1)[:100]
raise Exception(
f"{loc}.{path}| types different: {st0} vs {st1} |0: {preview_0} | 1:{preview_1}"
)

elif st0 in (str,):
preview_0 = str(s0)[:100]
preview_1 = str(s1)[:100]
preview_1 = str(s1)[:100]
pct = None
v0_num = None
v1_num = None
# Try to convert to float if possible
try:
v0_num = float(v0)
except (ValueError, TypeError):
pass
try:
v1_num = float(v1)
except (ValueError, TypeError):
pass
if v0_num is not None and v1_num is not None:
diff = v1_num - v0_num
pct = abs(diff / v0_num)
pct_diff = f"{pct:10.3%}"
if abs(pct) > threshold:
raise Exception(f"{loc}.{path}| values different: {s0} vs {s1}")
elif s0 != s1:
raise Exception(
f"{loc}.{path}| values different: {preview_0} vs {preview_1}"
)
return

elif st0 in (int, float):
if s0 != s1:
raise Exception(f'{loc}.{path}| values different: {preview_0} vs {preview_1}')
diff = s1 - s0
pct = abs(diff / max(s1, s0))
pct_diff = f"{pct:10.3%}"
if abs(pct) > threshold:
raise Exception(
f"{loc}.{path}| values different: {s0} vs {s1}| {pct_diff}"
)
return

elif isinstance(s0, (tuple, list)) and isinstance(s1, (tuple, list)):
print(f"converting lists -> dict")
s0 = {i: v for i, v in enumerate(s0)}
s1 = {i: v for i, v in enumerate(s1)}
self.s0 = s0
self.s1 = s1
self.ignore_added = ignore_added
if deep_diff_kw is None:
self.deep_diff_kw = dict(
ignore_numeric_type_changes=True, significant_digits=3
)
else:
self.deep_diff_kw = deep_diff_kw

if loc is None:
self.loc = []
Expand Down Expand Up @@ -65,67 +103,67 @@ def __getitem__(self, key):

@property
def path(self):
return '.'.join(self.loc)
return ".".join(self.loc)

def keys(self):
s0k = set(self.s0)
s1k = set(self.s1)
sa = set.intersection(s0k, s1k)
return sa

def dict_keys(self)->set:
return set(( k for k in self.keys() \
if isinstance(self.s0[k],dict) \
and isinstance(self.s1[k],dict)
))

def path_diffs(self,syskey:str) -> "Diffr":
"""take sys key like root.p1.p2.pk[0].*.po[*].val and generate diffs through each matching key. If there is a key prefix'd with path, such as root.p1.p2 be sure to strip that so you can navigate the data.
"""
if '.' not in syskey and '[' not in syskey:
#you're here
#print(f'returning {self.loc}')

def dict_keys(self) -> set:
return set(
(
k
for k in self.keys()
if isinstance(self.s0[k], dict) and isinstance(self.s1[k], dict)
)
)

def path_diffs(self, syskey: str) -> "Diffr":
"""take sys key like root.p1.p2.pk[0].*.po[*].val and generate diffs through each matching key. If there is a key prefix'd with path, such as root.p1.p2 be sure to strip that so you can navigate the data."""
if "." not in syskey and "[" not in syskey:
# you're here
# print(f'returning {self.loc}')
if syskey in self.dict_keys():
yield self[syskey]
else:
yield self
else:
#recursive
pre_path = '.'.join(self.loc)
# recursive
pre_path = ".".join(self.loc)

find = syskey
if pre_path in syskey:
#print(f'replacing: {pre_path} in {syskey}')
find = syskey.replace(pre_path,"")
pths = find.split('.')
for i,key_seg in enumerate(pths):
nx = pths[i+1:]
nxt = '.'.join(nx)
if not key_seg or key_seg == '.':
# print(f'replacing: {pre_path} in {syskey}')
find = syskey.replace(pre_path, "")

pths = find.split(".")
for i, key_seg in enumerate(pths):
nx = pths[i + 1 :]
nxt = ".".join(nx)
if not key_seg or key_seg == ".":
continue
elif nx:
#print(f'getting {key_seg} -> {nxt}')
# print(f'getting {key_seg} -> {nxt}')

if '*' == key_seg:
if "*" == key_seg:
for ky in self.dict_keys():
for val in self[ky].path_diffs(nxt):
yield val

elif '[*]' in key_seg:
elif "[*]" in key_seg:
array1 = self.s0[key_seg]
array2 = self.s1[key_seg]
for j in range(min(len(array1),len(array2))):
for j in range(min(len(array1), len(array2))):
v1 = array1[j]
v2 = array2[j]
for val in Diffrr(v1,v2).path_diffs(nxt):
for val in Diffrr(v1, v2).path_diffs(nxt):
yield val

elif key_seg in self.dict_keys():
for val in self[key_seg].path_diffs(nxt):
yield val



def __iter__(self):
return self.keys()
Expand Down Expand Up @@ -179,10 +217,10 @@ def print_here(self):
pprint(d1, indent=2)

def print_below(self):
print(f'## BASE')
print(f"## BASE")
pprint(self.s0, indent=2)
print(f'\n## TEST')
pprint(self.s1, indent=2)
print(f"\n## TEST")
pprint(self.s1, indent=2)

@property
def diff_obj(self) -> deepdiff.DeepDiff:
Expand All @@ -201,7 +239,7 @@ def diff_all(self, indent=2, file=None):
bytes = False
else:
# Determine if file expects bytes or text
bytes = hasattr(file, 'mode') and 'b' in file.mode
bytes = hasattr(file, "mode") and "b" in file.mode

title = f"{self.location} diffing data\n\n"
file.write(title.encode("utf-8") if bytes else title)
Expand Down Expand Up @@ -449,5 +487,6 @@ def main():
# diff_data already prints to stdout in __getitem__ on KeyError
pass


if __name__ == "__main__":
main()
Loading