Skip to content

Commit

Permalink
adding use_enum_value and threshold_to_diff_deeper
Browse files Browse the repository at this point in the history
  • Loading branch information
seperman committed May 14, 2024
1 parent b391ae9 commit 5f25cc5
Show file tree
Hide file tree
Showing 6 changed files with 93 additions and 47 deletions.
16 changes: 11 additions & 5 deletions deepdiff/deephash.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ def __init__(self,
ignore_numeric_type_changes=False,
ignore_type_subclasses=False,
ignore_string_case=False,
use_enum_value=False,
exclude_obj_callback=None,
number_to_string_func=None,
ignore_private_variables=True,
Expand All @@ -154,7 +155,7 @@ def __init__(self,
"exclude_paths, include_paths, exclude_regex_paths, hasher, ignore_repetition, "
"number_format_notation, apply_hash, ignore_type_in_groups, ignore_string_type_changes, "
"ignore_numeric_type_changes, ignore_type_subclasses, ignore_string_case "
"number_to_string_func, ignore_private_variables, parent "
"number_to_string_func, ignore_private_variables, parent, use_enum_value "
"encodings, ignore_encoding_errors") % ', '.join(kwargs.keys()))
if isinstance(hashes, MutableMapping):
self.hashes = hashes
Expand All @@ -170,6 +171,7 @@ def __init__(self,
self.exclude_regex_paths = convert_item_or_items_into_compiled_regexes_else_none(exclude_regex_paths)
self.hasher = default_hasher if hasher is None else hasher
self.hashes[UNPROCESSED_KEY] = []
self.use_enum_value = use_enum_value

self.significant_digits = self.get_significant_digits(significant_digits, ignore_numeric_type_changes)
self.truncate_datetime = get_truncate_datetime(truncate_datetime)
Expand Down Expand Up @@ -206,10 +208,10 @@ def __init__(self,
sha1hex = sha1hex

def __getitem__(self, obj, extract_index=0):
return self._getitem(self.hashes, obj, extract_index=extract_index)
return self._getitem(self.hashes, obj, extract_index=extract_index, use_enum_value=self.use_enum_value)

@staticmethod
def _getitem(hashes, obj, extract_index=0):
def _getitem(hashes, obj, extract_index=0, use_enum_value=False):
"""
extract_index is zero for hash and 1 for count and None to get them both.
To keep it backward compatible, we only get the hash by default so it is set to zero by default.
Expand All @@ -220,6 +222,8 @@ def _getitem(hashes, obj, extract_index=0):
key = BoolObj.TRUE
elif obj is False:
key = BoolObj.FALSE
elif use_enum_value and isinstance(obj, Enum):
key = obj.value

result_n_count = (None, 0)

Expand Down Expand Up @@ -256,14 +260,14 @@ def get(self, key, default=None, extract_index=0):
return self.get_key(self.hashes, key, default=default, extract_index=extract_index)

@staticmethod
def get_key(hashes, key, default=None, extract_index=0):
def get_key(hashes, key, default=None, extract_index=0, use_enum_value=False):
"""
get_key method for the hashes dictionary.
It can extract the hash for a given key that is already calculated when extract_index=0
or the count of items that went to building the object whenextract_index=1.
"""
try:
result = DeepHash._getitem(hashes, key, extract_index=extract_index)
result = DeepHash._getitem(hashes, key, extract_index=extract_index, use_enum_value=use_enum_value)
except KeyError:
result = default
return result
Expand Down Expand Up @@ -481,6 +485,8 @@ def _hash(self, obj, parent, parents_ids=EMPTY_FROZENSET):
if isinstance(obj, bool):
obj = self._prep_bool(obj)
result = None
elif self.use_enum_value and isinstance(obj, Enum):
obj = obj.value
else:
result = not_hashed
try:
Expand Down
56 changes: 25 additions & 31 deletions deepdiff/diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ def _report_progress(_stats, progress_logger, duration):
'number_format_notation',
'ignore_string_type_changes',
'ignore_numeric_type_changes',
'use_enum_value',
'ignore_type_in_groups',
'ignore_type_subclasses',
'ignore_string_case',
Expand All @@ -116,6 +117,7 @@ class DeepDiff(ResultDict, SerializationMixin, DistanceMixin, Base):
def __init__(self,
t1: Any,
t2: Any,
_original_type=None,
cache_purge_level: int=1,
cache_size: int=0,
cache_tuning_sample_size: int=0,
Expand All @@ -126,9 +128,6 @@ def __init__(self,
exclude_obj_callback: Optional[Callable]=None,
exclude_obj_callback_strict: Optional[Callable]=None,
exclude_paths: Union[str, List[str]]=None,
include_obj_callback: Optional[Callable]=None,
include_obj_callback_strict: Optional[Callable]=None,
include_paths: Union[str, List[str]]=None,
exclude_regex_paths: Union[str, List[str], Pattern[str], List[Pattern[str]], None]=None,
exclude_types: Optional[List[Any]]=None,
get_deep_distance: bool=False,
Expand All @@ -146,8 +145,10 @@ def __init__(self,
ignore_string_type_changes: bool=False,
ignore_type_in_groups: Optional[List[Tuple]]=None,
ignore_type_subclasses: bool=False,
include_obj_callback: Optional[Callable]=None,
include_obj_callback_strict: Optional[Callable]=None,
include_paths: Union[str, List[str]]=None,
iterable_compare_func: Optional[Callable]=None,
zip_ordered_iterables: bool=False,
log_frequency_in_sec: int=0,
math_epsilon: Optional[float]=None,
max_diffs: Optional[int]=None,
Expand All @@ -157,10 +158,12 @@ def __init__(self,
progress_logger: Callable=logger.info,
report_repetition: bool=False,
significant_digits: Optional[int]=None,
threshold_to_diff_deeper: float = 0,
truncate_datetime: Optional[str]=None,
use_enum_value: bool=False,
verbose_level: int=1,
view: str=TEXT_VIEW,
_original_type=None,
zip_ordered_iterables: bool=False,
_parameters=None,
_shared_parameters=None,
**kwargs):
Expand All @@ -175,7 +178,7 @@ def __init__(self,
"view, hasher, hashes, max_passes, max_diffs, zip_ordered_iterables, "
"cutoff_distance_for_pairs, cutoff_intersection_for_pairs, log_frequency_in_sec, cache_size, "
"cache_tuning_sample_size, get_deep_distance, group_by, group_by_sort_key, cache_purge_level, "
"math_epsilon, iterable_compare_func, _original_type, "
"math_epsilon, iterable_compare_func, use_enum_value, _original_type, threshold_to_diff_deeper, "
"ignore_order_func, custom_operators, encodings, ignore_encoding_errors, "
"_parameters and _shared_parameters.") % ', '.join(kwargs.keys()))

Expand All @@ -193,6 +196,8 @@ def __init__(self,
self.ignore_numeric_type_changes = ignore_numeric_type_changes
if strings == ignore_type_in_groups or strings in ignore_type_in_groups:
ignore_string_type_changes = True
self.use_enum_value = use_enum_value
self.threshold_to_diff_deeper = threshold_to_diff_deeper
self.ignore_string_type_changes = ignore_string_type_changes
self.ignore_type_in_groups = self.get_ignore_types_in_groups(
ignore_type_in_groups=ignore_type_in_groups,
Expand Down Expand Up @@ -513,6 +518,8 @@ def _get_clean_to_keys_mapping(self, keys, level):
for key in keys:
if self.ignore_string_type_changes and isinstance(key, bytes):
clean_key = key.decode('utf-8')
elif self.use_enum_value and isinstance(key, Enum):
clean_key = key.value
elif isinstance(key, numbers):
type_ = "number" if self.ignore_numeric_type_changes else key.__class__.__name__
clean_key = self.number_to_string(key, significant_digits=self.significant_digits,
Expand Down Expand Up @@ -578,6 +585,12 @@ def _diff_dict(
t_keys_added = t2_keys - t_keys_intersect
t_keys_removed = t1_keys - t_keys_intersect

if self.threshold_to_diff_deeper:
len_keys_changed = (len(t_keys_added) + len(t_keys_removed))
if len_keys_changed and len(t_keys_intersect) / len_keys_changed < self.threshold_to_diff_deeper:
self._report_result('values_changed', level, local_tree=local_tree)
return

for key in t_keys_added:
if self._count_diff() is StopIteration:
return
Expand Down Expand Up @@ -861,31 +874,6 @@ def _diff_by_forming_pairs_and_comparing_one_by_one(
self._report_result('iterable_item_added', change_level, local_tree=local_tree)

else: # check if item value has changed

# if (i != j):
# # Item moved
# change_level = level.branch_deeper(
# x,
# y,
# child_relationship_class=child_relationship_class,
# child_relationship_param=i,
# child_relationship_param2=j
# )
# self._report_result('iterable_item_moved', change_level)

# item_id = id(x)
# if parents_ids and item_id in parents_ids:
# continue
# parents_ids_added = add_to_frozen_set(parents_ids, item_id)

# # Go one level deeper
# next_level = level.branch_deeper(
# x,
# y,
# child_relationship_class=child_relationship_class,
# child_relationship_param=j)
# self._diff(next_level, parents_ids_added)

if (i != j and ((x == y) or self.iterable_compare_func)):
# Item moved
change_level = level.branch_deeper(
Expand Down Expand Up @@ -1604,6 +1592,12 @@ def _diff(self, level, parents_ids=frozenset(), _original_type=None, local_tree=
if self.type_check_func(level.t1, type_group) and self.type_check_func(level.t2, type_group):
report_type_change = False
break
if self.use_enum_value and isinstance(level.t1, Enum):
level.t1 = level.t1.value
report_type_change = False
if self.use_enum_value and isinstance(level.t2, Enum):
level.t2 = level.t2.value
report_type_change = False
if report_type_change:
self._diff_types(level, local_tree=local_tree)
return
Expand Down
12 changes: 10 additions & 2 deletions tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,5 +65,13 @@ def __reduce__(self):
return (self.__class__, (item, ))

def __eq__(self, other):
both_no_items_attr = (not hasattr(self, 'item')) and (not hasattr(other, 'item'))
return both_no_items_attr or self.item == other.item
if hasattr(self, 'item') and hasattr(other, 'item'):
return self.item == other.item
if not hasattr(self, 'item') and not hasattr(other, 'item'):
return True
return False

def __str__(self):
return f"<Picklable: {self.item if hasattr(self, 'item') else 'delete'}>"

__repr__ = __str__
16 changes: 16 additions & 0 deletions tests/test_delta.py
Original file line number Diff line number Diff line change
Expand Up @@ -463,6 +463,14 @@ def test_delta_dict_items_added_retain_order(self):
delta2 = Delta(diff=diff, bidirectional=True)
assert t1 == t2 - delta2

delta3 = Delta(diff, always_include_values=True, bidirectional=True, raise_errors=True)
flat_rows_list = delta3.to_flat_rows()
delta4 = Delta(flat_rows_list=flat_rows_list,
always_include_values=True, bidirectional=True, raise_errors=True)
assert t1 == t2 - delta4
assert t1 + delta4 == t2


def test_delta_constr_flat_dict_list_param_preserve(self):
"""
Issue: https://github.com/seperman/deepdiff/issues/457
Expand Down Expand Up @@ -818,6 +826,13 @@ def compare_func(item1, item2, level=None):
}
}
},
'delta_case14b_threshold_to_diff_deeper': {
't1': picklalbe_obj_without_item,
't2': PicklableClass(11),
'deepdiff_kwargs': {'threshold_to_diff_deeper': 0.33},
'to_delta_kwargs': {},
'expected_delta_dict': {'values_changed': {'root': {'new_value': PicklableClass(11)}}}
},
'delta_case15_diffing_simple_numbers': {
't1': 1,
't2': 2,
Expand Down Expand Up @@ -1451,6 +1466,7 @@ def test_delta_view_and_to_delta_dict_are_equal_when_parameteres_passed(self):
'ignore_string_type_changes': False,
'ignore_type_in_groups': [],
'report_repetition': True,
'use_enum_value': False,
'exclude_paths': None,
'include_paths': None,
'exclude_regex_paths': None,
Expand Down
38 changes: 29 additions & 9 deletions tests/test_diff_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,16 @@
logging.disable(logging.CRITICAL)


class MyEnum1(Enum):
book = "book"
cake = "cake"

class MyEnum2(str, Enum):
book = "book"
cake = "cake"



class TestDeepDiffText:
"""DeepDiff Tests."""

Expand Down Expand Up @@ -649,14 +659,6 @@ class MyEnum(Enum):

def test_enum_ignore_type_change(self):

class MyEnum1(Enum):
book = "book"
cake = "cake"

class MyEnum2(str, Enum):
book = "book"
cake = "cake"

diff = DeepDiff("book", MyEnum1.book)
expected = {
'type_changes': {'root': {'old_type': str, 'new_type': MyEnum1, 'old_value': 'book', 'new_value': MyEnum1.book}}}
Expand All @@ -668,6 +670,14 @@ class MyEnum2(str, Enum):
diff3 = DeepDiff("book", MyEnum2.book, ignore_type_in_groups=[(Enum, str)])
assert not diff3

def test_enum_use_enum_value1(self):
diff = DeepDiff("book", MyEnum2.book, use_enum_value=True)
assert not diff

def test_enum_use_enum_value_in_dict_key(self):
diff = DeepDiff({"book": 2}, {MyEnum2.book: 2}, use_enum_value=True)
assert not diff

def test_precompiled_regex(self):

pattern_1 = re.compile('foo')
Expand Down Expand Up @@ -950,6 +960,9 @@ def test_custom_objects_add_and_remove_verbose(self):

def get_custom_object_with_added_removed_methods(self):
class ClassA:
VAL = 1
VAL2 = 2

def method_a(self):
pass

Expand Down Expand Up @@ -1000,14 +1013,21 @@ def test_dictionary_of_custom_objects(self):
result = {}
assert result == ddiff

def test_dictionary_with_string_keys(self):
def test_dictionary_with_string_keys1(self):
t1 = {"veggie": "carrots"}
t2 = {"meat": "carrots"}

diff = DeepDiff(t1, t2)
assert {'dictionary_item_added': ["root['meat']"],
'dictionary_item_removed': ["root['veggie']"]} == diff

def test_dictionary_with_string_keys_threshold_to_diff_deeper(self):
t1 = {"veggie": "carrots"}
t2 = {"meat": "carrots"}

diff = DeepDiff(t1, t2, threshold_to_diff_deeper=0.33)
assert {'values_changed': {'root': {'new_value': {'meat': 'carrots'}, 'old_value': {'veggie': 'carrots'}}}} == diff

def test_dictionary_with_numeric_keys(self):
t1 = {Decimal('10.01'): "carrots"}
t2 = {10.01: "carrots"}
Expand Down
2 changes: 2 additions & 0 deletions tests/test_hash.py
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,8 @@ class MyEnum(Enum):
assert DeepHashPrep(MyEnum.A) != DeepHashPrep(MyEnum.A.value)
assert DeepHashPrep(MyEnum.A) != DeepHashPrep(MyEnum.B)

assert DeepHashPrep(MyEnum.A, use_enum_value=True)[MyEnum.A] == 'int:1'

def test_dict_hash(self):
string1 = "a"
string1_prepped = prep_str(string1)
Expand Down

0 comments on commit 5f25cc5

Please sign in to comment.