Skip to content

Commit

Permalink
switched back from OrderlySet to StableSet because OrderlySet was
Browse files Browse the repository at this point in the history
returning unordered sets when any operation other than add or remove was
done on the OrderlySet
  • Loading branch information
seperman committed May 17, 2024
1 parent 429b348 commit 872a45a
Show file tree
Hide file tree
Showing 21 changed files with 145 additions and 179 deletions.
5 changes: 2 additions & 3 deletions deepdiff/anyset.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from ordered_set import OrderedSet
from deepdiff.deephash import DeepHash
from deepdiff.helper import dict_
from deepdiff.helper import dict_, SortedSet


class AnySet:
Expand All @@ -11,7 +10,7 @@ class AnySet:
However one the AnySet object is deleted, all those traces will be gone too.
"""
def __init__(self, items=None):
self._set = OrderedSet()
self._set = SortedSet()
self._hashes = dict_()
self._hash_to_objects = dict_()
if items:
Expand Down
9 changes: 4 additions & 5 deletions deepdiff/base.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from ordered_set import OrderedSet
from deepdiff.helper import strings, numbers
from deepdiff.helper import strings, numbers, SortedSet


DEFAULT_SIGNIFICANT_DIGITS_WHEN_IGNORE_NUMERIC_TYPES = 12
Expand Down Expand Up @@ -31,18 +30,18 @@ def get_ignore_types_in_groups(self, ignore_type_in_groups,

result = []
for item_group in ignore_type_in_groups:
new_item_group = OrderedSet()
new_item_group = SortedSet()
for item in item_group:
item = type(item) if item is None or not isinstance(item, type) else item
new_item_group.add(item)
result.append(new_item_group)
ignore_type_in_groups = result

if ignore_string_type_changes and self.strings not in ignore_type_in_groups:
ignore_type_in_groups.append(OrderedSet(self.strings))
ignore_type_in_groups.append(SortedSet(self.strings))

if ignore_numeric_type_changes and self.numbers not in ignore_type_in_groups:
ignore_type_in_groups.append(OrderedSet(self.numbers))
ignore_type_in_groups.append(SortedSet(self.numbers))

if not ignore_type_subclasses:
# is_instance method needs tuples. When we look for subclasses, we need them to be tuples
Expand Down
4 changes: 2 additions & 2 deletions deepdiff/delta.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from functools import partial, cmp_to_key
from collections.abc import Mapping
from copy import deepcopy
from ordered_set import OrderedSet
from deepdiff import DeepDiff
from deepdiff.serialization import pickle_load, pickle_dump
from deepdiff.helper import (
Expand All @@ -14,6 +13,7 @@
Opcode, FlatDeltaRow, UnkownValueCode, FlatDataAction,
OPCODE_TAG_TO_FLAT_DATA_ACTION,
FLAT_DATA_ACTION_TO_OPCODE_TAG,
SortedSet,
)
from deepdiff.path import (
_path_to_elements, _get_nested_obj, _get_nested_obj_and_force,
Expand Down Expand Up @@ -744,7 +744,7 @@ def _do_ignore_order(self):
"""
fixed_indexes = self.diff.get('iterable_items_added_at_indexes', dict_())
remove_indexes = self.diff.get('iterable_items_removed_at_indexes', dict_())
paths = OrderedSet(fixed_indexes.keys()) | OrderedSet(remove_indexes.keys())
paths = SortedSet(fixed_indexes.keys()) | SortedSet(remove_indexes.keys())
for path in paths:
# In the case of ignore_order reports, we are pointing to the container object.
# Thus we add a [0] to the elements so we can get the required objects and discard what we don't need.
Expand Down
62 changes: 30 additions & 32 deletions deepdiff/diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,24 +16,23 @@
from collections import defaultdict
from inspect import getmembers
from itertools import zip_longest
from ordered_set import OrderedSet
from deepdiff.helper import (strings, bytes_type, numbers, uuids, datetimes, ListItemRemovedOrAdded, notpresent,
IndexedHash, unprocessed, add_to_frozen_set, basic_types,
convert_item_or_items_into_set_else_none, get_type,
convert_item_or_items_into_compiled_regexes_else_none,
type_is_subclass_of_type_group, type_in_type_group, get_doc,
number_to_string, datetime_normalize, KEY_TO_VAL_STR, booleans,
np_ndarray, np_floating, get_numpy_ndarray_rows, OrderedSetPlus, RepeatedTimer,
np_ndarray, np_floating, get_numpy_ndarray_rows, RepeatedTimer,
TEXT_VIEW, TREE_VIEW, DELTA_VIEW, detailed__dict__, add_root_to_paths,
np, get_truncate_datetime, dict_, CannotCompare, ENUM_INCLUDE_KEYS,
PydanticBaseModel, Opcode,)
PydanticBaseModel, Opcode, SortedSet)
from deepdiff.serialization import SerializationMixin
from deepdiff.distance import DistanceMixin
from deepdiff.model import (
RemapDict, ResultDict, TextResult, TreeResult, DiffLevel,
DictRelationship, AttributeRelationship, REPORT_KEYS,
SubscriptableIterableRelationship, NonSubscriptableIterableRelationship,
SetRelationship, NumpyArrayRelationship, CUSTOM_FIELD, PrettyOrderedSet,
SetRelationship, NumpyArrayRelationship, CUSTOM_FIELD,
FORCE_DEFAULT,
)
from deepdiff.deephash import DeepHash, combine_hashes_lists
Expand Down Expand Up @@ -567,27 +566,26 @@ def _diff_dict(
rel_class = DictRelationship

if self.ignore_private_variables:
t1_keys = OrderedSet([key for key in t1 if not(isinstance(key, str) and key.startswith('__'))])
t2_keys = OrderedSet([key for key in t2 if not(isinstance(key, str) and key.startswith('__'))])
t1_keys = SortedSet([key for key in t1 if not(isinstance(key, str) and key.startswith('__'))])
t2_keys = SortedSet([key for key in t2 if not(isinstance(key, str) and key.startswith('__'))])
else:
t1_keys = OrderedSet(t1.keys())
t2_keys = OrderedSet(t2.keys())
t1_keys = SortedSet(t1.keys())
t2_keys = SortedSet(t2.keys())
if self.ignore_string_type_changes or self.ignore_numeric_type_changes or self.ignore_string_case:
t1_clean_to_keys = self._get_clean_to_keys_mapping(keys=t1_keys, level=level)
t2_clean_to_keys = self._get_clean_to_keys_mapping(keys=t2_keys, level=level)
t1_keys = OrderedSet(t1_clean_to_keys.keys())
t2_keys = OrderedSet(t2_clean_to_keys.keys())
t1_keys = SortedSet(t1_clean_to_keys.keys())
t2_keys = SortedSet(t2_clean_to_keys.keys())
else:
t1_clean_to_keys = t2_clean_to_keys = None

t_keys_intersect = t2_keys.intersection(t1_keys)

t_keys_intersect = t2_keys & t1_keys
t_keys_union = t2_keys | t1_keys
t_keys_added = t2_keys - t_keys_intersect
t_keys_removed = t1_keys - t_keys_intersect

if self.threshold_to_diff_deeper:
len_keys_changed = (len(t_keys_added) + len(t_keys_removed))
if len_keys_changed and len(t_keys_intersect) / len_keys_changed < self.threshold_to_diff_deeper:
if len(t_keys_union) and len(t_keys_intersect) / len(t_keys_union) < self.threshold_to_diff_deeper:
self._report_result('values_changed', level, local_tree=local_tree)
return

Expand Down Expand Up @@ -1142,7 +1140,7 @@ def _get_most_in_common_pairs_in_iterables(
# It also includes a "max" key that is just the value of the biggest current distance in the
# most_in_common_pairs dictionary.
def defaultdict_orderedset():
return defaultdict(OrderedSetPlus)
return defaultdict(SortedSet)
most_in_common_pairs = defaultdict(defaultdict_orderedset)
pairs = dict_()

Expand Down Expand Up @@ -1185,7 +1183,7 @@ def defaultdict_orderedset():
pairs_of_item[_distance].add(removed_hash)
used_to_hashes = set()

distances_to_from_hashes = defaultdict(OrderedSetPlus)
distances_to_from_hashes = defaultdict(SortedSet)
for from_hash, distances_to_to_hashes in most_in_common_pairs.items():
# del distances_to_to_hashes['max']
for dist in distances_to_to_hashes:
Expand All @@ -1194,11 +1192,11 @@ def defaultdict_orderedset():
for dist in sorted(distances_to_from_hashes.keys()):
from_hashes = distances_to_from_hashes[dist]
while from_hashes:
from_hash = from_hashes.lpop()
from_hash = from_hashes.pop()
if from_hash not in used_to_hashes:
to_hashes = most_in_common_pairs[from_hash][dist]
while to_hashes:
to_hash = to_hashes.lpop()
to_hash = to_hashes.pop()
if to_hash not in used_to_hashes:
used_to_hashes.add(from_hash)
used_to_hashes.add(to_hash)
Expand All @@ -1217,8 +1215,8 @@ def _diff_iterable_with_deephash(self, level, parents_ids, _original_type=None,

full_t1_hashtable = self._create_hashtable(level, 't1')
full_t2_hashtable = self._create_hashtable(level, 't2')
t1_hashes = OrderedSetPlus(full_t1_hashtable.keys())
t2_hashes = OrderedSetPlus(full_t2_hashtable.keys())
t1_hashes = SortedSet(full_t1_hashtable.keys())
t2_hashes = SortedSet(full_t2_hashtable.keys())
hashes_added = t2_hashes - t1_hashes
hashes_removed = t1_hashes - t2_hashes

Expand Down Expand Up @@ -1630,7 +1628,7 @@ def _diff(self, level, parents_ids=frozenset(), _original_type=None, local_tree=
elif isinstance(level.t1, tuple):
self._diff_tuple(level, parents_ids, local_tree=local_tree)

elif isinstance(level.t1, (set, frozenset, OrderedSet)):
elif isinstance(level.t1, (set, frozenset, SortedSet)):
self._diff_set(level, local_tree=local_tree)

elif isinstance(level.t1, np_ndarray):
Expand Down Expand Up @@ -1752,19 +1750,19 @@ def affected_paths(self):
'iterable_item_added': {'root[3][1]': 4},
'values_changed': {'root[2]': {'new_value': 4, 'old_value': 2}}}
>>> ddiff.affected_paths
OrderedSet(['root[3][1]', 'root[4]', 'root[5]', 'root[6]', 'root[2]'])
SortedSet(['root[3][1]', 'root[4]', 'root[5]', 'root[6]', 'root[2]'])
>>> ddiff.affected_root_keys
OrderedSet([3, 4, 5, 6, 2])
SortedSet([3, 4, 5, 6, 2])
"""
result = OrderedSet()
result = SortedSet()
for key in REPORT_KEYS:
value = self.get(key)
if value:
if isinstance(value, PrettyOrderedSet):
if isinstance(value, SortedSet):
result |= value
else:
result |= OrderedSet(value.keys())
result |= SortedSet(value.keys())
return result

@property
Expand All @@ -1784,18 +1782,18 @@ def affected_root_keys(self):
'iterable_item_added': {'root[3][1]': 4},
'values_changed': {'root[2]': {'new_value': 4, 'old_value': 2}}}
>>> ddiff.affected_paths
OrderedSet(['root[3][1]', 'root[4]', 'root[5]', 'root[6]', 'root[2]'])
SortedSet(['root[3][1]', 'root[4]', 'root[5]', 'root[6]', 'root[2]'])
>>> ddiff.affected_root_keys
OrderedSet([3, 4, 5, 6, 2])
SortedSet([3, 4, 5, 6, 2])
"""
result = OrderedSet()
result = SortedSet()
for key in REPORT_KEYS:
value = self.tree.get(key)
if value:
if isinstance(value, PrettyOrderedSet):
result |= OrderedSet([i.get_root_key() for i in value])
if isinstance(value, SortedSet):
result |= SortedSet([i.get_root_key() for i in value])
else:
result |= OrderedSet([i.get_root_key() for i in value.keys()])
result |= SortedSet([i.get_root_key() for i in value.keys()])
return result


Expand Down
2 changes: 1 addition & 1 deletion deepdiff/distance.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def _precalculate_numpy_arrays_distance(
self, hashes_added, hashes_removed, t1_hashtable, t2_hashtable, _original_type):

# We only want to deal with 1D arrays.
if isinstance(t2_hashtable[hashes_added[0]].item, (np_ndarray, list)):
if isinstance(t2_hashtable[next(iter(hashes_added))].item, (np_ndarray, list)):
return

pre_calced_distances = dict_()
Expand Down
37 changes: 10 additions & 27 deletions deepdiff/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,10 @@
from ast import literal_eval
from decimal import Decimal, localcontext, InvalidOperation as InvalidDecimalOperation
from itertools import repeat
from ordered_set import OrderedSet
# from orderly_set import OrderlySet as OrderedSetModule # median: 0.806 s, some tests are failing
# from orderly_set import SortedSet as OrderedSetModule # median 1.011 s, didn't work for tests
from orderly_set import StableSetEq as OrderedSetModule # median: 1.0867 s for cache test, 5.63s for all tests
# from orderly_set import OrderedSet as OrderedSetModule # median 1.1256 s for cache test, 5.63s for all tests
from threading import Timer


Expand All @@ -24,6 +27,11 @@ class pydantic_base_model_type:
pass


class SortedSet(OrderedSetModule):
def __repr__(self):
return str(list(self))


try:
import numpy as np
except ImportError: # pragma: no cover. The case without Numpy is tested locally only.
Expand Down Expand Up @@ -318,7 +326,7 @@ def add_root_to_paths(paths):
"""
if paths is None:
return
result = OrderedSet()
result = SortedSet()
for path in paths:
if path.startswith('root'):
result.add(path)
Expand Down Expand Up @@ -524,31 +532,6 @@ def __repr__(self):
warnings.simplefilter('once', DeepDiffDeprecationWarning)


class OrderedSetPlus(OrderedSet):

def lpop(self):
"""
Remove and return the first element from the set.
Raises KeyError if the set is empty.
Example:
>>> oset = OrderedSet([1, 2, 3])
>>> oset.lpop()
1
"""
if not self.items:
raise KeyError('lpop from an empty set')

elem = self.items[0]
del self.items[0]
del self.map[elem]
return elem

def __repr__(self):
return str(list(self))

__str__ = __repr__


class RepeatedTimer:
"""
Threaded Repeated Timer by MestreLion
Expand Down
5 changes: 2 additions & 3 deletions deepdiff/lfucache.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,16 @@
Modified by Sep Dehpour
"""
from collections import defaultdict
from ordered_set import OrderedSet
from threading import Lock
from statistics import mean
from deepdiff.helper import not_found, dict_
from deepdiff.helper import not_found, dict_, SortedSet


class CacheNode:
def __init__(self, key, report_type, value, freq_node, pre, nxt):
self.key = key
if report_type:
self.content = defaultdict(OrderedSet)
self.content = defaultdict(SortedSet)
self.content[report_type].add(value)
else:
self.content = value
Expand Down
Loading

0 comments on commit 872a45a

Please sign in to comment.