Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: Strong t (improve perf by removing weakref from t_eval) #200

Open
wants to merge 10 commits into
base: master
Choose a base branch
from
139 changes: 76 additions & 63 deletions glom/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
import sys
import pdb
import copy
import weakref
import operator
from abc import ABCMeta
from pprint import pprint
Expand Down Expand Up @@ -613,7 +612,7 @@ def __init__(self, *path_parts):
if isinstance(part, Path):
part = part.path_t
if isinstance(part, TType):
sub_parts = _T_PATHS[part]
sub_parts = object.__getattribute__(part, '__op_args__')
if sub_parts[0] is not T:
raise ValueError('path segment must be path from T, not %r'
% sub_parts[0])
Expand All @@ -625,6 +624,9 @@ def __init__(self, *path_parts):
path_t = _t_child(path_t, 'P', part)
self.path_t = path_t

_CACHE = {}
_MAX_CACHE = 10000

@classmethod
def from_text(cls, text):
"""Make a Path from .-delimited text:
Expand All @@ -633,20 +635,26 @@ def from_text(cls, text):
Path('a', 'b', 'c')

"""
return cls(*text.split('.'))
if text not in cls._CACHE:
if len(cls._CACHE) > cls._MAX_CACHE:
return cls(*text.split('.'))
cls._CACHE[text] = cls(*text.split('.'))
return cls._CACHE[text]

def glomit(self, target, scope):
# The entrypoint for the Path extension
return _t_eval(target, self.path_t, scope)

def __len__(self):
return (len(_T_PATHS[self.path_t]) - 1) // 2
return (len(object.__getattribute__(self.path_t, '__op_args__')) - 1) // 2

def __eq__(self, other):
op_args = object.__getattribute__(self.path_t, '__op_args__')
if type(other) is Path:
return _T_PATHS[self.path_t] == _T_PATHS[other.path_t]
elif type(other) is TType:
return _T_PATHS[self.path_t] == _T_PATHS[other]
other = other.path_t
if type(other) is TType:
other_op_args = object.__getattribute__(other, '__op_args__')
return op_args == other_op_args
return False

def __ne__(self, other):
Expand All @@ -659,7 +667,7 @@ def values(self):
>>> Path(T.a.b, 'c', T['d']).values()
('a', 'b', 'c', 'd')
"""
cur_t_path = _T_PATHS[self.path_t]
cur_t_path = object.__getattribute__(self.path_t, '__op_args__')
return cur_t_path[2::2]

def items(self):
Expand All @@ -670,7 +678,7 @@ def items(self):
(('.', 'a'), ('.', 'b'), ('P', 'c'), ('[', 'd'))

"""
cur_t_path = _T_PATHS[self.path_t]
cur_t_path = object.__getattribute__(self.path_t, '__op_args__')
return tuple(zip(cur_t_path[1::2], cur_t_path[2::2]))

def startswith(self, other):
Expand All @@ -680,20 +688,19 @@ def startswith(self, other):
other = other.path_t
if not isinstance(other, TType):
raise TypeError('can only check if Path starts with string, Path or T')
o_path = _T_PATHS[other]
return _T_PATHS[self.path_t][:len(o_path)] == o_path
o_path = object.__getattribute__(other, '__op_args__')
path = object.__getattribute__(other, '__op_args__')
return path[:len(o_path)] == o_path

def from_t(self):
'''return the same path but starting from T'''
t_path = _T_PATHS[self.path_t]
if t_path[0] is S:
new_t = TType()
_T_PATHS[new_t] = (T,) + t_path[1:]
return Path(new_t)
return self
t_path = object.__getattribute__(self.path_t, '__op_args__')
if t_path[0] is T:
return self
return Path(TType((T,) + t_path[1:]))

def __getitem__(self, i):
cur_t_path = _T_PATHS[self.path_t]
cur_t_path = object.__getattribute__(self.path_t, '__op_args__')
try:
step = i.step
start = i.start if i.start is not None else 0
Expand All @@ -709,16 +716,14 @@ def __getitem__(self, i):
raise IndexError('Path index out of range')
stop = ((i + 1) * 2) + 1 if i >= 0 else ((i + 1) * 2) + len(cur_t_path)

new_t = TType()
new_path = cur_t_path[start:stop]
if step is not None and step != 1:
new_path = tuple(zip(new_path[::2], new_path[1::2]))[::step]
new_path = sum(new_path, ())
_T_PATHS[new_t] = (cur_t_path[0],) + new_path
return Path(new_t)
return Path(TType((cur_t_path[0],) + new_path))

def __repr__(self):
return _format_path(_T_PATHS[self.path_t][1:])
return _format_path(object.__getattribute__(self.path_t, '__op_args__')[1:])


def _format_path(t_path):
Expand Down Expand Up @@ -987,9 +992,13 @@ def _trace(self, target, spec, scope):
scope[glom] = scope[Inspect]
if self.echo:
print('---')
# TODO: switch from scope[Path] to the Target-Spec format trace above
# ... but maybe be smart about only printing deltas instead of the whole
# thing
print('path: ', scope[Path] + [spec])
print('target:', target)
if self.breakpoint:
# TODO: real debugger here?
self.breakpoint()
try:
ret = scope[Inspect](target, spec, scope)
Expand Down Expand Up @@ -1399,9 +1408,17 @@ class TType(object):
equivalent to accessing the ``__class__`` attribute.

"""
__slots__ = ('__weakref__',)
__slots__ = ("__op_args__",)

def __getattr__(self, name):
def __init__(self, op_args=None):
if op_args is None:
op_args = (self,) # for T, etc roots
self.__op_args__ = op_args
assert op_args != ()

def __getattribute__(self, name):
if name in ("__", "__repr__"):
return object.__getattribute__(self, '__')
if name.startswith('__'):
raise AttributeError('T instances reserve dunder attributes.'
' To access the "{name}" attribute, use'
Expand All @@ -1424,29 +1441,18 @@ def __(self, name):
return _t_child(self, '.', '__' + name)

def __repr__(self):
t_path = _T_PATHS[self]
return _format_t(t_path[1:], t_path[0])

def __getstate__(self):
t_path = _T_PATHS[self]
return tuple(({T: 'T', S: 'S', A: 'A'}[t_path[0]],) + t_path[1:])

def __setstate__(self, state):
_T_PATHS[self] = ({'T': T, 'S': S, 'A': A}[state[0]],) + state[1:]


_T_PATHS = weakref.WeakKeyDictionary()
op_args = object.__getattribute__(self, '__op_args__')
return _format_t(op_args[1:], op_args[0])


def _t_child(parent, operation, arg):
t = TType()
base = _T_PATHS[parent]
if base[0] is A and operation not in ('.', '[', 'P'):
op_args = object.__getattribute__(parent, '__op_args__')
if op_args[0] is A and operation not in ('.', '[', 'P'):
# whitelist rather than blacklist assignment friendly operations
# TODO: error type?
raise BadSpec("operation not allowed on A assignment path")
_T_PATHS[t] = base + (operation, arg)
return t

return TType(op_args + (operation, arg))


def _s_first_magic(scope, key, _t):
Expand All @@ -1465,7 +1471,7 @@ def _s_first_magic(scope, key, _t):


def _t_eval(target, _t, scope):
t_path = _T_PATHS[_t]
t_path = object.__getattribute__(_t, '__op_args__')
i = 1
fetch_till = len(t_path)
root = t_path[0]
Expand Down Expand Up @@ -1547,10 +1553,6 @@ def _t_eval(target, _t, scope):
S = TType() # like T, but means grab stuff from Scope, not Target
A = TType() # like S, but shorthand to assign target to scope

_T_PATHS[T] = (T,)
_T_PATHS[S] = (S,)
_T_PATHS[A] = (A,)

UP = make_sentinel('UP')
ROOT = make_sentinel('ROOT')

Expand Down Expand Up @@ -1807,6 +1809,7 @@ class TargetRegistry(object):
def __init__(self, register_default_types=True):
self._op_type_map = {}
self._op_type_tree = {} # see _register_fuzzy_type for details
self._type_cache = {}

self._op_auto_map = OrderedDict() # op name to function that returns handler function

Expand All @@ -1825,22 +1828,26 @@ def get_handler(self, op, obj, path=None, raise_exc=True):
"""
ret = False
obj_type = type(obj)
type_map = self.get_type_map(op)
if type_map:
try:
ret = type_map[obj_type]
except KeyError:
type_tree = self._op_type_tree.get(op, {})
closest = self._get_closest_type(obj, type_tree=type_tree)
if closest is None:
ret = False
else:
ret = type_map[closest]
cache_key = (obj_type, op)
if cache_key not in self._type_cache:
type_map = self.get_type_map(op)
if type_map:
try:
ret = type_map[obj_type]
except KeyError:
type_tree = self._op_type_tree.get(op, {})
closest = self._get_closest_type(obj, type_tree=type_tree)
if closest is None:
ret = False
else:
ret = type_map[closest]

if ret is False and raise_exc:
raise UnregisteredTarget(op, obj_type, type_map=type_map, path=path)
if ret is False and raise_exc:
raise UnregisteredTarget(op, obj_type, type_map=type_map, path=path)

return ret
self._type_cache[cache_key] = ret

return self._type_cache[cache_key]

def get_type_map(self, op):
try:
Expand Down Expand Up @@ -1928,6 +1935,8 @@ def register(self, target_type, **kwargs):
for op_name in new_op_map:
self._register_fuzzy_type(op_name, target_type)

self._type_cache = {} # reset type cache

return

def register_op(self, op_name, auto_func=None, exact=False):
Expand Down Expand Up @@ -2119,21 +2128,23 @@ def _has_callable_glomit(obj):

def _glom(target, spec, scope):
parent = scope
pmap = parent.maps[0]
scope = scope.new_child({
T: target,
Spec: spec,
UP: parent,
CHILD_ERRORS: [],
MODE: pmap[MODE],
})
parent[LAST_CHILD_SCOPE] = scope
pmap[LAST_CHILD_SCOPE] = scope

try:
if isinstance(spec, TType): # must go first, due to callability
if type(spec) is TType: # must go first, due to callability
return _t_eval(target, spec, scope)
elif _has_callable_glomit(spec):
return spec.glomit(target, scope)

return scope[MODE](target, spec, scope)
return scope.maps[0][MODE](target, spec, scope)
except Exception as e:
scope.maps[1][CHILD_ERRORS].append(scope)
scope.maps[0][CUR_ERROR] = e
Expand All @@ -2147,6 +2158,8 @@ def _glom(target, spec, scope):


def AUTO(target, spec, scope):
if type(spec) is str: # shortcut to make deep-get use case faster
return _t_eval(target, Path.from_text(spec).path_t, scope)
if isinstance(spec, dict):
return _handle_dict(target, spec, scope)
elif isinstance(spec, list):
Expand Down
3 changes: 0 additions & 3 deletions glom/matching.py
Original file line number Diff line number Diff line change
Expand Up @@ -1003,9 +1003,6 @@ def glomit(self, target, scope):
type(target).__name__))

if errs:
# TODO: due to the usage of basic path (not a Path
# object), the format can be a bit inconsistent here
# (e.g., 'a.b' and ['a', 'b'])
raise CheckError(errs, self, scope[Path])
return ret

Expand Down
Loading