From 90116a59d55688c68ac96bfb9908e213c8dc2c09 Mon Sep 17 00:00:00 2001 From: sneakers-the-rat Date: Tue, 9 Jul 2024 19:59:09 -0700 Subject: [PATCH 01/17] cache __TYPE_MAP and init submodules --- .gitignore | 3 ++ src/pynwb/__init__.py | 118 +++++++++++++++++++++++++++++++++++------- 2 files changed, 102 insertions(+), 19 deletions(-) diff --git a/.gitignore b/.gitignore index c0a2aca3e..95f08686e 100644 --- a/.gitignore +++ b/.gitignore @@ -77,3 +77,6 @@ tests/coverage/htmlcov # Version _version.py + +.core_typemap_version +core_typemap.pkl diff --git a/src/pynwb/__init__.py b/src/pynwb/__init__.py index 50db92dcc..ea4a18dfa 100644 --- a/src/pynwb/__init__.py +++ b/src/pynwb/__init__.py @@ -2,8 +2,11 @@ for reading and writing data in NWB format ''' import os.path +import warnings from pathlib import Path from copy import deepcopy +import subprocess +import pickle import h5py from hdmf.spec import NamespaceCatalog @@ -22,6 +25,16 @@ from .spec import NWBDatasetSpec, NWBGroupSpec, NWBNamespace # noqa E402 from .validate import validate # noqa: F401, E402 +try: + # see https://effigies.gitlab.io/posts/python-packaging-2023/ + from ._version import __version__ +except ImportError: # pragma: no cover + # this is a relatively slower method for getting the version string + from importlib.metadata import version # noqa: E402 + + __version__ = version("pynwb") + del version + @docval({'name': 'config_path', 'type': str, 'doc': 'Path to the configuration file.'}, {'name': 'type_map', 'type': TypeMap, 'doc': 'The TypeMap.', 'default': None}, @@ -50,7 +63,7 @@ def unload_type_config(**kwargs): type_map = kwargs['type_map'] or get_type_map() hdmf_unload_type_config(type_map=type_map) -def __get_resources(): +def __get_resources() -> dict: try: from importlib.resources import files except ImportError: @@ -60,9 +73,13 @@ def __get_resources(): __location_of_this_file = files(__name__) __core_ns_file_name = 'nwb.namespace.yaml' __schema_dir = 'nwb-schema/core' + cached_core_typemap = __location_of_this_file / 'core_typemap.pkl' + cached_version_indicator = __location_of_this_file / '.core_typemap_version' ret = dict() ret['namespace_path'] = str(__location_of_this_file / __schema_dir / __core_ns_file_name) + ret['cached_typemap_path'] = str(cached_core_typemap) + ret['cached_version_indicator'] = str(cached_version_indicator) return ret @@ -139,18 +156,89 @@ def load_namespaces(**kwargs): return __TYPE_MAP.load_namespaces(namespace_path) -# load the core namespace, i.e. base NWB specification -__resources = __get_resources() -if os.path.exists(__resources['namespace_path']): - load_namespaces(__resources['namespace_path']) -else: - raise RuntimeError( - "'core' is not a registered namespace. If you installed PyNWB locally using a git clone, you need to " - "use the --recurse_submodules flag when cloning. See developer installation instructions here: " - "https://pynwb.readthedocs.io/en/stable/install_developers.html#install-from-git-repository" - ) +def _git_cmd(*args) -> subprocess.CompletedProcess: + """ + Call git with the package as the directory regardless of cwd. + + Since any folder within a git repo works, don't try to ascend to the top, since + if we're *not* actually in a git repo we're only guaranteed to know about + the inner `pynwb` directory. + """ + parent_dir = str(Path(__file__).parent) + result = subprocess.run(["git", "-C", parent_dir, *args], capture_output=True) + return result + + +def _clone_submodules(): + if _git_cmd('rev-parse').returncode == 0: + warnings.warn( + 'NWB core schema not found in cloned installation, initializing submodules...', + stacklevel=1) + res = _git_cmd('submodule', 'update', '--init', '--recursive') + if not res.returncode == 0: + raise RuntimeError( + 'Exception while initializing submodules, got:\n' + 'stdout:\n' + ('-'*20) + res.stdout + "\nstderr:\n" + ('-'*20) + res.stderr) + else: + raise RuntimeError("'core' is not a registered namespace, and pynwb doesn't seem to be installed" + "from a cloned repository so the submodules can't be initialized. " + "Something has gone terribly wrong. maybe try reinstalling???") +def _load_core_namespace(final:bool=False): + """ + Load the core namespace into __TYPE_MAP, + either by loading a pickled version or creating one anew and pickling it. + + We keep a dotfile next to it that tracks what version of pynwb created it, + so that we invalidate it when the code changes. + + Args: + final (bool): This function tries again if the submodules aren't cloned, + but it shouldn't go into an infinite loop. + If final is ``True``, don't recurse. + """ + global __NS_CATALOG + global __TYPE_MAP + # load the core namespace, i.e. base NWB specification + __resources = __get_resources() + + # if we have a version indicator file and it doesn't match the current version, + # scrap the cached typemap + if os.path.exists(__resources['cached_version_indicator']): + with open(__resources['cached_version_indicator'], 'r') as f: + cached_version = f.read().strip() + if cached_version != __version__: + Path(__resources['cached_typemap_path']).unlink(missing_ok=True) + + # load pickled typemap if we have one + if os.path.exists(__resources['cached_typemap_path']): + with open(__resources['cached_typemap_path'], 'rb') as f: + __TYPE_MAP = pickle.load(f) + + # otherwise make a new one and cache it + elif os.path.exists(__resources['namespace_path']): + load_namespaces(__resources['namespace_path']) + with open(__resources['cached_typemap_path'], 'wb') as f: + pickle.dump(__TYPE_MAP, f, protocol=pickle.HIGHEST_PROTOCOL) + with open(__resources['cached_version_indicator'], 'w') as f: + f.write(__version__) + + # otherwise, we don't have the schema and try and initialize from submodules, + # afterwards trying to load the namespace again + else: + try: + _clone_submodules() + except (FileNotFoundError, OSError): + raise RuntimeError( + "'core' is not a registered namespace. If you installed PyNWB locally using a git clone, you need to " + "use the --recurse_submodules flag when cloning. See developer installation instructions here: " + "https://pynwb.readthedocs.io/en/stable/install_developers.html#install-from-git-repository" + ) + if not final: + _load_core_namespace(final=True) +_load_core_namespace() + def available_namespaces(): """Returns all namespaces registered in the namespace catalog""" return __NS_CATALOG.namespaces @@ -416,15 +504,7 @@ def export(self, **kwargs): from hdmf.data_utils import DataChunkIterator # noqa: F401,E402 from hdmf.backends.hdf5 import H5DataIO # noqa: F401,E402 -try: - # see https://effigies.gitlab.io/posts/python-packaging-2023/ - from ._version import __version__ -except ImportError: # pragma: no cover - # this is a relatively slower method for getting the version string - from importlib.metadata import version # noqa: E402 - __version__ = version("pynwb") - del version from ._due import due, BibTeX # noqa: E402 due.cite( From bdd0ccb99d589f0e4e9bdeaddf29636b7971f140 Mon Sep 17 00:00:00 2001 From: sneakers-the-rat Date: Wed, 10 Jul 2024 20:03:20 -0700 Subject: [PATCH 02/17] add changelog entry --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c72e78f58..548283e56 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,9 @@ ### Documentation and tutorial enhancements - Added pre-release pull request instructions to release process documentation @stephprince [#1928](https://github.com/NeurodataWithoutBorders/pynwb/pull/1928) +### Performance +- Cache global type map to speed import 3x. [#1931](https://github.com/NeurodataWithoutBorders/pynwb/pull/1931) + ## PyNWB 2.8.1 (July 3, 2024) ### Documentation and tutorial enhancements From 604b3a8ee32823b28cdbc8f336268c1501c26dd6 Mon Sep 17 00:00:00 2001 From: sneakers-the-rat Date: Mon, 15 Jul 2024 22:14:21 -0700 Subject: [PATCH 03/17] put __resources back in module ns --- src/pynwb/__init__.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/pynwb/__init__.py b/src/pynwb/__init__.py index ea4a18dfa..7bb409e59 100644 --- a/src/pynwb/__init__.py +++ b/src/pynwb/__init__.py @@ -98,6 +98,9 @@ def _get_resources(): __TYPE_MAP = TypeMap(__NS_CATALOG) __TYPE_MAP.merge(hdmf_typemap, ns_catalog=True) +# load the core namespace, i.e. base NWB specification +__resources = __get_resources() + @docval({'name': 'extensions', 'type': (str, TypeMap, list), 'doc': 'a path to a namespace, a TypeMap, or a list consisting of paths to namespaces and TypeMaps', @@ -200,8 +203,7 @@ def _load_core_namespace(final:bool=False): """ global __NS_CATALOG global __TYPE_MAP - # load the core namespace, i.e. base NWB specification - __resources = __get_resources() + global __resources # if we have a version indicator file and it doesn't match the current version, # scrap the cached typemap From 811651dafa8e5c26662891b97358d7a7d9ce6109 Mon Sep 17 00:00:00 2001 From: sneakers-the-rat Date: Tue, 16 Jul 2024 13:55:20 -0700 Subject: [PATCH 04/17] add tests for submodule clone and caching --- .gitignore | 1 + pyproject.toml | 2 +- src/pynwb/__init__.py | 13 +++++++-- tests/unit/test_import.py | 60 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 72 insertions(+), 4 deletions(-) create mode 100644 tests/unit/test_import.py diff --git a/.gitignore b/.gitignore index 95f08686e..83bd18a6e 100644 --- a/.gitignore +++ b/.gitignore @@ -80,3 +80,4 @@ _version.py .core_typemap_version core_typemap.pkl +core_nscatalog.pkl diff --git a/pyproject.toml b/pyproject.toml index 4873b52e1..5b28e500a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -98,7 +98,7 @@ exclude = [ "__pycache__", "build/", "dist/", - "src/nwb-schema", + "src/pynwb/nwb-schema", "docs/source/conf.py", "src/pynwb/_due.py", "test.py" # remove when pytest comes along diff --git a/src/pynwb/__init__.py b/src/pynwb/__init__.py index 7bb409e59..df6bce559 100644 --- a/src/pynwb/__init__.py +++ b/src/pynwb/__init__.py @@ -74,11 +74,13 @@ def __get_resources() -> dict: __core_ns_file_name = 'nwb.namespace.yaml' __schema_dir = 'nwb-schema/core' cached_core_typemap = __location_of_this_file / 'core_typemap.pkl' + cached_core_nscatalog = __location_of_this_file / 'core_nscatalog.pkl' cached_version_indicator = __location_of_this_file / '.core_typemap_version' ret = dict() ret['namespace_path'] = str(__location_of_this_file / __schema_dir / __core_ns_file_name) ret['cached_typemap_path'] = str(cached_core_typemap) + ret['cached_nscatalog_path'] = str(cached_core_nscatalog) ret['cached_version_indicator'] = str(cached_version_indicator) return ret @@ -178,11 +180,11 @@ def _clone_submodules(): 'NWB core schema not found in cloned installation, initializing submodules...', stacklevel=1) res = _git_cmd('submodule', 'update', '--init', '--recursive') - if not res.returncode == 0: + if not res.returncode == 0: # pragma: no cover raise RuntimeError( 'Exception while initializing submodules, got:\n' 'stdout:\n' + ('-'*20) + res.stdout + "\nstderr:\n" + ('-'*20) + res.stderr) - else: + else: # pragma: no cover raise RuntimeError("'core' is not a registered namespace, and pynwb doesn't seem to be installed" "from a cloned repository so the submodules can't be initialized. " "Something has gone terribly wrong. maybe try reinstalling???") @@ -212,17 +214,22 @@ def _load_core_namespace(final:bool=False): cached_version = f.read().strip() if cached_version != __version__: Path(__resources['cached_typemap_path']).unlink(missing_ok=True) + Path(__resources['cached_nscatalog_path']).unlink(missing_ok=True) # load pickled typemap if we have one if os.path.exists(__resources['cached_typemap_path']): with open(__resources['cached_typemap_path'], 'rb') as f: __TYPE_MAP = pickle.load(f) + with open(__resources['cached_nscatalog_path'], 'rb') as f: + __NS_CATALOG = pickle.load(f) # otherwise make a new one and cache it elif os.path.exists(__resources['namespace_path']): load_namespaces(__resources['namespace_path']) with open(__resources['cached_typemap_path'], 'wb') as f: pickle.dump(__TYPE_MAP, f, protocol=pickle.HIGHEST_PROTOCOL) + with open(__resources['cached_nscatalog_path'], 'wb') as f: + pickle.dump(__NS_CATALOG, f, protocol=pickle.HIGHEST_PROTOCOL) with open(__resources['cached_version_indicator'], 'w') as f: f.write(__version__) @@ -231,7 +238,7 @@ def _load_core_namespace(final:bool=False): else: try: _clone_submodules() - except (FileNotFoundError, OSError): + except (FileNotFoundError, OSError): # pragma: no cover raise RuntimeError( "'core' is not a registered namespace. If you installed PyNWB locally using a git clone, you need to " "use the --recurse_submodules flag when cloning. See developer installation instructions here: " diff --git a/tests/unit/test_import.py b/tests/unit/test_import.py new file mode 100644 index 000000000..fdc621b0b --- /dev/null +++ b/tests/unit/test_import.py @@ -0,0 +1,60 @@ +""" +Test functionality of import-time functionality like loading the core +namespace and cloning the submodules and whatnot +""" +import importlib +import tempfile +import shutil +from pathlib import Path +try: + from importlib.resources import files +except ImportError: + from importlib_resources import files + +from pynwb.testing import TestCase + +class TestPyNWBSubmoduleClone(TestCase): + def setUp(self): + # move the existing cloned submodules to a temporary directory + self.tmp_dir = tempfile.TemporaryDirectory() + self.repo_dir: Path = files('pynwb') / 'nwb-schema' + if self.repo_dir.exists(): + shutil.move(self.repo_dir, self.tmp_dir.name) + + # remove cache files if they exist + self.typemap_cache = Path(files('pynwb') / 'core_typemap.pkl') + self.nscatalog_cache = Path(files('pynwb') / 'core_nscatalog.pkl') + self.typemap_cache.unlink(missing_ok=True) + self.nscatalog_cache.unlink(missing_ok=True) + + + def tearDown(self) -> None: + # move the old repository back + if self.repo_dir.exists(): + shutil.rmtree(self.repo_dir) + shutil.move(Path(self.tmp_dir.name) / 'nwb-schema', self.repo_dir) + + def test_clone_on_import(self): + """ + On import, if the schema directory is not found, + we should try and import it + """ + assert not self.repo_dir.exists() + assert not self.nscatalog_cache.exists() + assert not self.typemap_cache.exists() + + with self.assertWarns(Warning) as clone_warning: + import pynwb + importlib.reload(pynwb) + + # there can be other warnings, but we should get at least one + assert len(clone_warning.warnings) > 0 + messages = [str(warn.message) for warn in clone_warning.warnings] + assert any(['initializing submodules' in msg for msg in messages]) + + assert self.repo_dir.exists() + assert self.nscatalog_cache.exists() + assert self.typemap_cache.exists() + + # we should also get the namespaces correctly too + assert 'core' in pynwb.available_namespaces() From a9b787857fbbe04de1d9a7ff67167fc57dd06aaa Mon Sep 17 00:00:00 2001 From: sneakers-the-rat Date: Tue, 16 Jul 2024 14:07:28 -0700 Subject: [PATCH 05/17] rm cached ns catalog, use the one cached in typemap globally --- src/pynwb/__init__.py | 12 ++++-------- tests/unit/test_import.py | 16 ++++++++++++++++ 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/src/pynwb/__init__.py b/src/pynwb/__init__.py index df6bce559..c7f327918 100644 --- a/src/pynwb/__init__.py +++ b/src/pynwb/__init__.py @@ -74,13 +74,11 @@ def __get_resources() -> dict: __core_ns_file_name = 'nwb.namespace.yaml' __schema_dir = 'nwb-schema/core' cached_core_typemap = __location_of_this_file / 'core_typemap.pkl' - cached_core_nscatalog = __location_of_this_file / 'core_nscatalog.pkl' cached_version_indicator = __location_of_this_file / '.core_typemap_version' ret = dict() ret['namespace_path'] = str(__location_of_this_file / __schema_dir / __core_ns_file_name) ret['cached_typemap_path'] = str(cached_core_typemap) - ret['cached_nscatalog_path'] = str(cached_core_nscatalog) ret['cached_version_indicator'] = str(cached_version_indicator) return ret @@ -214,22 +212,20 @@ def _load_core_namespace(final:bool=False): cached_version = f.read().strip() if cached_version != __version__: Path(__resources['cached_typemap_path']).unlink(missing_ok=True) - Path(__resources['cached_nscatalog_path']).unlink(missing_ok=True) # load pickled typemap if we have one if os.path.exists(__resources['cached_typemap_path']): with open(__resources['cached_typemap_path'], 'rb') as f: - __TYPE_MAP = pickle.load(f) - with open(__resources['cached_nscatalog_path'], 'rb') as f: - __NS_CATALOG = pickle.load(f) + __TYPE_MAP = pickle.load(f) # type: TypeMap + # __NS_CATALOG is contained by __TYPE_MAP and they should be linked, + # so we restore the global one from the cached __TYPE_MAP + __NS_CATALOG = __TYPE_MAP.namespace_catalog # otherwise make a new one and cache it elif os.path.exists(__resources['namespace_path']): load_namespaces(__resources['namespace_path']) with open(__resources['cached_typemap_path'], 'wb') as f: pickle.dump(__TYPE_MAP, f, protocol=pickle.HIGHEST_PROTOCOL) - with open(__resources['cached_nscatalog_path'], 'wb') as f: - pickle.dump(__NS_CATALOG, f, protocol=pickle.HIGHEST_PROTOCOL) with open(__resources['cached_version_indicator'], 'w') as f: f.write(__version__) diff --git a/tests/unit/test_import.py b/tests/unit/test_import.py index fdc621b0b..39dd728f6 100644 --- a/tests/unit/test_import.py +++ b/tests/unit/test_import.py @@ -58,3 +58,19 @@ def test_clone_on_import(self): # we should also get the namespaces correctly too assert 'core' in pynwb.available_namespaces() + + +class TestPyNWBImportCache(TestCase): + def setUp(self) -> None: + self.typemap_cache = Path(files('pynwb') / 'core_typemap.pkl') + + def test_cache_exists(self): + assert self.typemap_cache.exists() + + def test_typemap_ns_match(self): + """ + The global __NS_CATALOG and the one contained within the global __TYPE_MAP + should be the same object after importing + """ + import pynwb + assert id(pynwb.__TYPE_MAP.namespace_catalog) == id(pynwb.__NS_CATALOG) \ No newline at end of file From 7d1d5f1e613a25108310545ff8c043b5565d74e4 Mon Sep 17 00:00:00 2001 From: sneakers-the-rat Date: Tue, 16 Jul 2024 14:26:51 -0700 Subject: [PATCH 06/17] make error a warning if can't clone when installed as package, simplify error message, fix tests which are improperly accessing dunder methods. --- src/pynwb/__init__.py | 27 +++++++++++++++------------ tests/unit/test_import.py | 9 ++------- 2 files changed, 17 insertions(+), 19 deletions(-) diff --git a/src/pynwb/__init__.py b/src/pynwb/__init__.py index c7f327918..3c4ab0bfa 100644 --- a/src/pynwb/__init__.py +++ b/src/pynwb/__init__.py @@ -158,6 +158,10 @@ def load_namespaces(**kwargs): namespace_path = getargs('namespace_path', kwargs) return __TYPE_MAP.load_namespaces(namespace_path) +def available_namespaces(): + """Returns all namespaces registered in the namespace catalog""" + return __NS_CATALOG.namespaces + def _git_cmd(*args) -> subprocess.CompletedProcess: """ @@ -183,9 +187,7 @@ def _clone_submodules(): 'Exception while initializing submodules, got:\n' 'stdout:\n' + ('-'*20) + res.stdout + "\nstderr:\n" + ('-'*20) + res.stderr) else: # pragma: no cover - raise RuntimeError("'core' is not a registered namespace, and pynwb doesn't seem to be installed" - "from a cloned repository so the submodules can't be initialized. " - "Something has gone terribly wrong. maybe try reinstalling???") + raise RuntimeError("Package is not installed from a git repository, can't clone submodules") def _load_core_namespace(final:bool=False): @@ -234,19 +236,20 @@ def _load_core_namespace(final:bool=False): else: try: _clone_submodules() - except (FileNotFoundError, OSError): # pragma: no cover - raise RuntimeError( - "'core' is not a registered namespace. If you installed PyNWB locally using a git clone, you need to " - "use the --recurse_submodules flag when cloning. See developer installation instructions here: " - "https://pynwb.readthedocs.io/en/stable/install_developers.html#install-from-git-repository" - ) + except (FileNotFoundError, OSError, RuntimeError) as e: # pragma: no cover + if 'core' not in available_namespaces(): + warnings.warn( + "'core' is not a registered namespace. If you installed PyNWB locally using a git clone, " + "you need to use the --recurse_submodules flag when cloning. " + "See developer installation instructions here: " + "https://pynwb.readthedocs.io/en/stable/install_developers.html#install-from-git-repository\n" + f"Got exception: \n{e}" + ) if not final: _load_core_namespace(final=True) _load_core_namespace() -def available_namespaces(): - """Returns all namespaces registered in the namespace catalog""" - return __NS_CATALOG.namespaces + # a function to register a container classes with the global map diff --git a/tests/unit/test_import.py b/tests/unit/test_import.py index 39dd728f6..5e82a9201 100644 --- a/tests/unit/test_import.py +++ b/tests/unit/test_import.py @@ -11,6 +11,7 @@ except ImportError: from importlib_resources import files +import pynwb from pynwb.testing import TestCase class TestPyNWBSubmoduleClone(TestCase): @@ -23,9 +24,7 @@ def setUp(self): # remove cache files if they exist self.typemap_cache = Path(files('pynwb') / 'core_typemap.pkl') - self.nscatalog_cache = Path(files('pynwb') / 'core_nscatalog.pkl') self.typemap_cache.unlink(missing_ok=True) - self.nscatalog_cache.unlink(missing_ok=True) def tearDown(self) -> None: @@ -40,11 +39,9 @@ def test_clone_on_import(self): we should try and import it """ assert not self.repo_dir.exists() - assert not self.nscatalog_cache.exists() assert not self.typemap_cache.exists() with self.assertWarns(Warning) as clone_warning: - import pynwb importlib.reload(pynwb) # there can be other warnings, but we should get at least one @@ -53,7 +50,6 @@ def test_clone_on_import(self): assert any(['initializing submodules' in msg for msg in messages]) assert self.repo_dir.exists() - assert self.nscatalog_cache.exists() assert self.typemap_cache.exists() # we should also get the namespaces correctly too @@ -72,5 +68,4 @@ def test_typemap_ns_match(self): The global __NS_CATALOG and the one contained within the global __TYPE_MAP should be the same object after importing """ - import pynwb - assert id(pynwb.__TYPE_MAP.namespace_catalog) == id(pynwb.__NS_CATALOG) \ No newline at end of file + assert id(getattr(pynwb, '__TYPE_MAP').namespace_catalog) == id(getattr(pynwb, '__NS_CATALOG')) \ No newline at end of file From 62c7c6f9efea32270ecdde7fb1901e8b5c561f25 Mon Sep 17 00:00:00 2001 From: sneakers-the-rat Date: Tue, 16 Jul 2024 19:44:44 -0700 Subject: [PATCH 07/17] shutil is bugged on old versions of python 3.8 https://github.com/python/cpython/pull/5393 --- tests/unit/test_import.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/unit/test_import.py b/tests/unit/test_import.py index 5e82a9201..a213e7d3e 100644 --- a/tests/unit/test_import.py +++ b/tests/unit/test_import.py @@ -20,7 +20,7 @@ def setUp(self): self.tmp_dir = tempfile.TemporaryDirectory() self.repo_dir: Path = files('pynwb') / 'nwb-schema' if self.repo_dir.exists(): - shutil.move(self.repo_dir, self.tmp_dir.name) + self.repo_dir.rename(self.tmp_dir.name) # remove cache files if they exist self.typemap_cache = Path(files('pynwb') / 'core_typemap.pkl') @@ -31,7 +31,7 @@ def tearDown(self) -> None: # move the old repository back if self.repo_dir.exists(): shutil.rmtree(self.repo_dir) - shutil.move(Path(self.tmp_dir.name) / 'nwb-schema', self.repo_dir) + (Path(self.tmp_dir.name) / 'nwb-schema').rename(self.repo_dir) def test_clone_on_import(self): """ From 611af2b205ea8db89e5c349629ab352c6060709d Mon Sep 17 00:00:00 2001 From: sneakers-the-rat Date: Tue, 16 Jul 2024 20:25:20 -0700 Subject: [PATCH 08/17] fix crappy tempfile module deleting tmp directory even when file object still exists --- tests/unit/test_import.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tests/unit/test_import.py b/tests/unit/test_import.py index a213e7d3e..72d2e896c 100644 --- a/tests/unit/test_import.py +++ b/tests/unit/test_import.py @@ -17,10 +17,10 @@ class TestPyNWBSubmoduleClone(TestCase): def setUp(self): # move the existing cloned submodules to a temporary directory - self.tmp_dir = tempfile.TemporaryDirectory() + self.tmp_dir = Path(tempfile.mkdtemp()) / 'nwb-schema' self.repo_dir: Path = files('pynwb') / 'nwb-schema' if self.repo_dir.exists(): - self.repo_dir.rename(self.tmp_dir.name) + self.repo_dir.rename(self.tmp_dir) # remove cache files if they exist self.typemap_cache = Path(files('pynwb') / 'core_typemap.pkl') @@ -29,9 +29,11 @@ def setUp(self): def tearDown(self) -> None: # move the old repository back - if self.repo_dir.exists(): + + if self.repo_dir.exists() and self.tmp_dir.exists(): shutil.rmtree(self.repo_dir) - (Path(self.tmp_dir.name) / 'nwb-schema').rename(self.repo_dir) + self.tmp_dir.rename(self.repo_dir) + shutil.rmtree(self.tmp_dir.parent) def test_clone_on_import(self): """ From 070cb66cc57839f296f3bf3f4bece0b4f2978db3 Mon Sep 17 00:00:00 2001 From: sneakers-the-rat Date: Tue, 16 Jul 2024 21:13:03 -0700 Subject: [PATCH 09/17] fuck it, import side effects make testing hard and i gave it a shot lol --- tests/unit/test_import.py | 73 --------------------------------------- 1 file changed, 73 deletions(-) delete mode 100644 tests/unit/test_import.py diff --git a/tests/unit/test_import.py b/tests/unit/test_import.py deleted file mode 100644 index 72d2e896c..000000000 --- a/tests/unit/test_import.py +++ /dev/null @@ -1,73 +0,0 @@ -""" -Test functionality of import-time functionality like loading the core -namespace and cloning the submodules and whatnot -""" -import importlib -import tempfile -import shutil -from pathlib import Path -try: - from importlib.resources import files -except ImportError: - from importlib_resources import files - -import pynwb -from pynwb.testing import TestCase - -class TestPyNWBSubmoduleClone(TestCase): - def setUp(self): - # move the existing cloned submodules to a temporary directory - self.tmp_dir = Path(tempfile.mkdtemp()) / 'nwb-schema' - self.repo_dir: Path = files('pynwb') / 'nwb-schema' - if self.repo_dir.exists(): - self.repo_dir.rename(self.tmp_dir) - - # remove cache files if they exist - self.typemap_cache = Path(files('pynwb') / 'core_typemap.pkl') - self.typemap_cache.unlink(missing_ok=True) - - - def tearDown(self) -> None: - # move the old repository back - - if self.repo_dir.exists() and self.tmp_dir.exists(): - shutil.rmtree(self.repo_dir) - self.tmp_dir.rename(self.repo_dir) - shutil.rmtree(self.tmp_dir.parent) - - def test_clone_on_import(self): - """ - On import, if the schema directory is not found, - we should try and import it - """ - assert not self.repo_dir.exists() - assert not self.typemap_cache.exists() - - with self.assertWarns(Warning) as clone_warning: - importlib.reload(pynwb) - - # there can be other warnings, but we should get at least one - assert len(clone_warning.warnings) > 0 - messages = [str(warn.message) for warn in clone_warning.warnings] - assert any(['initializing submodules' in msg for msg in messages]) - - assert self.repo_dir.exists() - assert self.typemap_cache.exists() - - # we should also get the namespaces correctly too - assert 'core' in pynwb.available_namespaces() - - -class TestPyNWBImportCache(TestCase): - def setUp(self) -> None: - self.typemap_cache = Path(files('pynwb') / 'core_typemap.pkl') - - def test_cache_exists(self): - assert self.typemap_cache.exists() - - def test_typemap_ns_match(self): - """ - The global __NS_CATALOG and the one contained within the global __TYPE_MAP - should be the same object after importing - """ - assert id(getattr(pynwb, '__TYPE_MAP').namespace_catalog) == id(getattr(pynwb, '__NS_CATALOG')) \ No newline at end of file From a34ef20c5b01a258c2bfe339ce01539267def2bc Mon Sep 17 00:00:00 2001 From: sneakers-the-rat Date: Mon, 16 Sep 2024 21:03:22 -0700 Subject: [PATCH 10/17] remove cached typemap if no version indicator file present --- src/pynwb/__init__.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/pynwb/__init__.py b/src/pynwb/__init__.py index 3c4ab0bfa..3d50e3cdc 100644 --- a/src/pynwb/__init__.py +++ b/src/pynwb/__init__.py @@ -214,6 +214,9 @@ def _load_core_namespace(final:bool=False): cached_version = f.read().strip() if cached_version != __version__: Path(__resources['cached_typemap_path']).unlink(missing_ok=True) + else: + # remove any cached typemap, forcing re-creation + Path(__resources['cached_typemap_path']).unlink(missing_ok=True) # load pickled typemap if we have one if os.path.exists(__resources['cached_typemap_path']): From db134ce2947cfce80325aec89db15786ea16edc1 Mon Sep 17 00:00:00 2001 From: Jonny Saunders Date: Mon, 16 Sep 2024 22:59:38 -0700 Subject: [PATCH 11/17] Update .gitignore Co-authored-by: Ryan Ly --- .gitignore | 1 - 1 file changed, 1 deletion(-) diff --git a/.gitignore b/.gitignore index 83bd18a6e..95f08686e 100644 --- a/.gitignore +++ b/.gitignore @@ -80,4 +80,3 @@ _version.py .core_typemap_version core_typemap.pkl -core_nscatalog.pkl From 75e9924c382b88fc7de9253d1483bd1386a6891a Mon Sep 17 00:00:00 2001 From: rly Date: Mon, 16 Sep 2024 23:15:06 -0700 Subject: [PATCH 12/17] Rremove super private fields from import check --- tests/back_compat/test_import_structure.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tests/back_compat/test_import_structure.py b/tests/back_compat/test_import_structure.py index 36831929d..81c4acf90 100644 --- a/tests/back_compat/test_import_structure.py +++ b/tests/back_compat/test_import_structure.py @@ -30,19 +30,14 @@ def test_outer_import_structure(self): "TimeSeries", "TypeMap", "_HDF5IO", - "__NS_CATALOG", - "__TYPE_MAP", "__builtins__", "__cached__", "__doc__", "__file__", - "__get_resources", - "__io", "__loader__", "__name__", "__package__", "__path__", - "__resources", "__spec__", "__version__", "_due", From 578a283ca20376144b0498c772254f3898b4e70d Mon Sep 17 00:00:00 2001 From: rly Date: Mon, 16 Sep 2024 23:19:16 -0700 Subject: [PATCH 13/17] Make namespace catalog variable not global --- src/pynwb/__init__.py | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/src/pynwb/__init__.py b/src/pynwb/__init__.py index bd5ade7a2..0f88a8299 100644 --- a/src/pynwb/__init__.py +++ b/src/pynwb/__init__.py @@ -89,14 +89,13 @@ def _get_resources(): return __get_resources() -# a global namespace catalog -global __NS_CATALOG +# a global type map global __TYPE_MAP -__NS_CATALOG = NamespaceCatalog(NWBGroupSpec, NWBDatasetSpec, NWBNamespace) +__ns_catalog = NamespaceCatalog(NWBGroupSpec, NWBDatasetSpec, NWBNamespace) hdmf_typemap = hdmf.common.get_type_map() -__TYPE_MAP = TypeMap(__NS_CATALOG) +__TYPE_MAP = TypeMap(__ns_catalog) __TYPE_MAP.merge(hdmf_typemap, ns_catalog=True) # load the core namespace, i.e. base NWB specification @@ -161,7 +160,7 @@ def load_namespaces(**kwargs): def available_namespaces(): """Returns all namespaces registered in the namespace catalog""" - return __NS_CATALOG.namespaces + return __TYPE_MAP.namespace_catalog.namespaces def _git_cmd(*args) -> subprocess.CompletedProcess: @@ -204,7 +203,6 @@ def _load_core_namespace(final:bool=False): but it shouldn't go into an infinite loop. If final is ``True``, don't recurse. """ - global __NS_CATALOG global __TYPE_MAP global __resources @@ -223,9 +221,6 @@ def _load_core_namespace(final:bool=False): if os.path.exists(__resources['cached_typemap_path']): with open(__resources['cached_typemap_path'], 'rb') as f: __TYPE_MAP = pickle.load(f) # type: TypeMap - # __NS_CATALOG is contained by __TYPE_MAP and they should be linked, - # so we restore the global one from the cached __TYPE_MAP - __NS_CATALOG = __TYPE_MAP.namespace_catalog # otherwise make a new one and cache it elif os.path.exists(__resources['namespace_path']): From d2e7f785409d374e9d1a1a238ea89cad45172687 Mon Sep 17 00:00:00 2001 From: rly Date: Mon, 16 Sep 2024 23:27:15 -0700 Subject: [PATCH 14/17] Make new functions super private --- src/pynwb/__init__.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/src/pynwb/__init__.py b/src/pynwb/__init__.py index 0f88a8299..3bc82d2f1 100644 --- a/src/pynwb/__init__.py +++ b/src/pynwb/__init__.py @@ -163,7 +163,7 @@ def available_namespaces(): return __TYPE_MAP.namespace_catalog.namespaces -def _git_cmd(*args) -> subprocess.CompletedProcess: +def __git_cmd(*args) -> subprocess.CompletedProcess: """ Call git with the package as the directory regardless of cwd. @@ -176,12 +176,12 @@ def _git_cmd(*args) -> subprocess.CompletedProcess: return result -def _clone_submodules(): - if _git_cmd('rev-parse').returncode == 0: +def __clone_submodules(): + if __git_cmd('rev-parse').returncode == 0: warnings.warn( 'NWB core schema not found in cloned installation, initializing submodules...', stacklevel=1) - res = _git_cmd('submodule', 'update', '--init', '--recursive') + res = __git_cmd('submodule', 'update', '--init', '--recursive') if not res.returncode == 0: # pragma: no cover raise RuntimeError( 'Exception while initializing submodules, got:\n' @@ -190,7 +190,7 @@ def _clone_submodules(): raise RuntimeError("Package is not installed from a git repository, can't clone submodules") -def _load_core_namespace(final:bool=False): +def __load_core_namespace(final:bool=False): """ Load the core namespace into __TYPE_MAP, either by loading a pickled version or creating one anew and pickling it. @@ -234,7 +234,7 @@ def _load_core_namespace(final:bool=False): # afterwards trying to load the namespace again else: try: - _clone_submodules() + __clone_submodules() except (FileNotFoundError, OSError, RuntimeError) as e: # pragma: no cover if 'core' not in available_namespaces(): warnings.warn( @@ -245,10 +245,8 @@ def _load_core_namespace(final:bool=False): f"Got exception: \n{e}" ) if not final: - _load_core_namespace(final=True) -_load_core_namespace() - - + __load_core_namespace(final=True) +__load_core_namespace() # a function to register a container classes with the global map From c4e9ed10fb68875bd66f5202bb09cd45ccf0ffd3 Mon Sep 17 00:00:00 2001 From: rly Date: Mon, 16 Sep 2024 23:28:51 -0700 Subject: [PATCH 15/17] Add deprecationwarning on _get_resources --- src/pynwb/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/pynwb/__init__.py b/src/pynwb/__init__.py index 3bc82d2f1..d4931c5ee 100644 --- a/src/pynwb/__init__.py +++ b/src/pynwb/__init__.py @@ -86,6 +86,8 @@ def __get_resources() -> dict: def _get_resources(): # LEGACY: Needed to support legacy implementation. + # TODO: Remove this in PyNWB 3.0. + warn("The function '_get_resources' is deprecated and will be removed in a future release.", DeprecationWarning) return __get_resources() From 40220976921f12348ba4d2cbdf57e9f650825c8b Mon Sep 17 00:00:00 2001 From: rly Date: Mon, 16 Sep 2024 23:29:12 -0700 Subject: [PATCH 16/17] Make usage of warnings.warn consistent --- src/pynwb/__init__.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/pynwb/__init__.py b/src/pynwb/__init__.py index d4931c5ee..1d109abe3 100644 --- a/src/pynwb/__init__.py +++ b/src/pynwb/__init__.py @@ -2,7 +2,6 @@ for reading and writing data in NWB format ''' import os.path -import warnings from pathlib import Path from copy import deepcopy import subprocess @@ -180,7 +179,7 @@ def __git_cmd(*args) -> subprocess.CompletedProcess: def __clone_submodules(): if __git_cmd('rev-parse').returncode == 0: - warnings.warn( + warn( 'NWB core schema not found in cloned installation, initializing submodules...', stacklevel=1) res = __git_cmd('submodule', 'update', '--init', '--recursive') @@ -239,7 +238,7 @@ def __load_core_namespace(final:bool=False): __clone_submodules() except (FileNotFoundError, OSError, RuntimeError) as e: # pragma: no cover if 'core' not in available_namespaces(): - warnings.warn( + warn( "'core' is not a registered namespace. If you installed PyNWB locally using a git clone, " "you need to use the --recurse_submodules flag when cloning. " "See developer installation instructions here: " From 981b2fc075ab1065c75075b53d90618ad7140d0e Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Tue, 17 Sep 2024 00:04:41 -0700 Subject: [PATCH 17/17] Update CHANGELOG.md --- CHANGELOG.md | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 15ae58af4..e5909f577 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ # PyNWB Changelog +## PyNWB 2.8.3 (Upcoming) + +### Performance +- Cache global type map to speed import 3X. @sneakers-the-rat [#1931](https://github.com/NeurodataWithoutBorders/pynwb/pull/1931) + ## PyNWB 2.8.2 (September 9, 2024) ### Enhancements and minor changes @@ -15,9 +20,6 @@ - Changed `epoch_tags` to be a NWBFile property instead of constructor argument. @stephprince [#1935](https://github.com/NeurodataWithoutBorders/pynwb/pull/1935) - Exposed option to not cache the spec in `NWBHDF5IO.export`. @rly [#1959](https://github.com/NeurodataWithoutBorders/pynwb/pull/1959) -### Performance -- Cache global type map to speed import 3x. [#1931](https://github.com/NeurodataWithoutBorders/pynwb/pull/1931) - ## PyNWB 2.8.1 (July 3, 2024) ### Documentation and tutorial enhancements