Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cache __TYPE_MAP and init submodules #1931

Merged
merged 20 commits into from
Sep 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
90116a5
cache __TYPE_MAP and init submodules
sneakers-the-rat Jul 10, 2024
bdd0ccb
add changelog entry
sneakers-the-rat Jul 11, 2024
667937d
Merge branch 'dev' into faster-imports
mavaylon1 Jul 15, 2024
604b3a8
put __resources back in module ns
sneakers-the-rat Jul 16, 2024
811651d
add tests for submodule clone and caching
sneakers-the-rat Jul 16, 2024
a9b7878
rm cached ns catalog, use the one cached in typemap globally
sneakers-the-rat Jul 16, 2024
7d1d5f1
make error a warning if can't clone when installed as package, simpli…
sneakers-the-rat Jul 16, 2024
62c7c6f
shutil is bugged on old versions of python 3.8 https://github.com/py…
sneakers-the-rat Jul 17, 2024
611af2b
fix crappy tempfile module deleting tmp directory even when file obje…
sneakers-the-rat Jul 17, 2024
070cb66
fuck it, import side effects make testing hard and i gave it a shot lol
sneakers-the-rat Jul 17, 2024
ac59dda
Merge branch 'dev' into faster-imports
rly Sep 17, 2024
a34ef20
remove cached typemap if no version indicator file present
sneakers-the-rat Sep 17, 2024
0d37e91
Merge branch 'faster-imports' of https://github.com/sneakers-the-rat/…
sneakers-the-rat Sep 17, 2024
db134ce
Update .gitignore
sneakers-the-rat Sep 17, 2024
75e9924
Rremove super private fields from import check
rly Sep 17, 2024
578a283
Make namespace catalog variable not global
rly Sep 17, 2024
d2e7f78
Make new functions super private
rly Sep 17, 2024
c4e9ed1
Add deprecationwarning on _get_resources
rly Sep 17, 2024
4022097
Make usage of warnings.warn consistent
rly Sep 17, 2024
981b2fc
Update CHANGELOG.md
rly Sep 17, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -77,3 +77,6 @@ tests/coverage/htmlcov

# Version
_version.py

.core_typemap_version
core_typemap.pkl
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
# PyNWB Changelog

## PyNWB 2.8.3 (Upcoming)

### Performance
- Cache global type map to speed import 3X. @sneakers-the-rat [#1931](https://github.com/NeurodataWithoutBorders/pynwb/pull/1931)

## PyNWB 2.8.2 (September 9, 2024)

### Enhancements and minor changes
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ exclude = [
"__pycache__",
"build/",
"dist/",
"src/nwb-schema",
"src/pynwb/nwb-schema",
"docs/source/conf.py",
"docs/notebooks/*",
"src/pynwb/_due.py",
Expand Down
137 changes: 111 additions & 26 deletions src/pynwb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import os.path
from pathlib import Path
from copy import deepcopy
import subprocess
import pickle
from warnings import warn
import h5py

Expand All @@ -23,6 +25,16 @@
from .spec import NWBDatasetSpec, NWBGroupSpec, NWBNamespace # noqa E402
from .validate import validate # noqa: F401, E402

try:
# see https://effigies.gitlab.io/posts/python-packaging-2023/
from ._version import __version__
except ImportError: # pragma: no cover
# this is a relatively slower method for getting the version string
from importlib.metadata import version # noqa: E402

__version__ = version("pynwb")
del version


@docval({'name': 'config_path', 'type': str, 'doc': 'Path to the configuration file.'},
{'name': 'type_map', 'type': TypeMap, 'doc': 'The TypeMap.', 'default': None},
Expand Down Expand Up @@ -51,7 +63,7 @@
type_map = kwargs['type_map'] or get_type_map()
hdmf_unload_type_config(type_map=type_map)

def __get_resources():
def __get_resources() -> dict:
try:
from importlib.resources import files
except ImportError:
Expand All @@ -61,27 +73,35 @@
__location_of_this_file = files(__name__)
__core_ns_file_name = 'nwb.namespace.yaml'
__schema_dir = 'nwb-schema/core'
cached_core_typemap = __location_of_this_file / 'core_typemap.pkl'
cached_version_indicator = __location_of_this_file / '.core_typemap_version'

ret = dict()
ret['namespace_path'] = str(__location_of_this_file / __schema_dir / __core_ns_file_name)
ret['cached_typemap_path'] = str(cached_core_typemap)
ret['cached_version_indicator'] = str(cached_version_indicator)
return ret


def _get_resources():
# LEGACY: Needed to support legacy implementation.
# TODO: Remove this in PyNWB 3.0.
warn("The function '_get_resources' is deprecated and will be removed in a future release.", DeprecationWarning)

Check warning on line 89 in src/pynwb/__init__.py

View check run for this annotation

Codecov / codecov/patch

src/pynwb/__init__.py#L89

Added line #L89 was not covered by tests
return __get_resources()


# a global namespace catalog
global __NS_CATALOG
# a global type map
global __TYPE_MAP

__NS_CATALOG = NamespaceCatalog(NWBGroupSpec, NWBDatasetSpec, NWBNamespace)
__ns_catalog = NamespaceCatalog(NWBGroupSpec, NWBDatasetSpec, NWBNamespace)

hdmf_typemap = hdmf.common.get_type_map()
__TYPE_MAP = TypeMap(__NS_CATALOG)
__TYPE_MAP = TypeMap(__ns_catalog)
__TYPE_MAP.merge(hdmf_typemap, ns_catalog=True)

# load the core namespace, i.e. base NWB specification
__resources = __get_resources()


@docval({'name': 'extensions', 'type': (str, TypeMap, list),
'doc': 'a path to a namespace, a TypeMap, or a list consisting of paths to namespaces and TypeMaps',
Expand Down Expand Up @@ -139,22 +159,95 @@
namespace_path = getargs('namespace_path', kwargs)
return __TYPE_MAP.load_namespaces(namespace_path)

def available_namespaces():
"""Returns all namespaces registered in the namespace catalog"""
return __TYPE_MAP.namespace_catalog.namespaces

# load the core namespace, i.e. base NWB specification
__resources = __get_resources()
if os.path.exists(__resources['namespace_path']):
load_namespaces(__resources['namespace_path'])
else:
raise RuntimeError(
"'core' is not a registered namespace. If you installed PyNWB locally using a git clone, you need to "
"use the --recurse_submodules flag when cloning. See developer installation instructions here: "
"https://pynwb.readthedocs.io/en/stable/install_developers.html#install-from-git-repository"
)

def __git_cmd(*args) -> subprocess.CompletedProcess:
"""
Call git with the package as the directory regardless of cwd.

Since any folder within a git repo works, don't try to ascend to the top, since
if we're *not* actually in a git repo we're only guaranteed to know about
the inner `pynwb` directory.
"""
parent_dir = str(Path(__file__).parent)
result = subprocess.run(["git", "-C", parent_dir, *args], capture_output=True)
return result

Check warning on line 177 in src/pynwb/__init__.py

View check run for this annotation

Codecov / codecov/patch

src/pynwb/__init__.py#L175-L177

Added lines #L175 - L177 were not covered by tests


def __clone_submodules():
if __git_cmd('rev-parse').returncode == 0:
warn(

Check warning on line 182 in src/pynwb/__init__.py

View check run for this annotation

Codecov / codecov/patch

src/pynwb/__init__.py#L181-L182

Added lines #L181 - L182 were not covered by tests
'NWB core schema not found in cloned installation, initializing submodules...',
stacklevel=1)
res = __git_cmd('submodule', 'update', '--init', '--recursive')

Check warning on line 185 in src/pynwb/__init__.py

View check run for this annotation

Codecov / codecov/patch

src/pynwb/__init__.py#L185

Added line #L185 was not covered by tests
if not res.returncode == 0: # pragma: no cover
raise RuntimeError(
'Exception while initializing submodules, got:\n'
'stdout:\n' + ('-'*20) + res.stdout + "\nstderr:\n" + ('-'*20) + res.stderr)
else: # pragma: no cover
raise RuntimeError("Package is not installed from a git repository, can't clone submodules")


def __load_core_namespace(final:bool=False):
"""
Load the core namespace into __TYPE_MAP,
either by loading a pickled version or creating one anew and pickling it.

def available_namespaces():
"""Returns all namespaces registered in the namespace catalog"""
return __NS_CATALOG.namespaces
We keep a dotfile next to it that tracks what version of pynwb created it,
so that we invalidate it when the code changes.

Args:
final (bool): This function tries again if the submodules aren't cloned,
but it shouldn't go into an infinite loop.
If final is ``True``, don't recurse.
"""
global __TYPE_MAP
global __resources
rly marked this conversation as resolved.
Show resolved Hide resolved

# if we have a version indicator file and it doesn't match the current version,
# scrap the cached typemap
if os.path.exists(__resources['cached_version_indicator']):
with open(__resources['cached_version_indicator'], 'r') as f:
cached_version = f.read().strip()

Check warning on line 214 in src/pynwb/__init__.py

View check run for this annotation

Codecov / codecov/patch

src/pynwb/__init__.py#L214

Added line #L214 was not covered by tests
if cached_version != __version__:
Path(__resources['cached_typemap_path']).unlink(missing_ok=True)

Check warning on line 216 in src/pynwb/__init__.py

View check run for this annotation

Codecov / codecov/patch

src/pynwb/__init__.py#L216

Added line #L216 was not covered by tests
else:
# remove any cached typemap, forcing re-creation
Path(__resources['cached_typemap_path']).unlink(missing_ok=True)

# load pickled typemap if we have one
rly marked this conversation as resolved.
Show resolved Hide resolved
if os.path.exists(__resources['cached_typemap_path']):
with open(__resources['cached_typemap_path'], 'rb') as f:
__TYPE_MAP = pickle.load(f) # type: TypeMap

Check warning on line 224 in src/pynwb/__init__.py

View check run for this annotation

Codecov / codecov/patch

src/pynwb/__init__.py#L224

Added line #L224 was not covered by tests

# otherwise make a new one and cache it
elif os.path.exists(__resources['namespace_path']):
load_namespaces(__resources['namespace_path'])
with open(__resources['cached_typemap_path'], 'wb') as f:
pickle.dump(__TYPE_MAP, f, protocol=pickle.HIGHEST_PROTOCOL)
with open(__resources['cached_version_indicator'], 'w') as f:
f.write(__version__)

# otherwise, we don't have the schema and try and initialize from submodules,
# afterwards trying to load the namespace again
else:
try:
__clone_submodules()

Check warning on line 238 in src/pynwb/__init__.py

View check run for this annotation

Codecov / codecov/patch

src/pynwb/__init__.py#L237-L238

Added lines #L237 - L238 were not covered by tests
except (FileNotFoundError, OSError, RuntimeError) as e: # pragma: no cover
if 'core' not in available_namespaces():
warn(
"'core' is not a registered namespace. If you installed PyNWB locally using a git clone, "
"you need to use the --recurse_submodules flag when cloning. "
"See developer installation instructions here: "
"https://pynwb.readthedocs.io/en/stable/install_developers.html#install-from-git-repository\n"
f"Got exception: \n{e}"
)
if not final:
__load_core_namespace(final=True)

Check warning on line 249 in src/pynwb/__init__.py

View check run for this annotation

Codecov / codecov/patch

src/pynwb/__init__.py#L249

Added line #L249 was not covered by tests
__load_core_namespace()


# a function to register a container classes with the global map
Expand Down Expand Up @@ -427,15 +520,7 @@
from hdmf.data_utils import DataChunkIterator # noqa: F401,E402
from hdmf.backends.hdf5 import H5DataIO # noqa: F401,E402

try:
# see https://effigies.gitlab.io/posts/python-packaging-2023/
from ._version import __version__
except ImportError: # pragma: no cover
# this is a relatively slower method for getting the version string
from importlib.metadata import version # noqa: E402

__version__ = version("pynwb")
del version

from ._due import due, BibTeX # noqa: E402
due.cite(
Expand Down
5 changes: 0 additions & 5 deletions tests/back_compat/test_import_structure.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,19 +30,14 @@ def test_outer_import_structure(self):
"TimeSeries",
"TypeMap",
"_HDF5IO",
"__NS_CATALOG",
"__TYPE_MAP",
"__builtins__",
"__cached__",
"__doc__",
"__file__",
"__get_resources",
"__io",
"__loader__",
"__name__",
"__package__",
"__path__",
"__resources",
"__spec__",
"__version__",
"_due",
Expand Down
Loading