From 11038f4c978a73c2c53fd82221ff752a2796614e Mon Sep 17 00:00:00 2001 From: mavaylon1 Date: Wed, 24 Jul 2024 13:52:42 -0700 Subject: [PATCH 1/6] Fix --- src/hdmf_zarr/backend.py | 3 + tests/unit/test_zarrio.py | 32 ++++++++++ tests/unit/utils.py | 121 +++++++++++++++++++++++++++++++++++++- 3 files changed, 155 insertions(+), 1 deletion(-) diff --git a/src/hdmf_zarr/backend.py b/src/hdmf_zarr/backend.py index b5c35552..4e9844f4 100644 --- a/src/hdmf_zarr/backend.py +++ b/src/hdmf_zarr/backend.py @@ -961,6 +961,9 @@ def write_dataset(self, **kwargs): # noqa: C901 else: options['io_settings'] = {} + if builder.dimension_labels is not None: + builder.attributes['_ARRAY_DIMENSIONS'] = builder.dimension_labels + attributes = builder.attributes options['dtype'] = builder.dtype diff --git a/tests/unit/test_zarrio.py b/tests/unit/test_zarrio.py index 081706a8..8e849848 100644 --- a/tests/unit/test_zarrio.py +++ b/tests/unit/test_zarrio.py @@ -18,7 +18,10 @@ NestedDirectoryStore) import zarr from hdmf_zarr.backend import ZarrIO +from .utils import BuildDatasetShapeMixin, BarData, BarDataHolder +from hdmf.spec import DatasetSpec import os +import shutil CUR_DIR = os.path.dirname(os.path.realpath(__file__)) @@ -182,3 +185,32 @@ def test_force_open_without_consolidated_fails(self): except ValueError as e: self.fail("ZarrIO.__open_file_consolidated raised an unexpected ValueError: {}".format(e)) + +class TestBuildDatasetDimensionLabelsOneOption(BuildDatasetShapeMixin): + def tearDown(self): + shutil.rmtree(self.store) + + def get_base_shape_dims(self): + return [None, None], ['a', 'b'] + + def get_dataset_inc_spec(self): + dataset_inc_spec = DatasetSpec( + doc='A BarData', + data_type_inc='BarData', + quantity='*', + ) + return dataset_inc_spec + + def test_build(self): + bar_data_inst = BarData(name='my_bar', data=[[1, 2, 3], [4, 5, 6]], attr1='a string') + bar_data_holder_inst = BarDataHolder( + name='my_bar_holder', + bar_datas=[bar_data_inst], + ) + + with ZarrIO(self.store, manager=self.manager, mode='w') as io: + io.write(bar_data_holder_inst) + + with ZarrIO(self.store, manager=self.manager, mode='r') as io: + file = io.read() + self.assertEqual(file.bar_datas[0].data.attrs['_ARRAY_DIMENSIONS'], ['a', 'b']) diff --git a/tests/unit/utils.py b/tests/unit/utils.py index ebc1f732..de343acd 100644 --- a/tests/unit/utils.py +++ b/tests/unit/utils.py @@ -1,14 +1,16 @@ import os import tempfile from copy import copy, deepcopy +from abc import ABCMeta, abstractmethod from hdmf.build import (ObjectMapper, TypeMap, BuildManager) from hdmf.container import (Container, Data) from hdmf.spec import (GroupSpec, DatasetSpec, AttributeSpec, LinkSpec, RefSpec, DtypeSpec, NamespaceCatalog, SpecCatalog, - SpecNamespace, NamespaceBuilder) + SpecNamespace, NamespaceBuilder, Spec) from hdmf.spec.spec import (ZERO_OR_MANY, ONE_OR_MANY, ZERO_OR_ONE) from hdmf.utils import (docval, getargs, get_docval) +from hdmf.testing import TestCase from hdmf_zarr.backend import ROOT_NAME CORE_NAMESPACE = 'test_core' @@ -591,3 +593,120 @@ class CustomSpecNamespace(SpecNamespace): @classmethod def types_key(cls): return cls.__types_key + + +class BarData(Data): + + @docval({'name': 'name', 'type': str, 'doc': 'the name of this BarData'}, + {'name': 'data', 'type': ('data', 'array_data'), 'doc': 'the data'}, + {'name': 'attr1', 'type': str, 'doc': 'a string attribute', 'default': None}, + {'name': 'attr2', 'type': 'int', 'doc': 'an int attribute', 'default': None}, + {'name': 'ext_attr', 'type': bool, 'doc': 'a boolean attribute', 'default': True}) + def __init__(self, **kwargs): + name, data, attr1, attr2, ext_attr = getargs('name', 'data', 'attr1', 'attr2', 'ext_attr', kwargs) + super().__init__(name=name, data=data) + self.__attr1 = attr1 + self.__attr2 = attr2 + self.__ext_attr = kwargs['ext_attr'] + + @property + def data_type(self): + return 'BarData' + + @property + def attr1(self): + return self.__attr1 + + @property + def attr2(self): + return self.__attr2 + + @property + def ext_attr(self): + return self.__ext_attr + + +class BarDataHolder(Container): + + @docval({'name': 'name', 'type': str, 'doc': 'the name of this BarDataHolder'}, + {'name': 'bar_datas', 'type': ('data', 'array_data'), 'doc': 'bar_datas', 'default': list()}) + def __init__(self, **kwargs): + name, bar_datas = getargs('name', 'bar_datas', kwargs) + super().__init__(name=name) + self.__bar_datas = bar_datas + for b in bar_datas: + if b is not None and b.parent is None: + b.parent = self + + @property + def data_type(self): + return 'BarDataHolder' + + @property + def bar_datas(self): + return self.__bar_datas + + +class ExtBarDataMapper(ObjectMapper): + + @docval({"name": "spec", "type": Spec, "doc": "the spec to get the attribute value for"}, + {"name": "container", "type": BarData, "doc": "the container to get the attribute value from"}, + {"name": "manager", "type": BuildManager, "doc": "the BuildManager used for managing this build"}, + returns='the value of the attribute') + def get_attr_value(self, **kwargs): + ''' Get the value of the attribute corresponding to this spec from the given container ''' + spec, container, manager = getargs('spec', 'container', 'manager', kwargs) + # handle custom mapping of field 'ext_attr' within container + # BardataHolder/BarData -> spec BarDataHolder/BarData.ext_attr + if isinstance(container.parent, BarDataHolder): + if spec.name == 'ext_attr': + return container.ext_attr + return super().get_attr_value(**kwargs) + + +class BuildDatasetShapeMixin(TestCase, metaclass=ABCMeta): + + def setUp(self): + self.store = "tests/unit/test_io.zarr" + self.set_up_specs() + spec_catalog = SpecCatalog() + spec_catalog.register_spec(self.bar_data_spec, 'test.yaml') + spec_catalog.register_spec(self.bar_data_holder_spec, 'test.yaml') + namespace = SpecNamespace( + doc='a test namespace', + name=CORE_NAMESPACE, + schema=[{'source': 'test.yaml'}], + version='0.1.0', + catalog=spec_catalog + ) + namespace_catalog = NamespaceCatalog() + namespace_catalog.add_namespace(CORE_NAMESPACE, namespace) + type_map = TypeMap(namespace_catalog) + type_map.register_container_type(CORE_NAMESPACE, 'BarData', BarData) + type_map.register_container_type(CORE_NAMESPACE, 'BarDataHolder', BarDataHolder) + type_map.register_map(BarData, ExtBarDataMapper) + type_map.register_map(BarDataHolder, ObjectMapper) + self.manager = BuildManager(type_map) + + def set_up_specs(self): + shape, dims = self.get_base_shape_dims() + self.bar_data_spec = DatasetSpec( + doc='A test dataset specification with a data type', + data_type_def='BarData', + dtype='int', + shape=shape, + dims=dims, + ) + self.bar_data_holder_spec = GroupSpec( + doc='A container of multiple extended BarData objects', + data_type_def='BarDataHolder', + datasets=[self.get_dataset_inc_spec()], + ) + + @abstractmethod + def get_base_shape_dims(self): + pass + + @abstractmethod + def get_dataset_inc_spec(self): + pass From 03b338c63a06858a7b331628723db346cea74b64 Mon Sep 17 00:00:00 2001 From: mavaylon1 Date: Wed, 24 Jul 2024 14:00:22 -0700 Subject: [PATCH 2/6] name --- tests/unit/test_zarrio.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/test_zarrio.py b/tests/unit/test_zarrio.py index 8e849848..2704fdfa 100644 --- a/tests/unit/test_zarrio.py +++ b/tests/unit/test_zarrio.py @@ -186,7 +186,7 @@ def test_force_open_without_consolidated_fails(self): self.fail("ZarrIO.__open_file_consolidated raised an unexpected ValueError: {}".format(e)) -class TestBuildDatasetDimensionLabelsOneOption(BuildDatasetShapeMixin): +class TestDimensionLabels(BuildDatasetShapeMixin): def tearDown(self): shutil.rmtree(self.store) From 79e47603b7c42c380d07fdfb79e0d730f58ee273 Mon Sep 17 00:00:00 2001 From: Matthew Avaylon Date: Mon, 29 Jul 2024 09:05:23 -0700 Subject: [PATCH 3/6] Update pyproject.toml --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index e59270a5..31bbd524 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,9 +29,9 @@ classifiers = [ "Topic :: Scientific/Engineering :: Medical Science Apps." ] dependencies = [ - 'hdmf>=3.9.0', + 'hdmf>=3.14.3', 'zarr>=2.11.0, <3.0', # pin below 3.0 until HDMF-zarr supports zarr 3.0 - 'numpy>=1.24, <2.0', # pin below 2.0 until HDMF supports numpy 2.0 + 'numpy>=1.24', 'numcodecs>=0.9.1', 'pynwb>=2.5.0', 'threadpoolctl>=3.1.0', From 93b389fb9826b0cf9f0374ab877df7a1beb05d1e Mon Sep 17 00:00:00 2001 From: mavaylon1 Date: Mon, 29 Jul 2024 13:51:09 -0700 Subject: [PATCH 4/6] notes --- tests/unit/test_zarrio.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tests/unit/test_zarrio.py b/tests/unit/test_zarrio.py index 910ac688..7c9f3d6d 100644 --- a/tests/unit/test_zarrio.py +++ b/tests/unit/test_zarrio.py @@ -187,6 +187,14 @@ def test_force_open_without_consolidated_fails(self): class TestDimensionLabels(BuildDatasetShapeMixin): + """ + This is to test setting the dimension_labels as a zarr attribute '_ARRAY_DIMENSIONS'. + + Workflow: + i) We need to define a `get_dataset_inc_spec` to set the dim in the spec (via BuildDatasetShapeMixin) + ii) Create and write a BarDataHolder with a BarData. + iii) Read and check that the _ARRAY_DIMENSIONS attribute is set. + """ def tearDown(self): shutil.rmtree(self.store) @@ -214,4 +222,3 @@ def test_build(self): with ZarrIO(self.store, manager=self.manager, mode='r') as io: file = io.read() self.assertEqual(file.bar_datas[0].data.attrs['_ARRAY_DIMENSIONS'], ['a', 'b']) - From 47abb73f2ca6f2d857f2c6e11bb41ca0056f31fa Mon Sep 17 00:00:00 2001 From: Matthew Avaylon Date: Mon, 29 Jul 2024 13:53:03 -0700 Subject: [PATCH 5/6] Update CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index ddc9a05e..0a97df5f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ ### Enhancements * NWBZarrIO load_namespaces=True by default. @mavaylon1 [#204](https://github.com/hdmf-dev/hdmf-zarr/pull/204) * Added test for opening file with consolidated metadata from DANDI. @mavaylon1 [#206](https://github.com/hdmf-dev/hdmf-zarr/pull/206) +* Add dimension labels compatible with xarray. @mavaylon1 [#207](https://github.com/hdmf-dev/hdmf-zarr/pull/207) ## 0.8.0 (June 4, 2024) ### Bug Fixes From 91a3e7abee2b9d2f507606c6e9fcf9f160b9448d Mon Sep 17 00:00:00 2001 From: Matthew Avaylon Date: Wed, 31 Jul 2024 10:39:25 -0700 Subject: [PATCH 6/6] Update pyproject.toml --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 199a0f56..c52a5100 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,7 +31,7 @@ classifiers = [ dependencies = [ 'hdmf>=3.14.3', 'zarr>=2.11.0, <3.0', # pin below 3.0 until HDMF-zarr supports zarr 3.0 - 'numpy>=1.24', + 'numpy>=1.24, <2.0', # pin below 2.0 until HDMF-zarr supports numpy 2.0 'numcodecs>=0.9.1', 'pynwb>=2.5.0', 'threadpoolctl>=3.1.0',