From b94068c24c138757fe63d0adf23fc34471a6e72d Mon Sep 17 00:00:00 2001 From: mavaylon1 Date: Mon, 29 Jul 2024 07:50:32 -0700 Subject: [PATCH 01/19] prior --- src/hdmf/backends/hdf5/h5tools.py | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/src/hdmf/backends/hdf5/h5tools.py b/src/hdmf/backends/hdf5/h5tools.py index 8135d75e7..592ffb5fe 100644 --- a/src/hdmf/backends/hdf5/h5tools.py +++ b/src/hdmf/backends/hdf5/h5tools.py @@ -383,7 +383,10 @@ def copy_file(self, **kwargs): 'default': True}, {'name': 'herd', 'type': 'hdmf.common.resources.HERD', 'doc': 'A HERD object to populate with references.', - 'default': None}) + 'default': None}, + {'name': 'expandable', 'type': bool, 'default': True, + 'doc': ('Bool to set whether datasets are expandable by setting the max shape for all dimensions', + 'of a dataset to None and enabling auto-chunking by default.')}) def write(self, **kwargs): """Write the container to an HDF5 file.""" if self.__mode == 'r': @@ -826,10 +829,16 @@ def close_linked_files(self): 'doc': 'exhaust DataChunkIterators one at a time. If False, exhaust them concurrently', 'default': True}, {'name': 'export_source', 'type': str, - 'doc': 'The source of the builders when exporting', 'default': None}) + 'doc': 'The source of the builders when exporting', 'default': None}, + {'name': 'expandable', 'type': bool, 'default': True, + 'doc': ('Bool to set whether datasets are expandable by setting the max shape for all dimensions', + 'of a dataset to None and enabling auto-chunking by default.')}) def write_builder(self, **kwargs): f_builder = popargs('builder', kwargs) - link_data, exhaust_dci, export_source = getargs('link_data', 'exhaust_dci', 'export_source', kwargs) + link_data, exhaust_dci, export_source = getargs('link_data', + 'exhaust_dci', + 'export_source', + kwargs) self.logger.debug("Writing GroupBuilder '%s' to path '%s' with kwargs=%s" % (f_builder.name, self.source, kwargs)) for name, gbldr in f_builder.groups.items(): @@ -1000,6 +1009,9 @@ def _filler(): 'default': True}, {'name': 'export_source', 'type': str, 'doc': 'The source of the builders when exporting', 'default': None}, + {'name': 'expandable', 'type': bool, 'default': True, + 'doc': ('Bool to set whether datasets are expandable by setting the max shape for all dimensions', + 'of a dataset to None and enabling auto-chunking by default.')}, returns='the Group that was created', rtype=Group) def write_group(self, **kwargs): parent, builder = popargs('parent', 'builder', kwargs) @@ -1100,6 +1112,9 @@ def write_link(self, **kwargs): 'default': True}, {'name': 'export_source', 'type': str, 'doc': 'The source of the builders when exporting', 'default': None}, + {'name': 'expandable', 'type': bool, 'default': True, + 'doc': ('Bool to set whether datasets are expandable by setting the max shape for all dimensions', + 'of a dataset to None and enabling auto-chunking by default.')}, returns='the Dataset that was created', rtype=Dataset) def write_dataset(self, **kwargs): # noqa: C901 """ Write a dataset to HDF5 @@ -1107,7 +1122,7 @@ def write_dataset(self, **kwargs): # noqa: C901 The function uses other dataset-dependent write functions, e.g, ``__scalar_fill__``, ``__list_fill__``, and ``__setup_chunked_dset__`` to write the data. """ - parent, builder = popargs('parent', 'builder', kwargs) + parent, builder, expandable = popargs('parent', 'builder', 'expandable', kwargs) link_data, exhaust_dci, export_source = getargs('link_data', 'exhaust_dci', 'export_source', kwargs) self.logger.debug("Writing DatasetBuilder '%s' to parent group '%s'" % (builder.name, parent.name)) if self.get_written(builder): @@ -1228,7 +1243,7 @@ def _filler(): return # If the compound data type contains only regular data (i.e., no references) then we can write it as usual else: - dset = self.__list_fill__(parent, name, data, options) + dset = self.__list_fill__(parent, name, data, expandable, options) # Write a dataset containing references, i.e., a region or object reference. # NOTE: we can ignore options['io_settings'] for scalar data elif self.__is_ref(options['dtype']): @@ -1323,7 +1338,7 @@ def _filler(): self.__dci_queue.append(dataset=dset, data=data) # Write a regular in memory array (e.g., numpy array, list etc.) elif hasattr(data, '__len__'): - dset = self.__list_fill__(parent, name, data, options) + dset = self.__list_fill__(parent, name, data, expandable, options) # Write a regular scalar dataset else: dset = self.__scalar_fill__(parent, name, data, options) @@ -1451,7 +1466,7 @@ def __chunked_iter_fill__(cls, parent, name, data, options=None): return dset @classmethod - def __list_fill__(cls, parent, name, data, options=None): + def __list_fill__(cls, parent, name, data, expandable, options=None): # define the io settings and data type if necessary io_settings = {} dtype = None From b251b78ce31dda82ea406b13b8ef7e7a49bedc68 Mon Sep 17 00:00:00 2001 From: mavaylon1 Date: Wed, 31 Jul 2024 10:43:49 -0700 Subject: [PATCH 02/19] chckpoint --- src/hdmf/backends/hdf5/h5tools.py | 6 ++++++ src/hdmf/build/objectmapper.py | 1 + 2 files changed, 7 insertions(+) diff --git a/src/hdmf/backends/hdf5/h5tools.py b/src/hdmf/backends/hdf5/h5tools.py index 592ffb5fe..5926733d4 100644 --- a/src/hdmf/backends/hdf5/h5tools.py +++ b/src/hdmf/backends/hdf5/h5tools.py @@ -1132,6 +1132,7 @@ def write_dataset(self, **kwargs): # noqa: C901 data = builder.data dataio = None options = dict() # dict with additional + # breakpoint() if isinstance(data, H5DataIO): options['io_settings'] = data.io_settings dataio = data @@ -1488,6 +1489,11 @@ def __list_fill__(cls, parent, name, data, expandable, options=None): data_shape = (len(data),) else: data_shape = get_data_shape(data) + # breakpoint() + if expandable: + # Don't override existing settings + if 'maxshape' not in io_settings: + io_settings['maxshape'] = tuple([None]*len(data_shape)) # Create the dataset try: diff --git a/src/hdmf/build/objectmapper.py b/src/hdmf/build/objectmapper.py index b5815ee2c..90cb0654b 100644 --- a/src/hdmf/build/objectmapper.py +++ b/src/hdmf/build/objectmapper.py @@ -722,6 +722,7 @@ def build(self, **kwargs): if not isinstance(container, Data): msg = "'container' must be of type Data with DatasetSpec" raise ValueError(msg) + breakpoint() spec_dtype, spec_shape, spec_dims, spec = self.__check_dset_spec(self.spec, spec_ext) dimension_labels = self.__get_dimension_labels_from_spec(container.data, spec_shape, spec_dims) if isinstance(spec_dtype, RefSpec): From 2387a5ee349615e2b40baad8f9ccbe205a9a6936 Mon Sep 17 00:00:00 2001 From: mavaylon1 Date: Sat, 3 Aug 2024 14:15:43 -0700 Subject: [PATCH 03/19] possible poc --- src/hdmf/backends/hdf5/h5tools.py | 15 ++++++++------- src/hdmf/build/builders.py | 16 ++++++++++++---- src/hdmf/build/objectmapper.py | 24 ++++++++++++++---------- tests/unit/test_io_hdf5.py | 2 +- tests/unit/test_io_hdf5_h5tools.py | 27 +++++++++++++++++++++++++-- 5 files changed, 60 insertions(+), 24 deletions(-) diff --git a/src/hdmf/backends/hdf5/h5tools.py b/src/hdmf/backends/hdf5/h5tools.py index 5926733d4..1d41ad5e7 100644 --- a/src/hdmf/backends/hdf5/h5tools.py +++ b/src/hdmf/backends/hdf5/h5tools.py @@ -1130,9 +1130,9 @@ def write_dataset(self, **kwargs): # noqa: C901 return None name = builder.name data = builder.data + matched_spec_shape = builder.spec_shapes dataio = None options = dict() # dict with additional - # breakpoint() if isinstance(data, H5DataIO): options['io_settings'] = data.io_settings dataio = data @@ -1244,7 +1244,7 @@ def _filler(): return # If the compound data type contains only regular data (i.e., no references) then we can write it as usual else: - dset = self.__list_fill__(parent, name, data, expandable, options) + dset = self.__list_fill__(parent, name, data, matched_spec_shape, expandable, options) # Write a dataset containing references, i.e., a region or object reference. # NOTE: we can ignore options['io_settings'] for scalar data elif self.__is_ref(options['dtype']): @@ -1339,7 +1339,7 @@ def _filler(): self.__dci_queue.append(dataset=dset, data=data) # Write a regular in memory array (e.g., numpy array, list etc.) elif hasattr(data, '__len__'): - dset = self.__list_fill__(parent, name, data, expandable, options) + dset = self.__list_fill__(parent, name, data, matched_spec_shape, expandable, options) # Write a regular scalar dataset else: dset = self.__scalar_fill__(parent, name, data, options) @@ -1467,7 +1467,7 @@ def __chunked_iter_fill__(cls, parent, name, data, options=None): return dset @classmethod - def __list_fill__(cls, parent, name, data, expandable, options=None): + def __list_fill__(cls, parent, name, data, matched_spec_shape, expandable, options=None): # define the io settings and data type if necessary io_settings = {} dtype = None @@ -1489,12 +1489,13 @@ def __list_fill__(cls, parent, name, data, expandable, options=None): data_shape = (len(data),) else: data_shape = get_data_shape(data) - # breakpoint() if expandable: # Don't override existing settings if 'maxshape' not in io_settings: - io_settings['maxshape'] = tuple([None]*len(data_shape)) - + if matched_spec_shape is not None: + io_settings['maxshape'] = tuple([None]*len(matched_spec_shape)) + else: + io_settings['maxshape'] = tuple([None]*len(data_shape)) # Create the dataset try: dset = parent.create_dataset(name, shape=data_shape, dtype=dtype, **io_settings) diff --git a/src/hdmf/build/builders.py b/src/hdmf/build/builders.py index cb658b6d4..6ed453166 100644 --- a/src/hdmf/build/builders.py +++ b/src/hdmf/build/builders.py @@ -330,6 +330,9 @@ class DatasetBuilder(BaseBuilder): 'doc': 'The datatype of this dataset.', 'default': None}, {'name': 'attributes', 'type': dict, 'doc': 'A dictionary of attributes to create in this dataset.', 'default': dict()}, + {'name': 'spec_shapes', 'type': tuple, + 'doc': ('The shape(s) defined in the spec.'), + 'default': None}, {'name': 'dimension_labels', 'type': tuple, 'doc': ('A list of labels for each dimension of this dataset from the spec. Currently this is ' 'supplied only on build.'), @@ -341,15 +344,15 @@ class DatasetBuilder(BaseBuilder): {'name': 'source', 'type': str, 'doc': 'The source of the data in this builder.', 'default': None}) def __init__(self, **kwargs): """ Create a Builder object for a dataset """ - name, data, dtype, attributes, dimension_labels, maxshape, chunks, parent, source = getargs( - 'name', 'data', 'dtype', 'attributes', 'dimension_labels', 'maxshape', 'chunks', 'parent', 'source', - kwargs - ) + name, data, dtype, attributes, spec_shapes, dimension_labels, maxshape, chunks, parent, source = getargs( + 'name', 'data', 'dtype', 'attributes', 'spec_shapes', 'dimension_labels', 'maxshape', 'chunks', + 'parent', 'source', kwargs) super().__init__(name, attributes, parent, source) self['data'] = data self['attributes'] = _copy.copy(attributes) self.__dimension_labels = dimension_labels self.__chunks = chunks + self.__spec_shapes = spec_shapes self.__maxshape = maxshape if isinstance(data, BaseBuilder): if dtype is None: @@ -357,6 +360,11 @@ def __init__(self, **kwargs): self.__dtype = dtype self.__name = name + @property + def spec_shapes(self): + """The shapes defined in the spec.""" + return self.__spec_shapes + @property def data(self): """The data stored in the dataset represented by this builder.""" diff --git a/src/hdmf/build/objectmapper.py b/src/hdmf/build/objectmapper.py index 90cb0654b..589078a6f 100644 --- a/src/hdmf/build/objectmapper.py +++ b/src/hdmf/build/objectmapper.py @@ -722,9 +722,8 @@ def build(self, **kwargs): if not isinstance(container, Data): msg = "'container' must be of type Data with DatasetSpec" raise ValueError(msg) - breakpoint() spec_dtype, spec_shape, spec_dims, spec = self.__check_dset_spec(self.spec, spec_ext) - dimension_labels = self.__get_dimension_labels_from_spec(container.data, spec_shape, spec_dims) + dimension_labels, matched_shape = self.__get_spec_info(container.data, spec_shape, spec_dims) if isinstance(spec_dtype, RefSpec): self.logger.debug("Building %s '%s' as a dataset of references (source: %s)" % (container.__class__.__name__, container.name, repr(source))) @@ -735,6 +734,7 @@ def build(self, **kwargs): parent=parent, source=source, dtype=spec_dtype.reftype, + spec_shapes=matched_shape, dimension_labels=dimension_labels, ) manager.queue_ref(self.__set_dataset_to_refs(builder, spec_dtype, spec_shape, container, manager)) @@ -749,6 +749,7 @@ def build(self, **kwargs): parent=parent, source=source, dtype=spec_dtype, + spec_shapes=matched_shape, dimension_labels=dimension_labels, ) manager.queue_ref(self.__set_compound_dataset_to_refs(builder, spec, spec_dtype, container, @@ -767,6 +768,7 @@ def build(self, **kwargs): parent=parent, source=source, dtype="object", + spec_shapes=matched_shape, dimension_labels=dimension_labels, ) manager.queue_ref(self.__set_untyped_dataset_to_refs(builder, container, manager)) @@ -790,6 +792,7 @@ def build(self, **kwargs): parent=parent, source=source, dtype=dtype, + spec_shapes=matched_shape, dimension_labels=dimension_labels, ) @@ -821,9 +824,10 @@ def __check_dset_spec(self, orig, ext): spec = ext return dtype, shape, dims, spec - def __get_dimension_labels_from_spec(self, data, spec_shape, spec_dims) -> tuple: + def __get_spec_info(self, data, spec_shape, spec_dims): + """This will return the dimension labels and shape by matching the data shape to a permissible spec shape.""" if spec_shape is None or spec_dims is None: - return None + return None, None data_shape = get_data_shape(data) # if shape is a list of allowed shapes, find the index of the shape that matches the data if isinstance(spec_shape[0], list): @@ -843,22 +847,22 @@ def __get_dimension_labels_from_spec(self, data, spec_shape, spec_dims) -> tuple # use the most specific match -- the one with the fewest Nones if match_shape_inds: if len(match_shape_inds) == 1: - return tuple(spec_dims[match_shape_inds[0]]) + return tuple(spec_dims[match_shape_inds[0]]), tuple(spec_shape[match_shape_inds[0]]) else: count_nones = [len([x for x in spec_shape[k] if x is None]) for k in match_shape_inds] index_min_count = count_nones.index(min(count_nones)) best_match_ind = match_shape_inds[index_min_count] - return tuple(spec_dims[best_match_ind]) + return tuple(spec_dims[best_match_ind]), tuple(spec_shape[best_match_ind]) else: # no matches found msg = "Shape of data does not match any allowed shapes in spec '%s'" % self.spec.path warnings.warn(msg, IncorrectDatasetShapeBuildWarning) - return None + return None, None else: if len(data_shape) != len(spec_shape): msg = "Shape of data does not match shape in spec '%s'" % self.spec.path warnings.warn(msg, IncorrectDatasetShapeBuildWarning) - return None + return None, None # check each dimension. None means any length is allowed match = True for j, d in enumerate(data_shape): @@ -868,9 +872,9 @@ def __get_dimension_labels_from_spec(self, data, spec_shape, spec_dims) -> tuple if not match: msg = "Shape of data does not match shape in spec '%s'" % self.spec.path warnings.warn(msg, IncorrectDatasetShapeBuildWarning) - return None + return None, None # shape is a single list of allowed dimension lengths - return tuple(spec_dims) + return tuple(spec_dims), tuple(spec_shape) def __is_reftype(self, data): if (isinstance(data, AbstractDataChunkIterator) or diff --git a/tests/unit/test_io_hdf5.py b/tests/unit/test_io_hdf5.py index 29b7f2d7f..d15848abf 100644 --- a/tests/unit/test_io_hdf5.py +++ b/tests/unit/test_io_hdf5.py @@ -225,5 +225,5 @@ def test_dataset_shape(self): io.write_builder(self.builder) builder = io.read_builder() dset = builder['test_bucket']['foo_holder']['foo1']['my_data'].data - self.assertEqual(get_data_shape(dset), (10,)) + self.assertEqual(get_data_shape(dset), (None,)) io.close() diff --git a/tests/unit/test_io_hdf5_h5tools.py b/tests/unit/test_io_hdf5_h5tools.py index 5a4fd5a32..9e87ecba7 100644 --- a/tests/unit/test_io_hdf5_h5tools.py +++ b/tests/unit/test_io_hdf5_h5tools.py @@ -28,6 +28,7 @@ from hdmf.testing import TestCase, remove_test_file from hdmf.common.resources import HERD from hdmf.term_set import TermSet, TermSetWrapper +from hdmf.utils import get_data_shape from tests.unit.helpers.utils import (Foo, FooBucket, FooFile, get_foo_buildmanager, @@ -739,12 +740,12 @@ def test_copy_h5py_dataset_h5dataio_input(self): self.f['test_copy'][:].tolist()) def test_list_fill_empty(self): - dset = self.io.__list_fill__(self.f, 'empty_dataset', [], options={'dtype': int, 'io_settings': {}}) + dset = self.io.__list_fill__(self.f, 'empty_dataset', [], None, True, options={'dtype': int, 'io_settings': {}}) self.assertTupleEqual(dset.shape, (0,)) def test_list_fill_empty_no_dtype(self): with self.assertRaisesRegex(Exception, r"cannot add \S+ to [/\S]+ - could not determine type"): - self.io.__list_fill__(self.f, 'empty_dataset', []) + self.io.__list_fill__(self.f, 'empty_dataset', [], None, True) def test_read_str(self): a = ['a', 'bb', 'ccc', 'dddd', 'e'] @@ -3725,3 +3726,25 @@ def test_set_data_io(self): self.data.set_data_io(H5DataIO, dict(chunks=True)) assert isinstance(self.data.data, H5DataIO) assert self.data.data.io_settings["chunks"] + + +class TestExpand(TestCase): + def setUp(self): + self.manager = get_foo_buildmanager() + self.path = get_temp_filepath() + + def test_expand_false(self): + # Setup all the data we need + foo1 = Foo('foo1', [1, 2, 3, 4, 5], "I am foo1", 17, 3.14) + foobucket = FooBucket('bucket1', [foo1]) + foofile = FooFile(buckets=[foobucket]) + + with HDF5IO(self.path, manager=self.manager, mode='w') as io: + io.write(foofile, expandable=False) + + io = HDF5IO(self.path, manager=self.manager, mode='r') + read_foofile = io.read() + self.assertListEqual(foofile.buckets['bucket1'].foos['foo1'].my_data, + read_foofile.buckets['bucket1'].foos['foo1'].my_data[:].tolist()) + self.assertEqual(get_data_shape(read_foofile.buckets['bucket1'].foos['foo1'].my_data), + (5,)) From bfc85d1dba20107e8ef948268ee6116372cf6a35 Mon Sep 17 00:00:00 2001 From: mavaylon1 Date: Sat, 3 Aug 2024 14:28:54 -0700 Subject: [PATCH 04/19] fix --- src/hdmf/backends/hdf5/h5tools.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hdmf/backends/hdf5/h5tools.py b/src/hdmf/backends/hdf5/h5tools.py index 1d41ad5e7..6c276ceef 100644 --- a/src/hdmf/backends/hdf5/h5tools.py +++ b/src/hdmf/backends/hdf5/h5tools.py @@ -1493,7 +1493,7 @@ def __list_fill__(cls, parent, name, data, matched_spec_shape, expandable, optio # Don't override existing settings if 'maxshape' not in io_settings: if matched_spec_shape is not None: - io_settings['maxshape'] = tuple([None]*len(matched_spec_shape)) + io_settings['maxshape'] = matched_spec_shape else: io_settings['maxshape'] = tuple([None]*len(data_shape)) # Create the dataset From 20e61dbf8a9f4e5f255323971050b5b26a6463ac Mon Sep 17 00:00:00 2001 From: mavaylon1 Date: Mon, 5 Aug 2024 14:02:51 -0700 Subject: [PATCH 05/19] finished tests --- src/hdmf/build/objectmapper.py | 92 ++++++++++++++++-------------- tests/unit/helpers/utils.py | 64 +++++++++++++++++++++ tests/unit/test_io_hdf5_h5tools.py | 28 ++++++++- 3 files changed, 138 insertions(+), 46 deletions(-) diff --git a/src/hdmf/build/objectmapper.py b/src/hdmf/build/objectmapper.py index 589078a6f..d2c239110 100644 --- a/src/hdmf/build/objectmapper.py +++ b/src/hdmf/build/objectmapper.py @@ -555,6 +555,7 @@ def get_attribute(self, **kwargs): def get_attr_value(self, **kwargs): ''' Get the value of the attribute corresponding to this spec from the given container ''' spec, container, manager = getargs('spec', 'container', 'manager', kwargs) + attr_name = self.get_attribute(spec) if attr_name is None: return None @@ -826,55 +827,60 @@ def __check_dset_spec(self, orig, ext): def __get_spec_info(self, data, spec_shape, spec_dims): """This will return the dimension labels and shape by matching the data shape to a permissible spec shape.""" - if spec_shape is None or spec_dims is None: + if spec_shape is None and spec_dims is None: return None, None - data_shape = get_data_shape(data) - # if shape is a list of allowed shapes, find the index of the shape that matches the data - if isinstance(spec_shape[0], list): - match_shape_inds = list() - for i, s in enumerate(spec_shape): - # skip this shape if it has a different number of dimensions from the data - if len(s) != len(data_shape): - continue + elif spec_shape is not None and spec_dims is None: + return None, tuple(spec_shape) + elif spec_shape is None and spec_dims is not None: + return spec_dims, None + else: + data_shape = get_data_shape(data) + # if shape is a list of allowed shapes, find the index of the shape that matches the data + if isinstance(spec_shape[0], list): + match_shape_inds = list() + for i, s in enumerate(spec_shape): + # skip this shape if it has a different number of dimensions from the data + if len(s) != len(data_shape): + continue + # check each dimension. None means any length is allowed + match = True + for j, d in enumerate(data_shape): + if s[j] is not None and s[j] != d: + match = False + break + if match: + match_shape_inds.append(i) + # use the most specific match -- the one with the fewest Nones + if match_shape_inds: + if len(match_shape_inds) == 1: + return tuple(spec_dims[match_shape_inds[0]]), tuple(spec_shape[match_shape_inds[0]]) + else: + count_nones = [len([x for x in spec_shape[k] if x is None]) for k in match_shape_inds] + index_min_count = count_nones.index(min(count_nones)) + best_match_ind = match_shape_inds[index_min_count] + return tuple(spec_dims[best_match_ind]), tuple(spec_shape[best_match_ind]) + else: + # no matches found + msg = "Shape of data does not match any allowed shapes in spec '%s'" % self.spec.path + warnings.warn(msg, IncorrectDatasetShapeBuildWarning) + return None, None + else: + if len(data_shape) != len(spec_shape): + msg = "Shape of data does not match shape in spec '%s'" % self.spec.path + warnings.warn(msg, IncorrectDatasetShapeBuildWarning) + return None, None # check each dimension. None means any length is allowed match = True for j, d in enumerate(data_shape): - if s[j] is not None and s[j] != d: + if spec_shape[j] is not None and spec_shape[j] != d: match = False break - if match: - match_shape_inds.append(i) - # use the most specific match -- the one with the fewest Nones - if match_shape_inds: - if len(match_shape_inds) == 1: - return tuple(spec_dims[match_shape_inds[0]]), tuple(spec_shape[match_shape_inds[0]]) - else: - count_nones = [len([x for x in spec_shape[k] if x is None]) for k in match_shape_inds] - index_min_count = count_nones.index(min(count_nones)) - best_match_ind = match_shape_inds[index_min_count] - return tuple(spec_dims[best_match_ind]), tuple(spec_shape[best_match_ind]) - else: - # no matches found - msg = "Shape of data does not match any allowed shapes in spec '%s'" % self.spec.path - warnings.warn(msg, IncorrectDatasetShapeBuildWarning) - return None, None - else: - if len(data_shape) != len(spec_shape): - msg = "Shape of data does not match shape in spec '%s'" % self.spec.path - warnings.warn(msg, IncorrectDatasetShapeBuildWarning) - return None, None - # check each dimension. None means any length is allowed - match = True - for j, d in enumerate(data_shape): - if spec_shape[j] is not None and spec_shape[j] != d: - match = False - break - if not match: - msg = "Shape of data does not match shape in spec '%s'" % self.spec.path - warnings.warn(msg, IncorrectDatasetShapeBuildWarning) - return None, None - # shape is a single list of allowed dimension lengths - return tuple(spec_dims), tuple(spec_shape) + if not match: + msg = "Shape of data does not match shape in spec '%s'" % self.spec.path + warnings.warn(msg, IncorrectDatasetShapeBuildWarning) + return None, None + # shape is a single list of allowed dimension lengths + return tuple(spec_dims), tuple(spec_shape) def __is_reftype(self, data): if (isinstance(data, AbstractDataChunkIterator) or diff --git a/tests/unit/helpers/utils.py b/tests/unit/helpers/utils.py index 0f4b3c4bf..b2d2cb4ae 100644 --- a/tests/unit/helpers/utils.py +++ b/tests/unit/helpers/utils.py @@ -343,6 +343,70 @@ def __init__(self, spec): manager = BuildManager(type_map) return manager +############################################ +# Qux: A test class with variable data shapes +############################################ +class QuxData(Data): + pass + + +class QuxBucket(Container): + "PseudoFile" + @docval( + {"name": "name", "type": str, "doc": "the name of this bucket"}, + {"name": "qux_data", "type": QuxData, "doc": "Data with user defined shape."}, + ) + def __init__(self, **kwargs): + name, qux_data = getargs("name", "qux_data", kwargs) + super().__init__(name=name) + self.__qux_data = qux_data + self.__qux_data.parent = self + + @property + def qux_data(self): + return self.__qux_data + + +def get_qux_buildmanager(shape): + qux_data_spec = DatasetSpec( + doc="A test dataset of references specification with a data type", + name="qux_data", + data_type_def="QuxData", + shape=shape, + dtype='int' + ) + + qux_bucket_spec = GroupSpec( + doc="A test group specification for a data type containing data type", + data_type_def="QuxBucket", + datasets=[ + DatasetSpec(doc="doc", data_type_inc="QuxData"), + ], + ) + + spec_catalog = SpecCatalog() + spec_catalog.register_spec(qux_data_spec, "test.yaml") + spec_catalog.register_spec(qux_bucket_spec, "test.yaml") + + namespace = SpecNamespace( + "a test namespace", + CORE_NAMESPACE, + [{"source": "test.yaml"}], + version="0.1.0", + catalog=spec_catalog, + ) + + namespace_catalog = NamespaceCatalog() + namespace_catalog.add_namespace(CORE_NAMESPACE, namespace) + + type_map = TypeMap(namespace_catalog) + type_map.register_container_type(CORE_NAMESPACE, "QuxData", QuxData) + type_map.register_container_type(CORE_NAMESPACE, "QuxBucket", QuxBucket) + + manager = BuildManager(type_map) + return manager + + ############################################ # Baz example data containers and specs diff --git a/tests/unit/test_io_hdf5_h5tools.py b/tests/unit/test_io_hdf5_h5tools.py index 9e87ecba7..6518524de 100644 --- a/tests/unit/test_io_hdf5_h5tools.py +++ b/tests/unit/test_io_hdf5_h5tools.py @@ -10,9 +10,12 @@ import zipfile import h5py -import numpy as np from h5py import SoftLink, HardLink, ExternalLink, File from h5py import filters as h5py_filters + +import numpy as np +import numpy.testing as npt + from hdmf.backends.hdf5 import H5DataIO from hdmf.backends.hdf5.h5tools import HDF5IO, SPEC_LOC_ATTR, H5PY_3 from hdmf.backends.io import HDMFIO @@ -30,11 +33,11 @@ from hdmf.term_set import TermSet, TermSetWrapper from hdmf.utils import get_data_shape - from tests.unit.helpers.utils import (Foo, FooBucket, FooFile, get_foo_buildmanager, Baz, BazData, BazCpdData, BazBucket, get_baz_buildmanager, CORE_NAMESPACE, get_temp_filepath, CacheSpecTestHelper, - CustomGroupSpec, CustomDatasetSpec, CustomSpecNamespace) + CustomGroupSpec, CustomDatasetSpec, CustomSpecNamespace, + QuxData, QuxBucket, get_qux_buildmanager) try: import zarr @@ -3748,3 +3751,22 @@ def test_expand_false(self): read_foofile.buckets['bucket1'].foos['foo1'].my_data[:].tolist()) self.assertEqual(get_data_shape(read_foofile.buckets['bucket1'].foos['foo1'].my_data), (5,)) + + def test_expand_set_shape(self): + qux = QuxData(name='my_qux', data=[[1, 2, 3], [4, 5, 6]]) + quxbucket = QuxBucket('bucket1', qux) + + manager = get_qux_buildmanager([None, 3]) + + with HDF5IO(self.path, manager=manager, mode='w') as io: + io.write(quxbucket, expandable=True) + + io = HDF5IO(self.path, manager=manager, mode='r+') + read_quxbucket = io.read() + read_quxbucket.qux_data.append([7,8,9]) + + expected = np.array([[1, 2, 3], + [4, 5, 6], + [7, 8, 9]]) + npt.assert_array_equal(read_quxbucket.qux_data.data[:], expected) + self.assertEqual(read_quxbucket.qux_data.data.maxshape, (None,3)) From aeeadc0173b52937fd5d33f090bc5235e323c64a Mon Sep 17 00:00:00 2001 From: mavaylon1 Date: Wed, 7 Aug 2024 08:56:28 -0700 Subject: [PATCH 06/19] dtype --- src/hdmf/build/objectmapper.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/hdmf/build/objectmapper.py b/src/hdmf/build/objectmapper.py index d2c239110..90f6cefc1 100644 --- a/src/hdmf/build/objectmapper.py +++ b/src/hdmf/build/objectmapper.py @@ -724,7 +724,10 @@ def build(self, **kwargs): msg = "'container' must be of type Data with DatasetSpec" raise ValueError(msg) spec_dtype, spec_shape, spec_dims, spec = self.__check_dset_spec(self.spec, spec_ext) - dimension_labels, matched_shape = self.__get_spec_info(container.data, spec_shape, spec_dims) + dimension_labels, matched_shape = self.__get_spec_info(container.data, + spec_shape, + spec_dims, + spec_dtype) if isinstance(spec_dtype, RefSpec): self.logger.debug("Building %s '%s' as a dataset of references (source: %s)" % (container.__class__.__name__, container.name, repr(source))) @@ -825,7 +828,7 @@ def __check_dset_spec(self, orig, ext): spec = ext return dtype, shape, dims, spec - def __get_spec_info(self, data, spec_shape, spec_dims): + def __get_spec_info(self, data, spec_shape, spec_dims, spec_dtype=None): """This will return the dimension labels and shape by matching the data shape to a permissible spec shape.""" if spec_shape is None and spec_dims is None: return None, None @@ -834,7 +837,10 @@ def __get_spec_info(self, data, spec_shape, spec_dims): elif spec_shape is None and spec_dims is not None: return spec_dims, None else: - data_shape = get_data_shape(data) + if spec_dtype is not None and isinstance(spec_dtype, list): + data_shape = (len(data),) + else: + data_shape = get_data_shape(data) # if shape is a list of allowed shapes, find the index of the shape that matches the data if isinstance(spec_shape[0], list): match_shape_inds = list() From 1b4b593165859ecf4a2f442a0926569d71afb7c2 Mon Sep 17 00:00:00 2001 From: mavaylon1 Date: Wed, 7 Aug 2024 13:13:34 -0700 Subject: [PATCH 07/19] fix --- src/hdmf/backends/hdf5/h5tools.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/hdmf/backends/hdf5/h5tools.py b/src/hdmf/backends/hdf5/h5tools.py index 6c276ceef..b2e7170a0 100644 --- a/src/hdmf/backends/hdf5/h5tools.py +++ b/src/hdmf/backends/hdf5/h5tools.py @@ -1494,8 +1494,7 @@ def __list_fill__(cls, parent, name, data, matched_spec_shape, expandable, optio if 'maxshape' not in io_settings: if matched_spec_shape is not None: io_settings['maxshape'] = matched_spec_shape - else: - io_settings['maxshape'] = tuple([None]*len(data_shape)) + # Create the dataset try: dset = parent.create_dataset(name, shape=data_shape, dtype=dtype, **io_settings) From 8164d335edf3e1cb0a562aa8e705b927753cdc6c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 7 Aug 2024 20:14:49 +0000 Subject: [PATCH 08/19] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/hdmf/backends/hdf5/h5tools.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hdmf/backends/hdf5/h5tools.py b/src/hdmf/backends/hdf5/h5tools.py index b2e7170a0..c9cbea752 100644 --- a/src/hdmf/backends/hdf5/h5tools.py +++ b/src/hdmf/backends/hdf5/h5tools.py @@ -1494,7 +1494,7 @@ def __list_fill__(cls, parent, name, data, matched_spec_shape, expandable, optio if 'maxshape' not in io_settings: if matched_spec_shape is not None: io_settings['maxshape'] = matched_spec_shape - + # Create the dataset try: dset = parent.create_dataset(name, shape=data_shape, dtype=dtype, **io_settings) From ae1b590ad897459d46b28d80985e9b9651a222d1 Mon Sep 17 00:00:00 2001 From: mavaylon1 Date: Wed, 7 Aug 2024 13:52:49 -0700 Subject: [PATCH 09/19] fix again --- test_io_hdf5.h5 | Bin 0 -> 14784 bytes tests/unit/test_io_hdf5.py | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) create mode 100644 test_io_hdf5.h5 diff --git a/test_io_hdf5.h5 b/test_io_hdf5.h5 new file mode 100644 index 0000000000000000000000000000000000000000..c77c7214a6d037d90b6e113e3f4e6afbc7d816d3 GIT binary patch literal 14784 zcmeHNF>ljA6h0?y3BeVq)PXXASYRtmWlKd9JRn*DLpS3jMk%y)lp3jBnS#X7sbXYA z%*-ewKZB8xe*pLHy)UY*Mj<7Ykoznp@9y3C-QDNU?|kp>eX!A7tlq2|x+)bjXKF5q zzu$5VAF>8{l)sdOCvqV2bBPn4DH|%gA^IBZ-(f{p^e>(@8phUyeMl)bKjt}AfkNe_ zLLg|YZ_$9aWz^u)ci|bpNaKSvY`ty2iBqQnFYBlv0KR93w|VGw0d$fd2;{-5A1FL!&K%%4Kk!lJ2i&xG6n3ICx-dVmdW~Cx zAJ~@sVXhzOk4oUhegF@~lNGM#F_OHXHly`y*{&O?q-I}ue&_4X(7er&zQoI9o8y&D zxl}q{ay1N@8o3GItU4|EA>XQ_H0?jIn!H^+PqATZVCCd)pZQ;Yep$Od^Cm8ZL7F${ zTNCjQNVoVs1A*a6RtesX2$L<=oFbqIC<2OrBA^Hq0D+E-VPE8-$af+?h&&PbN#qxi zUqyZssX`G@1QY>9KoL*`{(l6pADxfe`6h6k_iXrR*~W<|h#00vy;z2{V&ha4f~FtG zia26`xDjm=4)1Yg(&g>#M|<%>FKYj-J=)%YJ=(JOP2K)e+Y7FL6vv_sk7@cvee}&5 znXz`NE*_S&UdQb;+`)IzxVRYND!^~xtQdoHEBWzGiyv*TV7zl;TZJN^2q*%IfFdv} z1Rg)yeB$P8n#k>Sdj6(-Gf7Ao9FxN_19wiYo%At(%dsGPT9dC<#0QWtcgb-9ve(?% o#a+e>be~5MQ=teb0*Zhlpa>`eihv@Z2q*%IfFhs>TpR*_0Cd68Bme*a literal 0 HcmV?d00001 diff --git a/tests/unit/test_io_hdf5.py b/tests/unit/test_io_hdf5.py index d15848abf..29b7f2d7f 100644 --- a/tests/unit/test_io_hdf5.py +++ b/tests/unit/test_io_hdf5.py @@ -225,5 +225,5 @@ def test_dataset_shape(self): io.write_builder(self.builder) builder = io.read_builder() dset = builder['test_bucket']['foo_holder']['foo1']['my_data'].data - self.assertEqual(get_data_shape(dset), (None,)) + self.assertEqual(get_data_shape(dset), (10,)) io.close() From 4e01d807bbe37b02e8c92c6e0b10d1f63d2d76b8 Mon Sep 17 00:00:00 2001 From: mavaylon1 Date: Wed, 7 Aug 2024 13:53:13 -0700 Subject: [PATCH 10/19] fix again --- test_io_hdf5.h5 | Bin 14784 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 test_io_hdf5.h5 diff --git a/test_io_hdf5.h5 b/test_io_hdf5.h5 deleted file mode 100644 index c77c7214a6d037d90b6e113e3f4e6afbc7d816d3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 14784 zcmeHNF>ljA6h0?y3BeVq)PXXASYRtmWlKd9JRn*DLpS3jMk%y)lp3jBnS#X7sbXYA z%*-ewKZB8xe*pLHy)UY*Mj<7Ykoznp@9y3C-QDNU?|kp>eX!A7tlq2|x+)bjXKF5q zzu$5VAF>8{l)sdOCvqV2bBPn4DH|%gA^IBZ-(f{p^e>(@8phUyeMl)bKjt}AfkNe_ zLLg|YZ_$9aWz^u)ci|bpNaKSvY`ty2iBqQnFYBlv0KR93w|VGw0d$fd2;{-5A1FL!&K%%4Kk!lJ2i&xG6n3ICx-dVmdW~Cx zAJ~@sVXhzOk4oUhegF@~lNGM#F_OHXHly`y*{&O?q-I}ue&_4X(7er&zQoI9o8y&D zxl}q{ay1N@8o3GItU4|EA>XQ_H0?jIn!H^+PqATZVCCd)pZQ;Yep$Od^Cm8ZL7F${ zTNCjQNVoVs1A*a6RtesX2$L<=oFbqIC<2OrBA^Hq0D+E-VPE8-$af+?h&&PbN#qxi zUqyZssX`G@1QY>9KoL*`{(l6pADxfe`6h6k_iXrR*~W<|h#00vy;z2{V&ha4f~FtG zia26`xDjm=4)1Yg(&g>#M|<%>FKYj-J=)%YJ=(JOP2K)e+Y7FL6vv_sk7@cvee}&5 znXz`NE*_S&UdQb;+`)IzxVRYND!^~xtQdoHEBWzGiyv*TV7zl;TZJN^2q*%IfFdv} z1Rg)yeB$P8n#k>Sdj6(-Gf7Ao9FxN_19wiYo%At(%dsGPT9dC<#0QWtcgb-9ve(?% o#a+e>be~5MQ=teb0*Zhlpa>`eihv@Z2q*%IfFhs>TpR*_0Cd68Bme*a From 345b2f65c858916210342e838341f674b6d7605b Mon Sep 17 00:00:00 2001 From: mavaylon1 Date: Wed, 14 Aug 2024 14:11:21 -0700 Subject: [PATCH 11/19] clean up --- src/hdmf/build/objectmapper.py | 68 +++++++++++++++++++--------------- 1 file changed, 39 insertions(+), 29 deletions(-) diff --git a/src/hdmf/build/objectmapper.py b/src/hdmf/build/objectmapper.py index 90f6cefc1..0484cad8f 100644 --- a/src/hdmf/build/objectmapper.py +++ b/src/hdmf/build/objectmapper.py @@ -828,48 +828,55 @@ def __check_dset_spec(self, orig, ext): spec = ext return dtype, shape, dims, spec + def __get_matched_dimension(self, data_shape, spec_shape, spec_dtype=None): + # if shape is a list of allowed shapes, find the index of the shape that matches the data + if isinstance(spec_shape[0], list): + match_shape_inds = list() + for i, s in enumerate(spec_shape): + # skip this shape if it has a different number of dimensions from the data + if len(s) != len(data_shape): + continue + # check each dimension. None means any length is allowed + match = True + for j, d in enumerate(data_shape): + if s[j] is not None and s[j] != d: + match = False + break + if match: + match_shape_inds.append(i) + return match_shape_inds + def __get_spec_info(self, data, spec_shape, spec_dims, spec_dtype=None): """This will return the dimension labels and shape by matching the data shape to a permissible spec shape.""" if spec_shape is None and spec_dims is None: return None, None - elif spec_shape is not None and spec_dims is None: - return None, tuple(spec_shape) - elif spec_shape is None and spec_dims is not None: - return spec_dims, None + # elif spec_shape is not None and spec_dims is None: else: if spec_dtype is not None and isinstance(spec_dtype, list): data_shape = (len(data),) else: data_shape = get_data_shape(data) - # if shape is a list of allowed shapes, find the index of the shape that matches the data + if isinstance(spec_shape[0], list): - match_shape_inds = list() - for i, s in enumerate(spec_shape): - # skip this shape if it has a different number of dimensions from the data - if len(s) != len(data_shape): - continue - # check each dimension. None means any length is allowed - match = True - for j, d in enumerate(data_shape): - if s[j] is not None and s[j] != d: - match = False - break - if match: - match_shape_inds.append(i) - # use the most specific match -- the one with the fewest Nones - if match_shape_inds: - if len(match_shape_inds) == 1: - return tuple(spec_dims[match_shape_inds[0]]), tuple(spec_shape[match_shape_inds[0]]) - else: - count_nones = [len([x for x in spec_shape[k] if x is None]) for k in match_shape_inds] - index_min_count = count_nones.index(min(count_nones)) - best_match_ind = match_shape_inds[index_min_count] - return tuple(spec_dims[best_match_ind]), tuple(spec_shape[best_match_ind]) - else: + match_shape_inds = self.__get_matched_dimension(data_shape, spec_shape, spec_dtype) + if len(match_shape_inds) == 0: # no matches found msg = "Shape of data does not match any allowed shapes in spec '%s'" % self.spec.path warnings.warn(msg, IncorrectDatasetShapeBuildWarning) return None, None + elif len(match_shape_inds) == 1: + if spec_dims is not None: + return tuple(spec_dims[match_shape_inds[0]]), tuple(spec_shape[match_shape_inds[0]]) + else: + return spec_dims, tuple(spec_shape[match_shape_inds[0]]) + else: + count_nones = [len([x for x in spec_shape[k] if x is None]) for k in match_shape_inds] + index_min_count = count_nones.index(min(count_nones)) + best_match_ind = match_shape_inds[index_min_count] + if spec_dims is not None: + return tuple(spec_dims[best_match_ind]), tuple(spec_shape[best_match_ind]) + else: + return spec_dims, tuple(spec_shape[best_match_ind]) else: if len(data_shape) != len(spec_shape): msg = "Shape of data does not match shape in spec '%s'" % self.spec.path @@ -886,7 +893,10 @@ def __get_spec_info(self, data, spec_shape, spec_dims, spec_dtype=None): warnings.warn(msg, IncorrectDatasetShapeBuildWarning) return None, None # shape is a single list of allowed dimension lengths - return tuple(spec_dims), tuple(spec_shape) + if spec_dims is not None: + return tuple(spec_dims), tuple(spec_shape) + else: + return None, tuple(spec_shape) def __is_reftype(self, data): if (isinstance(data, AbstractDataChunkIterator) or From 311e33a958adcd59f714eed298e20031ca21d083 Mon Sep 17 00:00:00 2001 From: mavaylon1 Date: Wed, 14 Aug 2024 14:29:51 -0700 Subject: [PATCH 12/19] coverage --- .../build_tests/mapper_tests/test_build.py | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/tests/unit/build_tests/mapper_tests/test_build.py b/tests/unit/build_tests/mapper_tests/test_build.py index 28cc9518e..db486548c 100644 --- a/tests/unit/build_tests/mapper_tests/test_build.py +++ b/tests/unit/build_tests/mapper_tests/test_build.py @@ -905,6 +905,33 @@ def test_build(self): builder = self.manager.build(bar_data_holder_inst, source='test.h5') assert builder.datasets['my_bar'].dimension_labels == ('a', 'b2') +class TestBuildDatasetDimensionLabelsShapeWithoutDims(BuildDatasetShapeMixin): + + def get_base_shape_dims(self): + return [[[None, None], [None, 3]], None] + + def get_dataset_inc_spec(self): + dataset_inc_spec = DatasetSpec( + doc='A BarData', + data_type_inc='BarData', + quantity='*', + ) + return dataset_inc_spec + + def test_build(self): + """ + Test build of BarDataHolder which contains a BarData. + """ + # NOTE: attr1 doesn't map to anything but is required in the test container class + bar_data_inst = BarData(name='my_bar', data=[[1, 2, 3], [4, 5, 6]], attr1='a string') + bar_data_holder_inst = BarDataHolder( + name='my_bar_holder', + bar_datas=[bar_data_inst], + ) + + builder = self.manager.build(bar_data_holder_inst, source='test.h5') + assert builder.datasets['my_bar'].dimension_labels is None + class TestBuildDatasetDimensionLabelsOneOptionRefined(BuildDatasetShapeMixin): From 2e446ef570b60c03b621260ce87b96ae21329772 Mon Sep 17 00:00:00 2001 From: Matthew Avaylon Date: Wed, 14 Aug 2024 17:40:03 -0700 Subject: [PATCH 13/19] Update CHANGELOG.md --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4a6369094..b2a93bacb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,8 @@ ### Enhancements - Added support to append to a dataset of references for HDMF-Zarr. @mavaylon1 [#1157](https://github.com/hdmf-dev/hdmf/pull/1157) +- Added support for datasets to be expandable by default for the HDF5 backend. @mavaylon1 [#1158](https://github.com/hdmf-dev/hdmf/pull/1158) + ## HDMF 3.14.3 (July 29, 2024) From de090810d9baa42e6e22f7d146cdd5ff6296adaa Mon Sep 17 00:00:00 2001 From: Matthew Avaylon Date: Wed, 14 Aug 2024 17:40:46 -0700 Subject: [PATCH 14/19] Update CHANGELOG.md --- CHANGELOG.md | 1 - 1 file changed, 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b2a93bacb..e0b52b201 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,7 +6,6 @@ - Added support to append to a dataset of references for HDMF-Zarr. @mavaylon1 [#1157](https://github.com/hdmf-dev/hdmf/pull/1157) - Added support for datasets to be expandable by default for the HDF5 backend. @mavaylon1 [#1158](https://github.com/hdmf-dev/hdmf/pull/1158) - ## HDMF 3.14.3 (July 29, 2024) ### Enhancements From dc78d6b9933fe42b638177795492f8cb26d104ef Mon Sep 17 00:00:00 2001 From: Matthew Avaylon Date: Wed, 14 Aug 2024 17:43:02 -0700 Subject: [PATCH 15/19] Update h5tools.py --- src/hdmf/backends/hdf5/h5tools.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/hdmf/backends/hdf5/h5tools.py b/src/hdmf/backends/hdf5/h5tools.py index c9cbea752..b02f47f05 100644 --- a/src/hdmf/backends/hdf5/h5tools.py +++ b/src/hdmf/backends/hdf5/h5tools.py @@ -385,7 +385,7 @@ def copy_file(self, **kwargs): 'doc': 'A HERD object to populate with references.', 'default': None}, {'name': 'expandable', 'type': bool, 'default': True, - 'doc': ('Bool to set whether datasets are expandable by setting the max shape for all dimensions', + 'doc': ('Bool to set whether datasets are expandable by setting the maxshape for all schema allowed dimensions', 'of a dataset to None and enabling auto-chunking by default.')}) def write(self, **kwargs): """Write the container to an HDF5 file.""" @@ -831,7 +831,7 @@ def close_linked_files(self): {'name': 'export_source', 'type': str, 'doc': 'The source of the builders when exporting', 'default': None}, {'name': 'expandable', 'type': bool, 'default': True, - 'doc': ('Bool to set whether datasets are expandable by setting the max shape for all dimensions', + 'doc': ('Bool to set whether datasets are expandable by setting the maxshape for all schema allowed dimensions', 'of a dataset to None and enabling auto-chunking by default.')}) def write_builder(self, **kwargs): f_builder = popargs('builder', kwargs) @@ -1010,7 +1010,7 @@ def _filler(): {'name': 'export_source', 'type': str, 'doc': 'The source of the builders when exporting', 'default': None}, {'name': 'expandable', 'type': bool, 'default': True, - 'doc': ('Bool to set whether datasets are expandable by setting the max shape for all dimensions', + 'doc': ('Bool to set whether datasets are expandable by setting the maxshape for all schema allowed dimensions', 'of a dataset to None and enabling auto-chunking by default.')}, returns='the Group that was created', rtype=Group) def write_group(self, **kwargs): @@ -1113,7 +1113,7 @@ def write_link(self, **kwargs): {'name': 'export_source', 'type': str, 'doc': 'The source of the builders when exporting', 'default': None}, {'name': 'expandable', 'type': bool, 'default': True, - 'doc': ('Bool to set whether datasets are expandable by setting the max shape for all dimensions', + 'doc': ('Bool to set whether datasets are expandable by setting the maxshape for all schema allowed dimensions', 'of a dataset to None and enabling auto-chunking by default.')}, returns='the Dataset that was created', rtype=Dataset) def write_dataset(self, **kwargs): # noqa: C901 From 7452bee91bca0a0d7010cc2278588cdfdd78721e Mon Sep 17 00:00:00 2001 From: Matthew Avaylon Date: Wed, 14 Aug 2024 17:44:34 -0700 Subject: [PATCH 16/19] Update h5tools.py --- src/hdmf/backends/hdf5/h5tools.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/src/hdmf/backends/hdf5/h5tools.py b/src/hdmf/backends/hdf5/h5tools.py index b02f47f05..273918840 100644 --- a/src/hdmf/backends/hdf5/h5tools.py +++ b/src/hdmf/backends/hdf5/h5tools.py @@ -385,8 +385,7 @@ def copy_file(self, **kwargs): 'doc': 'A HERD object to populate with references.', 'default': None}, {'name': 'expandable', 'type': bool, 'default': True, - 'doc': ('Bool to set whether datasets are expandable by setting the maxshape for all schema allowed dimensions', - 'of a dataset to None and enabling auto-chunking by default.')}) + 'doc': ('Bool to set whether datasets are expandable by setting the maxshape.')}) def write(self, **kwargs): """Write the container to an HDF5 file.""" if self.__mode == 'r': @@ -831,8 +830,7 @@ def close_linked_files(self): {'name': 'export_source', 'type': str, 'doc': 'The source of the builders when exporting', 'default': None}, {'name': 'expandable', 'type': bool, 'default': True, - 'doc': ('Bool to set whether datasets are expandable by setting the maxshape for all schema allowed dimensions', - 'of a dataset to None and enabling auto-chunking by default.')}) + 'doc': ('Bool to set whether datasets are expandable by setting the maxshape.')}) def write_builder(self, **kwargs): f_builder = popargs('builder', kwargs) link_data, exhaust_dci, export_source = getargs('link_data', @@ -1010,8 +1008,7 @@ def _filler(): {'name': 'export_source', 'type': str, 'doc': 'The source of the builders when exporting', 'default': None}, {'name': 'expandable', 'type': bool, 'default': True, - 'doc': ('Bool to set whether datasets are expandable by setting the maxshape for all schema allowed dimensions', - 'of a dataset to None and enabling auto-chunking by default.')}, + 'doc': ('Bool to set whether datasets are expandable by setting the maxshape.')}, returns='the Group that was created', rtype=Group) def write_group(self, **kwargs): parent, builder = popargs('parent', 'builder', kwargs) @@ -1113,8 +1110,7 @@ def write_link(self, **kwargs): {'name': 'export_source', 'type': str, 'doc': 'The source of the builders when exporting', 'default': None}, {'name': 'expandable', 'type': bool, 'default': True, - 'doc': ('Bool to set whether datasets are expandable by setting the maxshape for all schema allowed dimensions', - 'of a dataset to None and enabling auto-chunking by default.')}, + 'doc': ('Bool to set whether datasets are expandable by setting the maxshape.')}, returns='the Dataset that was created', rtype=Dataset) def write_dataset(self, **kwargs): # noqa: C901 """ Write a dataset to HDF5 From 626237e878006797daa8437be70e6be35d32e56e Mon Sep 17 00:00:00 2001 From: mavaylon1 Date: Wed, 14 Aug 2024 17:49:08 -0700 Subject: [PATCH 17/19] clean up for review --- src/hdmf/build/objectmapper.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/hdmf/build/objectmapper.py b/src/hdmf/build/objectmapper.py index 0484cad8f..6dbfdb1d4 100644 --- a/src/hdmf/build/objectmapper.py +++ b/src/hdmf/build/objectmapper.py @@ -850,7 +850,6 @@ def __get_spec_info(self, data, spec_shape, spec_dims, spec_dtype=None): """This will return the dimension labels and shape by matching the data shape to a permissible spec shape.""" if spec_shape is None and spec_dims is None: return None, None - # elif spec_shape is not None and spec_dims is None: else: if spec_dtype is not None and isinstance(spec_dtype, list): data_shape = (len(data),) From 0e76751134aa7d148380d7ede37cbca64e243175 Mon Sep 17 00:00:00 2001 From: mavaylon1 Date: Wed, 14 Aug 2024 17:49:40 -0700 Subject: [PATCH 18/19] clean up for review --- .../build_tests/mapper_tests/test_build.py | 27 ------------------- 1 file changed, 27 deletions(-) diff --git a/tests/unit/build_tests/mapper_tests/test_build.py b/tests/unit/build_tests/mapper_tests/test_build.py index db486548c..28cc9518e 100644 --- a/tests/unit/build_tests/mapper_tests/test_build.py +++ b/tests/unit/build_tests/mapper_tests/test_build.py @@ -905,33 +905,6 @@ def test_build(self): builder = self.manager.build(bar_data_holder_inst, source='test.h5') assert builder.datasets['my_bar'].dimension_labels == ('a', 'b2') -class TestBuildDatasetDimensionLabelsShapeWithoutDims(BuildDatasetShapeMixin): - - def get_base_shape_dims(self): - return [[[None, None], [None, 3]], None] - - def get_dataset_inc_spec(self): - dataset_inc_spec = DatasetSpec( - doc='A BarData', - data_type_inc='BarData', - quantity='*', - ) - return dataset_inc_spec - - def test_build(self): - """ - Test build of BarDataHolder which contains a BarData. - """ - # NOTE: attr1 doesn't map to anything but is required in the test container class - bar_data_inst = BarData(name='my_bar', data=[[1, 2, 3], [4, 5, 6]], attr1='a string') - bar_data_holder_inst = BarDataHolder( - name='my_bar_holder', - bar_datas=[bar_data_inst], - ) - - builder = self.manager.build(bar_data_holder_inst, source='test.h5') - assert builder.datasets['my_bar'].dimension_labels is None - class TestBuildDatasetDimensionLabelsOneOptionRefined(BuildDatasetShapeMixin): From 5567f04388e30b82c9b2b1a1790de1da806d701e Mon Sep 17 00:00:00 2001 From: mavaylon1 Date: Wed, 14 Aug 2024 17:52:44 -0700 Subject: [PATCH 19/19] clean up --- tests/unit/test_io_hdf5_h5tools.py | 41 ++++++++++++++++++++---------- 1 file changed, 27 insertions(+), 14 deletions(-) diff --git a/tests/unit/test_io_hdf5_h5tools.py b/tests/unit/test_io_hdf5_h5tools.py index 6518524de..4dece7398 100644 --- a/tests/unit/test_io_hdf5_h5tools.py +++ b/tests/unit/test_io_hdf5_h5tools.py @@ -3745,12 +3745,25 @@ def test_expand_false(self): with HDF5IO(self.path, manager=self.manager, mode='w') as io: io.write(foofile, expandable=False) - io = HDF5IO(self.path, manager=self.manager, mode='r') - read_foofile = io.read() - self.assertListEqual(foofile.buckets['bucket1'].foos['foo1'].my_data, - read_foofile.buckets['bucket1'].foos['foo1'].my_data[:].tolist()) - self.assertEqual(get_data_shape(read_foofile.buckets['bucket1'].foos['foo1'].my_data), - (5,)) + with HDF5IO(self.path, manager=self.manager, mode='r') as io: + read_foofile = io.read() + self.assertListEqual(foofile.buckets['bucket1'].foos['foo1'].my_data, + read_foofile.buckets['bucket1'].foos['foo1'].my_data[:].tolist()) + self.assertEqual(get_data_shape(read_foofile.buckets['bucket1'].foos['foo1'].my_data), + (5,)) + + def test_multi_shape_no_labels(self): + qux = QuxData(name='my_qux', data=[[1, 2, 3], [4, 5, 6]]) + quxbucket = QuxBucket('bucket1', qux) + + manager = get_qux_buildmanager([[None, None],[None, 3]]) + + with HDF5IO(self.path, manager=manager, mode='w') as io: + io.write(quxbucket, expandable=True) + + with HDF5IO(self.path, manager=manager, mode='r') as io: + read_quxbucket = io.read() + self.assertEqual(read_quxbucket.qux_data.data.maxshape, (None,3)) def test_expand_set_shape(self): qux = QuxData(name='my_qux', data=[[1, 2, 3], [4, 5, 6]]) @@ -3761,12 +3774,12 @@ def test_expand_set_shape(self): with HDF5IO(self.path, manager=manager, mode='w') as io: io.write(quxbucket, expandable=True) - io = HDF5IO(self.path, manager=manager, mode='r+') - read_quxbucket = io.read() - read_quxbucket.qux_data.append([7,8,9]) + with HDF5IO(self.path, manager=manager, mode='r+') as io: + read_quxbucket = io.read() + read_quxbucket.qux_data.append([7,8,9]) - expected = np.array([[1, 2, 3], - [4, 5, 6], - [7, 8, 9]]) - npt.assert_array_equal(read_quxbucket.qux_data.data[:], expected) - self.assertEqual(read_quxbucket.qux_data.data.maxshape, (None,3)) + expected = np.array([[1, 2, 3], + [4, 5, 6], + [7, 8, 9]]) + npt.assert_array_equal(read_quxbucket.qux_data.data[:], expected) + self.assertEqual(read_quxbucket.qux_data.data.maxshape, (None,3))