From b94068c24c138757fe63d0adf23fc34471a6e72d Mon Sep 17 00:00:00 2001
From: mavaylon1 <mavaylon1@berkeley.edu>
Date: Mon, 29 Jul 2024 07:50:32 -0700
Subject: [PATCH 01/19] prior

---
 src/hdmf/backends/hdf5/h5tools.py | 29 ++++++++++++++++++++++-------
 1 file changed, 22 insertions(+), 7 deletions(-)

diff --git a/src/hdmf/backends/hdf5/h5tools.py b/src/hdmf/backends/hdf5/h5tools.py
index 8135d75e7..592ffb5fe 100644
--- a/src/hdmf/backends/hdf5/h5tools.py
+++ b/src/hdmf/backends/hdf5/h5tools.py
@@ -383,7 +383,10 @@ def copy_file(self, **kwargs):
              'default': True},
             {'name': 'herd', 'type': 'hdmf.common.resources.HERD',
              'doc': 'A HERD object to populate with references.',
-             'default': None})
+             'default': None},
+            {'name': 'expandable', 'type': bool, 'default': True,
+             'doc': ('Bool to set whether datasets are expandable by setting the max shape for all dimensions',
+                     'of a dataset to None and enabling auto-chunking by default.')})
     def write(self, **kwargs):
         """Write the container to an HDF5 file."""
         if self.__mode == 'r':
@@ -826,10 +829,16 @@ def close_linked_files(self):
              'doc': 'exhaust DataChunkIterators one at a time. If False, exhaust them concurrently',
              'default': True},
             {'name': 'export_source', 'type': str,
-             'doc': 'The source of the builders when exporting', 'default': None})
+             'doc': 'The source of the builders when exporting', 'default': None},
+            {'name': 'expandable', 'type': bool, 'default': True,
+             'doc': ('Bool to set whether datasets are expandable by setting the max shape for all dimensions',
+                     'of a dataset to None and enabling auto-chunking by default.')})
     def write_builder(self, **kwargs):
         f_builder = popargs('builder', kwargs)
-        link_data, exhaust_dci, export_source = getargs('link_data', 'exhaust_dci', 'export_source', kwargs)
+        link_data, exhaust_dci, export_source = getargs('link_data',
+                                                        'exhaust_dci',
+                                                        'export_source',
+                                                        kwargs)
         self.logger.debug("Writing GroupBuilder '%s' to path '%s' with kwargs=%s"
                           % (f_builder.name, self.source, kwargs))
         for name, gbldr in f_builder.groups.items():
@@ -1000,6 +1009,9 @@ def _filler():
              'default': True},
             {'name': 'export_source', 'type': str,
              'doc': 'The source of the builders when exporting', 'default': None},
+            {'name': 'expandable', 'type': bool, 'default': True,
+             'doc': ('Bool to set whether datasets are expandable by setting the max shape for all dimensions',
+                     'of a dataset to None and enabling auto-chunking by default.')},
             returns='the Group that was created', rtype=Group)
     def write_group(self, **kwargs):
         parent, builder = popargs('parent', 'builder', kwargs)
@@ -1100,6 +1112,9 @@ def write_link(self, **kwargs):
              'default': True},
             {'name': 'export_source', 'type': str,
              'doc': 'The source of the builders when exporting', 'default': None},
+            {'name': 'expandable', 'type': bool, 'default': True,
+             'doc': ('Bool to set whether datasets are expandable by setting the max shape for all dimensions',
+                     'of a dataset to None and enabling auto-chunking by default.')},
             returns='the Dataset that was created', rtype=Dataset)
     def write_dataset(self, **kwargs):  # noqa: C901
         """ Write a dataset to HDF5
@@ -1107,7 +1122,7 @@ def write_dataset(self, **kwargs):  # noqa: C901
         The function uses other dataset-dependent write functions, e.g,
         ``__scalar_fill__``, ``__list_fill__``, and ``__setup_chunked_dset__`` to write the data.
         """
-        parent, builder = popargs('parent', 'builder', kwargs)
+        parent, builder, expandable = popargs('parent', 'builder', 'expandable', kwargs)
         link_data, exhaust_dci, export_source = getargs('link_data', 'exhaust_dci', 'export_source', kwargs)
         self.logger.debug("Writing DatasetBuilder '%s' to parent group '%s'" % (builder.name, parent.name))
         if self.get_written(builder):
@@ -1228,7 +1243,7 @@ def _filler():
                 return
             # If the compound data type contains only regular data (i.e., no references) then we can write it as usual
             else:
-                dset = self.__list_fill__(parent, name, data, options)
+                dset = self.__list_fill__(parent, name, data, expandable, options)
         # Write a dataset containing references, i.e., a region or object reference.
         # NOTE: we can ignore options['io_settings'] for scalar data
         elif self.__is_ref(options['dtype']):
@@ -1323,7 +1338,7 @@ def _filler():
                 self.__dci_queue.append(dataset=dset, data=data)
             # Write a regular in memory array (e.g., numpy array, list etc.)
             elif hasattr(data, '__len__'):
-                dset = self.__list_fill__(parent, name, data, options)
+                dset = self.__list_fill__(parent, name, data, expandable, options)
             # Write a regular scalar dataset
             else:
                 dset = self.__scalar_fill__(parent, name, data, options)
@@ -1451,7 +1466,7 @@ def __chunked_iter_fill__(cls, parent, name, data, options=None):
         return dset
 
     @classmethod
-    def __list_fill__(cls, parent, name, data, options=None):
+    def __list_fill__(cls, parent, name, data, expandable, options=None):
         # define the io settings and data type if necessary
         io_settings = {}
         dtype = None

From b251b78ce31dda82ea406b13b8ef7e7a49bedc68 Mon Sep 17 00:00:00 2001
From: mavaylon1 <mavaylon1@berkeley.edu>
Date: Wed, 31 Jul 2024 10:43:49 -0700
Subject: [PATCH 02/19] chckpoint

---
 src/hdmf/backends/hdf5/h5tools.py | 6 ++++++
 src/hdmf/build/objectmapper.py    | 1 +
 2 files changed, 7 insertions(+)

diff --git a/src/hdmf/backends/hdf5/h5tools.py b/src/hdmf/backends/hdf5/h5tools.py
index 592ffb5fe..5926733d4 100644
--- a/src/hdmf/backends/hdf5/h5tools.py
+++ b/src/hdmf/backends/hdf5/h5tools.py
@@ -1132,6 +1132,7 @@ def write_dataset(self, **kwargs):  # noqa: C901
         data = builder.data
         dataio = None
         options = dict()  # dict with additional
+        # breakpoint()
         if isinstance(data, H5DataIO):
             options['io_settings'] = data.io_settings
             dataio = data
@@ -1488,6 +1489,11 @@ def __list_fill__(cls, parent, name, data, expandable, options=None):
             data_shape = (len(data),)
         else:
             data_shape = get_data_shape(data)
+        # breakpoint()
+        if expandable:
+            # Don't override existing settings
+            if 'maxshape' not in io_settings:
+                io_settings['maxshape'] = tuple([None]*len(data_shape))
 
         # Create the dataset
         try:
diff --git a/src/hdmf/build/objectmapper.py b/src/hdmf/build/objectmapper.py
index b5815ee2c..90cb0654b 100644
--- a/src/hdmf/build/objectmapper.py
+++ b/src/hdmf/build/objectmapper.py
@@ -722,6 +722,7 @@ def build(self, **kwargs):
                 if not isinstance(container, Data):
                     msg = "'container' must be of type Data with DatasetSpec"
                     raise ValueError(msg)
+                breakpoint()
                 spec_dtype, spec_shape, spec_dims, spec = self.__check_dset_spec(self.spec, spec_ext)
                 dimension_labels = self.__get_dimension_labels_from_spec(container.data, spec_shape, spec_dims)
                 if isinstance(spec_dtype, RefSpec):

From 2387a5ee349615e2b40baad8f9ccbe205a9a6936 Mon Sep 17 00:00:00 2001
From: mavaylon1 <mavaylon1@berkeley.edu>
Date: Sat, 3 Aug 2024 14:15:43 -0700
Subject: [PATCH 03/19] possible poc

---
 src/hdmf/backends/hdf5/h5tools.py  | 15 ++++++++-------
 src/hdmf/build/builders.py         | 16 ++++++++++++----
 src/hdmf/build/objectmapper.py     | 24 ++++++++++++++----------
 tests/unit/test_io_hdf5.py         |  2 +-
 tests/unit/test_io_hdf5_h5tools.py | 27 +++++++++++++++++++++++++--
 5 files changed, 60 insertions(+), 24 deletions(-)

diff --git a/src/hdmf/backends/hdf5/h5tools.py b/src/hdmf/backends/hdf5/h5tools.py
index 5926733d4..1d41ad5e7 100644
--- a/src/hdmf/backends/hdf5/h5tools.py
+++ b/src/hdmf/backends/hdf5/h5tools.py
@@ -1130,9 +1130,9 @@ def write_dataset(self, **kwargs):  # noqa: C901
             return None
         name = builder.name
         data = builder.data
+        matched_spec_shape = builder.spec_shapes
         dataio = None
         options = dict()  # dict with additional
-        # breakpoint()
         if isinstance(data, H5DataIO):
             options['io_settings'] = data.io_settings
             dataio = data
@@ -1244,7 +1244,7 @@ def _filler():
                 return
             # If the compound data type contains only regular data (i.e., no references) then we can write it as usual
             else:
-                dset = self.__list_fill__(parent, name, data, expandable, options)
+                dset = self.__list_fill__(parent, name, data, matched_spec_shape, expandable, options)
         # Write a dataset containing references, i.e., a region or object reference.
         # NOTE: we can ignore options['io_settings'] for scalar data
         elif self.__is_ref(options['dtype']):
@@ -1339,7 +1339,7 @@ def _filler():
                 self.__dci_queue.append(dataset=dset, data=data)
             # Write a regular in memory array (e.g., numpy array, list etc.)
             elif hasattr(data, '__len__'):
-                dset = self.__list_fill__(parent, name, data, expandable, options)
+                dset = self.__list_fill__(parent, name, data, matched_spec_shape, expandable, options)
             # Write a regular scalar dataset
             else:
                 dset = self.__scalar_fill__(parent, name, data, options)
@@ -1467,7 +1467,7 @@ def __chunked_iter_fill__(cls, parent, name, data, options=None):
         return dset
 
     @classmethod
-    def __list_fill__(cls, parent, name, data, expandable, options=None):
+    def __list_fill__(cls, parent, name, data, matched_spec_shape, expandable, options=None):
         # define the io settings and data type if necessary
         io_settings = {}
         dtype = None
@@ -1489,12 +1489,13 @@ def __list_fill__(cls, parent, name, data, expandable, options=None):
             data_shape = (len(data),)
         else:
             data_shape = get_data_shape(data)
-        # breakpoint()
         if expandable:
             # Don't override existing settings
             if 'maxshape' not in io_settings:
-                io_settings['maxshape'] = tuple([None]*len(data_shape))
-
+                if matched_spec_shape is not None:
+                    io_settings['maxshape'] = tuple([None]*len(matched_spec_shape))
+                else:
+                    io_settings['maxshape'] = tuple([None]*len(data_shape))
         # Create the dataset
         try:
             dset = parent.create_dataset(name, shape=data_shape, dtype=dtype, **io_settings)
diff --git a/src/hdmf/build/builders.py b/src/hdmf/build/builders.py
index cb658b6d4..6ed453166 100644
--- a/src/hdmf/build/builders.py
+++ b/src/hdmf/build/builders.py
@@ -330,6 +330,9 @@ class DatasetBuilder(BaseBuilder):
              'doc': 'The datatype of this dataset.', 'default': None},
             {'name': 'attributes', 'type': dict,
              'doc': 'A dictionary of attributes to create in this dataset.', 'default': dict()},
+            {'name': 'spec_shapes', 'type': tuple,
+             'doc': ('The shape(s) defined in the spec.'),
+             'default': None},
             {'name': 'dimension_labels', 'type': tuple,
              'doc': ('A list of labels for each dimension of this dataset from the spec. Currently this is '
                      'supplied only on build.'),
@@ -341,15 +344,15 @@ class DatasetBuilder(BaseBuilder):
             {'name': 'source', 'type': str, 'doc': 'The source of the data in this builder.', 'default': None})
     def __init__(self, **kwargs):
         """ Create a Builder object for a dataset """
-        name, data, dtype, attributes, dimension_labels, maxshape, chunks, parent, source = getargs(
-            'name', 'data', 'dtype', 'attributes', 'dimension_labels', 'maxshape', 'chunks', 'parent', 'source',
-            kwargs
-        )
+        name, data, dtype, attributes, spec_shapes, dimension_labels, maxshape, chunks, parent, source = getargs(
+            'name', 'data', 'dtype', 'attributes', 'spec_shapes', 'dimension_labels', 'maxshape', 'chunks',
+            'parent', 'source', kwargs)
         super().__init__(name, attributes, parent, source)
         self['data'] = data
         self['attributes'] = _copy.copy(attributes)
         self.__dimension_labels = dimension_labels
         self.__chunks = chunks
+        self.__spec_shapes = spec_shapes
         self.__maxshape = maxshape
         if isinstance(data, BaseBuilder):
             if dtype is None:
@@ -357,6 +360,11 @@ def __init__(self, **kwargs):
         self.__dtype = dtype
         self.__name = name
 
+    @property
+    def spec_shapes(self):
+        """The shapes defined in the spec."""
+        return self.__spec_shapes
+
     @property
     def data(self):
         """The data stored in the dataset represented by this builder."""
diff --git a/src/hdmf/build/objectmapper.py b/src/hdmf/build/objectmapper.py
index 90cb0654b..589078a6f 100644
--- a/src/hdmf/build/objectmapper.py
+++ b/src/hdmf/build/objectmapper.py
@@ -722,9 +722,8 @@ def build(self, **kwargs):
                 if not isinstance(container, Data):
                     msg = "'container' must be of type Data with DatasetSpec"
                     raise ValueError(msg)
-                breakpoint()
                 spec_dtype, spec_shape, spec_dims, spec = self.__check_dset_spec(self.spec, spec_ext)
-                dimension_labels = self.__get_dimension_labels_from_spec(container.data, spec_shape, spec_dims)
+                dimension_labels, matched_shape = self.__get_spec_info(container.data, spec_shape, spec_dims)
                 if isinstance(spec_dtype, RefSpec):
                     self.logger.debug("Building %s '%s' as a dataset of references (source: %s)"
                                       % (container.__class__.__name__, container.name, repr(source)))
@@ -735,6 +734,7 @@ def build(self, **kwargs):
                         parent=parent,
                         source=source,
                         dtype=spec_dtype.reftype,
+                        spec_shapes=matched_shape,
                         dimension_labels=dimension_labels,
                     )
                     manager.queue_ref(self.__set_dataset_to_refs(builder, spec_dtype, spec_shape, container, manager))
@@ -749,6 +749,7 @@ def build(self, **kwargs):
                         parent=parent,
                         source=source,
                         dtype=spec_dtype,
+                        spec_shapes=matched_shape,
                         dimension_labels=dimension_labels,
                     )
                     manager.queue_ref(self.__set_compound_dataset_to_refs(builder, spec, spec_dtype, container,
@@ -767,6 +768,7 @@ def build(self, **kwargs):
                             parent=parent,
                             source=source,
                             dtype="object",
+                            spec_shapes=matched_shape,
                             dimension_labels=dimension_labels,
                         )
                         manager.queue_ref(self.__set_untyped_dataset_to_refs(builder, container, manager))
@@ -790,6 +792,7 @@ def build(self, **kwargs):
                             parent=parent,
                             source=source,
                             dtype=dtype,
+                            spec_shapes=matched_shape,
                             dimension_labels=dimension_labels,
                         )
 
@@ -821,9 +824,10 @@ def __check_dset_spec(self, orig, ext):
             spec = ext
         return dtype, shape, dims, spec
 
-    def __get_dimension_labels_from_spec(self, data, spec_shape, spec_dims) -> tuple:
+    def __get_spec_info(self, data, spec_shape, spec_dims):
+        """This will return the dimension labels and shape by matching the data shape to a permissible spec shape."""
         if spec_shape is None or spec_dims is None:
-            return None
+            return None, None
         data_shape = get_data_shape(data)
         # if shape is a list of allowed shapes, find the index of the shape that matches the data
         if isinstance(spec_shape[0], list):
@@ -843,22 +847,22 @@ def __get_dimension_labels_from_spec(self, data, spec_shape, spec_dims) -> tuple
             # use the most specific match -- the one with the fewest Nones
             if match_shape_inds:
                 if len(match_shape_inds) == 1:
-                    return tuple(spec_dims[match_shape_inds[0]])
+                    return tuple(spec_dims[match_shape_inds[0]]), tuple(spec_shape[match_shape_inds[0]])
                 else:
                     count_nones = [len([x for x in spec_shape[k] if x is None]) for k in match_shape_inds]
                     index_min_count = count_nones.index(min(count_nones))
                     best_match_ind = match_shape_inds[index_min_count]
-                    return tuple(spec_dims[best_match_ind])
+                    return tuple(spec_dims[best_match_ind]), tuple(spec_shape[best_match_ind])
             else:
                 # no matches found
                 msg = "Shape of data does not match any allowed shapes in spec '%s'" % self.spec.path
                 warnings.warn(msg, IncorrectDatasetShapeBuildWarning)
-                return None
+                return None, None
         else:
             if len(data_shape) != len(spec_shape):
                 msg = "Shape of data does not match shape in spec '%s'" % self.spec.path
                 warnings.warn(msg, IncorrectDatasetShapeBuildWarning)
-                return None
+                return None, None
             # check each dimension. None means any length is allowed
             match = True
             for j, d in enumerate(data_shape):
@@ -868,9 +872,9 @@ def __get_dimension_labels_from_spec(self, data, spec_shape, spec_dims) -> tuple
             if not match:
                 msg = "Shape of data does not match shape in spec '%s'" % self.spec.path
                 warnings.warn(msg, IncorrectDatasetShapeBuildWarning)
-                return None
+                return None, None
             # shape is a single list of allowed dimension lengths
-            return tuple(spec_dims)
+            return tuple(spec_dims), tuple(spec_shape)
 
     def __is_reftype(self, data):
         if (isinstance(data, AbstractDataChunkIterator) or
diff --git a/tests/unit/test_io_hdf5.py b/tests/unit/test_io_hdf5.py
index 29b7f2d7f..d15848abf 100644
--- a/tests/unit/test_io_hdf5.py
+++ b/tests/unit/test_io_hdf5.py
@@ -225,5 +225,5 @@ def test_dataset_shape(self):
         io.write_builder(self.builder)
         builder = io.read_builder()
         dset = builder['test_bucket']['foo_holder']['foo1']['my_data'].data
-        self.assertEqual(get_data_shape(dset), (10,))
+        self.assertEqual(get_data_shape(dset), (None,))
         io.close()
diff --git a/tests/unit/test_io_hdf5_h5tools.py b/tests/unit/test_io_hdf5_h5tools.py
index 5a4fd5a32..9e87ecba7 100644
--- a/tests/unit/test_io_hdf5_h5tools.py
+++ b/tests/unit/test_io_hdf5_h5tools.py
@@ -28,6 +28,7 @@
 from hdmf.testing import TestCase, remove_test_file
 from hdmf.common.resources import HERD
 from hdmf.term_set import TermSet, TermSetWrapper
+from hdmf.utils import get_data_shape
 
 
 from tests.unit.helpers.utils import (Foo, FooBucket, FooFile, get_foo_buildmanager,
@@ -739,12 +740,12 @@ def test_copy_h5py_dataset_h5dataio_input(self):
                              self.f['test_copy'][:].tolist())
 
     def test_list_fill_empty(self):
-        dset = self.io.__list_fill__(self.f, 'empty_dataset', [], options={'dtype': int, 'io_settings': {}})
+        dset = self.io.__list_fill__(self.f, 'empty_dataset', [], None, True, options={'dtype': int, 'io_settings': {}})
         self.assertTupleEqual(dset.shape, (0,))
 
     def test_list_fill_empty_no_dtype(self):
         with self.assertRaisesRegex(Exception, r"cannot add \S+ to [/\S]+ - could not determine type"):
-            self.io.__list_fill__(self.f, 'empty_dataset', [])
+            self.io.__list_fill__(self.f, 'empty_dataset', [], None, True)
 
     def test_read_str(self):
         a = ['a', 'bb', 'ccc', 'dddd', 'e']
@@ -3725,3 +3726,25 @@ def test_set_data_io(self):
         self.data.set_data_io(H5DataIO, dict(chunks=True))
         assert isinstance(self.data.data, H5DataIO)
         assert self.data.data.io_settings["chunks"]
+
+
+class TestExpand(TestCase):
+    def setUp(self):
+        self.manager = get_foo_buildmanager()
+        self.path = get_temp_filepath()
+
+    def test_expand_false(self):
+        # Setup all the data we need
+        foo1 = Foo('foo1', [1, 2, 3, 4, 5], "I am foo1", 17, 3.14)
+        foobucket = FooBucket('bucket1', [foo1])
+        foofile = FooFile(buckets=[foobucket])
+
+        with HDF5IO(self.path, manager=self.manager, mode='w') as io:
+            io.write(foofile, expandable=False)
+
+        io = HDF5IO(self.path, manager=self.manager, mode='r')
+        read_foofile = io.read()
+        self.assertListEqual(foofile.buckets['bucket1'].foos['foo1'].my_data,
+                                 read_foofile.buckets['bucket1'].foos['foo1'].my_data[:].tolist())
+        self.assertEqual(get_data_shape(read_foofile.buckets['bucket1'].foos['foo1'].my_data),
+                        (5,))

From bfc85d1dba20107e8ef948268ee6116372cf6a35 Mon Sep 17 00:00:00 2001
From: mavaylon1 <mavaylon1@berkeley.edu>
Date: Sat, 3 Aug 2024 14:28:54 -0700
Subject: [PATCH 04/19] fix

---
 src/hdmf/backends/hdf5/h5tools.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/hdmf/backends/hdf5/h5tools.py b/src/hdmf/backends/hdf5/h5tools.py
index 1d41ad5e7..6c276ceef 100644
--- a/src/hdmf/backends/hdf5/h5tools.py
+++ b/src/hdmf/backends/hdf5/h5tools.py
@@ -1493,7 +1493,7 @@ def __list_fill__(cls, parent, name, data, matched_spec_shape, expandable, optio
             # Don't override existing settings
             if 'maxshape' not in io_settings:
                 if matched_spec_shape is not None:
-                    io_settings['maxshape'] = tuple([None]*len(matched_spec_shape))
+                    io_settings['maxshape'] = matched_spec_shape
                 else:
                     io_settings['maxshape'] = tuple([None]*len(data_shape))
         # Create the dataset

From 20e61dbf8a9f4e5f255323971050b5b26a6463ac Mon Sep 17 00:00:00 2001
From: mavaylon1 <mavaylon1@berkeley.edu>
Date: Mon, 5 Aug 2024 14:02:51 -0700
Subject: [PATCH 05/19] finished tests

---
 src/hdmf/build/objectmapper.py     | 92 ++++++++++++++++--------------
 tests/unit/helpers/utils.py        | 64 +++++++++++++++++++++
 tests/unit/test_io_hdf5_h5tools.py | 28 ++++++++-
 3 files changed, 138 insertions(+), 46 deletions(-)

diff --git a/src/hdmf/build/objectmapper.py b/src/hdmf/build/objectmapper.py
index 589078a6f..d2c239110 100644
--- a/src/hdmf/build/objectmapper.py
+++ b/src/hdmf/build/objectmapper.py
@@ -555,6 +555,7 @@ def get_attribute(self, **kwargs):
     def get_attr_value(self, **kwargs):
         ''' Get the value of the attribute corresponding to this spec from the given container '''
         spec, container, manager = getargs('spec', 'container', 'manager', kwargs)
+
         attr_name = self.get_attribute(spec)
         if attr_name is None:
             return None
@@ -826,55 +827,60 @@ def __check_dset_spec(self, orig, ext):
 
     def __get_spec_info(self, data, spec_shape, spec_dims):
         """This will return the dimension labels and shape by matching the data shape to a permissible spec shape."""
-        if spec_shape is None or spec_dims is None:
+        if spec_shape is None and spec_dims is None:
             return None, None
-        data_shape = get_data_shape(data)
-        # if shape is a list of allowed shapes, find the index of the shape that matches the data
-        if isinstance(spec_shape[0], list):
-            match_shape_inds = list()
-            for i, s in enumerate(spec_shape):
-                # skip this shape if it has a different number of dimensions from the data
-                if len(s) != len(data_shape):
-                    continue
+        elif spec_shape is not None and spec_dims is None:
+            return None, tuple(spec_shape)
+        elif spec_shape is None and spec_dims is not None:
+            return spec_dims, None
+        else:
+            data_shape = get_data_shape(data)
+            # if shape is a list of allowed shapes, find the index of the shape that matches the data
+            if isinstance(spec_shape[0], list):
+                match_shape_inds = list()
+                for i, s in enumerate(spec_shape):
+                    # skip this shape if it has a different number of dimensions from the data
+                    if len(s) != len(data_shape):
+                        continue
+                    # check each dimension. None means any length is allowed
+                    match = True
+                    for j, d in enumerate(data_shape):
+                        if s[j] is not None and s[j] != d:
+                            match = False
+                            break
+                    if match:
+                        match_shape_inds.append(i)
+                # use the most specific match -- the one with the fewest Nones
+                if match_shape_inds:
+                    if len(match_shape_inds) == 1:
+                        return tuple(spec_dims[match_shape_inds[0]]), tuple(spec_shape[match_shape_inds[0]])
+                    else:
+                        count_nones = [len([x for x in spec_shape[k] if x is None]) for k in match_shape_inds]
+                        index_min_count = count_nones.index(min(count_nones))
+                        best_match_ind = match_shape_inds[index_min_count]
+                        return tuple(spec_dims[best_match_ind]), tuple(spec_shape[best_match_ind])
+                else:
+                    # no matches found
+                    msg = "Shape of data does not match any allowed shapes in spec '%s'" % self.spec.path
+                    warnings.warn(msg, IncorrectDatasetShapeBuildWarning)
+                    return None, None
+            else:
+                if len(data_shape) != len(spec_shape):
+                    msg = "Shape of data does not match shape in spec '%s'" % self.spec.path
+                    warnings.warn(msg, IncorrectDatasetShapeBuildWarning)
+                    return None, None
                 # check each dimension. None means any length is allowed
                 match = True
                 for j, d in enumerate(data_shape):
-                    if s[j] is not None and s[j] != d:
+                    if spec_shape[j] is not None and spec_shape[j] != d:
                         match = False
                         break
-                if match:
-                    match_shape_inds.append(i)
-            # use the most specific match -- the one with the fewest Nones
-            if match_shape_inds:
-                if len(match_shape_inds) == 1:
-                    return tuple(spec_dims[match_shape_inds[0]]), tuple(spec_shape[match_shape_inds[0]])
-                else:
-                    count_nones = [len([x for x in spec_shape[k] if x is None]) for k in match_shape_inds]
-                    index_min_count = count_nones.index(min(count_nones))
-                    best_match_ind = match_shape_inds[index_min_count]
-                    return tuple(spec_dims[best_match_ind]), tuple(spec_shape[best_match_ind])
-            else:
-                # no matches found
-                msg = "Shape of data does not match any allowed shapes in spec '%s'" % self.spec.path
-                warnings.warn(msg, IncorrectDatasetShapeBuildWarning)
-                return None, None
-        else:
-            if len(data_shape) != len(spec_shape):
-                msg = "Shape of data does not match shape in spec '%s'" % self.spec.path
-                warnings.warn(msg, IncorrectDatasetShapeBuildWarning)
-                return None, None
-            # check each dimension. None means any length is allowed
-            match = True
-            for j, d in enumerate(data_shape):
-                if spec_shape[j] is not None and spec_shape[j] != d:
-                    match = False
-                    break
-            if not match:
-                msg = "Shape of data does not match shape in spec '%s'" % self.spec.path
-                warnings.warn(msg, IncorrectDatasetShapeBuildWarning)
-                return None, None
-            # shape is a single list of allowed dimension lengths
-            return tuple(spec_dims), tuple(spec_shape)
+                if not match:
+                    msg = "Shape of data does not match shape in spec '%s'" % self.spec.path
+                    warnings.warn(msg, IncorrectDatasetShapeBuildWarning)
+                    return None, None
+                # shape is a single list of allowed dimension lengths
+                return tuple(spec_dims), tuple(spec_shape)
 
     def __is_reftype(self, data):
         if (isinstance(data, AbstractDataChunkIterator) or
diff --git a/tests/unit/helpers/utils.py b/tests/unit/helpers/utils.py
index 0f4b3c4bf..b2d2cb4ae 100644
--- a/tests/unit/helpers/utils.py
+++ b/tests/unit/helpers/utils.py
@@ -343,6 +343,70 @@ def __init__(self, spec):
     manager = BuildManager(type_map)
     return manager
 
+############################################
+# Qux: A test class with variable data shapes
+############################################
+class QuxData(Data):
+    pass
+
+
+class QuxBucket(Container):
+    "PseudoFile"
+    @docval(
+        {"name": "name", "type": str, "doc": "the name of this bucket"},
+        {"name": "qux_data", "type": QuxData, "doc": "Data with user defined shape."},
+    )
+    def __init__(self, **kwargs):
+        name, qux_data = getargs("name", "qux_data", kwargs)
+        super().__init__(name=name)
+        self.__qux_data = qux_data
+        self.__qux_data.parent = self
+
+    @property
+    def qux_data(self):
+        return self.__qux_data
+
+
+def get_qux_buildmanager(shape):
+    qux_data_spec = DatasetSpec(
+        doc="A test dataset of references specification with a data type",
+        name="qux_data",
+        data_type_def="QuxData",
+        shape=shape,
+        dtype='int'
+    )
+
+    qux_bucket_spec = GroupSpec(
+        doc="A test group specification for a data type containing data type",
+        data_type_def="QuxBucket",
+        datasets=[
+            DatasetSpec(doc="doc", data_type_inc="QuxData"),
+        ],
+    )
+
+    spec_catalog = SpecCatalog()
+    spec_catalog.register_spec(qux_data_spec, "test.yaml")
+    spec_catalog.register_spec(qux_bucket_spec, "test.yaml")
+
+    namespace = SpecNamespace(
+        "a test namespace",
+        CORE_NAMESPACE,
+        [{"source": "test.yaml"}],
+        version="0.1.0",
+        catalog=spec_catalog,
+    )
+
+    namespace_catalog = NamespaceCatalog()
+    namespace_catalog.add_namespace(CORE_NAMESPACE, namespace)
+
+    type_map = TypeMap(namespace_catalog)
+    type_map.register_container_type(CORE_NAMESPACE, "QuxData", QuxData)
+    type_map.register_container_type(CORE_NAMESPACE, "QuxBucket", QuxBucket)
+
+    manager = BuildManager(type_map)
+    return manager
+
+
 
 ############################################
 #  Baz example data containers and specs
diff --git a/tests/unit/test_io_hdf5_h5tools.py b/tests/unit/test_io_hdf5_h5tools.py
index 9e87ecba7..6518524de 100644
--- a/tests/unit/test_io_hdf5_h5tools.py
+++ b/tests/unit/test_io_hdf5_h5tools.py
@@ -10,9 +10,12 @@
 import zipfile
 
 import h5py
-import numpy as np
 from h5py import SoftLink, HardLink, ExternalLink, File
 from h5py import filters as h5py_filters
+
+import numpy as np
+import numpy.testing as npt
+
 from hdmf.backends.hdf5 import H5DataIO
 from hdmf.backends.hdf5.h5tools import HDF5IO, SPEC_LOC_ATTR, H5PY_3
 from hdmf.backends.io import HDMFIO
@@ -30,11 +33,11 @@
 from hdmf.term_set import TermSet, TermSetWrapper
 from hdmf.utils import get_data_shape
 
-
 from tests.unit.helpers.utils import (Foo, FooBucket, FooFile, get_foo_buildmanager,
                               Baz, BazData, BazCpdData, BazBucket, get_baz_buildmanager,
                               CORE_NAMESPACE, get_temp_filepath, CacheSpecTestHelper,
-                              CustomGroupSpec, CustomDatasetSpec, CustomSpecNamespace)
+                              CustomGroupSpec, CustomDatasetSpec, CustomSpecNamespace,
+                              QuxData, QuxBucket, get_qux_buildmanager)
 
 try:
     import zarr
@@ -3748,3 +3751,22 @@ def test_expand_false(self):
                                  read_foofile.buckets['bucket1'].foos['foo1'].my_data[:].tolist())
         self.assertEqual(get_data_shape(read_foofile.buckets['bucket1'].foos['foo1'].my_data),
                         (5,))
+
+    def test_expand_set_shape(self):
+        qux = QuxData(name='my_qux', data=[[1, 2, 3], [4, 5, 6]])
+        quxbucket = QuxBucket('bucket1', qux)
+
+        manager = get_qux_buildmanager([None, 3])
+
+        with HDF5IO(self.path, manager=manager, mode='w') as io:
+            io.write(quxbucket, expandable=True)
+
+        io = HDF5IO(self.path, manager=manager, mode='r+')
+        read_quxbucket = io.read()
+        read_quxbucket.qux_data.append([7,8,9])
+
+        expected = np.array([[1, 2, 3],
+                             [4, 5, 6],
+                             [7, 8, 9]])
+        npt.assert_array_equal(read_quxbucket.qux_data.data[:], expected)
+        self.assertEqual(read_quxbucket.qux_data.data.maxshape, (None,3))

From aeeadc0173b52937fd5d33f090bc5235e323c64a Mon Sep 17 00:00:00 2001
From: mavaylon1 <mavaylon1@berkeley.edu>
Date: Wed, 7 Aug 2024 08:56:28 -0700
Subject: [PATCH 06/19] dtype

---
 src/hdmf/build/objectmapper.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/src/hdmf/build/objectmapper.py b/src/hdmf/build/objectmapper.py
index d2c239110..90f6cefc1 100644
--- a/src/hdmf/build/objectmapper.py
+++ b/src/hdmf/build/objectmapper.py
@@ -724,7 +724,10 @@ def build(self, **kwargs):
                     msg = "'container' must be of type Data with DatasetSpec"
                     raise ValueError(msg)
                 spec_dtype, spec_shape, spec_dims, spec = self.__check_dset_spec(self.spec, spec_ext)
-                dimension_labels, matched_shape = self.__get_spec_info(container.data, spec_shape, spec_dims)
+                dimension_labels, matched_shape = self.__get_spec_info(container.data,
+                                                                       spec_shape,
+                                                                       spec_dims,
+                                                                       spec_dtype)
                 if isinstance(spec_dtype, RefSpec):
                     self.logger.debug("Building %s '%s' as a dataset of references (source: %s)"
                                       % (container.__class__.__name__, container.name, repr(source)))
@@ -825,7 +828,7 @@ def __check_dset_spec(self, orig, ext):
             spec = ext
         return dtype, shape, dims, spec
 
-    def __get_spec_info(self, data, spec_shape, spec_dims):
+    def __get_spec_info(self, data, spec_shape, spec_dims, spec_dtype=None):
         """This will return the dimension labels and shape by matching the data shape to a permissible spec shape."""
         if spec_shape is None and spec_dims is None:
             return None, None
@@ -834,7 +837,10 @@ def __get_spec_info(self, data, spec_shape, spec_dims):
         elif spec_shape is None and spec_dims is not None:
             return spec_dims, None
         else:
-            data_shape = get_data_shape(data)
+            if spec_dtype is not None and isinstance(spec_dtype, list):
+                data_shape = (len(data),)
+            else:
+                data_shape = get_data_shape(data)
             # if shape is a list of allowed shapes, find the index of the shape that matches the data
             if isinstance(spec_shape[0], list):
                 match_shape_inds = list()

From 1b4b593165859ecf4a2f442a0926569d71afb7c2 Mon Sep 17 00:00:00 2001
From: mavaylon1 <mavaylon1@berkeley.edu>
Date: Wed, 7 Aug 2024 13:13:34 -0700
Subject: [PATCH 07/19] fix

---
 src/hdmf/backends/hdf5/h5tools.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/hdmf/backends/hdf5/h5tools.py b/src/hdmf/backends/hdf5/h5tools.py
index 6c276ceef..b2e7170a0 100644
--- a/src/hdmf/backends/hdf5/h5tools.py
+++ b/src/hdmf/backends/hdf5/h5tools.py
@@ -1494,8 +1494,7 @@ def __list_fill__(cls, parent, name, data, matched_spec_shape, expandable, optio
             if 'maxshape' not in io_settings:
                 if matched_spec_shape is not None:
                     io_settings['maxshape'] = matched_spec_shape
-                else:
-                    io_settings['maxshape'] = tuple([None]*len(data_shape))
+        
         # Create the dataset
         try:
             dset = parent.create_dataset(name, shape=data_shape, dtype=dtype, **io_settings)

From 8164d335edf3e1cb0a562aa8e705b927753cdc6c Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 7 Aug 2024 20:14:49 +0000
Subject: [PATCH 08/19] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 src/hdmf/backends/hdf5/h5tools.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/hdmf/backends/hdf5/h5tools.py b/src/hdmf/backends/hdf5/h5tools.py
index b2e7170a0..c9cbea752 100644
--- a/src/hdmf/backends/hdf5/h5tools.py
+++ b/src/hdmf/backends/hdf5/h5tools.py
@@ -1494,7 +1494,7 @@ def __list_fill__(cls, parent, name, data, matched_spec_shape, expandable, optio
             if 'maxshape' not in io_settings:
                 if matched_spec_shape is not None:
                     io_settings['maxshape'] = matched_spec_shape
-        
+
         # Create the dataset
         try:
             dset = parent.create_dataset(name, shape=data_shape, dtype=dtype, **io_settings)

From ae1b590ad897459d46b28d80985e9b9651a222d1 Mon Sep 17 00:00:00 2001
From: mavaylon1 <mavaylon1@berkeley.edu>
Date: Wed, 7 Aug 2024 13:52:49 -0700
Subject: [PATCH 09/19] fix again

---
 test_io_hdf5.h5            | Bin 0 -> 14784 bytes
 tests/unit/test_io_hdf5.py |   2 +-
 2 files changed, 1 insertion(+), 1 deletion(-)
 create mode 100644 test_io_hdf5.h5

diff --git a/test_io_hdf5.h5 b/test_io_hdf5.h5
new file mode 100644
index 0000000000000000000000000000000000000000..c77c7214a6d037d90b6e113e3f4e6afbc7d816d3
GIT binary patch
literal 14784
zcmeHNF>ljA6h0?y3BeVq)PXXASYRtmWlKd9JRn*DLpS3jMk%y)lp3jBnS#X7sbXYA
z%*-ewKZB8xe*pLHy)UY*Mj<7Ykoznp@9y3C-QDNU?|kp>eX!A7tlq2|x+)bjXKF5q
zzu$5VAF>8{l)sdOCvqV2bBPn4DH|%gA^IBZ-(f{p^e>(@8phUyeMl)bKjt}AfkNe_
zLLg|YZ_$9aWz^u)ci|bpNaKSvY`ty2iBqQnFYBl<rSvQ(PxE(4_#2ejP*-?4iN8Ou
za((6Rln<aF)yMI7ApF5FUnNP{O?EqRpEdu=-{(&^H;BRD-7VoQ?B;oleyGeMFwPHb
zUEz8?dA}$5fnSSUAI~G(isp)rK;94FYwK9}gYjEGaG%nn_Y?)^lzBgJS9lBic^;p$
znZn=ZnT5>v0KR93w|VGw0d$fd2;{-5A1FL!&K%%4Kk!lJ2i&xG6n3ICx-dVmdW~Cx
zAJ~@sVXhzOk4oUhegF@~lNGM#F_OHXHly`y*{&O?q-I}ue&_4X(7er&zQoI9o8y&D
zxl}q{ay1N@8o3GItU4|EA>XQ_H0?jIn!H^+PqATZVCCd)pZQ;Yep$Od^Cm8ZL7F${
zTNCjQNVoVs1A*a6RtesX2$L<=oFbqIC<2OrBA^Hq0D+E-VPE8-$af+?h&&PbN#qxi
zUqyZssX`G@1QY>9KoL*`{(l6pADxfe`6h6k_iXrR*~W<|h#00vy;z2{V&ha4f~FtG
zia26`xDjm=4)1Yg(&g>#M|<%>FKYj-J=)%YJ=(JOP2K)e+Y7FL6vv_sk7@cvee}&5
znXz`NE*_S&UdQb;+`)IzxVRYND!^~xtQdoHEBWzGiyv*TV7zl;TZJN^2q*%IfFdv}
z1Rg)yeB$P8n#k>Sdj6(-Gf7Ao9FxN_19wiYo%At(%dsGPT9dC<#0QWtcgb-9ve(?%
o#a+e>be~5MQ=teb0*Zhlpa>`eihv@Z2q*%IfFhs>TpR*_0Cd68Bme*a

literal 0
HcmV?d00001

diff --git a/tests/unit/test_io_hdf5.py b/tests/unit/test_io_hdf5.py
index d15848abf..29b7f2d7f 100644
--- a/tests/unit/test_io_hdf5.py
+++ b/tests/unit/test_io_hdf5.py
@@ -225,5 +225,5 @@ def test_dataset_shape(self):
         io.write_builder(self.builder)
         builder = io.read_builder()
         dset = builder['test_bucket']['foo_holder']['foo1']['my_data'].data
-        self.assertEqual(get_data_shape(dset), (None,))
+        self.assertEqual(get_data_shape(dset), (10,))
         io.close()

From 4e01d807bbe37b02e8c92c6e0b10d1f63d2d76b8 Mon Sep 17 00:00:00 2001
From: mavaylon1 <mavaylon1@berkeley.edu>
Date: Wed, 7 Aug 2024 13:53:13 -0700
Subject: [PATCH 10/19] fix again

---
 test_io_hdf5.h5 | Bin 14784 -> 0 bytes
 1 file changed, 0 insertions(+), 0 deletions(-)
 delete mode 100644 test_io_hdf5.h5

diff --git a/test_io_hdf5.h5 b/test_io_hdf5.h5
deleted file mode 100644
index c77c7214a6d037d90b6e113e3f4e6afbc7d816d3..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 14784
zcmeHNF>ljA6h0?y3BeVq)PXXASYRtmWlKd9JRn*DLpS3jMk%y)lp3jBnS#X7sbXYA
z%*-ewKZB8xe*pLHy)UY*Mj<7Ykoznp@9y3C-QDNU?|kp>eX!A7tlq2|x+)bjXKF5q
zzu$5VAF>8{l)sdOCvqV2bBPn4DH|%gA^IBZ-(f{p^e>(@8phUyeMl)bKjt}AfkNe_
zLLg|YZ_$9aWz^u)ci|bpNaKSvY`ty2iBqQnFYBl<rSvQ(PxE(4_#2ejP*-?4iN8Ou
za((6Rln<aF)yMI7ApF5FUnNP{O?EqRpEdu=-{(&^H;BRD-7VoQ?B;oleyGeMFwPHb
zUEz8?dA}$5fnSSUAI~G(isp)rK;94FYwK9}gYjEGaG%nn_Y?)^lzBgJS9lBic^;p$
znZn=ZnT5>v0KR93w|VGw0d$fd2;{-5A1FL!&K%%4Kk!lJ2i&xG6n3ICx-dVmdW~Cx
zAJ~@sVXhzOk4oUhegF@~lNGM#F_OHXHly`y*{&O?q-I}ue&_4X(7er&zQoI9o8y&D
zxl}q{ay1N@8o3GItU4|EA>XQ_H0?jIn!H^+PqATZVCCd)pZQ;Yep$Od^Cm8ZL7F${
zTNCjQNVoVs1A*a6RtesX2$L<=oFbqIC<2OrBA^Hq0D+E-VPE8-$af+?h&&PbN#qxi
zUqyZssX`G@1QY>9KoL*`{(l6pADxfe`6h6k_iXrR*~W<|h#00vy;z2{V&ha4f~FtG
zia26`xDjm=4)1Yg(&g>#M|<%>FKYj-J=)%YJ=(JOP2K)e+Y7FL6vv_sk7@cvee}&5
znXz`NE*_S&UdQb;+`)IzxVRYND!^~xtQdoHEBWzGiyv*TV7zl;TZJN^2q*%IfFdv}
z1Rg)yeB$P8n#k>Sdj6(-Gf7Ao9FxN_19wiYo%At(%dsGPT9dC<#0QWtcgb-9ve(?%
o#a+e>be~5MQ=teb0*Zhlpa>`eihv@Z2q*%IfFhs>TpR*_0Cd68Bme*a


From 345b2f65c858916210342e838341f674b6d7605b Mon Sep 17 00:00:00 2001
From: mavaylon1 <mavaylon1@berkeley.edu>
Date: Wed, 14 Aug 2024 14:11:21 -0700
Subject: [PATCH 11/19] clean up

---
 src/hdmf/build/objectmapper.py | 68 +++++++++++++++++++---------------
 1 file changed, 39 insertions(+), 29 deletions(-)

diff --git a/src/hdmf/build/objectmapper.py b/src/hdmf/build/objectmapper.py
index 90f6cefc1..0484cad8f 100644
--- a/src/hdmf/build/objectmapper.py
+++ b/src/hdmf/build/objectmapper.py
@@ -828,48 +828,55 @@ def __check_dset_spec(self, orig, ext):
             spec = ext
         return dtype, shape, dims, spec
 
+    def __get_matched_dimension(self, data_shape, spec_shape, spec_dtype=None):
+        # if shape is a list of allowed shapes, find the index of the shape that matches the data
+        if isinstance(spec_shape[0], list):
+            match_shape_inds = list()
+            for i, s in enumerate(spec_shape):
+                # skip this shape if it has a different number of dimensions from the data
+                if len(s) != len(data_shape):
+                    continue
+                # check each dimension. None means any length is allowed
+                match = True
+                for j, d in enumerate(data_shape):
+                    if s[j] is not None and s[j] != d:
+                        match = False
+                        break
+                if match:
+                    match_shape_inds.append(i)
+            return match_shape_inds
+
     def __get_spec_info(self, data, spec_shape, spec_dims, spec_dtype=None):
         """This will return the dimension labels and shape by matching the data shape to a permissible spec shape."""
         if spec_shape is None and spec_dims is None:
             return None, None
-        elif spec_shape is not None and spec_dims is None:
-            return None, tuple(spec_shape)
-        elif spec_shape is None and spec_dims is not None:
-            return spec_dims, None
+        # elif spec_shape is not None and spec_dims is None:
         else:
             if spec_dtype is not None and isinstance(spec_dtype, list):
                 data_shape = (len(data),)
             else:
                 data_shape = get_data_shape(data)
-            # if shape is a list of allowed shapes, find the index of the shape that matches the data
+
             if isinstance(spec_shape[0], list):
-                match_shape_inds = list()
-                for i, s in enumerate(spec_shape):
-                    # skip this shape if it has a different number of dimensions from the data
-                    if len(s) != len(data_shape):
-                        continue
-                    # check each dimension. None means any length is allowed
-                    match = True
-                    for j, d in enumerate(data_shape):
-                        if s[j] is not None and s[j] != d:
-                            match = False
-                            break
-                    if match:
-                        match_shape_inds.append(i)
-                # use the most specific match -- the one with the fewest Nones
-                if match_shape_inds:
-                    if len(match_shape_inds) == 1:
-                        return tuple(spec_dims[match_shape_inds[0]]), tuple(spec_shape[match_shape_inds[0]])
-                    else:
-                        count_nones = [len([x for x in spec_shape[k] if x is None]) for k in match_shape_inds]
-                        index_min_count = count_nones.index(min(count_nones))
-                        best_match_ind = match_shape_inds[index_min_count]
-                        return tuple(spec_dims[best_match_ind]), tuple(spec_shape[best_match_ind])
-                else:
+                match_shape_inds = self.__get_matched_dimension(data_shape, spec_shape, spec_dtype)
+                if len(match_shape_inds) == 0:
                     # no matches found
                     msg = "Shape of data does not match any allowed shapes in spec '%s'" % self.spec.path
                     warnings.warn(msg, IncorrectDatasetShapeBuildWarning)
                     return None, None
+                elif len(match_shape_inds) == 1:
+                    if spec_dims is not None:
+                        return tuple(spec_dims[match_shape_inds[0]]), tuple(spec_shape[match_shape_inds[0]])
+                    else:
+                        return spec_dims, tuple(spec_shape[match_shape_inds[0]])
+                else:
+                    count_nones = [len([x for x in spec_shape[k] if x is None]) for k in match_shape_inds]
+                    index_min_count = count_nones.index(min(count_nones))
+                    best_match_ind = match_shape_inds[index_min_count]
+                    if spec_dims is not None:
+                        return tuple(spec_dims[best_match_ind]), tuple(spec_shape[best_match_ind])
+                    else:
+                        return spec_dims, tuple(spec_shape[best_match_ind])
             else:
                 if len(data_shape) != len(spec_shape):
                     msg = "Shape of data does not match shape in spec '%s'" % self.spec.path
@@ -886,7 +893,10 @@ def __get_spec_info(self, data, spec_shape, spec_dims, spec_dtype=None):
                     warnings.warn(msg, IncorrectDatasetShapeBuildWarning)
                     return None, None
                 # shape is a single list of allowed dimension lengths
-                return tuple(spec_dims), tuple(spec_shape)
+                if spec_dims is not None:
+                    return tuple(spec_dims), tuple(spec_shape)
+                else:
+                    return None, tuple(spec_shape)
 
     def __is_reftype(self, data):
         if (isinstance(data, AbstractDataChunkIterator) or

From 311e33a958adcd59f714eed298e20031ca21d083 Mon Sep 17 00:00:00 2001
From: mavaylon1 <mavaylon1@berkeley.edu>
Date: Wed, 14 Aug 2024 14:29:51 -0700
Subject: [PATCH 12/19] coverage

---
 .../build_tests/mapper_tests/test_build.py    | 27 +++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/tests/unit/build_tests/mapper_tests/test_build.py b/tests/unit/build_tests/mapper_tests/test_build.py
index 28cc9518e..db486548c 100644
--- a/tests/unit/build_tests/mapper_tests/test_build.py
+++ b/tests/unit/build_tests/mapper_tests/test_build.py
@@ -905,6 +905,33 @@ def test_build(self):
         builder = self.manager.build(bar_data_holder_inst, source='test.h5')
         assert builder.datasets['my_bar'].dimension_labels == ('a', 'b2')
 
+class TestBuildDatasetDimensionLabelsShapeWithoutDims(BuildDatasetShapeMixin):
+
+    def get_base_shape_dims(self):
+        return [[[None, None], [None, 3]], None]
+
+    def get_dataset_inc_spec(self):
+        dataset_inc_spec = DatasetSpec(
+            doc='A BarData',
+            data_type_inc='BarData',
+            quantity='*',
+        )
+        return dataset_inc_spec
+
+    def test_build(self):
+        """
+        Test build of BarDataHolder which contains a BarData.
+        """
+        # NOTE: attr1 doesn't map to anything but is required in the test container class
+        bar_data_inst = BarData(name='my_bar', data=[[1, 2, 3], [4, 5, 6]], attr1='a string')
+        bar_data_holder_inst = BarDataHolder(
+            name='my_bar_holder',
+            bar_datas=[bar_data_inst],
+        )
+
+        builder = self.manager.build(bar_data_holder_inst, source='test.h5')
+        assert builder.datasets['my_bar'].dimension_labels is None
+
 
 class TestBuildDatasetDimensionLabelsOneOptionRefined(BuildDatasetShapeMixin):
 

From 2e446ef570b60c03b621260ce87b96ae21329772 Mon Sep 17 00:00:00 2001
From: Matthew Avaylon <mavaylon1@berkeley.edu>
Date: Wed, 14 Aug 2024 17:40:03 -0700
Subject: [PATCH 13/19] Update CHANGELOG.md

---
 CHANGELOG.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4a6369094..b2a93bacb 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,8 @@
 
 ### Enhancements
 - Added support to append to a dataset of references for HDMF-Zarr. @mavaylon1 [#1157](https://github.com/hdmf-dev/hdmf/pull/1157)
+- Added support for datasets to be expandable by default for the HDF5 backend. @mavaylon1 [#1158](https://github.com/hdmf-dev/hdmf/pull/1158)
+
 
 ## HDMF 3.14.3 (July 29, 2024)
 

From de090810d9baa42e6e22f7d146cdd5ff6296adaa Mon Sep 17 00:00:00 2001
From: Matthew Avaylon <mavaylon1@berkeley.edu>
Date: Wed, 14 Aug 2024 17:40:46 -0700
Subject: [PATCH 14/19] Update CHANGELOG.md

---
 CHANGELOG.md | 1 -
 1 file changed, 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index b2a93bacb..e0b52b201 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,7 +6,6 @@
 - Added support to append to a dataset of references for HDMF-Zarr. @mavaylon1 [#1157](https://github.com/hdmf-dev/hdmf/pull/1157)
 - Added support for datasets to be expandable by default for the HDF5 backend. @mavaylon1 [#1158](https://github.com/hdmf-dev/hdmf/pull/1158)
 
-
 ## HDMF 3.14.3 (July 29, 2024)
 
 ### Enhancements

From dc78d6b9933fe42b638177795492f8cb26d104ef Mon Sep 17 00:00:00 2001
From: Matthew Avaylon <mavaylon1@berkeley.edu>
Date: Wed, 14 Aug 2024 17:43:02 -0700
Subject: [PATCH 15/19] Update h5tools.py

---
 src/hdmf/backends/hdf5/h5tools.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/hdmf/backends/hdf5/h5tools.py b/src/hdmf/backends/hdf5/h5tools.py
index c9cbea752..b02f47f05 100644
--- a/src/hdmf/backends/hdf5/h5tools.py
+++ b/src/hdmf/backends/hdf5/h5tools.py
@@ -385,7 +385,7 @@ def copy_file(self, **kwargs):
              'doc': 'A HERD object to populate with references.',
              'default': None},
             {'name': 'expandable', 'type': bool, 'default': True,
-             'doc': ('Bool to set whether datasets are expandable by setting the max shape for all dimensions',
+             'doc': ('Bool to set whether datasets are expandable by setting the maxshape for all schema allowed dimensions',
                      'of a dataset to None and enabling auto-chunking by default.')})
     def write(self, **kwargs):
         """Write the container to an HDF5 file."""
@@ -831,7 +831,7 @@ def close_linked_files(self):
             {'name': 'export_source', 'type': str,
              'doc': 'The source of the builders when exporting', 'default': None},
             {'name': 'expandable', 'type': bool, 'default': True,
-             'doc': ('Bool to set whether datasets are expandable by setting the max shape for all dimensions',
+             'doc': ('Bool to set whether datasets are expandable by setting the maxshape for all schema allowed dimensions',
                      'of a dataset to None and enabling auto-chunking by default.')})
     def write_builder(self, **kwargs):
         f_builder = popargs('builder', kwargs)
@@ -1010,7 +1010,7 @@ def _filler():
             {'name': 'export_source', 'type': str,
              'doc': 'The source of the builders when exporting', 'default': None},
             {'name': 'expandable', 'type': bool, 'default': True,
-             'doc': ('Bool to set whether datasets are expandable by setting the max shape for all dimensions',
+             'doc': ('Bool to set whether datasets are expandable by setting the maxshape for all schema allowed dimensions',
                      'of a dataset to None and enabling auto-chunking by default.')},
             returns='the Group that was created', rtype=Group)
     def write_group(self, **kwargs):
@@ -1113,7 +1113,7 @@ def write_link(self, **kwargs):
             {'name': 'export_source', 'type': str,
              'doc': 'The source of the builders when exporting', 'default': None},
             {'name': 'expandable', 'type': bool, 'default': True,
-             'doc': ('Bool to set whether datasets are expandable by setting the max shape for all dimensions',
+             'doc': ('Bool to set whether datasets are expandable by setting the maxshape for all schema allowed dimensions',
                      'of a dataset to None and enabling auto-chunking by default.')},
             returns='the Dataset that was created', rtype=Dataset)
     def write_dataset(self, **kwargs):  # noqa: C901

From 7452bee91bca0a0d7010cc2278588cdfdd78721e Mon Sep 17 00:00:00 2001
From: Matthew Avaylon <mavaylon1@berkeley.edu>
Date: Wed, 14 Aug 2024 17:44:34 -0700
Subject: [PATCH 16/19] Update h5tools.py

---
 src/hdmf/backends/hdf5/h5tools.py | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/src/hdmf/backends/hdf5/h5tools.py b/src/hdmf/backends/hdf5/h5tools.py
index b02f47f05..273918840 100644
--- a/src/hdmf/backends/hdf5/h5tools.py
+++ b/src/hdmf/backends/hdf5/h5tools.py
@@ -385,8 +385,7 @@ def copy_file(self, **kwargs):
              'doc': 'A HERD object to populate with references.',
              'default': None},
             {'name': 'expandable', 'type': bool, 'default': True,
-             'doc': ('Bool to set whether datasets are expandable by setting the maxshape for all schema allowed dimensions',
-                     'of a dataset to None and enabling auto-chunking by default.')})
+             'doc': ('Bool to set whether datasets are expandable by setting the maxshape.')})
     def write(self, **kwargs):
         """Write the container to an HDF5 file."""
         if self.__mode == 'r':
@@ -831,8 +830,7 @@ def close_linked_files(self):
             {'name': 'export_source', 'type': str,
              'doc': 'The source of the builders when exporting', 'default': None},
             {'name': 'expandable', 'type': bool, 'default': True,
-             'doc': ('Bool to set whether datasets are expandable by setting the maxshape for all schema allowed dimensions',
-                     'of a dataset to None and enabling auto-chunking by default.')})
+             'doc': ('Bool to set whether datasets are expandable by setting the maxshape.')})
     def write_builder(self, **kwargs):
         f_builder = popargs('builder', kwargs)
         link_data, exhaust_dci, export_source = getargs('link_data',
@@ -1010,8 +1008,7 @@ def _filler():
             {'name': 'export_source', 'type': str,
              'doc': 'The source of the builders when exporting', 'default': None},
             {'name': 'expandable', 'type': bool, 'default': True,
-             'doc': ('Bool to set whether datasets are expandable by setting the maxshape for all schema allowed dimensions',
-                     'of a dataset to None and enabling auto-chunking by default.')},
+             'doc': ('Bool to set whether datasets are expandable by setting the maxshape.')},
             returns='the Group that was created', rtype=Group)
     def write_group(self, **kwargs):
         parent, builder = popargs('parent', 'builder', kwargs)
@@ -1113,8 +1110,7 @@ def write_link(self, **kwargs):
             {'name': 'export_source', 'type': str,
              'doc': 'The source of the builders when exporting', 'default': None},
             {'name': 'expandable', 'type': bool, 'default': True,
-             'doc': ('Bool to set whether datasets are expandable by setting the maxshape for all schema allowed dimensions',
-                     'of a dataset to None and enabling auto-chunking by default.')},
+             'doc': ('Bool to set whether datasets are expandable by setting the maxshape.')},
             returns='the Dataset that was created', rtype=Dataset)
     def write_dataset(self, **kwargs):  # noqa: C901
         """ Write a dataset to HDF5

From 626237e878006797daa8437be70e6be35d32e56e Mon Sep 17 00:00:00 2001
From: mavaylon1 <mavaylon1@berkeley.edu>
Date: Wed, 14 Aug 2024 17:49:08 -0700
Subject: [PATCH 17/19] clean up for review

---
 src/hdmf/build/objectmapper.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/hdmf/build/objectmapper.py b/src/hdmf/build/objectmapper.py
index 0484cad8f..6dbfdb1d4 100644
--- a/src/hdmf/build/objectmapper.py
+++ b/src/hdmf/build/objectmapper.py
@@ -850,7 +850,6 @@ def __get_spec_info(self, data, spec_shape, spec_dims, spec_dtype=None):
         """This will return the dimension labels and shape by matching the data shape to a permissible spec shape."""
         if spec_shape is None and spec_dims is None:
             return None, None
-        # elif spec_shape is not None and spec_dims is None:
         else:
             if spec_dtype is not None and isinstance(spec_dtype, list):
                 data_shape = (len(data),)

From 0e76751134aa7d148380d7ede37cbca64e243175 Mon Sep 17 00:00:00 2001
From: mavaylon1 <mavaylon1@berkeley.edu>
Date: Wed, 14 Aug 2024 17:49:40 -0700
Subject: [PATCH 18/19] clean up for review

---
 .../build_tests/mapper_tests/test_build.py    | 27 -------------------
 1 file changed, 27 deletions(-)

diff --git a/tests/unit/build_tests/mapper_tests/test_build.py b/tests/unit/build_tests/mapper_tests/test_build.py
index db486548c..28cc9518e 100644
--- a/tests/unit/build_tests/mapper_tests/test_build.py
+++ b/tests/unit/build_tests/mapper_tests/test_build.py
@@ -905,33 +905,6 @@ def test_build(self):
         builder = self.manager.build(bar_data_holder_inst, source='test.h5')
         assert builder.datasets['my_bar'].dimension_labels == ('a', 'b2')
 
-class TestBuildDatasetDimensionLabelsShapeWithoutDims(BuildDatasetShapeMixin):
-
-    def get_base_shape_dims(self):
-        return [[[None, None], [None, 3]], None]
-
-    def get_dataset_inc_spec(self):
-        dataset_inc_spec = DatasetSpec(
-            doc='A BarData',
-            data_type_inc='BarData',
-            quantity='*',
-        )
-        return dataset_inc_spec
-
-    def test_build(self):
-        """
-        Test build of BarDataHolder which contains a BarData.
-        """
-        # NOTE: attr1 doesn't map to anything but is required in the test container class
-        bar_data_inst = BarData(name='my_bar', data=[[1, 2, 3], [4, 5, 6]], attr1='a string')
-        bar_data_holder_inst = BarDataHolder(
-            name='my_bar_holder',
-            bar_datas=[bar_data_inst],
-        )
-
-        builder = self.manager.build(bar_data_holder_inst, source='test.h5')
-        assert builder.datasets['my_bar'].dimension_labels is None
-
 
 class TestBuildDatasetDimensionLabelsOneOptionRefined(BuildDatasetShapeMixin):
 

From 5567f04388e30b82c9b2b1a1790de1da806d701e Mon Sep 17 00:00:00 2001
From: mavaylon1 <mavaylon1@berkeley.edu>
Date: Wed, 14 Aug 2024 17:52:44 -0700
Subject: [PATCH 19/19] clean up

---
 tests/unit/test_io_hdf5_h5tools.py | 41 ++++++++++++++++++++----------
 1 file changed, 27 insertions(+), 14 deletions(-)

diff --git a/tests/unit/test_io_hdf5_h5tools.py b/tests/unit/test_io_hdf5_h5tools.py
index 6518524de..4dece7398 100644
--- a/tests/unit/test_io_hdf5_h5tools.py
+++ b/tests/unit/test_io_hdf5_h5tools.py
@@ -3745,12 +3745,25 @@ def test_expand_false(self):
         with HDF5IO(self.path, manager=self.manager, mode='w') as io:
             io.write(foofile, expandable=False)
 
-        io = HDF5IO(self.path, manager=self.manager, mode='r')
-        read_foofile = io.read()
-        self.assertListEqual(foofile.buckets['bucket1'].foos['foo1'].my_data,
-                                 read_foofile.buckets['bucket1'].foos['foo1'].my_data[:].tolist())
-        self.assertEqual(get_data_shape(read_foofile.buckets['bucket1'].foos['foo1'].my_data),
-                        (5,))
+        with HDF5IO(self.path, manager=self.manager, mode='r') as io:
+            read_foofile = io.read()
+            self.assertListEqual(foofile.buckets['bucket1'].foos['foo1'].my_data,
+                                     read_foofile.buckets['bucket1'].foos['foo1'].my_data[:].tolist())
+            self.assertEqual(get_data_shape(read_foofile.buckets['bucket1'].foos['foo1'].my_data),
+                            (5,))
+
+    def test_multi_shape_no_labels(self):
+        qux = QuxData(name='my_qux', data=[[1, 2, 3], [4, 5, 6]])
+        quxbucket = QuxBucket('bucket1', qux)
+
+        manager = get_qux_buildmanager([[None, None],[None, 3]])
+
+        with HDF5IO(self.path, manager=manager, mode='w') as io:
+            io.write(quxbucket, expandable=True)
+
+        with HDF5IO(self.path, manager=manager, mode='r') as io:
+            read_quxbucket = io.read()
+            self.assertEqual(read_quxbucket.qux_data.data.maxshape, (None,3))
 
     def test_expand_set_shape(self):
         qux = QuxData(name='my_qux', data=[[1, 2, 3], [4, 5, 6]])
@@ -3761,12 +3774,12 @@ def test_expand_set_shape(self):
         with HDF5IO(self.path, manager=manager, mode='w') as io:
             io.write(quxbucket, expandable=True)
 
-        io = HDF5IO(self.path, manager=manager, mode='r+')
-        read_quxbucket = io.read()
-        read_quxbucket.qux_data.append([7,8,9])
+        with HDF5IO(self.path, manager=manager, mode='r+') as io:
+            read_quxbucket = io.read()
+            read_quxbucket.qux_data.append([7,8,9])
 
-        expected = np.array([[1, 2, 3],
-                             [4, 5, 6],
-                             [7, 8, 9]])
-        npt.assert_array_equal(read_quxbucket.qux_data.data[:], expected)
-        self.assertEqual(read_quxbucket.qux_data.data.maxshape, (None,3))
+            expected = np.array([[1, 2, 3],
+                                 [4, 5, 6],
+                                 [7, 8, 9]])
+            npt.assert_array_equal(read_quxbucket.qux_data.data[:], expected)
+            self.assertEqual(read_quxbucket.qux_data.data.maxshape, (None,3))