Skip to content

Commit

Permalink
Append a Dataset of References (#1135)
Browse files Browse the repository at this point in the history
  • Loading branch information
mavaylon1 authored Aug 22, 2024
1 parent acc3d78 commit e0bedca
Show file tree
Hide file tree
Showing 8 changed files with 90 additions and 3 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
- Adjusted stacklevel of warnings to point to user code when possible. @rly [#1166](https://github.com/hdmf-dev/hdmf/pull/1166)
- Improved "already exists" error message when adding a container to a `MultiContainerInterface`. @rly [#1165](https://github.com/hdmf-dev/hdmf/pull/1165)
- Added support to write multidimensional string arrays. @stephprince [#1173](https://github.com/hdmf-dev/hdmf/pull/1173)
- Add support for appending to a dataset of references. @mavaylon1 [#1135](https://github.com/hdmf-dev/hdmf/pull/1135)

### Bug fixes
- Fixed issue where scalar datasets with a compound data type were being written as non-scalar datasets @stephprince [#1176](https://github.com/hdmf-dev/hdmf/pull/1176)
Expand Down
2 changes: 1 addition & 1 deletion docs/source/install_developers.rst
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ environment by using the ``conda remove --name hdmf-venv --all`` command.
For advanced users, we recommend using Mambaforge_, a faster version of the conda package manager
that includes conda-forge as a default channel.

.. _Anaconda: https://www.anaconda.com/products/distribution
.. _Anaconda: https://www.anaconda.com/download
.. _Mambaforge: https://github.com/conda-forge/miniforge

Install from GitHub
Expand Down
2 changes: 1 addition & 1 deletion docs/source/install_users.rst
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,4 @@ You can also install HDMF using ``conda`` by running the following command in a
conda install -c conda-forge hdmf
.. _Anaconda Distribution: https://www.anaconda.com/products/distribution
.. _Anaconda Distribution: https://www.anaconda.com/download
16 changes: 15 additions & 1 deletion src/hdmf/backends/hdf5/h5_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
import logging

from ...array import Array
from ...data_utils import DataIO, AbstractDataChunkIterator
from ...data_utils import DataIO, AbstractDataChunkIterator, append_data
from ...query import HDMFDataset, ReferenceResolver, ContainerResolver, BuilderResolver
from ...region import RegionSlicer
from ...spec import SpecWriter, SpecReader
Expand Down Expand Up @@ -108,6 +108,20 @@ def ref(self):
def shape(self):
return self.dataset.shape

def append(self, arg):
# Get Builder
builder = self.io.manager.get_builder(arg)
if builder is None:
raise ValueError(
"The container being appended to the dataset has not yet been built. "
"Please write the container to the file, then open the modified file, and "
"append the read container to the dataset."
)

# Get HDF5 Reference
ref = self.io._create_ref(builder)
append_data(self.dataset, ref)


class DatasetOfReferences(H5Dataset, ReferenceResolver, metaclass=ABCMeta):
"""
Expand Down
9 changes: 9 additions & 0 deletions src/hdmf/backends/hdf5/h5tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -1518,6 +1518,7 @@ def __get_ref(self, **kwargs):
self.logger.debug("Getting reference for %s '%s'" % (container.__class__.__name__, container.name))
builder = self.manager.build(container)
path = self.__get_path(builder)

self.logger.debug("Getting reference at path '%s'" % path)
if isinstance(container, RegionBuilder):
region = container.region
Expand All @@ -1529,6 +1530,14 @@ def __get_ref(self, **kwargs):
else:
return self.__file[path].ref

@docval({'name': 'container', 'type': (Builder, Container, ReferenceBuilder), 'doc': 'the object to reference',
'default': None},
{'name': 'region', 'type': (slice, list, tuple), 'doc': 'the region reference indexing object',
'default': None},
returns='the reference', rtype=Reference)
def _create_ref(self, **kwargs):
return self.__get_ref(**kwargs)

def __is_ref(self, dtype):
if isinstance(dtype, DtypeSpec):
return self.__is_ref(dtype.dtype)
Expand Down
6 changes: 6 additions & 0 deletions src/hdmf/build/objectmapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,11 @@
from .errors import (BuildError, OrphanContainerBuildError, ReferenceTargetNotBuiltError, ContainerConfigurationError,
ConstructError)
from .manager import Proxy, BuildManager

from .warnings import (MissingRequiredBuildWarning, DtypeConversionWarning, IncorrectQuantityBuildWarning,
IncorrectDatasetShapeBuildWarning)
from hdmf.backends.hdf5.h5_utils import H5DataIO

from ..container import AbstractContainer, Data, DataRegion
from ..term_set import TermSetWrapper
from ..data_utils import DataIO, AbstractDataChunkIterator
Expand Down Expand Up @@ -978,6 +981,9 @@ def __get_ref_builder(self, builder, dtype, shape, container, build_manager):
for d in container.data:
target_builder = self.__get_target_builder(d, build_manager, builder)
bldr_data.append(ReferenceBuilder(target_builder))
if isinstance(container.data, H5DataIO):
# This is here to support appending a dataset of references.
bldr_data = H5DataIO(bldr_data, **container.data.get_io_params())
else:
self.logger.debug("Setting %s '%s' data to reference builder"
% (builder.__class__.__name__, builder.name))
Expand Down
6 changes: 6 additions & 0 deletions src/hdmf/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,12 @@ def __next__(self):
def next(self):
return self.dataset.next()

def append(self, arg):
"""
Override this method to support appending to backend-specific datasets
"""
pass # pragma: no cover


class ReferenceResolver(metaclass=ABCMeta):
"""
Expand Down
51 changes: 51 additions & 0 deletions tests/unit/test_io_hdf5_h5tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -3004,6 +3004,57 @@ def test_append_data(self):
self.assertEqual(f['foofile_data'].file.filename, self.paths[1])
self.assertIsInstance(f.attrs['foo_ref_attr'], h5py.Reference)

def test_append_dataset_of_references(self):
"""Test that exporting a written container with a dataset of references works."""
bazs = []
num_bazs = 1
for i in range(num_bazs):
bazs.append(Baz(name='baz%d' % i))
array_bazs=np.array(bazs)
wrapped_bazs = H5DataIO(array_bazs, maxshape=(None,))
baz_data = BazData(name='baz_data1', data=wrapped_bazs)
bucket = BazBucket(name='bucket1', bazs=bazs.copy(), baz_data=baz_data)

with HDF5IO(self.paths[0], manager=get_baz_buildmanager(), mode='w') as write_io:
write_io.write(bucket)

with HDF5IO(self.paths[0], manager=get_baz_buildmanager(), mode='a') as append_io:
read_bucket1 = append_io.read()
new_baz = Baz(name='new')
read_bucket1.add_baz(new_baz)
append_io.write(read_bucket1)

with HDF5IO(self.paths[0], manager=get_baz_buildmanager(), mode='a') as ref_io:
read_bucket1 = ref_io.read()
DoR = read_bucket1.baz_data.data
DoR.append(read_bucket1.bazs['new'])

with HDF5IO(self.paths[0], manager=get_baz_buildmanager(), mode='r') as read_io:
read_bucket1 = read_io.read()
self.assertEqual(len(read_bucket1.baz_data.data), 2)
self.assertIs(read_bucket1.baz_data.data[1], read_bucket1.bazs["new"])

def test_append_dataset_of_references_orphaned_target(self):
bazs = []
num_bazs = 1
for i in range(num_bazs):
bazs.append(Baz(name='baz%d' % i))
array_bazs=np.array(bazs)
wrapped_bazs = H5DataIO(array_bazs, maxshape=(None,))
baz_data = BazData(name='baz_data1', data=wrapped_bazs)
bucket = BazBucket(name='bucket1', bazs=bazs.copy(), baz_data=baz_data)

with HDF5IO(self.paths[0], manager=get_baz_buildmanager(), mode='w') as write_io:
write_io.write(bucket)

with HDF5IO(self.paths[0], manager=get_baz_buildmanager(), mode='a') as ref_io:
read_bucket1 = ref_io.read()
new_baz = Baz(name='new')
read_bucket1.add_baz(new_baz)
DoR = read_bucket1.baz_data.data
with self.assertRaises(ValueError):
DoR.append(read_bucket1.bazs['new'])

def test_append_external_link_data(self):
"""Test that exporting a written container after adding a link with link_data=True creates external links."""
foo1 = Foo('foo1', [1, 2, 3, 4, 5], "I am foo1", 17, 3.14)
Expand Down

0 comments on commit e0bedca

Please sign in to comment.