Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Patch ZipStore for write_image and write_multiscale method using Dask arrays with storage_options #399

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions ome_zarr/writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,7 @@ def write_multiscale(
Please use the 'storage_options' argument instead."""
warnings.warn(msg, DeprecationWarning)
datasets: List[dict] = []
is_zipstore = isinstance(group.store, zarr.storage.ZipStore)
for path, data in enumerate(pyramid):
options = _resolve_storage_options(storage_options, path)

Expand All @@ -251,11 +252,13 @@ def write_multiscale(
if chunks_opt is not None:
data = da.array(data).rechunk(chunks=chunks_opt)
options["chunks"] = chunks_opt
# storage options with chunks results in duplicate key error for ZipStore
# TODO: do other stores also cause this error with da.to_zarr?
da_delayed = da.to_zarr(
arr=data,
url=group.store,
component=str(Path(group.path, str(path))),
storage_options=options,
storage_options=None if is_zipstore else options,
compressor=options.get("compressor", zarr.storage.default_compressor),
dimension_separator=group._store._dimension_separator,
compute=compute,
Expand Down Expand Up @@ -585,6 +588,7 @@ def _write_dask_image(
# for path, data in enumerate(pyramid):
max_layer: int = scaler.max_layer if scaler is not None else 0
shapes = []
is_zipstore = isinstance(group.store, zarr.storage.ZipStore)
for path in range(0, max_layer + 1):
# LOGGER.debug(f"write_image path: {path}")
options = _resolve_storage_options(storage_options, path)
Expand All @@ -609,12 +613,14 @@ def _write_dask_image(
LOGGER.debug(
"write dask.array to_zarr shape: %s, dtype: %s", image.shape, image.dtype
)
# storage options with chunks results in duplicate key error for ZipStore
# TODO: do other stores also cause this error with da.to_zarr?
delayed.append(
da.to_zarr(
arr=image,
url=group.store,
component=str(Path(group.path, str(path))),
storage_options=options,
storage_options=None if is_zipstore else options,
compute=False,
compressor=options.get("compressor", zarr.storage.default_compressor),
dimension_separator=group._store._dimension_separator,
Expand Down
67 changes: 67 additions & 0 deletions tests/test_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ class TestWriter:
@pytest.fixture(autouse=True)
def initdir(self, tmpdir):
self.path = pathlib.Path(tmpdir.mkdir("data"))
self.zip_path = pathlib.Path(tmpdir.mkdir("data_zip"), "data.zip")
self.zip_store = zarr.storage.ZipStore(self.zip_path, mode="w")
self.store = parse_url(self.path, mode="w").store
self.root = zarr.group(store=self.store)
self.group = self.root.create_group("test")
Expand Down Expand Up @@ -146,6 +148,71 @@ def test_write_image_current(self, array_constructor):
for value in transfs[0]["scale"]:
assert value >= 1

# write_image/write_multiscale ZipStore when the storage_options is not None
@pytest.mark.parametrize("array_constructor", [np.array, da.from_array])
@pytest.mark.parametrize("storage_options", [None, {"chunks": (1, 100, 100)}])
def test_write_image_zipstore(self, array_constructor, storage_options):
# Initialize the Zarr group from ZipStore
group = zarr.group(store=self.zip_store, overwrite=True)

shape = (3, 300, 300)
data = self.create_data(shape)
if array_constructor == da.from_array:
data = array_constructor(data, chunks=(1, 50, 50))
else:
data = array_constructor(data)

write_image(data, group, axes="cyx", storage_options=storage_options)
self.zip_store.close()
# load the data from the ZipStore
store = zarr.storage.ZipStore(self.zip_path, mode="r")
group = zarr.open_group(store=store)
# check the chunksize of the array
if array_constructor == da.from_array:
if storage_options is None:
assert group[0].chunks == (1, 50, 50)
else:
assert group[0].chunks == storage_options["chunks"]
else:
if storage_options is None:
pass
else:
assert group[0].chunks == storage_options["chunks"]

@pytest.mark.parametrize("array_constructor", [np.array, da.from_array])
@pytest.mark.parametrize("storage_options", [None, {"chunks": (1, 100, 100)}])
def test_write_multiscale_zipstore(self, array_constructor, storage_options):
# Initialize the Zarr group from ZipStore
group = zarr.group(store=self.zip_store, overwrite=True)

shape = (3, 300, 300)
data_arrs = []
for i in range(2):
data = self.create_data(shape)
if array_constructor == da.from_array:
data = array_constructor(data, chunks=(1, 50, 50))
else:
data = array_constructor(data)
data_arrs.append(data.copy())

write_multiscale(data_arrs, group, axes="cyx", storage_options=storage_options)
self.zip_store.close()
# load the data from the ZipStore
store = zarr.storage.ZipStore(self.zip_path, mode="r")
group = zarr.open_group(store=store)
# check the chunksize of the array
for i in range(2):
if array_constructor == da.from_array:
if storage_options is None:
assert group[i].chunks == (1, 50, 50)
else:
assert group[i].chunks == storage_options["chunks"]
else:
if storage_options is None:
pass
else:
assert group[i].chunks == storage_options["chunks"]

@pytest.mark.parametrize("read_from_zarr", [True, False])
@pytest.mark.parametrize("compute", [True, False])
def test_write_image_dask(self, read_from_zarr, compute):
Expand Down
Loading