Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/improve-safe-chunk-validation' i…
Browse files Browse the repository at this point in the history
…nto improve-safe-chunk-validation
  • Loading branch information
josephnowak committed Sep 20, 2024
2 parents a30b1e0 + 604b8e1 commit b2bdc47
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 12 deletions.
14 changes: 9 additions & 5 deletions xarray/backends/zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,9 @@ def __getitem__(self, key):
# could possibly have a work-around for 0d data here


def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name, safe_chunks, region, mode):
def _determine_zarr_chunks(
enc_chunks, var_chunks, ndim, name, safe_chunks, region, mode
):
"""
Given encoding chunks (possibly None or []) and variable chunks
(possibly None or []).
Expand Down Expand Up @@ -163,7 +165,9 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name, safe_chunks, regi

if len(enc_chunks_tuple) != ndim:
# throw away encoding chunks, start over
return _determine_zarr_chunks(None, var_chunks, ndim, name, safe_chunks, region, mode)
return _determine_zarr_chunks(
None, var_chunks, ndim, name, safe_chunks, region, mode
)

for x in enc_chunks_tuple:
if not isinstance(x, int):
Expand Down Expand Up @@ -276,7 +280,7 @@ def extract_zarr_variable_encoding(
name=None,
safe_chunks=True,
region=None,
mode=None
mode=None,
):
"""
Extract zarr encoding dictionary from xarray Variable
Expand Down Expand Up @@ -328,7 +332,7 @@ def extract_zarr_variable_encoding(
name=name,
safe_chunks=safe_chunks,
region=region,
mode=mode
mode=mode,
)
encoding["chunks"] = chunks
return encoding
Expand Down Expand Up @@ -864,7 +868,7 @@ def set_variables(self, variables, check_encoding_set, writer, unlimited_dims=No
raise_on_invalid=vn in check_encoding_set,
name=vn,
safe_chunks=self._safe_chunks,
mode=self._mode
mode=self._mode,
)

if name not in existing_keys:
Expand Down
17 changes: 10 additions & 7 deletions xarray/tests/test_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -6157,29 +6157,32 @@ def test_zarr_safe_chunk_region(tmp_path):
store = tmp_path / "foo.zarr"

arr = xr.DataArray(
list(range(10)),
dims=["a"],
coords={"a": list(range(10))},
name="foo"
list(range(10)), dims=["a"], coords={"a": list(range(10))}, name="foo"
).chunk(a=3)
arr.to_zarr(store, mode="w")

for mode in ["r+", "a"]:
with pytest.raises(ValueError):
# There are two Dask chunks on the same Zarr chunk,
# which means that it is unsafe in any mode
arr.isel(a=slice(0, 3)).chunk(a=(2, 1)).to_zarr(store, region="auto", mode=mode)
arr.isel(a=slice(0, 3)).chunk(a=(2, 1)).to_zarr(
store, region="auto", mode=mode
)

with pytest.raises(ValueError):
# the first chunk is covering the border size, but it is not
# completely covering the second chunk, which means that it is
# unsafe in any mode
arr.isel(a=slice(1, 5)).chunk(a=(3, 1)).to_zarr(store, region="auto", mode=mode)
arr.isel(a=slice(1, 5)).chunk(a=(3, 1)).to_zarr(
store, region="auto", mode=mode
)

with pytest.raises(ValueError):
# The first chunk is safe but the other two chunks are overlapping with
# the same Zarr chunk
arr.isel(a=slice(0, 5)).chunk(a=(3, 1, 1)).to_zarr(store, region="auto", mode=mode)
arr.isel(a=slice(0, 5)).chunk(a=(3, 1, 1)).to_zarr(
store, region="auto", mode=mode
)

# Fully update two contiguous chunks is safe in any mode
arr.isel(a=slice(3, 9)).to_zarr(store, region="auto", mode=mode)
Expand Down

0 comments on commit b2bdc47

Please sign in to comment.