Skip to content

Commit

Permalink
Now the mode r+ is able to update the last chunk of Zarr even if it i…
Browse files Browse the repository at this point in the history
…s not "complete"
  • Loading branch information
josephnowak committed Sep 21, 2024
1 parent cc585d0 commit 0b4b9b1
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 12 deletions.
21 changes: 14 additions & 7 deletions xarray/backends/zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,8 +196,6 @@ def _determine_zarr_chunks(
# If it is possible to write on partial chunks then it is not necessary to check
# the last one contained on the region
allow_partial_chunks = mode != "r+"
# The r+ mode force to write only on full chunks, even on the last one
end = None if mode == "r+" else -1

base_error = (
f"Specified zarr chunks encoding['chunks']={enc_chunks_tuple!r} for "
Expand Down Expand Up @@ -231,14 +229,21 @@ def _determine_zarr_chunks(
raise ValueError(base_error)

if not allow_partial_chunks:
chunk_start = sum(dchunks[:-1]) + region_start
if chunk_start % zchunk:
# The last chunk which can also be the only one is a partial chunk
# if it is not aligned at the beginning
raise ValueError(base_error)

region_stop = interval.stop if interval.stop else size
cover_last_chunk = region_stop > size - size % zchunk

if not cover_last_chunk:
if dchunks[-1] % zchunk:
if size - region_stop + 1 < zchunk:
# If the region is covering the last chunk then check
# if the reminder with the default chunk size
# is equal to the size of the last chunk
if dchunks[-1] % zchunk != size % zchunk:
raise ValueError(base_error)
elif dchunks[-1] % zchunk != size % zchunk:
# The remainder must be equal to the size of the last Zarr chunk
elif dchunks[-1] % zchunk:
raise ValueError(base_error)

return enc_chunks_tuple
Expand Down Expand Up @@ -307,6 +312,8 @@ def extract_zarr_variable_encoding(
encoding : dict
Zarr encoding for `variable`
"""

shape = shape if shape else variable.shape
encoding = variable.encoding.copy()

safe_to_drop = {"source", "original_shape"}
Expand Down
9 changes: 4 additions & 5 deletions xarray/tests/test_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -6199,14 +6199,12 @@ def test_zarr_safe_chunk_region(tmp_path):

# This is safe with mode "a", the border size is covered by the first chunk of Dask
arr.isel(a=slice(1, 4)).chunk(a=(2, 1)).to_zarr(store, region="auto", mode="a")

with pytest.raises(ValueError):
# This is considered unsafe in mode "r+" because it is writing in a partial chunk
arr.isel(a=slice(1, 4)).chunk(a=(2, 1)).to_zarr(store, region="auto", mode="r+")

# This is safe on mode "a" because there is a single dask chunk
arr.isel(a=slice(1, 5)).chunk(a=(4,)).to_zarr(store, region="auto", mode="a")

with pytest.raises(ValueError):
# This is unsafe on mode "r+", because the Dask chunk is partially writing
# in the first chunk of Zarr
Expand Down Expand Up @@ -6239,7 +6237,7 @@ def test_zarr_safe_chunk_region(tmp_path):
# (the last Zarr chunk has size 2)
arr.isel(a=slice(10, 11)).to_zarr(store, region="auto", mode="r+")

# Validate the same than the above test but in the beginning of the last chunk
# Validate the same as the above test but in the beginning of the last chunk
arr.isel(a=slice(9, 10)).to_zarr(store, region="auto", mode="a")
with pytest.raises(ValueError):
arr.isel(a=slice(9, 10)).to_zarr(store, region="auto", mode="r+")
Expand All @@ -6252,7 +6250,8 @@ def test_zarr_safe_chunk_region(tmp_path):

with pytest.raises(ValueError):
# If the chunk is of size equal to the one in the Zarr encoding, but
# it is partially writing in the last chunk then raise an error
# it is partially writing in the first chunk then raise an error
arr.isel(a=slice(8, None)).chunk(a=3).to_zarr(store, region="auto", mode="r+")


with pytest.raises(ValueError):
arr.isel(a=slice(5, -1)).chunk(a=5).to_zarr(store, region="auto", mode="r+")

0 comments on commit 0b4b9b1

Please sign in to comment.