From 0b4b9b1f9bb61becc39ad3bea6da9775cb82a72d Mon Sep 17 00:00:00 2001 From: Joseph Gonzalez Date: Sat, 21 Sep 2024 19:54:22 -0400 Subject: [PATCH] Now the mode r+ is able to update the last chunk of Zarr even if it is not "complete" --- xarray/backends/zarr.py | 21 ++++++++++++++------- xarray/tests/test_backends.py | 9 ++++----- 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index e6fe93a398a..197f735f950 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -196,8 +196,6 @@ def _determine_zarr_chunks( # If it is possible to write on partial chunks then it is not necessary to check # the last one contained on the region allow_partial_chunks = mode != "r+" - # The r+ mode force to write only on full chunks, even on the last one - end = None if mode == "r+" else -1 base_error = ( f"Specified zarr chunks encoding['chunks']={enc_chunks_tuple!r} for " @@ -231,14 +229,21 @@ def _determine_zarr_chunks( raise ValueError(base_error) if not allow_partial_chunks: + chunk_start = sum(dchunks[:-1]) + region_start + if chunk_start % zchunk: + # The last chunk which can also be the only one is a partial chunk + # if it is not aligned at the beginning + raise ValueError(base_error) + region_stop = interval.stop if interval.stop else size - cover_last_chunk = region_stop > size - size % zchunk - if not cover_last_chunk: - if dchunks[-1] % zchunk: + if size - region_stop + 1 < zchunk: + # If the region is covering the last chunk then check + # if the reminder with the default chunk size + # is equal to the size of the last chunk + if dchunks[-1] % zchunk != size % zchunk: raise ValueError(base_error) - elif dchunks[-1] % zchunk != size % zchunk: - # The remainder must be equal to the size of the last Zarr chunk + elif dchunks[-1] % zchunk: raise ValueError(base_error) return enc_chunks_tuple @@ -307,6 +312,8 @@ def extract_zarr_variable_encoding( encoding : dict Zarr encoding for `variable` """ + + shape = shape if shape else variable.shape encoding = variable.encoding.copy() safe_to_drop = {"source", "original_shape"} diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index c04f71ae61c..919317fb0d0 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -6199,14 +6199,12 @@ def test_zarr_safe_chunk_region(tmp_path): # This is safe with mode "a", the border size is covered by the first chunk of Dask arr.isel(a=slice(1, 4)).chunk(a=(2, 1)).to_zarr(store, region="auto", mode="a") - with pytest.raises(ValueError): # This is considered unsafe in mode "r+" because it is writing in a partial chunk arr.isel(a=slice(1, 4)).chunk(a=(2, 1)).to_zarr(store, region="auto", mode="r+") # This is safe on mode "a" because there is a single dask chunk arr.isel(a=slice(1, 5)).chunk(a=(4,)).to_zarr(store, region="auto", mode="a") - with pytest.raises(ValueError): # This is unsafe on mode "r+", because the Dask chunk is partially writing # in the first chunk of Zarr @@ -6239,7 +6237,7 @@ def test_zarr_safe_chunk_region(tmp_path): # (the last Zarr chunk has size 2) arr.isel(a=slice(10, 11)).to_zarr(store, region="auto", mode="r+") - # Validate the same than the above test but in the beginning of the last chunk + # Validate the same as the above test but in the beginning of the last chunk arr.isel(a=slice(9, 10)).to_zarr(store, region="auto", mode="a") with pytest.raises(ValueError): arr.isel(a=slice(9, 10)).to_zarr(store, region="auto", mode="r+") @@ -6252,7 +6250,8 @@ def test_zarr_safe_chunk_region(tmp_path): with pytest.raises(ValueError): # If the chunk is of size equal to the one in the Zarr encoding, but - # it is partially writing in the last chunk then raise an error + # it is partially writing in the first chunk then raise an error arr.isel(a=slice(8, None)).chunk(a=3).to_zarr(store, region="auto", mode="r+") - + with pytest.raises(ValueError): + arr.isel(a=slice(5, -1)).chunk(a=5).to_zarr(store, region="auto", mode="r+")