Skip to content

Commit

Permalink
Future-proof offset and size records in chunkmanifest
Browse files Browse the repository at this point in the history
  • Loading branch information
moradology committed Jul 9, 2024
1 parent 91ebefe commit 5d11f48
Showing 1 changed file with 10 additions and 10 deletions.
20 changes: 10 additions & 10 deletions virtualizarr/manifests/manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,8 @@ class ChunkManifest:
"""

_paths: np.ndarray[Any, np.dtypes.StringDType] # type: ignore[name-defined]
_offsets: np.ndarray[Any, np.dtype[np.int32]]
_lengths: np.ndarray[Any, np.dtype[np.int32]]
_offsets: np.ndarray[Any, np.dtype[np.uint64]]
_lengths: np.ndarray[Any, np.dtype[np.uint64]]

def __init__(self, entries: dict) -> None:
"""
Expand Down Expand Up @@ -100,8 +100,8 @@ def __init__(self, entries: dict) -> None:

# Initializing to empty implies that entries with path='' are treated as missing chunks
paths = np.empty(shape=shape, dtype=np.dtypes.StringDType()) # type: ignore[attr-defined]
offsets = np.empty(shape=shape, dtype=np.dtype("int32"))
lengths = np.empty(shape=shape, dtype=np.dtype("int32"))
offsets = np.empty(shape=shape, dtype=np.dtype("uint64"))
lengths = np.empty(shape=shape, dtype=np.dtype("uint64"))

# populate the arrays
for key, entry in entries.items():
Expand All @@ -128,8 +128,8 @@ def __init__(self, entries: dict) -> None:
def from_arrays(
cls,
paths: np.ndarray[Any, np.dtype[np.dtypes.StringDType]], # type: ignore[name-defined]
offsets: np.ndarray[Any, np.dtype[np.int32]],
lengths: np.ndarray[Any, np.dtype[np.int32]],
offsets: np.ndarray[Any, np.dtype[np.uint64]],
lengths: np.ndarray[Any, np.dtype[np.uint64]],
) -> "ChunkManifest":
"""
Create manifest directly from numpy arrays containing the path and byte range information.
Expand Down Expand Up @@ -161,13 +161,13 @@ def from_arrays(
raise ValueError(
f"paths array must have a numpy variable-length string dtype, but got dtype {paths.dtype}"
)
if offsets.dtype != np.dtype("int32"):
if offsets.dtype != np.dtype("uint64"):
raise ValueError(
f"offsets array must have 32-bit integer dtype, but got dtype {offsets.dtype}"
f"offsets array must have 64-bit unsigned integer dtype, but got dtype {offsets.dtype}"
)
if lengths.dtype != np.dtype("int32"):
if lengths.dtype != np.dtype("uint64"):
raise ValueError(
f"lengths array must have 32-bit integer dtype, but got dtype {lengths.dtype}"
f"lengths array must have 64-bit unsigned integer dtype, but got dtype {lengths.dtype}"
)

# check shapes
Expand Down

0 comments on commit 5d11f48

Please sign in to comment.