Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add utility for opening remote files with fsspec #9797

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions xarray/backends/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,15 @@ def _normalize_path_list(
return _normalize_path_list(paths)


def _open_remote_file(file, mode, storage_options=None):
import fsspec

fs, _, paths = fsspec.get_fs_token_paths(
file, mode=mode, storage_options=storage_options
)
return fs.open(paths[0], mode=mode)


def _encode_variable_name(name):
if name is None:
name = NONE_VAR_NAME
Expand Down
10 changes: 10 additions & 0 deletions xarray/backends/h5netcdf_.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
BackendEntrypoint,
WritableCFDataStore,
_normalize_path,
_open_remote_file,
datatree_from_dict_with_io_cleanup,
find_root_and_group,
)
Expand Down Expand Up @@ -149,9 +150,16 @@ def open(
decode_vlen_strings=True,
driver=None,
driver_kwds=None,
storage_options=None,
):
import h5netcdf

if isinstance(filename, str) and is_remote_uri(filename) and driver is None:
mode_ = "rb" if mode == "r" else mode
filename = _open_remote_file(
filename, mode=mode_, storage_options=storage_options
)

if isinstance(filename, bytes):
raise ValueError(
"can't open netCDF4/HDF5 as bytes "
Expand Down Expand Up @@ -425,6 +433,7 @@ def open_dataset(
decode_vlen_strings=True,
driver=None,
driver_kwds=None,
storage_options=None,
) -> Dataset:
filename_or_obj = _normalize_path(filename_or_obj)
store = H5NetCDFStore.open(
Expand All @@ -450,6 +459,7 @@ def open_dataset(
drop_variables=drop_variables,
use_cftime=use_cftime,
decode_timedelta=decode_timedelta,
storage_options=None,
)
return ds

Expand Down
1 change: 1 addition & 0 deletions xarray/backends/store.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ def open_dataset(
drop_variables: str | Iterable[str] | None = None,
use_cftime=None,
decode_timedelta=None,
storage_options=None,
) -> Dataset:
assert isinstance(filename_or_obj, AbstractDataStore)

Expand Down
21 changes: 21 additions & 0 deletions xarray/tests/test_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -6489,3 +6489,24 @@ def test_zarr_safe_chunk_region(tmp_path):
chunk = ds.isel(region)
chunk = chunk.chunk()
chunk.chunk().to_zarr(store, region=region)


@requires_h5netcdf
@requires_fsspec
def test_h5netcdf_storage_options() -> None:
with create_tmp_files(2, allow_cleanup_failure=ON_WINDOWS) as (f1, f2):
ds1 = create_test_data()
ds1.to_netcdf(f1, engine="h5netcdf")

ds2 = create_test_data()
ds2.to_netcdf(f2, engine="h5netcdf")

files = [f"file://{f}" for f in [f1, f2]]
ds = xr.open_mfdataset(
files,
engine="h5netcdf",
concat_dim="time",
combine="nested",
storage_options={"skip_instance_cache": False},
)
assert_identical(xr.concat([ds1, ds2], dim="time"), ds)
Loading