diff --git a/docs/releases.rst b/docs/releases.rst index 53d99207..6a9ae28c 100644 --- a/docs/releases.rst +++ b/docs/releases.rst @@ -39,10 +39,11 @@ Bug fixes Documentation ~~~~~~~~~~~~~ +- Warn if user passes `indexes=None` to `open_virtual_dataset` to indicate that this is not yet fully supported. + (:pull:`170`) By `Tom Nicholas `_. - Clarify that virtual datasets cannot be treated like normal xarray datasets. (:issue:`173`) By `Tom Nicholas `_. - Internal Changes ~~~~~~~~~~~~~~~~ diff --git a/virtualizarr/tests/test_integration.py b/virtualizarr/tests/test_integration.py index 256ed363..2e612de9 100644 --- a/virtualizarr/tests/test_integration.py +++ b/virtualizarr/tests/test_integration.py @@ -173,5 +173,5 @@ def test_open_scalar_variable(tmpdir): ds = xr.Dataset(data_vars={"a": 0}) ds.to_netcdf(f"{tmpdir}/scalar.nc") - vds = open_virtual_dataset(f"{tmpdir}/scalar.nc") + vds = open_virtual_dataset(f"{tmpdir}/scalar.nc", indexes={}) assert vds["a"].shape == () diff --git a/virtualizarr/tests/test_xarray.py b/virtualizarr/tests/test_xarray.py index b1665a07..d0fe2e3b 100644 --- a/virtualizarr/tests/test_xarray.py +++ b/virtualizarr/tests/test_xarray.py @@ -251,7 +251,8 @@ def test_no_indexes(self, netcdf4_file): assert vds.indexes == {} def test_create_default_indexes(self, netcdf4_file): - vds = open_virtual_dataset(netcdf4_file, indexes=None) + with pytest.warns(UserWarning, match="will create in-memory pandas indexes"): + vds = open_virtual_dataset(netcdf4_file, indexes=None) ds = xr.open_dataset(netcdf4_file, decode_times=False) # TODO use xr.testing.assert_identical(vds.indexes, ds.indexes) instead once class supported by assertion comparison, see https://github.com/pydata/xarray/issues/5812 @@ -278,8 +279,10 @@ class TestCombineUsingIndexes: def test_combine_by_coords(self, netcdf4_files): filepath1, filepath2 = netcdf4_files - vds1 = open_virtual_dataset(filepath1) - vds2 = open_virtual_dataset(filepath2) + with pytest.warns(UserWarning, match="will create in-memory pandas indexes"): + vds1 = open_virtual_dataset(filepath1) + with pytest.warns(UserWarning, match="will create in-memory pandas indexes"): + vds2 = open_virtual_dataset(filepath2) combined_vds = xr.combine_by_coords( [vds2, vds1], @@ -287,6 +290,23 @@ def test_combine_by_coords(self, netcdf4_files): assert combined_vds.xindexes["time"].to_pandas_index().is_monotonic_increasing + @pytest.mark.xfail(reason="Not yet implemented, see issue #18") + def test_combine_by_coords_keeping_manifestarrays(self, netcdf4_files): + filepath1, filepath2 = netcdf4_files + + with pytest.warns(UserWarning, match="will create in-memory pandas indexes"): + vds1 = open_virtual_dataset(filepath1) + with pytest.warns(UserWarning, match="will create in-memory pandas indexes"): + vds2 = open_virtual_dataset(filepath2) + + combined_vds = xr.combine_by_coords( + [vds2, vds1], + ) + + assert isinstance(combined_vds["time"].data, ManifestArray) + assert isinstance(combined_vds["lat"].data, ManifestArray) + assert isinstance(combined_vds["lon"].data, ManifestArray) + @network @requires_s3fs @@ -363,7 +383,9 @@ def test_read_from_url(self, filetype, url): class TestLoadVirtualDataset: def test_loadable_variables(self, netcdf4_file): vars_to_load = ["air", "time"] - vds = open_virtual_dataset(netcdf4_file, loadable_variables=vars_to_load) + vds = open_virtual_dataset( + netcdf4_file, loadable_variables=vars_to_load, indexes={} + ) for name in vds.variables: if name in vars_to_load: diff --git a/virtualizarr/xarray.py b/virtualizarr/xarray.py index f33fc13f..df847173 100644 --- a/virtualizarr/xarray.py +++ b/virtualizarr/xarray.py @@ -1,3 +1,4 @@ +import warnings from collections.abc import Iterable, Mapping, MutableMapping from pathlib import Path from typing import ( @@ -156,6 +157,11 @@ def open_virtual_dataset( ) if indexes is None: + warnings.warn( + "Specifying `indexes=None` will create in-memory pandas indexes for each 1D coordinate, but concatenation of ManifestArrays backed by pandas indexes is not yet supported (see issue #18)." + "You almost certainly want to pass `indexes={}` to `open_virtual_dataset` instead." + ) + # add default indexes by reading data from file indexes = {name: index for name, index in ds.xindexes.items()} elif indexes != {}: