Skip to content

Commit

Permalink
Warn on index creation (#170)
Browse files Browse the repository at this point in the history
* add warning when user passes indexes=None

* expect warnings in tests

* new test to show how #18 doesn't fully work yet

* release notes

* lint
  • Loading branch information
TomNicholas committed Jul 2, 2024
1 parent a0451b9 commit 2822961
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 6 deletions.
3 changes: 2 additions & 1 deletion docs/releases.rst
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,11 @@ Bug fixes
Documentation
~~~~~~~~~~~~~

- Warn if user passes `indexes=None` to `open_virtual_dataset` to indicate that this is not yet fully supported.
(:pull:`170`) By `Tom Nicholas <https://github.com/TomNicholas>`_.
- Clarify that virtual datasets cannot be treated like normal xarray datasets. (:issue:`173`)
By `Tom Nicholas <https://github.com/TomNicholas>`_.


Internal Changes
~~~~~~~~~~~~~~~~

Expand Down
2 changes: 1 addition & 1 deletion virtualizarr/tests/test_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,5 +173,5 @@ def test_open_scalar_variable(tmpdir):
ds = xr.Dataset(data_vars={"a": 0})
ds.to_netcdf(f"{tmpdir}/scalar.nc")

vds = open_virtual_dataset(f"{tmpdir}/scalar.nc")
vds = open_virtual_dataset(f"{tmpdir}/scalar.nc", indexes={})
assert vds["a"].shape == ()
30 changes: 26 additions & 4 deletions virtualizarr/tests/test_xarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,8 @@ def test_no_indexes(self, netcdf4_file):
assert vds.indexes == {}

def test_create_default_indexes(self, netcdf4_file):
vds = open_virtual_dataset(netcdf4_file, indexes=None)
with pytest.warns(UserWarning, match="will create in-memory pandas indexes"):
vds = open_virtual_dataset(netcdf4_file, indexes=None)
ds = xr.open_dataset(netcdf4_file, decode_times=False)

# TODO use xr.testing.assert_identical(vds.indexes, ds.indexes) instead once class supported by assertion comparison, see https://github.com/pydata/xarray/issues/5812
Expand All @@ -278,15 +279,34 @@ class TestCombineUsingIndexes:
def test_combine_by_coords(self, netcdf4_files):
filepath1, filepath2 = netcdf4_files

vds1 = open_virtual_dataset(filepath1)
vds2 = open_virtual_dataset(filepath2)
with pytest.warns(UserWarning, match="will create in-memory pandas indexes"):
vds1 = open_virtual_dataset(filepath1)
with pytest.warns(UserWarning, match="will create in-memory pandas indexes"):
vds2 = open_virtual_dataset(filepath2)

combined_vds = xr.combine_by_coords(
[vds2, vds1],
)

assert combined_vds.xindexes["time"].to_pandas_index().is_monotonic_increasing

@pytest.mark.xfail(reason="Not yet implemented, see issue #18")
def test_combine_by_coords_keeping_manifestarrays(self, netcdf4_files):
filepath1, filepath2 = netcdf4_files

with pytest.warns(UserWarning, match="will create in-memory pandas indexes"):
vds1 = open_virtual_dataset(filepath1)
with pytest.warns(UserWarning, match="will create in-memory pandas indexes"):
vds2 = open_virtual_dataset(filepath2)

combined_vds = xr.combine_by_coords(
[vds2, vds1],
)

assert isinstance(combined_vds["time"].data, ManifestArray)
assert isinstance(combined_vds["lat"].data, ManifestArray)
assert isinstance(combined_vds["lon"].data, ManifestArray)


@network
@requires_s3fs
Expand Down Expand Up @@ -363,7 +383,9 @@ def test_read_from_url(self, filetype, url):
class TestLoadVirtualDataset:
def test_loadable_variables(self, netcdf4_file):
vars_to_load = ["air", "time"]
vds = open_virtual_dataset(netcdf4_file, loadable_variables=vars_to_load)
vds = open_virtual_dataset(
netcdf4_file, loadable_variables=vars_to_load, indexes={}
)

for name in vds.variables:
if name in vars_to_load:
Expand Down
6 changes: 6 additions & 0 deletions virtualizarr/xarray.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import warnings
from collections.abc import Iterable, Mapping, MutableMapping
from pathlib import Path
from typing import (
Expand Down Expand Up @@ -156,6 +157,11 @@ def open_virtual_dataset(
)

if indexes is None:
warnings.warn(
"Specifying `indexes=None` will create in-memory pandas indexes for each 1D coordinate, but concatenation of ManifestArrays backed by pandas indexes is not yet supported (see issue #18)."
"You almost certainly want to pass `indexes={}` to `open_virtual_dataset` instead."
)

# add default indexes by reading data from file
indexes = {name: index for name, index in ds.xindexes.items()}
elif indexes != {}:
Expand Down

0 comments on commit 2822961

Please sign in to comment.