Skip to content

Commit

Permalink
Use a set to avoid duplicate var names from kerchunk (#179)
Browse files Browse the repository at this point in the history
  • Loading branch information
moradology authored Jul 9, 2024
1 parent 7a3e093 commit 1ac4efc
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 3 deletions.
4 changes: 2 additions & 2 deletions virtualizarr/kerchunk.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,8 +165,8 @@ def find_var_names(ds_reference_dict: KerchunkStoreRefs) -> list[str]:
"""Find the names of zarr variables in this store/group."""

refs = ds_reference_dict["refs"]
found_var_names = [key.split("/")[0] for key in refs.keys() if "/" in key]
return found_var_names
found_var_names = {key.split("/")[0] for key in refs.keys() if "/" in key}
return list(found_var_names)


def extract_array_refs(
Expand Down
13 changes: 12 additions & 1 deletion virtualizarr/tests/test_kerchunk.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import xarray as xr
import xarray.testing as xrt

from virtualizarr.kerchunk import FileType, _automatically_determine_filetype
from virtualizarr.kerchunk import FileType, find_var_names, _automatically_determine_filetype, KerchunkStoreRefs
from virtualizarr.manifests import ChunkManifest, ManifestArray
from virtualizarr.xarray import dataset_from_kerchunk_refs

Expand Down Expand Up @@ -266,3 +266,14 @@ def test_FileType():
assert "zarr" == FileType("zarr").name
with pytest.raises(ValueError):
FileType(None)


def test_no_duplicates_find_var_names():
"""Verify that we get a deduplicated list of var names"""
ref_dict = {
"refs": {
"x/something": {},
"x/otherthing": {}
}
}
assert len(find_var_names(ref_dict)) == 1

0 comments on commit 1ac4efc

Please sign in to comment.