From 1ac4efc770f6965ac2c08f063359b191b112647b Mon Sep 17 00:00:00 2001 From: Nathan Zimmerman Date: Tue, 9 Jul 2024 16:00:33 -0500 Subject: [PATCH] Use a set to avoid duplicate var names from kerchunk (#179) --- virtualizarr/kerchunk.py | 4 ++-- virtualizarr/tests/test_kerchunk.py | 13 ++++++++++++- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/virtualizarr/kerchunk.py b/virtualizarr/kerchunk.py index 97f64b1b..6e82067d 100644 --- a/virtualizarr/kerchunk.py +++ b/virtualizarr/kerchunk.py @@ -165,8 +165,8 @@ def find_var_names(ds_reference_dict: KerchunkStoreRefs) -> list[str]: """Find the names of zarr variables in this store/group.""" refs = ds_reference_dict["refs"] - found_var_names = [key.split("/")[0] for key in refs.keys() if "/" in key] - return found_var_names + found_var_names = {key.split("/")[0] for key in refs.keys() if "/" in key} + return list(found_var_names) def extract_array_refs( diff --git a/virtualizarr/tests/test_kerchunk.py b/virtualizarr/tests/test_kerchunk.py index 22d6d7df..a6693e29 100644 --- a/virtualizarr/tests/test_kerchunk.py +++ b/virtualizarr/tests/test_kerchunk.py @@ -5,7 +5,7 @@ import xarray as xr import xarray.testing as xrt -from virtualizarr.kerchunk import FileType, _automatically_determine_filetype +from virtualizarr.kerchunk import FileType, find_var_names, _automatically_determine_filetype, KerchunkStoreRefs from virtualizarr.manifests import ChunkManifest, ManifestArray from virtualizarr.xarray import dataset_from_kerchunk_refs @@ -266,3 +266,14 @@ def test_FileType(): assert "zarr" == FileType("zarr").name with pytest.raises(ValueError): FileType(None) + + +def test_no_duplicates_find_var_names(): + """Verify that we get a deduplicated list of var names""" + ref_dict = { + "refs": { + "x/something": {}, + "x/otherthing": {} + } + } + assert len(find_var_names(ref_dict)) == 1 \ No newline at end of file