Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add GeometryIndex #7

Merged
merged 28 commits into from
Nov 23, 2022
Merged
Show file tree
Hide file tree
Changes from 23 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions doc/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,6 @@ Indexing
.. autosummary::
:toctree: generated/

ShapelySTRTreeIndex.__init__
ShapelySTRTreeIndex.from_variables
ShapelySTRTreeIndex.sel
GeometryIndex
GeometryIndex.crs
GeometryIndex.sindex
7 changes: 7 additions & 0 deletions doc/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,20 @@
"sphinx.ext.autodoc",
"numpydoc",
"sphinx.ext.autosummary",
"sphinx.ext.intersphinx",
"myst_nb",
"sphinx_copybutton",
]

templates_path = ["_templates"]
exclude_patterns = []

intersphinx_mapping = {
"python": ("https://docs.python.org/3", None),
"shapely": ("https://shapely.readthedocs.io/en/latest/", None),
"pyproj": ("https://pyproj4.github.io/pyproj/latest/", None),
"xarray": ("https://docs.xarray.dev/en/latest/", None),
}

# -- Options for HTML output -------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
Expand Down
2,659 changes: 1,997 additions & 662 deletions doc/source/quickstart.ipynb

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ classifiers = [
requires-python = ">=3.8"
dependencies = [
"xarray >= 2022.11.0",
"pyproj >= 3.0.0",
"shapely >= 2.0b1",
]

Expand Down
4 changes: 2 additions & 2 deletions xvec/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from importlib.metadata import PackageNotFoundError, version

from .strtree import ShapelySTRTreeIndex # noqa
from .index import GeometryIndex # noqa

try:
__version__ = version("package-name")
__version__ = version("xvec")
martinfleis marked this conversation as resolved.
Show resolved Hide resolved
except PackageNotFoundError: # noqa
# package is not installed
pass
262 changes: 262 additions & 0 deletions xvec/index.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,262 @@
from __future__ import annotations

import warnings
from typing import Any, Hashable, Iterable, Mapping, Sequence

import numpy as np
import pandas as pd
import shapely
from pyproj import CRS
from xarray import DataArray, Variable
from xarray.core.indexing import IndexSelResult
from xarray.indexes import Index, PandasIndex


def _format_crs(crs: CRS | None, max_width: int = 50) -> str:
if crs is not None:
srs = crs.to_string()
else:
srs = "None"

return srs if len(srs) <= max_width else " ".join([srs[:max_width], "..."])


def _get_common_crs(crs_set: set[CRS | None]):
# code taken from geopandas (BSD-3 Licence)

crs_not_none = [crs for crs in crs_set if crs is not None]
names = [crs.name for crs in crs_not_none]

if len(crs_not_none) == 0:
return None
if len(crs_not_none) == 1:
if len(crs_set) != 1:
warnings.warn(
"CRS not set for some of the concatenation inputs. "
f"Setting output's CRS as {names[0]} "
"(the single non-null crs provided)."
benbovy marked this conversation as resolved.
Show resolved Hide resolved
)
return crs_not_none[0]

raise ValueError(
f"cannot determine common CRS for concatenation inputs, got {names}. "
# "Use `to_crs()` to transform geometries to the same CRS before merging."
)


class GeometryIndex(Index):
"""An CRS-aware, Xarray-compatible index for vector geometries.

This index can be set from any 1-dimensional coordinate of
(shapely 2.0) :class:`shapely.Geometry` elements.

It provides all the basic functionality of an
:class:`xarray.indexes.PandasIndex`. In addition, it allows spatial
filtering based on geometries (powered by :class:`shapely.STRtree`).

Parameters
----------
index : :class:`xarray.indexes.PandasIndex`
An Xarray (pandas) index built from an array-like of
:class:`shapely.Geometry` objects.
crs : :class:`pyproj.crs.CRS` or any, optional
The coordinate reference system. Any value accepted by
:meth:`pyproj.crs.CRS.from_user_input`.

"""

_index: PandasIndex
_sindex: shapely.STRtree | None
_crs: CRS | None

def __init__(self, index: PandasIndex, crs: CRS | Any | None = None):
if not np.all(shapely.is_geometry(index.index)):
raise ValueError("array must contain shapely.Geometry objects")

if crs is not None:
crs = CRS.from_user_input(crs)

self._crs = crs
self._index = index
self._sindex = None

@property
def crs(self) -> CRS | None:
"""Returns the coordinate reference system of the index as a
:class:`pyproj.crs.CRS` object.
"""
return self._crs

@property
def sindex(self) -> shapely.STRtree:
"""Returns the spatial index, i.e., a :class:`shapely.STRtree` object.

It may build the index before returning it if it hasn't been built before.
"""
if self._sindex is None:
self._sindex = shapely.STRtree(self._index.index)
return self._sindex

def _check_crs(self, other_crs: CRS | None, allow_none: bool = False) -> bool:
"""Check if the index's projection is the same than the given one.
If allow_none is True, empty CRS is treated as the same.
"""
if allow_none:
if self.crs is None or other_crs is None:
return True
if not self.crs == other_crs:
return False
return True

def _crs_mismatch_warn(self, other_crs: CRS | None, stacklevel: int = 3):
"""Raise a CRS mismatch warning with the information on the assigned CRS."""
srs = _format_crs(self.crs, max_width=50)
other_srs = _format_crs(other_crs, max_width=50)

# TODO: expand warning message with reproject suggestion
warnings.warn(
"CRS mismatch between the CRS of index geometries "
"and the CRS of input geometries.\n"
f"Index CRS: {srs}\n"
f"Input CRS: {other_srs}\n",
UserWarning,
stacklevel=stacklevel,
)

@classmethod
def from_variables(
cls,
variables: Mapping[Any, Variable],
*,
options: Mapping[str, Any],
):
# TODO: try getting CRS from coordinate attrs or GeometryArray or SRID

index = PandasIndex.from_variables(variables, options={})
martinfleis marked this conversation as resolved.
Show resolved Hide resolved
return cls(index, crs=options.get("crs"))

@classmethod
def concat(
cls,
indexes: Sequence[GeometryIndex],
dim: Hashable,
positions: Iterable[Iterable[int]] | None = None,
) -> GeometryIndex:
crs_set = {idx.crs for idx in indexes}
crs = _get_common_crs(crs_set)

indexes_ = [idx._index for idx in indexes]
index = PandasIndex.concat(indexes_, dim, positions)
return cls(index, crs)

def create_variables(
self, variables: Mapping[Any, Variable] | None = None
) -> dict[Hashable, Variable]:
return self._index.create_variables(variables)

def to_pandas_index(self) -> pd.Index:
return self._index.index

def isel(self, indexers: Mapping[Any, Any]):
index = self._index.isel(indexers)

if index is not None:
return type(self)(index, self.crs)
else:
return None

def _sel_sindex(self, labels, method, tolerance):
# only one entry expected
assert len(labels) == 1
martinfleis marked this conversation as resolved.
Show resolved Hide resolved
label = next(iter(labels.values()))

if method != "nearest":
if not isinstance(label, shapely.Geometry):
raise ValueError(
"selection with another method than nearest only supports "
"a single geometry as input label."
)

if isinstance(label, DataArray):
label_array = label._variable._data
elif isinstance(label, Variable):
label_array = label._data
elif isinstance(label, shapely.Geometry):
label_array = np.array([label])
else:
label_array = np.array(label)

# check for possible CRS of geometry labels
# (by default assume same CRS than the index)
label_crs = getattr(label_array, "crs", None)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can this situation when label_array has crs attribute actually happen? Since we cast all inputs to the numpy.array above, even if we have something like a GeometryArray, we would strip it of CRS. Codecov marking l. 193 as untested seems to prove that.

I think we need to check for crs of labels not label_array.

Suggested change
label_crs = getattr(label_array, "crs", None)
label_crs = getattr(labels, "crs", None)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually it depends (note: labels is a dict and label is the extracted value).

  • If label is a GeometryArray then yes we need to check label.crs
  • If label is a xarray.Variable or xarray.DataArray wrapping a GeometryArray (i.e., the ._data attribute), we need to check label_array.crs

The latter is not supported yet in Xarray, though, and we still need to figure out a more general way to extract CRS from various input types. So I suggest to remove the CRS check here and address that in a follow-up PR.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fine with me. Let's move this to an issue to keep track.

if label_crs is not None and not self._check_crs(label_crs, allow_none=True):
self._crs_mismatch_warn(label_crs, stacklevel=4)

if not np.all(shapely.is_geometry(label_array)):
raise ValueError("labels must be shapely.Geometry objects")

if method == "nearest":
indices = self.sindex.nearest(label_array)
else:
indices = self.sindex.query(label, predicate=method, distance=tolerance)

# attach dimension names and/or coordinates to positional indexer
if isinstance(label, Variable):
indices = Variable(label.dims, indices)
elif isinstance(label, DataArray):
indices = DataArray(indices, coords=label._coords, dims=label.dims)

return IndexSelResult({self._index.dim: indices})

def sel(
self, labels: dict[Any, Any], method=None, tolerance=None
) -> IndexSelResult:
if method is None:
return self._index.sel(labels)
else:
# We reuse here `method` and `tolerance` options of
# `xarray.indexes.PandasIndex` as `predicate` and `distance`
# options when `labels` is a single geometry.
# Xarray currently doesn't support custom options
# (see https://github.com/pydata/xarray/issues/7099)
return self._sel_sindex(labels, method, tolerance)

def equals(self, other: Index) -> bool:
if not isinstance(other, GeometryIndex):
return False

if not self._check_crs(other.crs, allow_none=True):
self._crs_mismatch_warn(other.crs)

return self._index.equals(other._index)

def join(
self: GeometryIndex, other: GeometryIndex, how: str = "inner"
) -> GeometryIndex:
if not self._check_crs(other.crs, allow_none=True):
self._crs_mismatch_warn(other.crs)

index = self._index.join(other._index, how=how)
return type(self)(index, self.crs)
martinfleis marked this conversation as resolved.
Show resolved Hide resolved

def reindex_like(
self, other: GeometryIndex, method=None, tolerance=None
) -> dict[Hashable, Any]:
if not self._check_crs(other.crs, allow_none=True):
self._crs_mismatch_warn(other.crs)

return self._index.reindex_like(
other._index, method=method, tolerance=tolerance
)

def roll(self, shifts: Mapping[Any, int]) -> GeometryIndex:
index = self._index.roll(shifts)
return type(self)(index, self.crs)

def rename(self, name_dict, dims_dict):
index = self._index.rename(name_dict, dims_dict)
return type(self)(index, self.crs)

def _repr_inline_(self, max_width: int):
srs = _format_crs(self.crs, max_width=max_width)
return f"{self.__class__.__name__} (crs={srs})"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
def _repr_inline_(self, max_width: int):
srs = _format_crs(self.crs, max_width=max_width)
return f"{self.__class__.__name__} (crs={srs})"
def _repr_inline_(self, max_width: int):
if max_width is None:
max_width = 50
srs = _format_crs(self.crs, max_width=max_width)
return f"{self.__class__.__name__} (crs={srs})"

Somehow this is raising a TypeError. Cell 6 in the Quickstart notebook:

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
File ~/mambaforge/envs/xvec_dev/lib/python3.10/site-packages/IPython/core/formatters.py:342, in BaseFormatter.__call__(self, obj)
    340     method = get_real_method(obj, self.print_method)
    341     if method is not None:
--> 342         return method()
    343     return None
    344 else:

File ~/mambaforge/envs/xvec_dev/lib/python3.10/site-packages/xarray/core/common.py:167, in AbstractArray._repr_html_(self)
    165 if OPTIONS["display_style"] == "text":
    166     return f"<pre>{escape(repr(self))}</pre>"
--> 167 return formatting_html.array_repr(self)

File ~/mambaforge/envs/xvec_dev/lib/python3.10/site-packages/xarray/core/formatting_html.py:318, in array_repr(arr)
    316 if hasattr(arr, "xindexes"):
    317     indexes = _get_indexes_dict(arr.xindexes)
--> 318     sections.append(index_section(indexes))
    320 sections.append(attr_section(arr.attrs))
    322 return _obj_repr(arr, header_components, sections)

File ~/mambaforge/envs/xvec_dev/lib/python3.10/site-packages/xarray/core/formatting_html.py:195, in _mapping_section(mapping, name, details_func, max_items_collapse, expand_option_name, enabled)
    188 expanded = _get_boolean_with_default(
    189     expand_option_name, n_items < max_items_collapse
    190 )
    191 collapsed = not expanded
    193 return collapsible_section(
    194     name,
--> 195     details=details_func(mapping),
    196     n_items=n_items,
    197     enabled=enabled,
    198     collapsed=collapsed,
    199 )

File ~/mambaforge/envs/xvec_dev/lib/python3.10/site-packages/xarray/core/formatting_html.py:155, in summarize_indexes(indexes)
    154 def summarize_indexes(indexes):
--> 155     indexes_li = "".join(
    156         f"<li class='xr-var-item'>{summarize_index(v, i)}</li>"
    157         for v, i in indexes.items()
    158     )
    159     return f"<ul class='xr-var-list'>{indexes_li}</ul>"

File ~/mambaforge/envs/xvec_dev/lib/python3.10/site-packages/xarray/core/formatting_html.py:156, in <genexpr>(.0)
    154 def summarize_indexes(indexes):
    155     indexes_li = "".join(
--> 156         f"<li class='xr-var-item'>{summarize_index(v, i)}</li>"
    157         for v, i in indexes.items()
    158     )
    159     return f"<ul class='xr-var-list'>{indexes_li}</ul>"

File ~/mambaforge/envs/xvec_dev/lib/python3.10/site-packages/xarray/core/formatting_html.py:139, in summarize_index(coord_names, index)
    136 name = "<br>".join([escape(str(n)) for n in coord_names])
    138 index_id = f"index-{uuid.uuid4()}"
--> 139 preview = escape(inline_index_repr(index))
    140 details = short_index_repr_html(index)
    142 data_icon = _icon("icon-database")

File ~/mambaforge/envs/xvec_dev/lib/python3.10/site-packages/xarray/core/formatting.py:413, in inline_index_repr(index, max_width)
    411 def inline_index_repr(index, max_width=None):
    412     if hasattr(index, "_repr_inline_"):
--> 413         repr_ = index._repr_inline_(max_width=max_width)
    414     else:
    415         # fallback for the `pandas.Index` subclasses from
    416         # `Indexes.get_pandas_indexes` / `xr_obj.indexes`
    417         repr_ = repr(index)

File ~/Git/xvec/xvec/index.py:261, in GeometryIndex._repr_inline_(self, max_width)
    260 def _repr_inline_(self, max_width: int):
--> 261     srs = _format_crs(self.crs, max_width=max_width)
    262     return f"{self.__class__.__name__} (crs={srs})"

File ~/Git/xvec/xvec/index.py:21, in _format_crs(crs, max_width)
     18 else:
     19     srs = "None"
---> 21 return srs if len(srs) <= max_width else " ".join([srs[:max_width], "..."])

TypeError: '<=' not supported between instances of 'int' and 'NoneType'

This bit seems to override our default with None

File ~/mambaforge/envs/xvec_dev/lib/python3.10/site-packages/xarray/core/formatting.py:413, in inline_index_repr(index, max_width)
    411 def inline_index_repr(index, max_width=None):
    412     if hasattr(index, "_repr_inline_"):
--> 413         repr_ = index._repr_inline_(max_width=max_width)

We need to catch that and override in that case. Either here like in the code suggestion above or in _format_crs.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch, we can fix it here but I think this should rather be fixed in xarray.core.formatting.inline_index_repr? cc @keewis.

72 changes: 0 additions & 72 deletions xvec/strtree.py

This file was deleted.

Loading