Skip to content

Commit

Permalink
Merge pull request #183 from Deltares/chore/improve_performance
Browse files Browse the repository at this point in the history
Chore/improve performance
  • Loading branch information
frederique-hub authored Sep 22, 2023
2 parents 0cf9947 + 06d9790 commit 98bcdd2
Show file tree
Hide file tree
Showing 14 changed files with 1,596 additions and 985 deletions.
1,635 changes: 974 additions & 661 deletions poetry.lock

Large diffs are not rendered by default.

4 changes: 4 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ GDAL = "3.5.1"
fiona = "1.8.21"
pygeos = "^0.14"
momepy = "0.5.0"
pyogrio = "^0.6.0"
joblib = "^1.3.2"

[tool.poetry.group.dev.dependencies]
isort = "^5.10.1"
Expand Down Expand Up @@ -78,6 +80,8 @@ pytest-cov = "^3.0.0"
pytest = "^7.1.3"
teamcity-messages = "^1.32"
testbook = "^0.4.2"
pytest-xdist = "^3.3.1"
pytest-profiling = "^1.7.0"

[tool.black]
line-length = 88
Expand Down
Empty file added ra2ce/graph/hazard/__init__.py
Empty file.
43 changes: 43 additions & 0 deletions ra2ce/graph/hazard/hazard_common_functions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import logging
from pathlib import Path
from osgeo import gdal
from networkx import Graph
from ra2ce.graph.networks_utils import bounds_intersect_2d, get_extent


def validate_extent_graph(extent_graph: list[float], tif_hazard_file: Path) -> None:
"""
Validates the given extent graph to a hazard file (*.tif)
Args:
extent_graph (list[float]): List of boundary points determening the extent of a graph.
tif_hazard_file (Path): Hazard (*.tif) file.
Raises:
ValueError: When the hazard raster and the graph geometries do not overlap.
"""
# Check if the hazard and graph extents overlap
extent = get_extent(gdal.Open(str(tif_hazard_file)))
extent_hazard = (
extent["minX"],
extent["maxX"],
extent["minY"],
extent["maxY"],
)

if not bounds_intersect_2d(extent_graph, extent_hazard):
logging.info("Raster extent: {}, Graph extent: {}".format(extent, extent_graph))
raise ValueError(
"The hazard raster and the graph geometries do not overlap, check projection"
)


def get_edges_geoms(graph: Graph) -> list:
"""
Gets all edges geometry from a provided graph.
"""
return [
(u, v, k, edata)
for u, v, k, edata in graph.edges.data(keys=True)
if "geometry" in edata
]
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
"""
GNU GENERAL PUBLIC LICENSE
Version 3, 29 June 2007
Risk Assessment and Adaptation for Critical Infrastructure (RA2CE).
Copyright (C) 2023 Stichting Deltares
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
"""

from geopandas import GeoDataFrame
from networkx import Graph
from ra2ce.graph.hazard.hazard_intersect.hazard_intersect_builder_protocol import HazardIntersectBuilderProtocol
from abc import ABC, abstractmethod

class HazardIntersectBuilderBase(ABC, HazardIntersectBuilderProtocol):
def get_intersection(self, hazard_overlay: GeoDataFrame | Graph) -> GeoDataFrame | Graph:
if isinstance(hazard_overlay, GeoDataFrame):
return self._from_geodataframe(hazard_overlay)
elif isinstance(hazard_overlay, Graph):
return self._from_networkx(hazard_overlay)
raise ValueError("No hazard intersection can be built with overlay type {}.".format(type(hazard_overlay).__name__))

@abstractmethod
def _from_networkx(self, hazard_overlay: GeoDataFrame | Graph) -> GeoDataFrame | Graph:
pass

@abstractmethod
def _from_geodataframe(self, hazard_overlay: GeoDataFrame | Graph) -> GeoDataFrame | Graph:
pass
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
"""
GNU GENERAL PUBLIC LICENSE
Version 3, 29 June 2007
Risk Assessment and Adaptation for Critical Infrastructure (RA2CE).
Copyright (C) 2023 Stichting Deltares
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
"""

from dataclasses import dataclass, field
from pathlib import Path
from typing import Callable
from geopandas import GeoDataFrame, read_file, sjoin
from joblib import Parallel, delayed
from networkx import Graph
from numpy import nanmean
from ra2ce.graph.hazard.hazard_intersect.hazard_intersect_builder_base import (
HazardIntersectBuilderBase,
)


@dataclass
class HazardIntersectBuilderForShp(HazardIntersectBuilderBase):
hazard_field_name: str = ""
hazard_aggregate_wl: str = ""
hazard_names: list[str] = field(default_factory=list)
ra2ce_names: list[str] = field(default_factory=list)
hazard_shp_files: list[str] = field(default_factory=list)

def _from_networkx(self, hazard_overlay: Graph) -> Graph:
"""Overlays the hazard shapefile over the road segments NetworkX graph.
Args:
hazard_overlay (NetworkX graph): The graph that should be overlayed with the hazard shapefile(s)
Returns:
hazard_overlay (NetworkX graph): The graph with hazard shapefile(s) data joined
"""
# TODO check if the CRS of the graph and shapefile match
def networkx_overlay(hazard_shp_file: Path, race_name: str):
gdf = read_file(str(hazard_shp_file))
spatial_index = gdf.sindex

for u, v, k, edata in hazard_overlay.edges.data(keys=True):
if "geometry" in edata:
possible_matches_index = list(
spatial_index.intersection(edata["geometry"].bounds)
)
possible_matches = gdf.iloc[possible_matches_index]
precise_matches = possible_matches[
possible_matches.intersects(edata["geometry"])
]

if not precise_matches.empty:
if self.hazard_aggregate_wl == "max":
hazard_overlay[u][v][k][
race_name + "_" + self.hazard_aggregate_wl[:2]
] = precise_matches[self.hazard_field_name].max()
if self.hazard_aggregate_wl == "min":
hazard_overlay[u][v][k][
race_name + "_" + self.hazard_aggregate_wl[:2]
] = precise_matches[self.hazard_field_name].min()
if self.hazard_aggregate_wl == "mean":
hazard_overlay[u][v][k][
race_name + "_" + self.hazard_aggregate_wl[:2]
] = nanmean(precise_matches[self.hazard_field_name])
else:
hazard_overlay[u][v][k][
race_name + "_" + self.hazard_aggregate_wl[:2]
] = 0
else:
hazard_overlay[u][v][k][
race_name + "_" + self.hazard_aggregate_wl[:2]
] = 0

# Run in parallel to boost performance.
self._overlay_in_parallel(networkx_overlay)
# for i, _ra2ce_name in self.ra2ce_names:
# networkx_overlay(self.hazard_shp_files[i], _ra2ce_name)
return hazard_overlay

def _from_geodataframe(self, hazard_overlay: GeoDataFrame) -> GeoDataFrame:
"""Overlays the hazard shapefile over the road segments GeoDataFrame. The gdf is reprojected to the hazard shapefile if necessary.
Args:
hazard_overlay (GeoDataFrame): the network geodataframe that should be overlayed with the hazard shapefile(s)
Returns:
hazard_overlay (GeoDataFrame): the network geodataframe with hazard shapefile(s) data joined
"""
gdf_crs_original = hazard_overlay.crs

def geodataframe_overlay(hazard_shp_file: Path, ra2ce_name: str):
gdf_hazard = read_file(str(hazard_shp_file))

if hazard_overlay.crs != gdf_hazard.crs:
hazard_overlay = hazard_overlay.to_crs(gdf_hazard.crs)

hazard_overlay = sjoin(
hazard_overlay,
gdf_hazard[[self.hazard_field_name, "geometry"]],
how="left",
)
hazard_overlay.rename(
columns={
self.hazard_field_name: ra2ce_name
+ "_"
+ self.hazard_aggregate_wl[:2]
},
inplace=True,
)

# Run in parallel to boost performance.
self._overlay_in_parallel(geodataframe_overlay)
# for i, _ra2ce_name in self.ra2ce_names:
# geodataframe_overlay(self.hazard_shp_files[i], _ra2ce_name)

if hazard_overlay.crs != gdf_crs_original:
hazard_overlay = hazard_overlay.to_crs(gdf_crs_original)

return hazard_overlay

def _overlay_in_parallel(self, overlay_func: Callable):
# Run in parallel to boost performance.
Parallel(n_jobs=2, require="sharedmem")(
delayed(overlay_func)(self.hazard_shp_files[i], _ra2ce_name)
for i, _ra2ce_name in enumerate(self.ra2ce_names)
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
"""
GNU GENERAL PUBLIC LICENSE
Version 3, 29 June 2007
Risk Assessment and Adaptation for Critical Infrastructure (RA2CE).
Copyright (C) 2023 Stichting Deltares
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
"""

from dataclasses import dataclass
from ra2ce.graph.hazard.hazard_intersect.hazard_intersect_builder_base import (
HazardIntersectBuilderBase,
)

from networkx import Graph
from geopandas import GeoDataFrame
from ra2ce.graph.networks_utils import (
graph_from_gdf,
graph_to_gdf,
)
from pandas import read_csv


@dataclass
class HazardIntersectBuilderForTable(HazardIntersectBuilderBase):
hazard_field_name: str = ""
network_file_id: str = ""
hazard_id: str = ""
ra2ce_name_key: str = "RA2CE name"

def _from_network_x(self, hazard_overlay: Graph) -> Graph:
"""Joins a table with IDs and hazard information with the road segments with corresponding IDs."""
gdf, gdf_nodes = graph_to_gdf(hazard_overlay, save_nodes=True)
gdf = self._from_geodataframe(gdf)

# TODO: Check if the graph is created again correctly.
hazard_overlay = graph_from_gdf(gdf, gdf_nodes)
return hazard_overlay

def _from_geodataframe(self, hazard_overlay: GeoDataFrame):
"""Joins a table with IDs and hazard information with the road segments with corresponding IDs."""
for haz in self.hazard_files["table"]:
if haz.suffix in [".csv"]:
hazard_overlay = self._join_table(hazard_overlay, haz)
return hazard_overlay

def _join_table(self, graph: Graph, hazard: str) -> Graph:
df = read_csv(hazard)
df = df[self.hazard_field_name]
graph = graph.merge(
df,
how="left",
left_on=self.network_file_id,
right_on=self.hazard_id,
)

graph.rename(
columns={
self.hazard_field_name: [
n[:-3] for n in self.hazard_name_table[self.ra2ce_name_key]
][0]
},
inplace=True,
) # Check if this is the right name
return graph
Loading

0 comments on commit 98bcdd2

Please sign in to comment.