Skip to content

Commit

Permalink
chore: Removed intermediate export logic (#649)
Browse files Browse the repository at this point in the history
* chore: Removed intermediate export logic

* feat: Refactored both analyses to generate an `AnalysisResultWrapper`

* chore: Corrected network option to get back the nodes and edges whilst not exporting

* chore: Small correction to output file

* chore: small refactor to reduce code duplication

* chore: removed unnecessary references

* chore: Refactored code to remove unnecessary methods from `network_utils`

* chore: Removed unnecessary method from `network_utils`

* chore: Minor corrections after refactoring

* test: Corrected test assertion so the error message is more clear

* chore: Corrected export statement, now it does to `gpkg`

* chore: Wrong basename given during `MultiGraphNetworkExporter.export_to_gpkg`

* chore: Corrected generation of optimal routes files

* chore: corrected wrong suffix name `_node` instead of `_nodes`

* chore: Reordered appending of analysis result as it was done earlier

* chore: Processed review remarks

* chore: Corrected logic that was preventing us from exporting the index in the geodataframes
  • Loading branch information
Carsopre authored Dec 11, 2024
2 parents 4a97682 + 4c06436 commit 9eb97c3
Show file tree
Hide file tree
Showing 13 changed files with 149 additions and 228 deletions.
23 changes: 15 additions & 8 deletions ra2ce/analysis/analysis_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,18 @@ class AnalysisBase(ABC, AnalysisProtocol):
the `AnalysisProtocol`.
"""

def _get_analysis_result(
self, gdf_result: GeoDataFrame, custom_name: str
) -> AnalysisResult:
_ar = AnalysisResult(
analysis_result=gdf_result,
analysis_config=self.analysis,
output_path=self.output_path,
)
if custom_name:
_ar.analysis_name = custom_name
return _ar

def generate_result_wrapper(
self, *analysis_result: GeoDataFrame
) -> AnalysisResultWrapper:
Expand All @@ -48,13 +60,8 @@ def generate_result_wrapper(
AnalysisResultWrapper: Wrapping result with configuration details.
"""

def get_analysis_result(gdf_result: GeoDataFrame) -> AnalysisResult:
return AnalysisResult(
analysis_result=gdf_result,
analysis_config=self.analysis,
output_path=self.output_path,
)

return AnalysisResultWrapper(
results_collection=list(map(get_analysis_result, analysis_result))
results_collection=[
self._get_analysis_result(_ar, "") for _ar in analysis_result
]
)
111 changes: 30 additions & 81 deletions ra2ce/analysis/losses/multi_link_origin_closest_destination.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,7 @@
from ra2ce.network.network_config_data.network_config_data import (
OriginsDestinationsSection,
)
from ra2ce.network.networks_utils import graph_to_gpkg
from ra2ce.ra2ce_logger import logging
from ra2ce.network.networks_utils import get_nodes_and_edges_from_origin_graph


class MultiLinkOriginClosestDestination(AnalysisBase, AnalysisLossesProtocol):
Expand Down Expand Up @@ -46,56 +45,13 @@ def __init__(
self.file_id = analysis_input.file_id
self._analysis_input = analysis_input

def _save_gdf(self, gdf: GeoDataFrame, save_path: Path) -> None:
"""Takes in a geodataframe object and outputs shapefiles at the paths indicated by edge_shp and node_shp
Arguments:
gdf [geodataframe]: geodataframe object to be converted
save_path [str]: output path including extension for edges shapefile
Returns:
None
"""
# save to shapefile
gdf.crs = "epsg:4326" # TODO: decide if this should be variable with e.g. an output_crs configured

for col in gdf.columns:
if gdf[col].dtype == object and col != gdf.geometry.name:
gdf[col] = gdf[col].astype(str)

if save_path.exists():
save_path.unlink()
gdf.to_file(save_path, driver="GPKG")
logging.info("Results saved to: {}".format(save_path))

def execute(self) -> AnalysisResultWrapper:
def _save_gpkg_analysis(
base_graph,
to_save_gdf: list[GeoDataFrame],
to_save_gdf_names: list[str],
):
for to_save, save_name in zip(to_save_gdf, to_save_gdf_names):
if not to_save.empty:
gpkg_path = _output_path.joinpath(
self.analysis.name.replace(" ", "_") + f"_{save_name}.gpkg"
)
self._save_gdf(to_save, gpkg_path)

# Save the Graph
gpkg_path_nodes = _output_path.joinpath(
self.analysis.name.replace(" ", "_") + "_results_nodes.gpkg"
)
gpkg_path_edges = _output_path.joinpath(
self.analysis.name.replace(" ", "_") + "_results_edges.gpkg"
)
graph_to_gpkg(base_graph, gpkg_path_edges, gpkg_path_nodes)

_output_path = self.output_path.joinpath(self.analysis.analysis.config_value)

analyzer = OriginClosestDestination(self._analysis_input)

if self.analysis.calculate_route_without_disruption:
(
base_graph,
_base_graph,
opt_routes_without_hazard,
destinations,
) = analyzer.optimal_route_origin_closest_destination()
Expand All @@ -105,7 +61,7 @@ def _save_gpkg_analysis(
opt_routes_with_hazard = GeoDataFrame(data=None)
else:
(
base_graph,
_base_graph,
origins,
destinations,
agg_results,
Expand All @@ -119,48 +75,42 @@ def _save_gpkg_analysis(
)
else:
(
base_graph,
_base_graph,
origins,
destinations,
agg_results,
opt_routes_with_hazard,
) = analyzer.multi_link_origin_closest_destination()
opt_routes_without_hazard = GeoDataFrame()

if self.analysis.save_gpkg:
# Save the GeoDataFrames
to_save_gdf = [
origins,
destinations,
opt_routes_without_hazard,
opt_routes_with_hazard,
]
to_save_gdf_names = [
"origins",
"destinations",
"optimal_routes_without_hazard",
"optimal_routes_with_hazard",
_nodes_graph, _edges_graph = get_nodes_and_edges_from_origin_graph(_base_graph)
_base_name = self.analysis.name.replace(" ", "_")
_analysis_result_wrapper = AnalysisResultWrapper(
results_collection=[
self._get_analysis_result(origins, _base_name + "_origins"),
self._get_analysis_result(destinations, _base_name + "_destinations"),
self._get_analysis_result(_nodes_graph, _base_name + "_results_nodes"),
self._get_analysis_result(_edges_graph, _base_name + "_results_edges"),
self._get_analysis_result(
opt_routes_without_hazard,
_base_name + "_optimal_routes_without_hazard",
),
self._get_analysis_result(
opt_routes_with_hazard, _base_name + "_optimal_routes_with_hazard"
),
]
_save_gpkg_analysis(base_graph, to_save_gdf, to_save_gdf_names)
if self.analysis.save_csv:
csv_path = _output_path.joinpath(
self.analysis.name.replace(" ", "_") + "_destinations.csv"
)
if "geometry" in destinations.columns:
del destinations["geometry"]
if not csv_path.parent.exists():
csv_path.parent.mkdir(parents=True)
destinations.to_csv(csv_path, index=False)
)

csv_path = _output_path.joinpath(
self.analysis.name.replace(" ", "_") + "_optimal_routes.csv"
# Legacy code, previously only done to export to CSV.
_opt_routes_name = _base_name + "_optimal_routes"
if not opt_routes_without_hazard.empty:
_analysis_result_wrapper.results_collection.append(
self._get_analysis_result(opt_routes_with_hazard, _opt_routes_name)
)
if not opt_routes_with_hazard.empty:
_analysis_result_wrapper.results_collection.append(
self._get_analysis_result(opt_routes_without_hazard, _opt_routes_name)
)
if not opt_routes_without_hazard.empty:
del opt_routes_without_hazard["geometry"]
opt_routes_without_hazard.to_csv(csv_path, index=False)
if not opt_routes_with_hazard.empty:
del opt_routes_with_hazard["geometry"]
opt_routes_with_hazard.to_csv(csv_path, index=False)

if self.graph_file_hazard.file is not None:
agg_results.to_excel(
Expand All @@ -170,5 +120,4 @@ def _save_gpkg_analysis(
index=False,
)

# TODO: This does not seem correct, why were we returning None?
return self.generate_result_wrapper(None)
return _analysis_result_wrapper
78 changes: 10 additions & 68 deletions ra2ce/analysis/losses/optimal_route_origin_closest_destination.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
import logging
from pathlib import Path

from geopandas import GeoDataFrame

from ra2ce.analysis.analysis_base import AnalysisBase
from ra2ce.analysis.analysis_config_data.analysis_config_data import (
AnalysisSectionLosses,
Expand All @@ -16,7 +13,6 @@
from ra2ce.network.network_config_data.network_config_data import (
OriginsDestinationsSection,
)
from ra2ce.network.networks_utils import graph_to_gpkg


class OptimalRouteOriginClosestDestination(AnalysisBase, AnalysisLossesProtocol):
Expand Down Expand Up @@ -44,74 +40,20 @@ def __init__(
self.file_id = analysis_input.file_id
self._analysis_input = analysis_input

def _save_gdf(self, gdf: GeoDataFrame, save_path: Path):
"""Takes in a geodataframe object and outputs shapefiles at the paths indicated by edge_shp and node_shp
Arguments:
gdf [geodataframe]: geodataframe object to be converted
save_path [str]: output path including extension for edges shapefile
Returns:
None
"""
# save to shapefile
gdf.crs = "epsg:4326" # TODO: decide if this should be variable with e.g. an output_crs configured

for col in gdf.columns:
if gdf[col].dtype == object and col != gdf.geometry.name:
gdf[col] = gdf[col].astype(str)

if save_path.exists():
save_path.unlink()
gdf.to_file(save_path, driver="GPKG")
logging.info("Results saved to: {}".format(save_path))

def execute(self) -> AnalysisResultWrapper:
def _save_gpkg_analysis(
base_graph,
to_save_gdf: list[GeoDataFrame],
to_save_gdf_names: list[str],
):
for to_save, save_name in zip(to_save_gdf, to_save_gdf_names):
if not to_save.empty:
gpkg_path = _output_path.joinpath(
self.analysis.name.replace(" ", "_") + f"_{save_name}.gpkg"
)
self._save_gdf(to_save, gpkg_path)

# Save the Graph
gpkg_path_nodes = _output_path.joinpath(
self.analysis.name.replace(" ", "_") + "_results_nodes.gpkg"
)
gpkg_path_edges = _output_path.joinpath(
self.analysis.name.replace(" ", "_") + "_results_edges.gpkg"
)
graph_to_gpkg(base_graph, gpkg_path_edges, gpkg_path_nodes)

_output_path = self.output_path.joinpath(self.analysis.analysis.config_value)

analyzer = OriginClosestDestination(self._analysis_input)

# Get gdfs
(
base_graph,
opt_routes,
destinations,
) = analyzer.optimal_route_origin_closest_destination()
if self.analysis.save_gpkg:
# Save the GeoDataFrames
to_save_gdf = [destinations, opt_routes]
to_save_gdf_names = ["destinations", "optimal_routes"]
_save_gpkg_analysis(base_graph, to_save_gdf, to_save_gdf_names)

if self.analysis.save_csv:
csv_path = _output_path.joinpath(
self.analysis.name.replace(" ", "_") + "_destinations.csv"
)
del destinations["geometry"]
destinations.to_csv(csv_path, index=False)

csv_path = _output_path.joinpath(
self.analysis.name.replace(" ", "_") + "_optimal_routes.csv"
)
del opt_routes["geometry"]
opt_routes.to_csv(csv_path, index=False)
# TODO: This does not seem correct, why were we returning None?
return self.generate_result_wrapper(None)
_base_name = self.analysis.name.replace(" ", "_")
return AnalysisResultWrapper(
results_collection=[
self._get_analysis_result(base_graph, _base_name + "_origins"),
self._get_analysis_result(destinations, _base_name + "_destinations"),
self._get_analysis_result(opt_routes, _base_name + "_optimal_routes"),
]
)
17 changes: 11 additions & 6 deletions ra2ce/network/exporters/geodataframe_network_exporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,18 @@

class GeoDataFrameNetworkExporter(NetworkExporterBase):
def export_to_gpkg(self, output_dir: Path, export_data: gpd.GeoDataFrame) -> None:
_output_shp_path = output_dir / (self._basename + ".gpkg")
_output_gpkg_path = output_dir.joinpath(self.basename + ".gpkg")

if _output_gpkg_path.exists():
logging.info("Removing previous gpkg file %s.", _output_gpkg_path)
_output_gpkg_path.unlink()

export_data.to_file(
_output_shp_path, index=False
) # , encoding='utf-8' -Removed the encoding type because this causes some shapefiles not to save.
logging.info(f"Saved {_output_shp_path.stem} in {output_dir}.")
_output_gpkg_path, index=False, driver="GPKG", encoding="utf-8"
)
logging.info("Saved %s in %s.", _output_gpkg_path.stem, output_dir)

def export_to_pickle(self, output_dir: Path, export_data: gpd.GeoDataFrame) -> None:
self.pickle_path = output_dir / (self._basename + ".feather")
self.pickle_path = output_dir.joinpath(self.basename + ".feather")
export_data.to_feather(self.pickle_path, index=False)
logging.info(f"Saved {self.pickle_path.stem} in {output_dir}.")
logging.info("Saved %s in %s.", self.pickle_path.stem, output_dir)
32 changes: 20 additions & 12 deletions ra2ce/network/exporters/multi_graph_network_exporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,16 @@
from pathlib import Path
from typing import Optional

from geopandas import GeoDataFrame

from ra2ce.network.exporters.geodataframe_network_exporter import (
GeoDataFrameNetworkExporter,
)
from ra2ce.network.exporters.network_exporter_base import (
MULTIGRAPH_TYPE,
NetworkExporterBase,
)
from ra2ce.network.networks_utils import graph_to_gpkg
from ra2ce.network.networks_utils import get_nodes_and_edges_from_origin_graph


class MultiGraphNetworkExporter(NetworkExporterBase):
Expand All @@ -38,20 +43,23 @@ def export_to_gpkg(self, output_dir: Path, export_data: MULTIGRAPH_TYPE) -> None
if not output_dir.is_dir():
output_dir.mkdir(parents=True)

# TODO: This method should be a writer itself.
graph_to_gpkg(
export_data,
output_dir / (self._basename + "_edges.gpkg"),
output_dir / (self._basename + "_nodes.gpkg"),
)
logging.info(
f"Saved {self._basename + '_edges.gpkg'} and {self._basename + '_nodes.gpkg'} in {output_dir}."
)
_nodes_graph, _edges_graph = get_nodes_and_edges_from_origin_graph(export_data)

def export_gdf(gdf_data: GeoDataFrame, suffix: str):
"""
Different from `GeoDataFrameNetworkExporter` at `index=True`.
"""
_export_file = output_dir.joinpath(self.basename + suffix + ".gpkg")
gdf_data.to_file(_export_file, index=True, driver="GPKG", encoding="utf-8")
logging.info("Saved %s in %s.", _export_file.stem, output_dir)

export_gdf(_edges_graph, "_edges")
export_gdf(_nodes_graph, "_nodes")

def export_to_pickle(self, output_dir: Path, export_data: MULTIGRAPH_TYPE) -> None:
self.pickle_path = output_dir / (self._basename + ".p")
self.pickle_path = output_dir.joinpath(self.basename + ".p")
with open(self.pickle_path, "wb") as f:
pickle.dump(export_data, f, protocol=4)
logging.info(
f"Saved {self.pickle_path.stem} in {self.pickle_path.resolve().parent}."
"Saved %s in %s.", self.pickle_path.stem, self.pickle_path.resolve().parent
)
Loading

0 comments on commit 9eb97c3

Please sign in to comment.