Skip to content

Commit

Permalink
fix: 435 fix exception in case no lanes in any of the road types
Browse files Browse the repository at this point in the history
  • Loading branch information
ArdtK authored Jun 27, 2024
1 parent d71d621 commit eede49c
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 18 deletions.
32 changes: 18 additions & 14 deletions ra2ce/analysis/damages/damages_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,20 +121,24 @@ def create_summary_statistics(gdf: GeoDataFrame) -> dict:
"""
# Todo: in the future we can make it more generic, so that we can easily get the mode/mean/whatever

dictionary = dict(gdf.groupby("road_type")["lanes"].agg(pd.Series.mode))

# get a default value if any key of the dictionary became empty (because the mode operation on the 'lanes' column
# for a road type results in an empty array
non_empty_modes = [
value for value in dictionary.values() if isinstance(value, float) and value > 0
]
default_value = np.mean(non_empty_modes)

# Replace empty arrays with the calculated average
for key, value in dictionary.items():
if isinstance(value, np.ndarray) and len(value) == 0:
dictionary[key] = default_value
return dictionary
_grouped_lanes = gdf.groupby("road_type")["lanes"]
_road_types, _lanes = list(zip(*((x[0], x[1].mode()) for x in _grouped_lanes)))
_lanes_dict = {
_road_type: _lanes[0] if not _lanes.empty else np.nan
for _road_type, _lanes in zip(_road_types, _lanes)
}

# get a default value if any key of the dictionary is nan
# (because the mode operation on the 'lanes' column for a road type results in an empty array)
default_value = np.mean(
list(_val for _val in _lanes_dict.values() if not np.isnan(_val))
)

# Replace nan with the calculated average
return {
_road_type: _lanes if not np.isnan(_lanes) else default_value
for _road_type, _lanes in _lanes_dict.items()
}


def scale_damage_using_lanes(lane_scale_factors, df, cols_to_scale) -> pd.DataFrame:
Expand Down
29 changes: 25 additions & 4 deletions tests/analysis/damages/test_damages_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,19 +64,40 @@ def test_clean_lane_data(self):
assert _result_data[0] == 42
assert _result_data[1] == 24

def test_create_summary_statistics(self):
@pytest.mark.parametrize(
"lanes, expected",
[
pytest.param([0, 1, 3], [0, 1, 3], id="Valid lanes"),
pytest.param([np.nan, 1, 3], [2, 1, 3], id="First lane invalid"),
pytest.param([0, np.nan, 3], [0, 1.5, 3], id="Middle lane invalid"),
pytest.param([0, 1, np.nan], [0, 1, 0.5], id="Last lane invalid"),
pytest.param(
[np.nan, np.nan, np.nan],
[np.nan, np.nan, np.nan],
id="All lanes invalid",
),
],
)
def test_create_summary_statistics(self, lanes, expected):
# 1. Define test data.
_left_line = LineString([[0, 0], [1, 0], [2, 0]])
_middle_line = LineString([[1, 0], [1, 1], [2, 2]])
_right_line = LineString([[3, 0], [2, 1], [2, 2]])
_data = {
"road_type": ["name1", "name2"],
"geometry": [_left_line, _right_line],
"lanes": [0, 1],
"road_type": ["name1", "name2", "name3"],
"geometry": [_left_line, _middle_line, _right_line],
"lanes": lanes,
}
_test_gdf = gpd.GeoDataFrame(_data, crs="EPSG:4326")

# 2. Run test.
result_dict = create_summary_statistics(_test_gdf)

# 3. Verify final expectations
def valid_lanes(lane: float, expected: float) -> bool:
if np.isnan(lane):
return np.isnan(expected)
return lane == pytest.approx(expected)

assert isinstance(result_dict, dict)
assert all(map(valid_lanes, result_dict.values(), expected))

0 comments on commit eede49c

Please sign in to comment.