fix: 435 fix exception in case no lanes in any of the road types

Deltares · Jun 27, 2024 · eede49c · eede49c
1 parent d71d621
commit eede49c
Show file tree

Hide file tree

Showing 2 changed files with 43 additions and 18 deletions.
diff --git a/ra2ce/analysis/damages/damages_utils.py b/ra2ce/analysis/damages/damages_utils.py
@@ -121,20 +121,24 @@ def create_summary_statistics(gdf: GeoDataFrame) -> dict:
     """
     # Todo: in the future we can make it more generic, so that we can easily get the mode/mean/whatever
 
-    dictionary = dict(gdf.groupby("road_type")["lanes"].agg(pd.Series.mode))
-
-    # get a default value if any key of the dictionary became empty (because the mode operation on the 'lanes' column
-    # for a road type results in an empty array
-    non_empty_modes = [
-        value for value in dictionary.values() if isinstance(value, float) and value > 0
-    ]
-    default_value = np.mean(non_empty_modes)
-
-    # Replace empty arrays with the calculated average
-    for key, value in dictionary.items():
-        if isinstance(value, np.ndarray) and len(value) == 0:
-            dictionary[key] = default_value
-    return dictionary
+    _grouped_lanes = gdf.groupby("road_type")["lanes"]
+    _road_types, _lanes = list(zip(*((x[0], x[1].mode()) for x in _grouped_lanes)))
+    _lanes_dict = {
+        _road_type: _lanes[0] if not _lanes.empty else np.nan
+        for _road_type, _lanes in zip(_road_types, _lanes)
+    }
+
+    # get a default value if any key of the dictionary is nan
+    # (because the mode operation on the 'lanes' column for a road type results in an empty array)
+    default_value = np.mean(
+        list(_val for _val in _lanes_dict.values() if not np.isnan(_val))
+    )
+
+    # Replace nan with the calculated average
+    return {
+        _road_type: _lanes if not np.isnan(_lanes) else default_value
+        for _road_type, _lanes in _lanes_dict.items()
+    }
 
 
 def scale_damage_using_lanes(lane_scale_factors, df, cols_to_scale) -> pd.DataFrame:

diff --git a/tests/analysis/damages/test_damages_utils.py b/tests/analysis/damages/test_damages_utils.py
@@ -64,19 +64,40 @@ def test_clean_lane_data(self):
         assert _result_data[0] == 42
         assert _result_data[1] == 24
 
-    def test_create_summary_statistics(self):
+    @pytest.mark.parametrize(
+        "lanes, expected",
+        [
+            pytest.param([0, 1, 3], [0, 1, 3], id="Valid lanes"),
+            pytest.param([np.nan, 1, 3], [2, 1, 3], id="First lane invalid"),
+            pytest.param([0, np.nan, 3], [0, 1.5, 3], id="Middle lane invalid"),
+            pytest.param([0, 1, np.nan], [0, 1, 0.5], id="Last lane invalid"),
+            pytest.param(
+                [np.nan, np.nan, np.nan],
+                [np.nan, np.nan, np.nan],
+                id="All lanes invalid",
+            ),
+        ],
+    )
+    def test_create_summary_statistics(self, lanes, expected):
         # 1. Define test data.
         _left_line = LineString([[0, 0], [1, 0], [2, 0]])
+        _middle_line = LineString([[1, 0], [1, 1], [2, 2]])
         _right_line = LineString([[3, 0], [2, 1], [2, 2]])
         _data = {
-            "road_type": ["name1", "name2"],
-            "geometry": [_left_line, _right_line],
-            "lanes": [0, 1],
+            "road_type": ["name1", "name2", "name3"],
+            "geometry": [_left_line, _middle_line, _right_line],
+            "lanes": lanes,
         }
         _test_gdf = gpd.GeoDataFrame(_data, crs="EPSG:4326")
 
         # 2. Run test.
         result_dict = create_summary_statistics(_test_gdf)
 
         # 3. Verify final expectations
+        def valid_lanes(lane: float, expected: float) -> bool:
+            if np.isnan(lane):
+                return np.isnan(expected)
+            return lane == pytest.approx(expected)
+
         assert isinstance(result_dict, dict)
+        assert all(map(valid_lanes, result_dict.values(), expected))