diff --git a/pandas/_libs/window/indexers.pyi b/pandas/_libs/window/indexers.pyi index bbb5e6988d0b5..c9bc64be34ac9 100644 --- a/pandas/_libs/window/indexers.pyi +++ b/pandas/_libs/window/indexers.pyi @@ -8,6 +8,5 @@ def calculate_variable_window_bounds( min_periods, center: bool, closed: str | None, - step: int | None, index: np.ndarray, # const int64_t[:] ) -> tuple[npt.NDArray[np.int64], npt.NDArray[np.int64]]: ... diff --git a/pandas/_libs/window/indexers.pyx b/pandas/_libs/window/indexers.pyx index 992212a872035..465865dec23c4 100644 --- a/pandas/_libs/window/indexers.pyx +++ b/pandas/_libs/window/indexers.pyx @@ -16,7 +16,6 @@ def calculate_variable_window_bounds( object min_periods, # unused but here to match get_window_bounds signature bint center, str closed, - int64_t step, const int64_t[:] index ): """ @@ -39,9 +38,6 @@ def calculate_variable_window_bounds( closed : str string of side of the window that should be closed - step : int64 - Spacing between windows - index : ndarray[int64] time series index to roll over @@ -150,4 +146,4 @@ def calculate_variable_window_bounds( # right endpoint is open if not right_closed and not center: end[i] -= 1 - return start[::step], end[::step] + return start, end diff --git a/pandas/core/indexers/objects.py b/pandas/core/indexers/objects.py index 54bdee0bb0208..636ea7897606a 100644 --- a/pandas/core/indexers/objects.py +++ b/pandas/core/indexers/objects.py @@ -115,9 +115,6 @@ def get_window_bounds( step: int | None = None, ) -> tuple[np.ndarray, np.ndarray]: - if step is not None: - raise NotImplementedError("step not implemented for variable window") - # error: Argument 4 to "calculate_variable_window_bounds" has incompatible # type "Optional[bool]"; expected "bool" # error: Argument 6 to "calculate_variable_window_bounds" has incompatible @@ -128,7 +125,6 @@ def get_window_bounds( min_periods, center, # type: ignore[arg-type] closed, - 1, self.index_array, # type: ignore[arg-type] ) @@ -234,12 +230,10 @@ def get_window_bounds( step: int | None = None, ) -> tuple[np.ndarray, np.ndarray]: - if step is not None: - raise NotImplementedError("step not implemented for expanding window") - - end = np.arange(1, num_values + 1, dtype=np.int64) - start = np.zeros(len(end), dtype=np.int64) - return start, end + return ( + np.zeros(num_values, dtype=np.int64), + np.arange(1, num_values + 1, dtype=np.int64), + ) class FixedForwardWindowIndexer(BaseIndexer): @@ -343,8 +337,6 @@ def get_window_bounds( closed: str | None = None, step: int | None = None, ) -> tuple[np.ndarray, np.ndarray]: - if step is not None: - raise NotImplementedError("step not implemented for groupby window") # 1) For each group, get the indices that belong to the group # 2) Use the indices to calculate the start & end bounds of the window @@ -404,11 +396,4 @@ def get_window_bounds( step: int | None = None, ) -> tuple[np.ndarray, np.ndarray]: - if step is not None: - raise NotImplementedError( - "step not implemented for exponentail moving window" - ) - return ( - np.array([0], dtype=np.int64), - np.array([num_values], dtype=np.int64), - ) + return np.array([0], dtype=np.int64), np.array([num_values], dtype=np.int64) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 269bef32a0f39..7961b954d3a2a 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -229,6 +229,11 @@ def _validate(self) -> None: ) if self.method not in ["table", "single"]: raise ValueError("method must be 'table' or 'single") + if self.step is not None: + if not is_integer(self.step): + raise ValueError("step must be an integer") + elif self.step < 0: + raise ValueError("step must be >= 0") def _check_window_bounds( self, start: np.ndarray, end: np.ndarray, num_vals: int @@ -238,16 +243,14 @@ def _check_window_bounds( f"start ({len(start)}) and end ({len(end)}) bounds must be the " f"same length" ) - elif not isinstance(self._get_window_indexer(), GroupbyIndexer) and len( - start - ) != (num_vals + (self.step or 1) - 1) // (self.step or 1): + elif len(start) != (num_vals + (self.step or 1) - 1) // (self.step or 1): raise ValueError( f"start and end bounds ({len(start)}) must be the same length " f"as the object ({num_vals}) divided by the step ({self.step}) " f"if given and rounded up" ) - def _slice_index(self, index: Index, result: Sized | None = None) -> Index: + def _slice_axis_for_step(self, index: Index, result: Sized | None = None) -> Index: """ Slices the index for a given result and the preset step. """ @@ -446,7 +449,7 @@ def _apply_series( raise DataError("No numeric types to aggregate") from err result = homogeneous_func(values) - index = self._slice_index(obj.index, result) + index = self._slice_axis_for_step(obj.index, result) return obj._constructor(result, index=index, name=obj.name) def _apply_blockwise( @@ -484,7 +487,7 @@ def hfunc(values: ArrayLike) -> ArrayLike: res_values.append(res) taker.append(i) - index = self._slice_index( + index = self._slice_axis_for_step( obj.index, res_values[0] if len(res_values) > 0 else None ) df = type(obj)._from_arrays( @@ -524,7 +527,7 @@ def _apply_tablewise( values = values.T if self.axis == 1 else values result = homogeneous_func(values) result = result.T if self.axis == 1 else result - index = self._slice_index(obj.index, result) + index = self._slice_axis_for_step(obj.index, result) columns = ( obj.columns if result.shape[1] == len(obj.columns) @@ -644,13 +647,13 @@ def _numba_apply( ) result = aggregator(values, start, end, min_periods, *func_args) result = result.T if self.axis == 1 else result - index = self._slice_index(obj.index, result) + index = self._slice_axis_for_step(obj.index, result) if obj.ndim == 1: result = result.squeeze() out = obj._constructor(result, index=index, name=obj.name) return out else: - columns = self._slice_index(obj.columns, result.T) + columns = self._slice_axis_for_step(obj.columns, result.T) out = obj._constructor(result, index=index, columns=columns) return self._resolve_output(out, obj) @@ -692,7 +695,7 @@ def __init__( obj = obj.drop(columns=self._grouper.names, errors="ignore") # GH 15354 if kwargs.get("step") is not None: - raise NotImplementedError("step not implemented for rolling groupby") + raise NotImplementedError("step not implemented for groupby") super().__init__(obj, *args, **kwargs) def _apply( @@ -938,14 +941,12 @@ class Window(BaseWindow): The closed parameter with fixed windows is now supported. step : int, default None - When supported, applies ``[::step]`` to the resulting sequence of windows, in a - computationally efficient manner. Currently supported only with fixed-length - window indexers. Note that using a step argument other than None or 1 will - produce a result with a different shape than the input. - ..versionadded:: 1.5 + ..versionadded:: 1.5.0 - The step parameter is only supported with fixed windows. + Evaluate the window at every ``step`` result, equivalent to slicing as + ``[::step]``. ``window`` must be an integer. Using a step argument other + than None or 1 will produce a result with a different shape than the input. method : str {'single', 'table'}, default 'single' @@ -1605,9 +1606,7 @@ def cov( **kwargs, ): if self.step is not None: - raise NotImplementedError( - "step not implemented for rolling and expanding cov" - ) + raise NotImplementedError("step not implemented for cov") from pandas import Series @@ -1650,11 +1649,8 @@ def corr( ddof: int = 1, **kwargs, ): - if self.step is not None: - raise NotImplementedError( - "step not implemented for rolling and expanding corr" - ) + raise NotImplementedError("step not implemented for corr") from pandas import Series @@ -1749,24 +1745,16 @@ def _validate(self): if self.min_periods is None: self.min_periods = 1 + if self.step is not None: + raise NotImplementedError( + "step is not supported with frequency windows" + ) + elif isinstance(self.window, BaseIndexer): # Passed BaseIndexer subclass should handle all other rolling kwargs pass elif not is_integer(self.window) or self.window < 0: raise ValueError("window must be an integer 0 or greater") - # GH 15354: - # validate window indexer parameters do not raise in get_window_bounds - # this cannot be done in BaseWindow._validate because there _get_window_indexer - # would erroneously create a fixed window given a window argument like "1s" due - # to _win_freq_i8 not being set - indexer = self._get_window_indexer() - indexer.get_window_bounds( - num_values=0, - min_periods=self.min_periods, - center=self.center, - closed=self.closed, - step=self.step, - ) def _validate_datetimelike_monotonic(self): """ diff --git a/pandas/tests/window/test_apply.py b/pandas/tests/window/test_apply.py index bbf9f3d886794..12f9cb27e8cbe 100644 --- a/pandas/tests/window/test_apply.py +++ b/pandas/tests/window/test_apply.py @@ -52,8 +52,7 @@ def test_rolling_apply_out_of_bounds(engine_and_raw): @pytest.mark.parametrize("window", [2, "2s"]) -@pytest.mark.parametrize("step", [None]) -def test_rolling_apply_with_pandas_objects(window, step): +def test_rolling_apply_with_pandas_objects(window): # 5071 df = DataFrame( {"A": np.random.randn(5), "B": np.random.randint(0, 10, size=5)}, @@ -67,8 +66,8 @@ def f(x): return np.nan return x.iloc[-1] - result = df.rolling(window, step=step).apply(f, raw=False) - expected = df.iloc[2:].reindex_like(df)[::step] + result = df.rolling(window).apply(f, raw=False) + expected = df.iloc[2:].reindex_like(df) tm.assert_frame_equal(result, expected) with tm.external_error_raised(AttributeError): @@ -96,8 +95,7 @@ def test_rolling_apply(engine_and_raw, step): tm.assert_series_equal(result, expected) -@pytest.mark.parametrize("step", [None]) -def test_all_apply(engine_and_raw, step): +def test_all_apply(engine_and_raw): engine, raw = engine_and_raw df = ( @@ -106,16 +104,15 @@ def test_all_apply(engine_and_raw, step): ).set_index("A") * 2 ) - er = df.rolling(window=1, step=step) - r = df.rolling(window="1s", step=step) + er = df.rolling(window=1) + r = df.rolling(window="1s") result = r.apply(lambda x: 1, engine=engine, raw=raw) expected = er.apply(lambda x: 1, engine=engine, raw=raw) tm.assert_frame_equal(result, expected) -@pytest.mark.parametrize("step", [None]) -def test_ragged_apply(engine_and_raw, step): +def test_ragged_apply(engine_and_raw): engine, raw = engine_and_raw df = DataFrame({"B": range(5)}) @@ -128,24 +125,18 @@ def test_ragged_apply(engine_and_raw, step): ] f = lambda x: 1 - result = df.rolling(window="1s", min_periods=1, step=step).apply( - f, engine=engine, raw=raw - ) - expected = df.copy()[::step] + result = df.rolling(window="1s", min_periods=1).apply(f, engine=engine, raw=raw) + expected = df.copy() expected["B"] = 1.0 tm.assert_frame_equal(result, expected) - result = df.rolling(window="2s", min_periods=1, step=step).apply( - f, engine=engine, raw=raw - ) - expected = df.copy()[::step] + result = df.rolling(window="2s", min_periods=1).apply(f, engine=engine, raw=raw) + expected = df.copy() expected["B"] = 1.0 tm.assert_frame_equal(result, expected) - result = df.rolling(window="5s", min_periods=1, step=step).apply( - f, engine=engine, raw=raw - ) - expected = df.copy()[::step] + result = df.rolling(window="5s", min_periods=1).apply(f, engine=engine, raw=raw) + expected = df.copy() expected["B"] = 1.0 tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/window/test_base_indexer.py b/pandas/tests/window/test_base_indexer.py index aef79f97bf93d..eb5278c2b33ea 100644 --- a/pandas/tests/window/test_base_indexer.py +++ b/pandas/tests/window/test_base_indexer.py @@ -259,14 +259,13 @@ def test_rolling_forward_cov_corr(func, expected): ["left", [0.0, 0.0, 1.0, 2.0, 5.0, 9.0, 5.0, 6.0, 7.0, 8.0]], ], ) -@pytest.mark.parametrize("step", [None]) -def test_non_fixed_variable_window_indexer(closed, expected_data, step): +def test_non_fixed_variable_window_indexer(closed, expected_data): index = date_range("2020", periods=10) df = DataFrame(range(10), index=index) offset = BusinessDay(1) indexer = VariableOffsetWindowIndexer(index=index, offset=offset) - result = df.rolling(indexer, closed=closed, step=step).sum() - expected = DataFrame(expected_data, index=index)[::step] + result = df.rolling(indexer, closed=closed).sum() + expected = DataFrame(expected_data, index=index) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 53e1d442d60a4..7e9bc121f06ff 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -26,13 +26,7 @@ ) import pandas._testing as tm from pandas.api.indexers import BaseIndexer -from pandas.core.indexers.objects import ( - ExpandingIndexer, - ExponentialMovingWindowIndexer, - GroupbyIndexer, - VariableOffsetWindowIndexer, - VariableWindowIndexer, -) +from pandas.core.indexers.objects import VariableOffsetWindowIndexer from pandas.core.window import Rolling from pandas.tseries.offsets import BusinessDay @@ -96,71 +90,33 @@ def test_invalid_constructor(frame_or_series, w): timedelta(days=3), Timedelta(days=3), "3D", - ExpandingIndexer(window_size=3), - ExponentialMovingWindowIndexer(window_size=3), - GroupbyIndexer(window_size=3), VariableOffsetWindowIndexer( index=date_range("2015-12-25", periods=5), offset=BusinessDay(1) ), - VariableWindowIndexer(window_size=3), - ], -) -@pytest.mark.parametrize( - "func", - [ - lambda df: df.rolling, - lambda df: df.groupby("key").rolling, ], ) -def test_constructor_step_not_implemented(window, func, step): +def test_freq_window_not_implemented(window): # GH 15354 df = DataFrame( - {"value": np.arange(10), "key": np.array([1] * 5 + [2] * 5)}, + np.arange(10), index=date_range("2015-12-24", periods=10, freq="D"), ) - f = lambda: func(df)(window=window, step=step) - if step is None: - f() - else: - with pytest.raises(NotImplementedError, match="step not implemented"): - f() + with pytest.raises( + NotImplementedError, match="step is not supported with frequency windows" + ): + df.rolling("3D", step=3) @pytest.mark.parametrize("agg", ["cov", "corr"]) -def test_constructor_step_not_implemented_for_cov_corr(agg, step): - # GH 15354 - df = DataFrame( - {"value": np.arange(10), "key": np.array([1] * 5 + [2] * 5)}, - index=date_range("2015-12-24", periods=10, freq="D"), - ) - f = lambda: getattr(df.rolling(window=2, step=step), agg)(df) - if step is None: - f() - else: - with pytest.raises(NotImplementedError, match="step not implemented"): - f() - - -@pytest.mark.parametrize( - "func", - [ - lambda df: df.expanding, - lambda df: df.ewm, - ], -) -def test_constructor_step_unsupported(func, step): +def test_step_not_implemented_for_cov_corr(agg): # GH 15354 - df = DataFrame( - {"value": np.arange(10), "key": np.array([1] * 5 + [2] * 5)}, - index=date_range("2015-12-24", periods=10, freq="D"), - ) - with pytest.raises(TypeError, match="got an unexpected keyword argument 'step'"): - func(df)(step=step) + roll = DataFrame(range(2)).rolling(1, step=2) + with pytest.raises(NotImplementedError, match="step not implemented"): + getattr(roll, agg)() @pytest.mark.parametrize("window", [timedelta(days=3), Timedelta(days=3)]) -@pytest.mark.parametrize("step", [None]) -def test_constructor_with_timedelta_window(window, step): +def test_constructor_with_timedelta_window(window): # GH 15440 n = 10 df = DataFrame( @@ -169,19 +125,18 @@ def test_constructor_with_timedelta_window(window, step): ) expected_data = np.append([0.0, 1.0], np.arange(3.0, 27.0, 3)) - result = df.rolling(window=window, step=step).sum() + result = df.rolling(window=window).sum() expected = DataFrame( {"value": expected_data}, index=date_range("2015-12-24", periods=n, freq="D"), - )[::step] + ) tm.assert_frame_equal(result, expected) - expected = df.rolling("3D", step=step).sum() + expected = df.rolling("3D").sum() tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("window", [timedelta(days=3), Timedelta(days=3), "3D"]) -@pytest.mark.parametrize("step", [None]) -def test_constructor_timedelta_window_and_minperiods(window, step, raw): +def test_constructor_timedelta_window_and_minperiods(window, raw): # GH 15305 n = 10 df = DataFrame( @@ -191,11 +146,9 @@ def test_constructor_timedelta_window_and_minperiods(window, step, raw): expected = DataFrame( {"value": np.append([np.NaN, 1.0], np.arange(3.0, 27.0, 3))}, index=date_range("2017-08-08", periods=n, freq="D"), - )[::step] - result_roll_sum = df.rolling(window=window, min_periods=2, step=step).sum() - result_roll_generic = df.rolling(window=window, min_periods=2, step=step).apply( - sum, raw=raw ) + result_roll_sum = df.rolling(window=window, min_periods=2).sum() + result_roll_generic = df.rolling(window=window, min_periods=2).apply(sum, raw=raw) tm.assert_frame_equal(result_roll_sum, expected) tm.assert_frame_equal(result_roll_generic, expected) @@ -214,21 +167,18 @@ def test_numpy_compat(method): @pytest.mark.parametrize("closed", ["right", "left", "both", "neither"]) -@pytest.mark.parametrize("step", [None]) -def test_closed_fixed(closed, arithmetic_win_operators, step): +def test_closed_fixed(closed, arithmetic_win_operators): # GH 34315 func_name = arithmetic_win_operators df_fixed = DataFrame({"A": [0, 1, 2, 3, 4]}) df_time = DataFrame({"A": [0, 1, 2, 3, 4]}, index=date_range("2020", periods=5)) result = getattr( - df_fixed.rolling(2, closed=closed, min_periods=1, step=step), + df_fixed.rolling(2, closed=closed, min_periods=1), func_name, )() - if step is not None: - result = result.reset_index(drop=True) expected = getattr( - df_time.rolling("2D", closed=closed, min_periods=1, step=step), + df_time.rolling("2D", closed=closed, min_periods=1), func_name, )().reset_index(drop=True) @@ -280,9 +230,8 @@ def test_closed_fixed(closed, arithmetic_win_operators, step): ), ], ) -@pytest.mark.parametrize("step", [None]) def test_datetimelike_centered_selections( - closed, window_selections, step, arithmetic_win_operators + closed, window_selections, arithmetic_win_operators ): # GH 34315 func_name = arithmetic_win_operators @@ -293,7 +242,7 @@ def test_datetimelike_centered_selections( expected = DataFrame( {"A": [getattr(df_time["A"].iloc[s], func_name)() for s in window_selections]}, index=date_range("2020", periods=5), - )[::step] + ) if func_name == "sem": kwargs = {"ddof": 0} @@ -301,7 +250,7 @@ def test_datetimelike_centered_selections( kwargs = {} result = getattr( - df_time.rolling("2D", closed=closed, min_periods=1, center=True, step=step), + df_time.rolling("2D", closed=closed, min_periods=1, center=True), func_name, )(**kwargs) @@ -321,9 +270,8 @@ def test_datetimelike_centered_selections( ("2s", "neither", [1.0, 2.0, 2.0]), ], ) -@pytest.mark.parametrize("step", [None]) def test_datetimelike_centered_offset_covers_all( - window, closed, expected, step, frame_or_series + window, closed, expected, frame_or_series ): # GH 42753 @@ -334,8 +282,8 @@ def test_datetimelike_centered_offset_covers_all( ] df = frame_or_series([1, 1, 1], index=index) - result = df.rolling(window, closed=closed, center=True, step=step).sum() - expected = frame_or_series(expected, index=index)[::step] + result = df.rolling(window, closed=closed, center=True).sum() + expected = frame_or_series(expected, index=index) tm.assert_equal(result, expected) @@ -348,9 +296,8 @@ def test_datetimelike_centered_offset_covers_all( ("2D", "neither", [2, 2, 2, 2, 2, 2, 2, 2]), ], ) -@pytest.mark.parametrize("step", [None]) def test_datetimelike_nonunique_index_centering( - window, closed, expected, frame_or_series, step + window, closed, expected, frame_or_series ): index = DatetimeIndex( [ @@ -366,24 +313,23 @@ def test_datetimelike_nonunique_index_centering( ) df = frame_or_series([1] * 8, index=index, dtype=float) - expected = frame_or_series(expected, index=index, dtype=float)[::step] + expected = frame_or_series(expected, index=index, dtype=float) - result = df.rolling(window, center=True, closed=closed, step=step).sum() + result = df.rolling(window, center=True, closed=closed).sum() tm.assert_equal(result, expected) -@pytest.mark.parametrize("step", [None]) -def test_even_number_window_alignment(step): +def test_even_number_window_alignment(): # see discussion in GH 38780 s = Series(range(3), index=date_range(start="2020-01-01", freq="D", periods=3)) # behavior of index- and datetime-based windows differs here! # s.rolling(window=2, min_periods=1, center=True).mean() - result = s.rolling(window="2D", min_periods=1, center=True, step=step).mean() + result = s.rolling(window="2D", min_periods=1, center=True).mean() - expected = Series([0.5, 1.5, 2], index=s.index)[::step] + expected = Series([0.5, 1.5, 2], index=s.index) tm.assert_series_equal(result, expected) @@ -415,27 +361,23 @@ def test_closed_fixed_binary_col(center, step): @pytest.mark.parametrize("closed", ["neither", "left"]) -@pytest.mark.parametrize("step", [None]) -def test_closed_empty(closed, arithmetic_win_operators, step): +def test_closed_empty(closed, arithmetic_win_operators): # GH 26005 func_name = arithmetic_win_operators ser = Series(data=np.arange(5), index=date_range("2000", periods=5, freq="2D")) - roll = ser.rolling("1D", closed=closed, step=step) + roll = ser.rolling("1D", closed=closed) result = getattr(roll, func_name)() - expected = Series([np.nan] * 5, index=ser.index)[::step] + expected = Series([np.nan] * 5, index=ser.index) tm.assert_series_equal(result, expected) @pytest.mark.parametrize("func", ["min", "max"]) -@pytest.mark.parametrize("step", [None]) -def test_closed_one_entry(func, step): +def test_closed_one_entry(func): # GH24718 ser = Series(data=[2], index=date_range("2000", periods=1)) - result = getattr(ser.rolling("10D", closed="left", step=step), func)() - index = ser.index.copy() - index.freq = index.freq * (step or 1) - tm.assert_series_equal(result, Series([np.nan], index=index)) + result = getattr(ser.rolling("10D", closed="left"), func)() + tm.assert_series_equal(result, Series([np.nan], index=ser.index)) @pytest.mark.parametrize("func", ["min", "max"]) @@ -1829,3 +1771,13 @@ def test_rolling_std_neg_sqrt(): b = a.ewm(span=3).std() assert np.isfinite(b[2:]).all() + + +def test_step_not_integer_raises(): + with pytest.raises(ValueError, match="step must be an integer"): + DataFrame(range(2)).rolling(1, step="foo") + + +def test_step_not_positive_raises(): + with pytest.raises(ValueError, match="step must be >= 0"): + DataFrame(range(2)).rolling(1, step=-1)