From 6caefb19f4d7c05451fafca182c6eb39fe9901ed Mon Sep 17 00:00:00 2001 From: rtpsw Date: Tue, 1 Mar 2022 01:26:08 +0200 Subject: [PATCH] ENH: Rolling window with step size (GH-15354) (#45765) --- doc/source/whatsnew/v1.5.0.rst | 1 + pandas/_libs/window/indexers.pyi | 1 + pandas/_libs/window/indexers.pyx | 9 +- pandas/core/generic.py | 3 + pandas/core/indexers/objects.py | 54 +++++- pandas/core/window/ewm.py | 2 + pandas/core/window/numba_.py | 4 +- pandas/core/window/rolling.py | 104 ++++++++++-- pandas/tests/window/conftest.py | 6 + pandas/tests/window/test_api.py | 89 +++++----- pandas/tests/window/test_apply.py | 59 ++++--- pandas/tests/window/test_base_indexer.py | 59 ++++--- pandas/tests/window/test_dtypes.py | 20 ++- pandas/tests/window/test_groupby.py | 7 +- pandas/tests/window/test_numba.py | 54 +++--- pandas/tests/window/test_rolling.py | 158 ++++++++++++++---- pandas/tests/window/test_rolling_functions.py | 60 +++---- pandas/tests/window/test_rolling_quantile.py | 20 ++- pandas/tests/window/test_rolling_skew_kurt.py | 42 ++--- pandas/tests/window/test_win_type.py | 72 ++++---- 20 files changed, 562 insertions(+), 262 deletions(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index d2b34f587b5e2..59d4ef1d9b39d 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -38,6 +38,7 @@ Other enhancements - :meth:`to_numeric` now preserves float64 arrays when downcasting would generate values not representable in float32 (:issue:`43693`) - :meth:`Series.reset_index` and :meth:`DataFrame.reset_index` now support the argument ``allow_duplicates`` (:issue:`44410`) - :meth:`.GroupBy.min` and :meth:`.GroupBy.max` now supports `Numba `_ execution with the ``engine`` keyword (:issue:`45428`) +- :meth:`DataFrame.rolling` and :meth:`Series.rolling` now support a ``step`` parameter with fixed-length windows (:issue:`15354`) - Implemented a ``bool``-dtype :class:`Index`, passing a bool-dtype array-like to ``pd.Index`` will now retain ``bool`` dtype instead of casting to ``object`` (:issue:`45061`) - Implemented a complex-dtype :class:`Index`, passing a complex-dtype array-like to ``pd.Index`` will now retain complex dtype instead of casting to ``object`` (:issue:`45845`) - Improved error message in :class:`~pandas.core.window.Rolling` when ``window`` is a frequency and ``NaT`` is in the rolling axis (:issue:`46087`) diff --git a/pandas/_libs/window/indexers.pyi b/pandas/_libs/window/indexers.pyi index c9bc64be34ac9..bbb5e6988d0b5 100644 --- a/pandas/_libs/window/indexers.pyi +++ b/pandas/_libs/window/indexers.pyi @@ -8,5 +8,6 @@ def calculate_variable_window_bounds( min_periods, center: bool, closed: str | None, + step: int | None, index: np.ndarray, # const int64_t[:] ) -> tuple[npt.NDArray[np.int64], npt.NDArray[np.int64]]: ... diff --git a/pandas/_libs/window/indexers.pyx b/pandas/_libs/window/indexers.pyx index 4b3a858ade773..992212a872035 100644 --- a/pandas/_libs/window/indexers.pyx +++ b/pandas/_libs/window/indexers.pyx @@ -16,6 +16,7 @@ def calculate_variable_window_bounds( object min_periods, # unused but here to match get_window_bounds signature bint center, str closed, + int64_t step, const int64_t[:] index ): """ @@ -38,6 +39,9 @@ def calculate_variable_window_bounds( closed : str string of side of the window that should be closed + step : int64 + Spacing between windows + index : ndarray[int64] time series index to roll over @@ -52,6 +56,9 @@ def calculate_variable_window_bounds( int64_t start_bound, end_bound, index_growth_sign = 1 Py_ssize_t i, j + if num_values <= 0: + return np.empty(0, dtype='int64'), np.empty(0, dtype='int64') + # default is 'right' if closed is None: closed = 'right' @@ -143,4 +150,4 @@ def calculate_variable_window_bounds( # right endpoint is open if not right_closed and not center: end[i] -= 1 - return start, end + return start[::step], end[::step] diff --git a/pandas/core/generic.py b/pandas/core/generic.py index fc682b848b054..83d0a95b8adb2 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -11261,6 +11261,7 @@ def rolling( on: str | None = None, axis: Axis = 0, closed: str | None = None, + step: int | None = None, method: str = "single", ): axis = self._get_axis_number(axis) @@ -11275,6 +11276,7 @@ def rolling( on=on, axis=axis, closed=closed, + step=step, method=method, ) @@ -11287,6 +11289,7 @@ def rolling( on=on, axis=axis, closed=closed, + step=step, method=method, ) diff --git a/pandas/core/indexers/objects.py b/pandas/core/indexers/objects.py index 4d5e4bbe6bd36..54bdee0bb0208 100644 --- a/pandas/core/indexers/objects.py +++ b/pandas/core/indexers/objects.py @@ -27,6 +27,9 @@ center passed from the top level rolling API closed : str, default None closed passed from the top level rolling API +step : int, default None + step passed from the top level rolling API + .. versionadded:: 1.5 win_type : str, default None win_type passed from the top level rolling API @@ -62,6 +65,7 @@ def get_window_bounds( min_periods: int | None = None, center: bool | None = None, closed: str | None = None, + step: int | None = None, ) -> tuple[np.ndarray, np.ndarray]: raise NotImplementedError @@ -77,6 +81,7 @@ def get_window_bounds( min_periods: int | None = None, center: bool | None = None, closed: str | None = None, + step: int | None = None, ) -> tuple[np.ndarray, np.ndarray]: if center: @@ -84,7 +89,7 @@ def get_window_bounds( else: offset = 0 - end = np.arange(1 + offset, num_values + 1 + offset, dtype="int64") + end = np.arange(1 + offset, num_values + 1 + offset, step, dtype="int64") start = end - self.window_size if closed in ["left", "both"]: start -= 1 @@ -107,8 +112,12 @@ def get_window_bounds( min_periods: int | None = None, center: bool | None = None, closed: str | None = None, + step: int | None = None, ) -> tuple[np.ndarray, np.ndarray]: + if step is not None: + raise NotImplementedError("step not implemented for variable window") + # error: Argument 4 to "calculate_variable_window_bounds" has incompatible # type "Optional[bool]"; expected "bool" # error: Argument 6 to "calculate_variable_window_bounds" has incompatible @@ -119,6 +128,7 @@ def get_window_bounds( min_periods, center, # type: ignore[arg-type] closed, + 1, self.index_array, # type: ignore[arg-type] ) @@ -145,8 +155,14 @@ def get_window_bounds( min_periods: int | None = None, center: bool | None = None, closed: str | None = None, + step: int | None = None, ) -> tuple[np.ndarray, np.ndarray]: + if step is not None: + raise NotImplementedError("step not implemented for variable offset window") + if num_values <= 0: + return np.empty(0, dtype="int64"), np.empty(0, dtype="int64") + # if windows is variable, default is 'right', otherwise default is 'both' if closed is None: closed = "right" if self.index is not None else "both" @@ -215,12 +231,15 @@ def get_window_bounds( min_periods: int | None = None, center: bool | None = None, closed: str | None = None, + step: int | None = None, ) -> tuple[np.ndarray, np.ndarray]: - return ( - np.zeros(num_values, dtype=np.int64), - np.arange(1, num_values + 1, dtype=np.int64), - ) + if step is not None: + raise NotImplementedError("step not implemented for expanding window") + + end = np.arange(1, num_values + 1, dtype=np.int64) + start = np.zeros(len(end), dtype=np.int64) + return start, end class FixedForwardWindowIndexer(BaseIndexer): @@ -256,6 +275,7 @@ def get_window_bounds( min_periods: int | None = None, center: bool | None = None, closed: str | None = None, + step: int | None = None, ) -> tuple[np.ndarray, np.ndarray]: if center: @@ -264,11 +284,13 @@ def get_window_bounds( raise ValueError( "Forward-looking windows don't support setting the closed argument" ) + if step is None: + step = 1 - start = np.arange(num_values, dtype="int64") + start = np.arange(0, num_values, step, dtype="int64") end = start + self.window_size if self.window_size: - end[-self.window_size :] = num_values + end = np.clip(end, 0, num_values) return start, end @@ -319,7 +341,11 @@ def get_window_bounds( min_periods: int | None = None, center: bool | None = None, closed: str | None = None, + step: int | None = None, ) -> tuple[np.ndarray, np.ndarray]: + if step is not None: + raise NotImplementedError("step not implemented for groupby window") + # 1) For each group, get the indices that belong to the group # 2) Use the indices to calculate the start & end bounds of the window # 3) Append the window bounds in group order @@ -339,7 +365,7 @@ def get_window_bounds( **self.indexer_kwargs, ) start, end = indexer.get_window_bounds( - len(indices), min_periods, center, closed + len(indices), min_periods, center, closed, step ) start = start.astype(np.int64) end = end.astype(np.int64) @@ -358,6 +384,8 @@ def get_window_bounds( ) start_arrays.append(window_indices.take(ensure_platform_int(start))) end_arrays.append(window_indices.take(ensure_platform_int(end))) + if len(start_arrays) == 0: + return np.array([], dtype=np.int64), np.array([], dtype=np.int64) start = np.concatenate(start_arrays) end = np.concatenate(end_arrays) return start, end @@ -373,6 +401,14 @@ def get_window_bounds( min_periods: int | None = None, center: bool | None = None, closed: str | None = None, + step: int | None = None, ) -> tuple[np.ndarray, np.ndarray]: - return np.array([0], dtype=np.int64), np.array([num_values], dtype=np.int64) + if step is not None: + raise NotImplementedError( + "step not implemented for exponentail moving window" + ) + return ( + np.array([0], dtype=np.int64), + np.array([num_values], dtype=np.int64), + ) diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py index 2d633ba1a2bcd..4c2b99762b812 100644 --- a/pandas/core/window/ewm.py +++ b/pandas/core/window/ewm.py @@ -732,6 +732,7 @@ def cov_func(x, y): min_periods=min_periods, center=self.center, closed=self.closed, + step=self.step, ) result = window_aggregations.ewmcov( x_array, @@ -798,6 +799,7 @@ def cov_func(x, y): min_periods=min_periods, center=self.center, closed=self.closed, + step=self.step, ) def _cov(X, Y): diff --git a/pandas/core/window/numba_.py b/pandas/core/window/numba_.py index 3b14f0d14ecab..0f9f01e93a477 100644 --- a/pandas/core/window/numba_.py +++ b/pandas/core/window/numba_.py @@ -220,8 +220,8 @@ def roll_table( minimum_periods: int, *args: Any, ): - result = np.empty(values.shape) - min_periods_mask = np.empty(values.shape) + result = np.empty((len(begin), values.shape[1])) + min_periods_mask = np.empty(result.shape) for i in numba.prange(len(result)): start = begin[i] stop = end[i] diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 5eca817d6bb62..269bef32a0f39 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -14,6 +14,7 @@ Any, Callable, Hashable, + Sized, ) import warnings @@ -126,6 +127,7 @@ def __init__( axis: Axis = 0, on: str | Index | None = None, closed: str | None = None, + step: int | None = None, method: str = "single", *, selection=None, @@ -133,6 +135,7 @@ def __init__( self.obj = obj self.on = on self.closed = closed + self.step = step self.window = window self.min_periods = min_periods self.center = center @@ -235,12 +238,25 @@ def _check_window_bounds( f"start ({len(start)}) and end ({len(end)}) bounds must be the " f"same length" ) - elif len(start) != num_vals: + elif not isinstance(self._get_window_indexer(), GroupbyIndexer) and len( + start + ) != (num_vals + (self.step or 1) - 1) // (self.step or 1): raise ValueError( f"start and end bounds ({len(start)}) must be the same length " - f"as the object ({num_vals})" + f"as the object ({num_vals}) divided by the step ({self.step}) " + f"if given and rounded up" ) + def _slice_index(self, index: Index, result: Sized | None = None) -> Index: + """ + Slices the index for a given result and the preset step. + """ + return ( + index + if result is None or len(result) == len(index) + else index[:: self.step] + ) + def _create_data(self, obj: NDFrameT) -> NDFrameT: """ Split data into blocks & return conformed data. @@ -324,6 +340,7 @@ def __iter__(self): min_periods=self.min_periods, center=self.center, closed=self.closed, + step=self.step, ) self._check_window_bounds(start, end, len(obj)) @@ -429,7 +446,8 @@ def _apply_series( raise DataError("No numeric types to aggregate") from err result = homogeneous_func(values) - return obj._constructor(result, index=obj.index, name=obj.name) + index = self._slice_index(obj.index, result) + return obj._constructor(result, index=index, name=obj.name) def _apply_blockwise( self, homogeneous_func: Callable[..., ArrayLike], name: str | None = None @@ -466,9 +484,12 @@ def hfunc(values: ArrayLike) -> ArrayLike: res_values.append(res) taker.append(i) + index = self._slice_index( + obj.index, res_values[0] if len(res_values) > 0 else None + ) df = type(obj)._from_arrays( res_values, - index=obj.index, + index=index, columns=obj.columns.take(taker), verify_integrity=False, ) @@ -503,7 +524,13 @@ def _apply_tablewise( values = values.T if self.axis == 1 else values result = homogeneous_func(values) result = result.T if self.axis == 1 else result - out = obj._constructor(result, index=obj.index, columns=obj.columns) + index = self._slice_index(obj.index, result) + columns = ( + obj.columns + if result.shape[1] == len(obj.columns) + else obj.columns[:: self.step] + ) + out = obj._constructor(result, index=index, columns=columns) return self._resolve_output(out, obj) @@ -570,6 +597,7 @@ def calc(x): min_periods=min_periods, center=self.center, closed=self.closed, + step=self.step, ) self._check_window_bounds(start, end, len(x)) @@ -608,6 +636,7 @@ def _numba_apply( min_periods=min_periods, center=self.center, closed=self.closed, + step=self.step, ) self._check_window_bounds(start, end, len(values)) aggregator = executor.generate_shared_aggregator( @@ -615,12 +644,14 @@ def _numba_apply( ) result = aggregator(values, start, end, min_periods, *func_args) result = result.T if self.axis == 1 else result + index = self._slice_index(obj.index, result) if obj.ndim == 1: result = result.squeeze() - out = obj._constructor(result, index=obj.index, name=obj.name) + out = obj._constructor(result, index=index, name=obj.name) return out else: - out = obj._constructor(result, index=obj.index, columns=obj.columns) + columns = self._slice_index(obj.columns, result.T) + out = obj._constructor(result, index=index, columns=columns) return self._resolve_output(out, obj) def aggregate(self, func, *args, **kwargs): @@ -659,6 +690,9 @@ def __init__( # groupby., but unexpected to users in # groupby.rolling. obj = obj.drop(columns=self._grouper.names, errors="ignore") + # GH 15354 + if kwargs.get("step") is not None: + raise NotImplementedError("step not implemented for rolling groupby") super().__init__(obj, *args, **kwargs) def _apply( @@ -848,8 +882,8 @@ class Window(BaseWindow): If a BaseIndexer subclass, the window boundaries based on the defined ``get_window_bounds`` method. Additional rolling - keyword arguments, namely ``min_periods``, ``center``, and - ``closed`` will be passed to ``get_window_bounds``. + keyword arguments, namely ``min_periods``, ``center``, ``closed`` and + ``step`` will be passed to ``get_window_bounds``. min_periods : int, default None Minimum number of observations in window required to have a value; @@ -903,6 +937,16 @@ class Window(BaseWindow): The closed parameter with fixed windows is now supported. + step : int, default None + When supported, applies ``[::step]`` to the resulting sequence of windows, in a + computationally efficient manner. Currently supported only with fixed-length + window indexers. Note that using a step argument other than None or 1 will + produce a result with a different shape than the input. + + ..versionadded:: 1.5 + + The step parameter is only supported with fixed windows. + method : str {'single', 'table'}, default 'single' .. versionadded:: 1.3.0 @@ -1021,6 +1065,17 @@ class Window(BaseWindow): 3 3.0 4 6.0 + **step** + + Rolling sum with a window length of 2 observations, minimum of 1 observation to + calculate a value, and a step of 2. + + >>> df.rolling(2, min_periods=1, step=2).sum() + B + 0 0.0 + 2 3.0 + 4 4.0 + **win_type** Rolling sum with a window length of 2, using the Scipy ``'gaussian'`` @@ -1043,6 +1098,7 @@ class Window(BaseWindow): "axis", "on", "closed", + "step", "method", ] @@ -1132,7 +1188,7 @@ def calc(x): return result - return self._apply_blockwise(homogeneous_func, name) + return self._apply_blockwise(homogeneous_func, name)[:: self.step] @doc( _shared_docs["aggregate"], @@ -1548,6 +1604,11 @@ def cov( ddof: int = 1, **kwargs, ): + if self.step is not None: + raise NotImplementedError( + "step not implemented for rolling and expanding cov" + ) + from pandas import Series def cov_func(x, y): @@ -1564,6 +1625,7 @@ def cov_func(x, y): min_periods=min_periods, center=self.center, closed=self.closed, + step=self.step, ) self._check_window_bounds(start, end, len(x_array)) @@ -1589,6 +1651,11 @@ def corr( **kwargs, ): + if self.step is not None: + raise NotImplementedError( + "step not implemented for rolling and expanding corr" + ) + from pandas import Series def corr_func(x, y): @@ -1605,6 +1672,7 @@ def corr_func(x, y): min_periods=min_periods, center=self.center, closed=self.closed, + step=self.step, ) self._check_window_bounds(start, end, len(x_array)) @@ -1643,6 +1711,7 @@ class Rolling(RollingAndExpandingMixin): "axis", "on", "closed", + "step", "method", ] @@ -1682,9 +1751,22 @@ def _validate(self): elif isinstance(self.window, BaseIndexer): # Passed BaseIndexer subclass should handle all other rolling kwargs - return + pass elif not is_integer(self.window) or self.window < 0: raise ValueError("window must be an integer 0 or greater") + # GH 15354: + # validate window indexer parameters do not raise in get_window_bounds + # this cannot be done in BaseWindow._validate because there _get_window_indexer + # would erroneously create a fixed window given a window argument like "1s" due + # to _win_freq_i8 not being set + indexer = self._get_window_indexer() + indexer.get_window_bounds( + num_values=0, + min_periods=self.min_periods, + center=self.center, + closed=self.closed, + step=self.step, + ) def _validate_datetimelike_monotonic(self): """ diff --git a/pandas/tests/window/conftest.py b/pandas/tests/window/conftest.py index f2832652ed58f..f42a1a5449c5c 100644 --- a/pandas/tests/window/conftest.py +++ b/pandas/tests/window/conftest.py @@ -126,3 +126,9 @@ def frame(): index=bdate_range(datetime(2009, 1, 1), periods=100), columns=np.arange(10), ) + + +@pytest.fixture(params=[None, 1, 2, 5, 10]) +def step(request): + """step keyword argument for rolling window operations.""" + return request.param diff --git a/pandas/tests/window/test_api.py b/pandas/tests/window/test_api.py index f84a579247630..6dbcc8dfd00c0 100644 --- a/pandas/tests/window/test_api.py +++ b/pandas/tests/window/test_api.py @@ -16,20 +16,20 @@ from pandas.core.base import SpecificationError -def test_getitem(): +def test_getitem(step): frame = DataFrame(np.random.randn(5, 5)) - r = frame.rolling(window=5) - tm.assert_index_equal(r._selected_obj.columns, frame.columns) + r = frame.rolling(window=5, step=step) + tm.assert_index_equal(r._selected_obj.columns, frame[::step].columns) - r = frame.rolling(window=5)[1] - assert r._selected_obj.name == frame.columns[1] + r = frame.rolling(window=5, step=step)[1] + assert r._selected_obj.name == frame[::step].columns[1] # technically this is allowed - r = frame.rolling(window=5)[1, 3] - tm.assert_index_equal(r._selected_obj.columns, frame.columns[[1, 3]]) + r = frame.rolling(window=5, step=step)[1, 3] + tm.assert_index_equal(r._selected_obj.columns, frame[::step].columns[[1, 3]]) - r = frame.rolling(window=5)[[1, 3]] - tm.assert_index_equal(r._selected_obj.columns, frame.columns[[1, 3]]) + r = frame.rolling(window=5, step=step)[[1, 3]] + tm.assert_index_equal(r._selected_obj.columns, frame[::step].columns[[1, 3]]) def test_select_bad_cols(): @@ -53,21 +53,21 @@ def test_attribute_access(): r.F -def tests_skip_nuisance(): +def tests_skip_nuisance(step): df = DataFrame({"A": range(5), "B": range(5, 10), "C": "foo"}) - r = df.rolling(window=3) + r = df.rolling(window=3, step=step) result = r[["A", "B"]].sum() expected = DataFrame( {"A": [np.nan, np.nan, 3, 6, 9], "B": [np.nan, np.nan, 18, 21, 24]}, columns=list("AB"), - ) + )[::step] tm.assert_frame_equal(result, expected) -def test_skip_sum_object_raises(): +def test_skip_sum_object_raises(step): df = DataFrame({"A": range(5), "B": range(5, 10), "C": "foo"}) - r = df.rolling(window=3) + r = df.rolling(window=3, step=step) msg = r"nuisance columns.*Dropped columns were Index\(\['C'\], dtype='object'\)" with tm.assert_produces_warning(FutureWarning, match=msg): # GH#42738 @@ -75,14 +75,14 @@ def test_skip_sum_object_raises(): expected = DataFrame( {"A": [np.nan, np.nan, 3, 6, 9], "B": [np.nan, np.nan, 18, 21, 24]}, columns=list("AB"), - ) + )[::step] tm.assert_frame_equal(result, expected) -def test_agg(): +def test_agg(step): df = DataFrame({"A": range(5), "B": range(0, 10, 2)}) - r = df.rolling(window=3) + r = df.rolling(window=3, step=step) a_mean = r["A"].mean() a_std = r["A"].std() a_sum = r["A"].sum() @@ -141,10 +141,10 @@ def test_agg_apply(raw): tm.assert_frame_equal(result, expected, check_like=True) -def test_agg_consistency(): +def test_agg_consistency(step): df = DataFrame({"A": range(5), "B": range(0, 10, 2)}) - r = df.rolling(window=3) + r = df.rolling(window=3, step=step) result = r.agg([np.sum, np.mean]).columns expected = MultiIndex.from_product([list("AB"), ["sum", "mean"]]) @@ -182,7 +182,7 @@ def test_agg_nested_dicts(): r.agg({"A": {"ra": ["mean", "std"]}, "B": {"rb": ["mean", "std"]}}) -def test_count_nonnumeric_types(): +def test_count_nonnumeric_types(step): # GH12541 cols = [ "int", @@ -239,13 +239,13 @@ def test_count_nonnumeric_types(): "periods_nat": [1.0, 2.0, 1.0], }, columns=cols, - ) + )[::step] - result = df.rolling(window=2, min_periods=0).count() + result = df.rolling(window=2, min_periods=0, step=step).count() tm.assert_frame_equal(result, expected) - result = df.rolling(1, min_periods=0).count() - expected = df.notna().astype(float) + result = df.rolling(1, min_periods=0, step=step).count() + expected = df.notna().astype(float)[::step] tm.assert_frame_equal(result, expected) @@ -339,11 +339,11 @@ def test_validate_deprecated(): @pytest.mark.filterwarnings("ignore:min_periods:FutureWarning") def test_dont_modify_attributes_after_methods( - arithmetic_win_operators, closed, center, min_periods + arithmetic_win_operators, closed, center, min_periods, step ): # GH 39554 roll_obj = Series(range(1)).rolling( - 1, center=center, closed=closed, min_periods=min_periods + 1, center=center, closed=closed, min_periods=min_periods, step=step ) expected = {attr: getattr(roll_obj, attr) for attr in roll_obj._attributes} getattr(roll_obj, arithmetic_win_operators)() @@ -351,40 +351,49 @@ def test_dont_modify_attributes_after_methods( assert result == expected -def test_centered_axis_validation(): +def test_centered_axis_validation(step): # ok - Series(np.ones(10)).rolling(window=3, center=True, axis=0).mean() + Series(np.ones(10)).rolling(window=3, center=True, axis=0, step=step).mean() # bad axis msg = "No axis named 1 for object type Series" with pytest.raises(ValueError, match=msg): - Series(np.ones(10)).rolling(window=3, center=True, axis=1).mean() + Series(np.ones(10)).rolling(window=3, center=True, axis=1, step=step).mean() # ok ok - DataFrame(np.ones((10, 10))).rolling(window=3, center=True, axis=0).mean() - DataFrame(np.ones((10, 10))).rolling(window=3, center=True, axis=1).mean() + DataFrame(np.ones((10, 10))).rolling( + window=3, center=True, axis=0, step=step + ).mean() + DataFrame(np.ones((10, 10))).rolling( + window=3, center=True, axis=1, step=step + ).mean() # bad axis msg = "No axis named 2 for object type DataFrame" with pytest.raises(ValueError, match=msg): - (DataFrame(np.ones((10, 10))).rolling(window=3, center=True, axis=2).mean()) + ( + DataFrame(np.ones((10, 10))) + .rolling(window=3, center=True, axis=2, step=step) + .mean() + ) -def test_rolling_min_min_periods(): +def test_rolling_min_min_periods(step): a = Series([1, 2, 3, 4, 5]) - result = a.rolling(window=100, min_periods=1).min() - expected = Series(np.ones(len(a))) + result = a.rolling(window=100, min_periods=1, step=step).min() + expected = Series(np.ones(len(a)))[::step] tm.assert_series_equal(result, expected) msg = "min_periods 5 must be <= window 3" with pytest.raises(ValueError, match=msg): - Series([1, 2, 3]).rolling(window=3, min_periods=5).min() + Series([1, 2, 3]).rolling(window=3, min_periods=5, step=step).min() -def test_rolling_max_min_periods(): +def test_rolling_max_min_periods(step): a = Series([1, 2, 3, 4, 5], dtype=np.float64) - b = a.rolling(window=100, min_periods=1).max() - tm.assert_almost_equal(a, b) + result = a.rolling(window=100, min_periods=1, step=step).max() + expected = a[::step] + tm.assert_almost_equal(result, expected) msg = "min_periods 5 must be <= window 3" with pytest.raises(ValueError, match=msg): - Series([1, 2, 3]).rolling(window=3, min_periods=5).max() + Series([1, 2, 3]).rolling(window=3, min_periods=5, step=step).max() diff --git a/pandas/tests/window/test_apply.py b/pandas/tests/window/test_apply.py index 6683137960146..bbf9f3d886794 100644 --- a/pandas/tests/window/test_apply.py +++ b/pandas/tests/window/test_apply.py @@ -52,7 +52,8 @@ def test_rolling_apply_out_of_bounds(engine_and_raw): @pytest.mark.parametrize("window", [2, "2s"]) -def test_rolling_apply_with_pandas_objects(window): +@pytest.mark.parametrize("step", [None]) +def test_rolling_apply_with_pandas_objects(window, step): # 5071 df = DataFrame( {"A": np.random.randn(5), "B": np.random.randint(0, 10, size=5)}, @@ -66,32 +67,37 @@ def f(x): return np.nan return x.iloc[-1] - result = df.rolling(window).apply(f, raw=False) - expected = df.iloc[2:].reindex_like(df) + result = df.rolling(window, step=step).apply(f, raw=False) + expected = df.iloc[2:].reindex_like(df)[::step] tm.assert_frame_equal(result, expected) with tm.external_error_raised(AttributeError): df.rolling(window).apply(f, raw=True) -def test_rolling_apply(engine_and_raw): +def test_rolling_apply(engine_and_raw, step): engine, raw = engine_and_raw expected = Series([], dtype="float64") - result = expected.rolling(10).apply(lambda x: x.mean(), engine=engine, raw=raw) + result = expected.rolling(10, step=step).apply( + lambda x: x.mean(), engine=engine, raw=raw + ) tm.assert_series_equal(result, expected) # gh-8080 s = Series([None, None, None]) - result = s.rolling(2, min_periods=0).apply(lambda x: len(x), engine=engine, raw=raw) - expected = Series([1.0, 2.0, 2.0]) + result = s.rolling(2, min_periods=0, step=step).apply( + lambda x: len(x), engine=engine, raw=raw + ) + expected = Series([1.0, 2.0, 2.0])[::step] tm.assert_series_equal(result, expected) - result = s.rolling(2, min_periods=0).apply(len, engine=engine, raw=raw) + result = s.rolling(2, min_periods=0, step=step).apply(len, engine=engine, raw=raw) tm.assert_series_equal(result, expected) -def test_all_apply(engine_and_raw): +@pytest.mark.parametrize("step", [None]) +def test_all_apply(engine_and_raw, step): engine, raw = engine_and_raw df = ( @@ -100,15 +106,16 @@ def test_all_apply(engine_and_raw): ).set_index("A") * 2 ) - er = df.rolling(window=1) - r = df.rolling(window="1s") + er = df.rolling(window=1, step=step) + r = df.rolling(window="1s", step=step) result = r.apply(lambda x: 1, engine=engine, raw=raw) expected = er.apply(lambda x: 1, engine=engine, raw=raw) tm.assert_frame_equal(result, expected) -def test_ragged_apply(engine_and_raw): +@pytest.mark.parametrize("step", [None]) +def test_ragged_apply(engine_and_raw, step): engine, raw = engine_and_raw df = DataFrame({"B": range(5)}) @@ -121,18 +128,24 @@ def test_ragged_apply(engine_and_raw): ] f = lambda x: 1 - result = df.rolling(window="1s", min_periods=1).apply(f, engine=engine, raw=raw) - expected = df.copy() + result = df.rolling(window="1s", min_periods=1, step=step).apply( + f, engine=engine, raw=raw + ) + expected = df.copy()[::step] expected["B"] = 1.0 tm.assert_frame_equal(result, expected) - result = df.rolling(window="2s", min_periods=1).apply(f, engine=engine, raw=raw) - expected = df.copy() + result = df.rolling(window="2s", min_periods=1, step=step).apply( + f, engine=engine, raw=raw + ) + expected = df.copy()[::step] expected["B"] = 1.0 tm.assert_frame_equal(result, expected) - result = df.rolling(window="5s", min_periods=1).apply(f, engine=engine, raw=raw) - expected = df.copy() + result = df.rolling(window="5s", min_periods=1, step=step).apply( + f, engine=engine, raw=raw + ) + expected = df.copy()[::step] expected["B"] = 1.0 tm.assert_frame_equal(result, expected) @@ -266,9 +279,13 @@ def test_time_rule_frame(raw, frame): @pytest.mark.parametrize("minp", [0, 99, 100]) -def test_min_periods(raw, series, minp): - result = series.rolling(len(series) + 1, min_periods=minp).apply(f, raw=raw) - expected = series.rolling(len(series), min_periods=minp).apply(f, raw=raw) +def test_min_periods(raw, series, minp, step): + result = series.rolling(len(series) + 1, min_periods=minp, step=step).apply( + f, raw=raw + ) + expected = series.rolling(len(series), min_periods=minp, step=step).apply( + f, raw=raw + ) nan_mask = isna(result) tm.assert_series_equal(nan_mask, isna(expected)) diff --git a/pandas/tests/window/test_base_indexer.py b/pandas/tests/window/test_base_indexer.py index 5593aa8351c69..aef79f97bf93d 100644 --- a/pandas/tests/window/test_base_indexer.py +++ b/pandas/tests/window/test_base_indexer.py @@ -46,7 +46,7 @@ def test_indexer_constructor_arg(): df = DataFrame({"values": range(5)}) class CustomIndexer(BaseIndexer): - def get_window_bounds(self, num_values, min_periods, center, closed): + def get_window_bounds(self, num_values, min_periods, center, closed, step): start = np.empty(num_values, dtype=np.int64) end = np.empty(num_values, dtype=np.int64) for i in range(num_values): @@ -68,11 +68,17 @@ def test_indexer_accepts_rolling_args(): df = DataFrame({"values": range(5)}) class CustomIndexer(BaseIndexer): - def get_window_bounds(self, num_values, min_periods, center, closed): + def get_window_bounds(self, num_values, min_periods, center, closed, step): start = np.empty(num_values, dtype=np.int64) end = np.empty(num_values, dtype=np.int64) for i in range(num_values): - if center and min_periods == 1 and closed == "both" and i == 2: + if ( + center + and min_periods == 1 + and closed == "both" + and step == 1 + and i == 2 + ): start[i] = 0 end[i] = num_values else: @@ -81,7 +87,9 @@ def get_window_bounds(self, num_values, min_periods, center, closed): return start, end indexer = CustomIndexer(window_size=1) - result = df.rolling(indexer, center=True, min_periods=1, closed="both").sum() + result = df.rolling( + indexer, center=True, min_periods=1, closed="both", step=1 + ).sum() expected = DataFrame({"values": [0.0, 1.0, 10.0, 3.0, 4.0]}) tm.assert_frame_equal(result, expected) @@ -141,7 +149,7 @@ def get_window_bounds(self, num_values, min_periods, center, closed): ], ) @pytest.mark.filterwarnings("ignore:min_periods:FutureWarning") -def test_rolling_forward_window(constructor, func, np_func, expected, np_kwargs): +def test_rolling_forward_window(constructor, func, np_func, expected, np_kwargs, step): # GH 32865 values = np.arange(10.0) values[5] = 100.0 @@ -158,11 +166,11 @@ def test_rolling_forward_window(constructor, func, np_func, expected, np_kwargs) rolling = constructor(values).rolling(window=indexer, closed="right") getattr(rolling, func)() - rolling = constructor(values).rolling(window=indexer, min_periods=2) + rolling = constructor(values).rolling(window=indexer, min_periods=2, step=step) result = getattr(rolling, func)() # Check that the function output matches the explicitly provided array - expected = constructor(expected) + expected = constructor(expected)[::step] tm.assert_equal(result, expected) # Check that the rolling function output matches applying an alternative @@ -182,12 +190,12 @@ def test_rolling_forward_window(constructor, func, np_func, expected, np_kwargs) @pytest.mark.parametrize("constructor", [Series, DataFrame]) -def test_rolling_forward_skewness(constructor): +def test_rolling_forward_skewness(constructor, step): values = np.arange(10.0) values[5] = 100.0 indexer = FixedForwardWindowIndexer(window_size=5) - rolling = constructor(values).rolling(window=indexer, min_periods=3) + rolling = constructor(values).rolling(window=indexer, min_periods=3, step=step) result = rolling.skew() expected = constructor( @@ -203,7 +211,7 @@ def test_rolling_forward_skewness(constructor): np.nan, np.nan, ] - ) + )[::step] tm.assert_equal(result, expected) @@ -239,7 +247,7 @@ def test_rolling_forward_cov_corr(func, expected): # We are interested in checking only pairwise covariance / correlation result = getattr(rolling, func)().loc[(slice(None), 1), 0] result = result.reset_index(drop=True) - expected = Series(expected) + expected = Series(expected).reset_index(drop=True) expected.name = result.name tm.assert_equal(result, expected) @@ -251,22 +259,23 @@ def test_rolling_forward_cov_corr(func, expected): ["left", [0.0, 0.0, 1.0, 2.0, 5.0, 9.0, 5.0, 6.0, 7.0, 8.0]], ], ) -def test_non_fixed_variable_window_indexer(closed, expected_data): +@pytest.mark.parametrize("step", [None]) +def test_non_fixed_variable_window_indexer(closed, expected_data, step): index = date_range("2020", periods=10) df = DataFrame(range(10), index=index) offset = BusinessDay(1) indexer = VariableOffsetWindowIndexer(index=index, offset=offset) - result = df.rolling(indexer, closed=closed).sum() - expected = DataFrame(expected_data, index=index) + result = df.rolling(indexer, closed=closed, step=step).sum() + expected = DataFrame(expected_data, index=index)[::step] tm.assert_frame_equal(result, expected) -def test_fixed_forward_indexer_count(): +def test_fixed_forward_indexer_count(step): # GH: 35579 df = DataFrame({"b": [None, None, None, 7]}) indexer = FixedForwardWindowIndexer(window_size=2) - result = df.rolling(window=indexer, min_periods=0).count() - expected = DataFrame({"b": [0.0, 0.0, 1.0, 1.0]}) + result = df.rolling(window=indexer, min_periods=0, step=step).count() + expected = DataFrame({"b": [0.0, 0.0, 1.0, 1.0]})[::step] tm.assert_frame_equal(result, expected) @@ -277,7 +286,7 @@ def test_fixed_forward_indexer_count(): def test_indexer_quantile_sum(end_value, values, func, args): # GH 37153 class CustomIndexer(BaseIndexer): - def get_window_bounds(self, num_values, min_periods, center, closed): + def get_window_bounds(self, num_values, min_periods, center, closed, step): start = np.empty(num_values, dtype=np.int64) end = np.empty(num_values, dtype=np.int64) for i in range(num_values): @@ -338,14 +347,16 @@ def test_indexers_are_reusable_after_groupby_rolling( ], ) def test_fixed_forward_indexer_bounds( - window_size, num_values, expected_start, expected_end + window_size, num_values, expected_start, expected_end, step ): # GH 43267 indexer = FixedForwardWindowIndexer(window_size=window_size) - start, end = indexer.get_window_bounds(num_values=num_values) + start, end = indexer.get_window_bounds(num_values=num_values, step=step) - tm.assert_numpy_array_equal(start, np.array(expected_start), check_dtype=False) - tm.assert_numpy_array_equal(end, np.array(expected_end), check_dtype=False) + tm.assert_numpy_array_equal( + start, np.array(expected_start[::step]), check_dtype=False + ) + tm.assert_numpy_array_equal(end, np.array(expected_end[::step]), check_dtype=False) assert len(start) == len(end) @@ -456,7 +467,7 @@ def test_rolling_groupby_with_fixed_forward_many(group_keys, window_size): def test_unequal_start_end_bounds(): class CustomIndexer(BaseIndexer): - def get_window_bounds(self, num_values, min_periods, center, closed): + def get_window_bounds(self, num_values, min_periods, center, closed, step): return np.array([1]), np.array([1, 2]) indexer = CustomIndexer() @@ -478,7 +489,7 @@ def get_window_bounds(self, num_values, min_periods, center, closed): def test_unequal_bounds_to_object(): # GH 44470 class CustomIndexer(BaseIndexer): - def get_window_bounds(self, num_values, min_periods, center, closed): + def get_window_bounds(self, num_values, min_periods, center, closed, step): return np.array([1]), np.array([2]) indexer = CustomIndexer() diff --git a/pandas/tests/window/test_dtypes.py b/pandas/tests/window/test_dtypes.py index 80a96c3a8cee9..161976a6112a5 100644 --- a/pandas/tests/window/test_dtypes.py +++ b/pandas/tests/window/test_dtypes.py @@ -90,9 +90,11 @@ def dtypes(request): ), ], ) -def test_series_dtypes(method, data, expected_data, coerce_int, dtypes, min_periods): +def test_series_dtypes( + method, data, expected_data, coerce_int, dtypes, min_periods, step +): ser = Series(data, dtype=get_dtype(dtypes, coerce_int=coerce_int)) - rolled = ser.rolling(2, min_periods=min_periods) + rolled = ser.rolling(2, min_periods=min_periods, step=step) if dtypes in ("m8[ns]", "M8[ns]", "datetime64[ns, UTC]") and method != "count": msg = "No numeric types to aggregate" @@ -100,15 +102,15 @@ def test_series_dtypes(method, data, expected_data, coerce_int, dtypes, min_peri getattr(rolled, method)() else: result = getattr(rolled, method)() - expected = Series(expected_data, dtype="float64") + expected = Series(expected_data, dtype="float64")[::step] tm.assert_almost_equal(result, expected) -def test_series_nullable_int(any_signed_int_ea_dtype): +def test_series_nullable_int(any_signed_int_ea_dtype, step): # GH 43016 ser = Series([0, 1, NA], dtype=any_signed_int_ea_dtype) - result = ser.rolling(2).mean() - expected = Series([np.nan, 0.5, np.nan]) + result = ser.rolling(2, step=step).mean() + expected = Series([np.nan, 0.5, np.nan])[::step] tm.assert_series_equal(result, expected) @@ -156,10 +158,10 @@ def test_series_nullable_int(any_signed_int_ea_dtype): ), ], ) -def test_dataframe_dtypes(method, expected_data, dtypes, min_periods): +def test_dataframe_dtypes(method, expected_data, dtypes, min_periods, step): df = DataFrame(np.arange(10).reshape((5, 2)), dtype=get_dtype(dtypes)) - rolled = df.rolling(2, min_periods=min_periods) + rolled = df.rolling(2, min_periods=min_periods, step=step) if dtypes in ("m8[ns]", "M8[ns]", "datetime64[ns, UTC]") and method != "count": msg = "No numeric types to aggregate" @@ -167,5 +169,5 @@ def test_dataframe_dtypes(method, expected_data, dtypes, min_periods): getattr(rolled, method)() else: result = getattr(rolled, method)() - expected = DataFrame(expected_data, dtype="float64") + expected = DataFrame(expected_data, dtype="float64")[::step] tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/window/test_groupby.py b/pandas/tests/window/test_groupby.py index 25782d0c0617f..90b9288b77690 100644 --- a/pandas/tests/window/test_groupby.py +++ b/pandas/tests/window/test_groupby.py @@ -448,7 +448,12 @@ def test_groupby_rolling_custom_indexer(self): # GH 35557 class SimpleIndexer(BaseIndexer): def get_window_bounds( - self, num_values=0, min_periods=None, center=None, closed=None + self, + num_values=0, + min_periods=None, + center=None, + closed=None, + step=None, ): min_periods = self.window_size if min_periods is None else 0 end = np.arange(num_values, dtype=np.int64) + 1 diff --git a/pandas/tests/window/test_numba.py b/pandas/tests/window/test_numba.py index 6fd45606ae98d..0e7fe24420171 100644 --- a/pandas/tests/window/test_numba.py +++ b/pandas/tests/window/test_numba.py @@ -54,7 +54,7 @@ def arithmetic_numba_supported_operators(request): # Filter warnings when parallel=True and the function can't be parallelized by Numba class TestEngine: @pytest.mark.parametrize("jit", [True, False]) - def test_numba_vs_cython_apply(self, jit, nogil, parallel, nopython, center): + def test_numba_vs_cython_apply(self, jit, nogil, parallel, nopython, center, step): def f(x, *args): arg_sum = 0 for arg in args: @@ -70,10 +70,10 @@ def f(x, *args): args = (2,) s = Series(range(10)) - result = s.rolling(2, center=center).apply( + result = s.rolling(2, center=center, step=step).apply( f, args=args, engine="numba", engine_kwargs=engine_kwargs, raw=True ) - expected = s.rolling(2, center=center).apply( + expected = s.rolling(2, center=center, step=step).apply( f, engine="cython", args=args, raw=True ) tm.assert_series_equal(result, expected) @@ -82,14 +82,20 @@ def f(x, *args): "data", [DataFrame(np.eye(5)), Series(range(5), name="foo")] ) def test_numba_vs_cython_rolling_methods( - self, data, nogil, parallel, nopython, arithmetic_numba_supported_operators + self, + data, + nogil, + parallel, + nopython, + arithmetic_numba_supported_operators, + step, ): method, kwargs = arithmetic_numba_supported_operators engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython} - roll = data.rolling(2) + roll = data.rolling(2, step=step) result = getattr(roll, method)( engine="numba", engine_kwargs=engine_kwargs, **kwargs ) @@ -116,7 +122,7 @@ def test_numba_vs_cython_expanding_methods( tm.assert_equal(result, expected) @pytest.mark.parametrize("jit", [True, False]) - def test_cache_apply(self, jit, nogil, parallel, nopython): + def test_cache_apply(self, jit, nogil, parallel, nopython, step): # Test that the functions are cached correctly if we switch functions def func_1(x): return np.mean(x) + 4 @@ -132,7 +138,7 @@ def func_2(x): engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython} - roll = Series(range(10)).rolling(2) + roll = Series(range(10)).rolling(2, step=step) result = roll.apply( func_1, engine="numba", engine_kwargs=engine_kwargs, raw=True ) @@ -327,21 +333,29 @@ def f(x): ) def test_table_method_rolling_methods( - self, axis, nogil, parallel, nopython, arithmetic_numba_supported_operators + self, + axis, + nogil, + parallel, + nopython, + arithmetic_numba_supported_operators, + step, ): method, kwargs = arithmetic_numba_supported_operators engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython} df = DataFrame(np.eye(3)) - roll_table = df.rolling(2, method="table", axis=axis, min_periods=0) + roll_table = df.rolling(2, method="table", axis=axis, min_periods=0, step=step) if method in ("var", "std"): with pytest.raises(NotImplementedError, match=f"{method} not supported"): getattr(roll_table, method)( engine_kwargs=engine_kwargs, engine="numba", **kwargs ) else: - roll_single = df.rolling(2, method="single", axis=axis, min_periods=0) + roll_single = df.rolling( + 2, method="single", axis=axis, min_periods=0, step=step + ) result = getattr(roll_table, method)( engine_kwargs=engine_kwargs, engine="numba", **kwargs ) @@ -350,29 +364,29 @@ def test_table_method_rolling_methods( ) tm.assert_frame_equal(result, expected) - def test_table_method_rolling_apply(self, axis, nogil, parallel, nopython): + def test_table_method_rolling_apply(self, axis, nogil, parallel, nopython, step): engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython} def f(x): return np.sum(x, axis=0) + 1 df = DataFrame(np.eye(3)) - result = df.rolling(2, method="table", axis=axis, min_periods=0).apply( - f, raw=True, engine_kwargs=engine_kwargs, engine="numba" - ) - expected = df.rolling(2, method="single", axis=axis, min_periods=0).apply( - f, raw=True, engine_kwargs=engine_kwargs, engine="numba" - ) + result = df.rolling( + 2, method="table", axis=axis, min_periods=0, step=step + ).apply(f, raw=True, engine_kwargs=engine_kwargs, engine="numba") + expected = df.rolling( + 2, method="single", axis=axis, min_periods=0, step=step + ).apply(f, raw=True, engine_kwargs=engine_kwargs, engine="numba") tm.assert_frame_equal(result, expected) - def test_table_method_rolling_weighted_mean(self): + def test_table_method_rolling_weighted_mean(self, step): def weighted_mean(x): arr = np.ones((1, x.shape[1])) arr[:, :2] = (x[:, :2] * x[:, 2]).sum(axis=0) / x[:, 2].sum() return arr df = DataFrame([[1, 2, 0.6], [2, 3, 0.4], [3, 4, 0.2], [4, 5, 0.7]]) - result = df.rolling(2, method="table", min_periods=0).apply( + result = df.rolling(2, method="table", min_periods=0, step=step).apply( weighted_mean, raw=True, engine="numba" ) expected = DataFrame( @@ -382,7 +396,7 @@ def weighted_mean(x): [3.333333, 2.333333, 1.0], [1.555556, 7, 1.0], ] - ) + )[::step] tm.assert_frame_equal(result, expected) def test_table_method_expanding_apply(self, axis, nogil, parallel, nopython): diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index ced163178f73a..53e1d442d60a4 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -26,8 +26,17 @@ ) import pandas._testing as tm from pandas.api.indexers import BaseIndexer +from pandas.core.indexers.objects import ( + ExpandingIndexer, + ExponentialMovingWindowIndexer, + GroupbyIndexer, + VariableOffsetWindowIndexer, + VariableWindowIndexer, +) from pandas.core.window import Rolling +from pandas.tseries.offsets import BusinessDay + def test_doc_string(): @@ -81,8 +90,77 @@ def test_invalid_constructor(frame_or_series, w): c(window=2, min_periods=1, center=w) +@pytest.mark.parametrize( + "window", + [ + timedelta(days=3), + Timedelta(days=3), + "3D", + ExpandingIndexer(window_size=3), + ExponentialMovingWindowIndexer(window_size=3), + GroupbyIndexer(window_size=3), + VariableOffsetWindowIndexer( + index=date_range("2015-12-25", periods=5), offset=BusinessDay(1) + ), + VariableWindowIndexer(window_size=3), + ], +) +@pytest.mark.parametrize( + "func", + [ + lambda df: df.rolling, + lambda df: df.groupby("key").rolling, + ], +) +def test_constructor_step_not_implemented(window, func, step): + # GH 15354 + df = DataFrame( + {"value": np.arange(10), "key": np.array([1] * 5 + [2] * 5)}, + index=date_range("2015-12-24", periods=10, freq="D"), + ) + f = lambda: func(df)(window=window, step=step) + if step is None: + f() + else: + with pytest.raises(NotImplementedError, match="step not implemented"): + f() + + +@pytest.mark.parametrize("agg", ["cov", "corr"]) +def test_constructor_step_not_implemented_for_cov_corr(agg, step): + # GH 15354 + df = DataFrame( + {"value": np.arange(10), "key": np.array([1] * 5 + [2] * 5)}, + index=date_range("2015-12-24", periods=10, freq="D"), + ) + f = lambda: getattr(df.rolling(window=2, step=step), agg)(df) + if step is None: + f() + else: + with pytest.raises(NotImplementedError, match="step not implemented"): + f() + + +@pytest.mark.parametrize( + "func", + [ + lambda df: df.expanding, + lambda df: df.ewm, + ], +) +def test_constructor_step_unsupported(func, step): + # GH 15354 + df = DataFrame( + {"value": np.arange(10), "key": np.array([1] * 5 + [2] * 5)}, + index=date_range("2015-12-24", periods=10, freq="D"), + ) + with pytest.raises(TypeError, match="got an unexpected keyword argument 'step'"): + func(df)(step=step) + + @pytest.mark.parametrize("window", [timedelta(days=3), Timedelta(days=3)]) -def test_constructor_with_timedelta_window(window): +@pytest.mark.parametrize("step", [None]) +def test_constructor_with_timedelta_window(window, step): # GH 15440 n = 10 df = DataFrame( @@ -91,18 +169,19 @@ def test_constructor_with_timedelta_window(window): ) expected_data = np.append([0.0, 1.0], np.arange(3.0, 27.0, 3)) - result = df.rolling(window=window).sum() + result = df.rolling(window=window, step=step).sum() expected = DataFrame( {"value": expected_data}, index=date_range("2015-12-24", periods=n, freq="D"), - ) + )[::step] tm.assert_frame_equal(result, expected) - expected = df.rolling("3D").sum() + expected = df.rolling("3D", step=step).sum() tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("window", [timedelta(days=3), Timedelta(days=3), "3D"]) -def test_constructor_timedelta_window_and_minperiods(window, raw): +@pytest.mark.parametrize("step", [None]) +def test_constructor_timedelta_window_and_minperiods(window, step, raw): # GH 15305 n = 10 df = DataFrame( @@ -112,9 +191,11 @@ def test_constructor_timedelta_window_and_minperiods(window, raw): expected = DataFrame( {"value": np.append([np.NaN, 1.0], np.arange(3.0, 27.0, 3))}, index=date_range("2017-08-08", periods=n, freq="D"), + )[::step] + result_roll_sum = df.rolling(window=window, min_periods=2, step=step).sum() + result_roll_generic = df.rolling(window=window, min_periods=2, step=step).apply( + sum, raw=raw ) - result_roll_sum = df.rolling(window=window, min_periods=2).sum() - result_roll_generic = df.rolling(window=window, min_periods=2).apply(sum, raw=raw) tm.assert_frame_equal(result_roll_sum, expected) tm.assert_frame_equal(result_roll_generic, expected) @@ -133,18 +214,21 @@ def test_numpy_compat(method): @pytest.mark.parametrize("closed", ["right", "left", "both", "neither"]) -def test_closed_fixed(closed, arithmetic_win_operators): +@pytest.mark.parametrize("step", [None]) +def test_closed_fixed(closed, arithmetic_win_operators, step): # GH 34315 func_name = arithmetic_win_operators df_fixed = DataFrame({"A": [0, 1, 2, 3, 4]}) df_time = DataFrame({"A": [0, 1, 2, 3, 4]}, index=date_range("2020", periods=5)) result = getattr( - df_fixed.rolling(2, closed=closed, min_periods=1), + df_fixed.rolling(2, closed=closed, min_periods=1, step=step), func_name, )() + if step is not None: + result = result.reset_index(drop=True) expected = getattr( - df_time.rolling("2D", closed=closed, min_periods=1), + df_time.rolling("2D", closed=closed, min_periods=1, step=step), func_name, )().reset_index(drop=True) @@ -196,8 +280,9 @@ def test_closed_fixed(closed, arithmetic_win_operators): ), ], ) +@pytest.mark.parametrize("step", [None]) def test_datetimelike_centered_selections( - closed, window_selections, arithmetic_win_operators + closed, window_selections, step, arithmetic_win_operators ): # GH 34315 func_name = arithmetic_win_operators @@ -208,7 +293,7 @@ def test_datetimelike_centered_selections( expected = DataFrame( {"A": [getattr(df_time["A"].iloc[s], func_name)() for s in window_selections]}, index=date_range("2020", periods=5), - ) + )[::step] if func_name == "sem": kwargs = {"ddof": 0} @@ -216,7 +301,7 @@ def test_datetimelike_centered_selections( kwargs = {} result = getattr( - df_time.rolling("2D", closed=closed, min_periods=1, center=True), + df_time.rolling("2D", closed=closed, min_periods=1, center=True, step=step), func_name, )(**kwargs) @@ -236,8 +321,9 @@ def test_datetimelike_centered_selections( ("2s", "neither", [1.0, 2.0, 2.0]), ], ) +@pytest.mark.parametrize("step", [None]) def test_datetimelike_centered_offset_covers_all( - window, closed, expected, frame_or_series + window, closed, expected, step, frame_or_series ): # GH 42753 @@ -248,8 +334,8 @@ def test_datetimelike_centered_offset_covers_all( ] df = frame_or_series([1, 1, 1], index=index) - result = df.rolling(window, closed=closed, center=True).sum() - expected = frame_or_series(expected, index=index) + result = df.rolling(window, closed=closed, center=True, step=step).sum() + expected = frame_or_series(expected, index=index)[::step] tm.assert_equal(result, expected) @@ -262,8 +348,9 @@ def test_datetimelike_centered_offset_covers_all( ("2D", "neither", [2, 2, 2, 2, 2, 2, 2, 2]), ], ) +@pytest.mark.parametrize("step", [None]) def test_datetimelike_nonunique_index_centering( - window, closed, expected, frame_or_series + window, closed, expected, frame_or_series, step ): index = DatetimeIndex( [ @@ -279,28 +366,29 @@ def test_datetimelike_nonunique_index_centering( ) df = frame_or_series([1] * 8, index=index, dtype=float) - expected = frame_or_series(expected, index=index, dtype=float) + expected = frame_or_series(expected, index=index, dtype=float)[::step] - result = df.rolling(window, center=True, closed=closed).sum() + result = df.rolling(window, center=True, closed=closed, step=step).sum() tm.assert_equal(result, expected) -def test_even_number_window_alignment(): +@pytest.mark.parametrize("step", [None]) +def test_even_number_window_alignment(step): # see discussion in GH 38780 s = Series(range(3), index=date_range(start="2020-01-01", freq="D", periods=3)) # behavior of index- and datetime-based windows differs here! # s.rolling(window=2, min_periods=1, center=True).mean() - result = s.rolling(window="2D", min_periods=1, center=True).mean() + result = s.rolling(window="2D", min_periods=1, center=True, step=step).mean() - expected = Series([0.5, 1.5, 2], index=s.index) + expected = Series([0.5, 1.5, 2], index=s.index)[::step] tm.assert_series_equal(result, expected) -def test_closed_fixed_binary_col(center): +def test_closed_fixed_binary_col(center, step): # GH 34315 data = [0, 1, 1, 0, 0, 1, 0, 1] df = DataFrame( @@ -317,31 +405,37 @@ def test_closed_fixed_binary_col(center): expected_data, columns=["binary_col"], index=date_range(start="2020-01-01", freq="min", periods=len(expected_data)), - ) + )[::step] - rolling = df.rolling(window=len(df), closed="left", min_periods=1, center=center) + rolling = df.rolling( + window=len(df), closed="left", min_periods=1, center=center, step=step + ) result = rolling.mean() tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("closed", ["neither", "left"]) -def test_closed_empty(closed, arithmetic_win_operators): +@pytest.mark.parametrize("step", [None]) +def test_closed_empty(closed, arithmetic_win_operators, step): # GH 26005 func_name = arithmetic_win_operators ser = Series(data=np.arange(5), index=date_range("2000", periods=5, freq="2D")) - roll = ser.rolling("1D", closed=closed) + roll = ser.rolling("1D", closed=closed, step=step) result = getattr(roll, func_name)() - expected = Series([np.nan] * 5, index=ser.index) + expected = Series([np.nan] * 5, index=ser.index)[::step] tm.assert_series_equal(result, expected) @pytest.mark.parametrize("func", ["min", "max"]) -def test_closed_one_entry(func): +@pytest.mark.parametrize("step", [None]) +def test_closed_one_entry(func, step): # GH24718 ser = Series(data=[2], index=date_range("2000", periods=1)) - result = getattr(ser.rolling("10D", closed="left"), func)() - tm.assert_series_equal(result, Series([np.nan], index=ser.index)) + result = getattr(ser.rolling("10D", closed="left", step=step), func)() + index = ser.index.copy() + index.freq = index.freq * (step or 1) + tm.assert_series_equal(result, Series([np.nan], index=index)) @pytest.mark.parametrize("func", ["min", "max"]) @@ -1362,7 +1456,7 @@ def test_rolling_non_monotonic(method, expected): df = DataFrame({"values": np.arange(len(use_expanding)) ** 2}) class CustomIndexer(BaseIndexer): - def get_window_bounds(self, num_values, min_periods, center, closed): + def get_window_bounds(self, num_values, min_periods, center, closed, step): start = np.empty(num_values, dtype=np.int64) end = np.empty(num_values, dtype=np.int64) for i in range(num_values): diff --git a/pandas/tests/window/test_rolling_functions.py b/pandas/tests/window/test_rolling_functions.py index 842c056806092..9ab4ff13796d6 100644 --- a/pandas/tests/window/test_rolling_functions.py +++ b/pandas/tests/window/test_rolling_functions.py @@ -38,10 +38,11 @@ [lambda x: np.var(x, ddof=0), "var", {"ddof": 0}], ], ) -def test_series(series, compare_func, roll_func, kwargs): - result = getattr(series.rolling(50), roll_func)(**kwargs) +def test_series(series, compare_func, roll_func, kwargs, step): + result = getattr(series.rolling(50, step=step), roll_func)(**kwargs) assert isinstance(result, Series) - tm.assert_almost_equal(result.iloc[-1], compare_func(series[-50:])) + end = range(0, len(series), step or 1)[-1] + 1 + tm.assert_almost_equal(result.iloc[-1], compare_func(series[end - 50 : end])) @pytest.mark.parametrize( @@ -64,12 +65,13 @@ def test_series(series, compare_func, roll_func, kwargs): [lambda x: np.var(x, ddof=0), "var", {"ddof": 0}], ], ) -def test_frame(raw, frame, compare_func, roll_func, kwargs): - result = getattr(frame.rolling(50), roll_func)(**kwargs) +def test_frame(raw, frame, compare_func, roll_func, kwargs, step): + result = getattr(frame.rolling(50, step=step), roll_func)(**kwargs) assert isinstance(result, DataFrame) + end = range(0, len(frame), step or 1)[-1] + 1 tm.assert_series_equal( result.iloc[-1, :], - frame.iloc[-50:, :].apply(compare_func, axis=0, raw=raw), + frame.iloc[end - 50 : end, :].apply(compare_func, axis=0, raw=raw), check_names=False, ) @@ -200,13 +202,13 @@ def test_nans_count(): ], ) @pytest.mark.parametrize("minp", [0, 99, 100]) -def test_min_periods(series, minp, roll_func, kwargs): - result = getattr(series.rolling(len(series) + 1, min_periods=minp), roll_func)( - **kwargs - ) - expected = getattr(series.rolling(len(series), min_periods=minp), roll_func)( - **kwargs - ) +def test_min_periods(series, minp, roll_func, kwargs, step): + result = getattr( + series.rolling(len(series) + 1, min_periods=minp, step=step), roll_func + )(**kwargs) + expected = getattr( + series.rolling(len(series), min_periods=minp, step=step), roll_func + )(**kwargs) nan_mask = isna(result) tm.assert_series_equal(nan_mask, isna(expected)) @@ -214,9 +216,9 @@ def test_min_periods(series, minp, roll_func, kwargs): tm.assert_almost_equal(result[nan_mask], expected[nan_mask]) -def test_min_periods_count(series): - result = series.rolling(len(series) + 1, min_periods=0).count() - expected = series.rolling(len(series), min_periods=0).count() +def test_min_periods_count(series, step): + result = series.rolling(len(series) + 1, min_periods=0, step=step).count() + expected = series.rolling(len(series), min_periods=0, step=step).count() nan_mask = isna(result) tm.assert_series_equal(nan_mask, isna(expected)) @@ -364,7 +366,7 @@ def test_rolling_functions_window_non_shrinkage(f): tm.assert_frame_equal(df_result, df_expected) -def test_rolling_max_gh6297(): +def test_rolling_max_gh6297(step): """Replicate result expected in GH #6297""" indices = [datetime(1975, 1, i) for i in range(1, 6)] # So that we can have 2 datapoints on one of the days @@ -378,12 +380,12 @@ def test_rolling_max_gh6297(): expected = Series( [1.0, 2.0, 6.0, 4.0, 5.0], index=DatetimeIndex([datetime(1975, 1, i, 0) for i in range(1, 6)], freq="D"), - ) - x = series.resample("D").max().rolling(window=1).max() + )[::step] + x = series.resample("D").max().rolling(window=1, step=step).max() tm.assert_series_equal(expected, x) -def test_rolling_max_resample(): +def test_rolling_max_resample(step): indices = [datetime(1975, 1, i) for i in range(1, 6)] # So that we can have 3 datapoints on last day (4, 10, and 20) @@ -399,16 +401,16 @@ def test_rolling_max_resample(): expected = Series( [0.0, 1.0, 2.0, 3.0, 20.0], index=DatetimeIndex([datetime(1975, 1, i, 0) for i in range(1, 6)], freq="D"), - ) - x = series.resample("D").max().rolling(window=1).max() + )[::step] + x = series.resample("D").max().rolling(window=1, step=step).max() tm.assert_series_equal(expected, x) # Now specify median (10.0) expected = Series( [0.0, 1.0, 2.0, 3.0, 10.0], index=DatetimeIndex([datetime(1975, 1, i, 0) for i in range(1, 6)], freq="D"), - ) - x = series.resample("D").median().rolling(window=1).max() + )[::step] + x = series.resample("D").median().rolling(window=1, step=step).max() tm.assert_series_equal(expected, x) # Now specify mean (4+10+20)/3 @@ -416,12 +418,12 @@ def test_rolling_max_resample(): expected = Series( [0.0, 1.0, 2.0, 3.0, v], index=DatetimeIndex([datetime(1975, 1, i, 0) for i in range(1, 6)], freq="D"), - ) - x = series.resample("D").mean().rolling(window=1).max() + )[::step] + x = series.resample("D").mean().rolling(window=1, step=step).max() tm.assert_series_equal(expected, x) -def test_rolling_min_resample(): +def test_rolling_min_resample(step): indices = [datetime(1975, 1, i) for i in range(1, 6)] # So that we can have 3 datapoints on last day (4, 10, and 20) @@ -437,8 +439,8 @@ def test_rolling_min_resample(): expected = Series( [0.0, 1.0, 2.0, 3.0, 4.0], index=DatetimeIndex([datetime(1975, 1, i, 0) for i in range(1, 6)], freq="D"), - ) - r = series.resample("D").min().rolling(window=1) + )[::step] + r = series.resample("D").min().rolling(window=1, step=step) tm.assert_series_equal(expected, r.min()) diff --git a/pandas/tests/window/test_rolling_quantile.py b/pandas/tests/window/test_rolling_quantile.py index 56b79097a1d05..815ee419590f7 100644 --- a/pandas/tests/window/test_rolling_quantile.py +++ b/pandas/tests/window/test_rolling_quantile.py @@ -34,21 +34,23 @@ def scoreatpercentile(a, per): @pytest.mark.parametrize("q", [0.0, 0.1, 0.5, 0.9, 1.0]) -def test_series(series, q): +def test_series(series, q, step): compare_func = partial(scoreatpercentile, per=q) - result = series.rolling(50).quantile(q) + result = series.rolling(50, step=step).quantile(q) assert isinstance(result, Series) - tm.assert_almost_equal(result.iloc[-1], compare_func(series[-50:])) + end = range(0, len(series), step or 1)[-1] + 1 + tm.assert_almost_equal(result.iloc[-1], compare_func(series[end - 50 : end])) @pytest.mark.parametrize("q", [0.0, 0.1, 0.5, 0.9, 1.0]) -def test_frame(raw, frame, q): +def test_frame(raw, frame, q, step): compare_func = partial(scoreatpercentile, per=q) - result = frame.rolling(50).quantile(q) + result = frame.rolling(50, step=step).quantile(q) assert isinstance(result, DataFrame) + end = range(0, len(frame), step or 1)[-1] + 1 tm.assert_series_equal( result.iloc[-1, :], - frame.iloc[-50:, :].apply(compare_func, axis=0, raw=raw), + frame.iloc[end - 50 : end, :].apply(compare_func, axis=0, raw=raw), check_names=False, ) @@ -113,9 +115,9 @@ def test_nans(q): @pytest.mark.parametrize("minp", [0, 99, 100]) @pytest.mark.parametrize("q", [0.0, 0.1, 0.5, 0.9, 1.0]) -def test_min_periods(series, minp, q): - result = series.rolling(len(series) + 1, min_periods=minp).quantile(q) - expected = series.rolling(len(series), min_periods=minp).quantile(q) +def test_min_periods(series, minp, q, step): + result = series.rolling(len(series) + 1, min_periods=minp, step=step).quantile(q) + expected = series.rolling(len(series), min_periods=minp, step=step).quantile(q) nan_mask = isna(result) tm.assert_series_equal(nan_mask, isna(expected)) diff --git a/pandas/tests/window/test_rolling_skew_kurt.py b/pandas/tests/window/test_rolling_skew_kurt.py index 46b7eb6cbc285..152172d7b2266 100644 --- a/pandas/tests/window/test_rolling_skew_kurt.py +++ b/pandas/tests/window/test_rolling_skew_kurt.py @@ -112,9 +112,13 @@ def test_nans(sp_func, roll_func): @pytest.mark.parametrize("minp", [0, 99, 100]) @pytest.mark.parametrize("roll_func", ["kurt", "skew"]) -def test_min_periods(series, minp, roll_func): - result = getattr(series.rolling(len(series) + 1, min_periods=minp), roll_func)() - expected = getattr(series.rolling(len(series), min_periods=minp), roll_func)() +def test_min_periods(series, minp, roll_func, step): + result = getattr( + series.rolling(len(series) + 1, min_periods=minp, step=step), roll_func + )() + expected = getattr( + series.rolling(len(series), min_periods=minp, step=step), roll_func + )() nan_mask = isna(result) tm.assert_series_equal(nan_mask, isna(expected)) @@ -172,55 +176,55 @@ def test_center_reindex_frame(frame, roll_func): tm.assert_frame_equal(frame_xp, frame_rs) -def test_rolling_skew_edge_cases(): +def test_rolling_skew_edge_cases(step): - all_nan = Series([np.NaN] * 5) + all_nan = Series([np.NaN] * 5)[::step] # yields all NaN (0 variance) d = Series([1] * 5) - x = d.rolling(window=5).skew() + x = d.rolling(window=5, step=step).skew() tm.assert_series_equal(all_nan, x) # yields all NaN (window too small) d = Series(np.random.randn(5)) - x = d.rolling(window=2).skew() + x = d.rolling(window=2, step=step).skew() tm.assert_series_equal(all_nan, x) # yields [NaN, NaN, NaN, 0.177994, 1.548824] d = Series([-1.50837035, -0.1297039, 0.19501095, 1.73508164, 0.41941401]) - expected = Series([np.NaN, np.NaN, np.NaN, 0.177994, 1.548824]) - x = d.rolling(window=4).skew() + expected = Series([np.NaN, np.NaN, np.NaN, 0.177994, 1.548824])[::step] + x = d.rolling(window=4, step=step).skew() tm.assert_series_equal(expected, x) -def test_rolling_kurt_edge_cases(): +def test_rolling_kurt_edge_cases(step): - all_nan = Series([np.NaN] * 5) + all_nan = Series([np.NaN] * 5)[::step] # yields all NaN (0 variance) d = Series([1] * 5) - x = d.rolling(window=5).kurt() + x = d.rolling(window=5, step=step).kurt() tm.assert_series_equal(all_nan, x) # yields all NaN (window too small) d = Series(np.random.randn(5)) - x = d.rolling(window=3).kurt() + x = d.rolling(window=3, step=step).kurt() tm.assert_series_equal(all_nan, x) # yields [NaN, NaN, NaN, 1.224307, 2.671499] d = Series([-1.50837035, -0.1297039, 0.19501095, 1.73508164, 0.41941401]) - expected = Series([np.NaN, np.NaN, np.NaN, 1.224307, 2.671499]) - x = d.rolling(window=4).kurt() + expected = Series([np.NaN, np.NaN, np.NaN, 1.224307, 2.671499])[::step] + x = d.rolling(window=4, step=step).kurt() tm.assert_series_equal(expected, x) -def test_rolling_skew_eq_value_fperr(): +def test_rolling_skew_eq_value_fperr(step): # #18804 all rolling skew for all equal values should return Nan - a = Series([1.1] * 15).rolling(window=10).skew() + a = Series([1.1] * 15).rolling(window=10, step=step).skew() assert np.isnan(a).all() -def test_rolling_kurt_eq_value_fperr(): +def test_rolling_kurt_eq_value_fperr(step): # #18804 all rolling kurt for all equal values should return Nan - a = Series([1.1] * 15).rolling(window=10).kurt() + a = Series([1.1] * 15).rolling(window=10, step=step).kurt() assert np.isnan(a).all() diff --git a/pandas/tests/window/test_win_type.py b/pandas/tests/window/test_win_type.py index 03ea745d9cb86..c356c9bdc7742 100644 --- a/pandas/tests/window/test_win_type.py +++ b/pandas/tests/window/test_win_type.py @@ -125,10 +125,10 @@ def test_constructor_with_win_type_invalid(frame_or_series): @td.skip_if_no_scipy @pytest.mark.filterwarnings("ignore:can't resolve:ImportWarning") -def test_window_with_args(): +def test_window_with_args(step): # make sure that we are aggregating window functions correctly with arg r = Series(np.random.randn(100)).rolling( - window=10, min_periods=1, win_type="gaussian" + window=10, min_periods=1, win_type="gaussian", step=step ) expected = concat([r.mean(std=10), r.mean(std=0.01)], axis=1) expected.columns = ["", ""] @@ -173,7 +173,7 @@ def test_win_type_freq_return_deprecation(): @td.skip_if_no_scipy def test_win_type_not_implemented(): class CustomIndexer(BaseIndexer): - def get_window_bounds(self, num_values, min_periods, center, closed): + def get_window_bounds(self, num_values, min_periods, center, closed, step): return np.array([0, 1]), np.array([1, 2]) df = DataFrame({"values": range(2)}) @@ -183,10 +183,10 @@ def get_window_bounds(self, num_values, min_periods, center, closed): @td.skip_if_no_scipy -def test_cmov_mean(): +def test_cmov_mean(step): # GH 8238 vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, 9.48, 10.63, 14.48]) - result = Series(vals).rolling(5, center=True).mean() + result = Series(vals).rolling(5, center=True, step=step).mean() expected_values = [ np.nan, np.nan, @@ -199,15 +199,15 @@ def test_cmov_mean(): np.nan, np.nan, ] - expected = Series(expected_values) + expected = Series(expected_values)[::step] tm.assert_series_equal(expected, result) @td.skip_if_no_scipy -def test_cmov_window(): +def test_cmov_window(step): # GH 8238 vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, 9.48, 10.63, 14.48]) - result = Series(vals).rolling(5, win_type="boxcar", center=True).mean() + result = Series(vals).rolling(5, win_type="boxcar", center=True, step=step).mean() expected_values = [ np.nan, np.nan, @@ -220,28 +220,28 @@ def test_cmov_window(): np.nan, np.nan, ] - expected = Series(expected_values) + expected = Series(expected_values)[::step] tm.assert_series_equal(expected, result) @td.skip_if_no_scipy -def test_cmov_window_corner(): +def test_cmov_window_corner(step): # GH 8238 # all nan vals = Series([np.nan] * 10) - result = vals.rolling(5, center=True, win_type="boxcar").mean() + result = vals.rolling(5, center=True, win_type="boxcar", step=step).mean() assert np.isnan(result).all() # empty vals = Series([], dtype=object) - result = vals.rolling(5, center=True, win_type="boxcar").mean() + result = vals.rolling(5, center=True, win_type="boxcar", step=step).mean() assert len(result) == 0 # shorter than window vals = Series(np.random.randn(5)) - result = vals.rolling(10, win_type="boxcar").mean() + result = vals.rolling(10, win_type="boxcar", step=step).mean() assert np.isnan(result).all() - assert len(result) == 5 + assert len(result) == len(range(0, 5, step or 1)) @td.skip_if_no_scipy @@ -310,7 +310,7 @@ def test_cmov_window_corner(): ), ], ) -def test_cmov_window_frame(f, xp): +def test_cmov_window_frame(f, xp, step): # Gh 8238 df = DataFrame( np.array( @@ -328,28 +328,30 @@ def test_cmov_window_frame(f, xp): ] ) ) - xp = DataFrame(np.array(xp)) + xp = DataFrame(np.array(xp))[::step] - roll = df.rolling(5, win_type="boxcar", center=True) + roll = df.rolling(5, win_type="boxcar", center=True, step=step) rs = getattr(roll, f)() tm.assert_frame_equal(xp, rs) @td.skip_if_no_scipy -def test_cmov_window_na_min_periods(): +def test_cmov_window_na_min_periods(step): # min_periods vals = Series(np.random.randn(10)) vals[4] = np.nan vals[8] = np.nan - xp = vals.rolling(5, min_periods=4, center=True).mean() - rs = vals.rolling(5, win_type="boxcar", min_periods=4, center=True).mean() + xp = vals.rolling(5, min_periods=4, center=True, step=step).mean() + rs = vals.rolling( + 5, win_type="boxcar", min_periods=4, center=True, step=step + ).mean() tm.assert_series_equal(xp, rs) @td.skip_if_no_scipy -def test_cmov_window_regular(win_types): +def test_cmov_window_regular(win_types, step): # GH 8238 vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, 9.48, 10.63, 14.48]) xps = { @@ -451,26 +453,26 @@ def test_cmov_window_regular(win_types): ], } - xp = Series(xps[win_types]) - rs = Series(vals).rolling(5, win_type=win_types, center=True).mean() + xp = Series(xps[win_types])[::step] + rs = Series(vals).rolling(5, win_type=win_types, center=True, step=step).mean() tm.assert_series_equal(xp, rs) @td.skip_if_no_scipy -def test_cmov_window_regular_linear_range(win_types): +def test_cmov_window_regular_linear_range(win_types, step): # GH 8238 vals = np.array(range(10), dtype=float) xp = vals.copy() xp[:2] = np.nan xp[-2:] = np.nan - xp = Series(xp) + xp = Series(xp)[::step] - rs = Series(vals).rolling(5, win_type=win_types, center=True).mean() + rs = Series(vals).rolling(5, win_type=win_types, center=True, step=step).mean() tm.assert_series_equal(xp, rs) @td.skip_if_no_scipy -def test_cmov_window_regular_missing_data(win_types): +def test_cmov_window_regular_missing_data(win_types, step): # GH 8238 vals = np.array( [6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, np.nan, 10.63, 14.48] @@ -574,13 +576,13 @@ def test_cmov_window_regular_missing_data(win_types): ], } - xp = Series(xps[win_types]) - rs = Series(vals).rolling(5, win_type=win_types, min_periods=3).mean() + xp = Series(xps[win_types])[::step] + rs = Series(vals).rolling(5, win_type=win_types, min_periods=3, step=step).mean() tm.assert_series_equal(xp, rs) @td.skip_if_no_scipy -def test_cmov_window_special(win_types_special): +def test_cmov_window_special(win_types_special, step): # GH 8238 kwds = { "kaiser": {"beta": 1.0}, @@ -642,17 +644,17 @@ def test_cmov_window_special(win_types_special): ], } - xp = Series(xps[win_types_special]) + xp = Series(xps[win_types_special])[::step] rs = ( Series(vals) - .rolling(5, win_type=win_types_special, center=True) + .rolling(5, win_type=win_types_special, center=True, step=step) .mean(**kwds[win_types_special]) ) tm.assert_series_equal(xp, rs) @td.skip_if_no_scipy -def test_cmov_window_special_linear_range(win_types_special): +def test_cmov_window_special_linear_range(win_types_special, step): # GH 8238 kwds = { "kaiser": {"beta": 1.0}, @@ -666,11 +668,11 @@ def test_cmov_window_special_linear_range(win_types_special): xp = vals.copy() xp[:2] = np.nan xp[-2:] = np.nan - xp = Series(xp) + xp = Series(xp)[::step] rs = ( Series(vals) - .rolling(5, win_type=win_types_special, center=True) + .rolling(5, win_type=win_types_special, center=True, step=step) .mean(**kwds[win_types_special]) ) tm.assert_series_equal(xp, rs)