diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst
index d2b34f587b5e2..59d4ef1d9b39d 100644
--- a/doc/source/whatsnew/v1.5.0.rst
+++ b/doc/source/whatsnew/v1.5.0.rst
@@ -38,6 +38,7 @@ Other enhancements
- :meth:`to_numeric` now preserves float64 arrays when downcasting would generate values not representable in float32 (:issue:`43693`)
- :meth:`Series.reset_index` and :meth:`DataFrame.reset_index` now support the argument ``allow_duplicates`` (:issue:`44410`)
- :meth:`.GroupBy.min` and :meth:`.GroupBy.max` now supports `Numba `_ execution with the ``engine`` keyword (:issue:`45428`)
+- :meth:`DataFrame.rolling` and :meth:`Series.rolling` now support a ``step`` parameter with fixed-length windows (:issue:`15354`)
- Implemented a ``bool``-dtype :class:`Index`, passing a bool-dtype array-like to ``pd.Index`` will now retain ``bool`` dtype instead of casting to ``object`` (:issue:`45061`)
- Implemented a complex-dtype :class:`Index`, passing a complex-dtype array-like to ``pd.Index`` will now retain complex dtype instead of casting to ``object`` (:issue:`45845`)
- Improved error message in :class:`~pandas.core.window.Rolling` when ``window`` is a frequency and ``NaT`` is in the rolling axis (:issue:`46087`)
diff --git a/pandas/_libs/window/indexers.pyi b/pandas/_libs/window/indexers.pyi
index c9bc64be34ac9..bbb5e6988d0b5 100644
--- a/pandas/_libs/window/indexers.pyi
+++ b/pandas/_libs/window/indexers.pyi
@@ -8,5 +8,6 @@ def calculate_variable_window_bounds(
min_periods,
center: bool,
closed: str | None,
+ step: int | None,
index: np.ndarray, # const int64_t[:]
) -> tuple[npt.NDArray[np.int64], npt.NDArray[np.int64]]: ...
diff --git a/pandas/_libs/window/indexers.pyx b/pandas/_libs/window/indexers.pyx
index 4b3a858ade773..992212a872035 100644
--- a/pandas/_libs/window/indexers.pyx
+++ b/pandas/_libs/window/indexers.pyx
@@ -16,6 +16,7 @@ def calculate_variable_window_bounds(
object min_periods, # unused but here to match get_window_bounds signature
bint center,
str closed,
+ int64_t step,
const int64_t[:] index
):
"""
@@ -38,6 +39,9 @@ def calculate_variable_window_bounds(
closed : str
string of side of the window that should be closed
+ step : int64
+ Spacing between windows
+
index : ndarray[int64]
time series index to roll over
@@ -52,6 +56,9 @@ def calculate_variable_window_bounds(
int64_t start_bound, end_bound, index_growth_sign = 1
Py_ssize_t i, j
+ if num_values <= 0:
+ return np.empty(0, dtype='int64'), np.empty(0, dtype='int64')
+
# default is 'right'
if closed is None:
closed = 'right'
@@ -143,4 +150,4 @@ def calculate_variable_window_bounds(
# right endpoint is open
if not right_closed and not center:
end[i] -= 1
- return start, end
+ return start[::step], end[::step]
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index fc682b848b054..83d0a95b8adb2 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -11261,6 +11261,7 @@ def rolling(
on: str | None = None,
axis: Axis = 0,
closed: str | None = None,
+ step: int | None = None,
method: str = "single",
):
axis = self._get_axis_number(axis)
@@ -11275,6 +11276,7 @@ def rolling(
on=on,
axis=axis,
closed=closed,
+ step=step,
method=method,
)
@@ -11287,6 +11289,7 @@ def rolling(
on=on,
axis=axis,
closed=closed,
+ step=step,
method=method,
)
diff --git a/pandas/core/indexers/objects.py b/pandas/core/indexers/objects.py
index 4d5e4bbe6bd36..54bdee0bb0208 100644
--- a/pandas/core/indexers/objects.py
+++ b/pandas/core/indexers/objects.py
@@ -27,6 +27,9 @@
center passed from the top level rolling API
closed : str, default None
closed passed from the top level rolling API
+step : int, default None
+ step passed from the top level rolling API
+ .. versionadded:: 1.5
win_type : str, default None
win_type passed from the top level rolling API
@@ -62,6 +65,7 @@ def get_window_bounds(
min_periods: int | None = None,
center: bool | None = None,
closed: str | None = None,
+ step: int | None = None,
) -> tuple[np.ndarray, np.ndarray]:
raise NotImplementedError
@@ -77,6 +81,7 @@ def get_window_bounds(
min_periods: int | None = None,
center: bool | None = None,
closed: str | None = None,
+ step: int | None = None,
) -> tuple[np.ndarray, np.ndarray]:
if center:
@@ -84,7 +89,7 @@ def get_window_bounds(
else:
offset = 0
- end = np.arange(1 + offset, num_values + 1 + offset, dtype="int64")
+ end = np.arange(1 + offset, num_values + 1 + offset, step, dtype="int64")
start = end - self.window_size
if closed in ["left", "both"]:
start -= 1
@@ -107,8 +112,12 @@ def get_window_bounds(
min_periods: int | None = None,
center: bool | None = None,
closed: str | None = None,
+ step: int | None = None,
) -> tuple[np.ndarray, np.ndarray]:
+ if step is not None:
+ raise NotImplementedError("step not implemented for variable window")
+
# error: Argument 4 to "calculate_variable_window_bounds" has incompatible
# type "Optional[bool]"; expected "bool"
# error: Argument 6 to "calculate_variable_window_bounds" has incompatible
@@ -119,6 +128,7 @@ def get_window_bounds(
min_periods,
center, # type: ignore[arg-type]
closed,
+ 1,
self.index_array, # type: ignore[arg-type]
)
@@ -145,8 +155,14 @@ def get_window_bounds(
min_periods: int | None = None,
center: bool | None = None,
closed: str | None = None,
+ step: int | None = None,
) -> tuple[np.ndarray, np.ndarray]:
+ if step is not None:
+ raise NotImplementedError("step not implemented for variable offset window")
+ if num_values <= 0:
+ return np.empty(0, dtype="int64"), np.empty(0, dtype="int64")
+
# if windows is variable, default is 'right', otherwise default is 'both'
if closed is None:
closed = "right" if self.index is not None else "both"
@@ -215,12 +231,15 @@ def get_window_bounds(
min_periods: int | None = None,
center: bool | None = None,
closed: str | None = None,
+ step: int | None = None,
) -> tuple[np.ndarray, np.ndarray]:
- return (
- np.zeros(num_values, dtype=np.int64),
- np.arange(1, num_values + 1, dtype=np.int64),
- )
+ if step is not None:
+ raise NotImplementedError("step not implemented for expanding window")
+
+ end = np.arange(1, num_values + 1, dtype=np.int64)
+ start = np.zeros(len(end), dtype=np.int64)
+ return start, end
class FixedForwardWindowIndexer(BaseIndexer):
@@ -256,6 +275,7 @@ def get_window_bounds(
min_periods: int | None = None,
center: bool | None = None,
closed: str | None = None,
+ step: int | None = None,
) -> tuple[np.ndarray, np.ndarray]:
if center:
@@ -264,11 +284,13 @@ def get_window_bounds(
raise ValueError(
"Forward-looking windows don't support setting the closed argument"
)
+ if step is None:
+ step = 1
- start = np.arange(num_values, dtype="int64")
+ start = np.arange(0, num_values, step, dtype="int64")
end = start + self.window_size
if self.window_size:
- end[-self.window_size :] = num_values
+ end = np.clip(end, 0, num_values)
return start, end
@@ -319,7 +341,11 @@ def get_window_bounds(
min_periods: int | None = None,
center: bool | None = None,
closed: str | None = None,
+ step: int | None = None,
) -> tuple[np.ndarray, np.ndarray]:
+ if step is not None:
+ raise NotImplementedError("step not implemented for groupby window")
+
# 1) For each group, get the indices that belong to the group
# 2) Use the indices to calculate the start & end bounds of the window
# 3) Append the window bounds in group order
@@ -339,7 +365,7 @@ def get_window_bounds(
**self.indexer_kwargs,
)
start, end = indexer.get_window_bounds(
- len(indices), min_periods, center, closed
+ len(indices), min_periods, center, closed, step
)
start = start.astype(np.int64)
end = end.astype(np.int64)
@@ -358,6 +384,8 @@ def get_window_bounds(
)
start_arrays.append(window_indices.take(ensure_platform_int(start)))
end_arrays.append(window_indices.take(ensure_platform_int(end)))
+ if len(start_arrays) == 0:
+ return np.array([], dtype=np.int64), np.array([], dtype=np.int64)
start = np.concatenate(start_arrays)
end = np.concatenate(end_arrays)
return start, end
@@ -373,6 +401,14 @@ def get_window_bounds(
min_periods: int | None = None,
center: bool | None = None,
closed: str | None = None,
+ step: int | None = None,
) -> tuple[np.ndarray, np.ndarray]:
- return np.array([0], dtype=np.int64), np.array([num_values], dtype=np.int64)
+ if step is not None:
+ raise NotImplementedError(
+ "step not implemented for exponentail moving window"
+ )
+ return (
+ np.array([0], dtype=np.int64),
+ np.array([num_values], dtype=np.int64),
+ )
diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py
index 2d633ba1a2bcd..4c2b99762b812 100644
--- a/pandas/core/window/ewm.py
+++ b/pandas/core/window/ewm.py
@@ -732,6 +732,7 @@ def cov_func(x, y):
min_periods=min_periods,
center=self.center,
closed=self.closed,
+ step=self.step,
)
result = window_aggregations.ewmcov(
x_array,
@@ -798,6 +799,7 @@ def cov_func(x, y):
min_periods=min_periods,
center=self.center,
closed=self.closed,
+ step=self.step,
)
def _cov(X, Y):
diff --git a/pandas/core/window/numba_.py b/pandas/core/window/numba_.py
index 3b14f0d14ecab..0f9f01e93a477 100644
--- a/pandas/core/window/numba_.py
+++ b/pandas/core/window/numba_.py
@@ -220,8 +220,8 @@ def roll_table(
minimum_periods: int,
*args: Any,
):
- result = np.empty(values.shape)
- min_periods_mask = np.empty(values.shape)
+ result = np.empty((len(begin), values.shape[1]))
+ min_periods_mask = np.empty(result.shape)
for i in numba.prange(len(result)):
start = begin[i]
stop = end[i]
diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py
index 5eca817d6bb62..269bef32a0f39 100644
--- a/pandas/core/window/rolling.py
+++ b/pandas/core/window/rolling.py
@@ -14,6 +14,7 @@
Any,
Callable,
Hashable,
+ Sized,
)
import warnings
@@ -126,6 +127,7 @@ def __init__(
axis: Axis = 0,
on: str | Index | None = None,
closed: str | None = None,
+ step: int | None = None,
method: str = "single",
*,
selection=None,
@@ -133,6 +135,7 @@ def __init__(
self.obj = obj
self.on = on
self.closed = closed
+ self.step = step
self.window = window
self.min_periods = min_periods
self.center = center
@@ -235,12 +238,25 @@ def _check_window_bounds(
f"start ({len(start)}) and end ({len(end)}) bounds must be the "
f"same length"
)
- elif len(start) != num_vals:
+ elif not isinstance(self._get_window_indexer(), GroupbyIndexer) and len(
+ start
+ ) != (num_vals + (self.step or 1) - 1) // (self.step or 1):
raise ValueError(
f"start and end bounds ({len(start)}) must be the same length "
- f"as the object ({num_vals})"
+ f"as the object ({num_vals}) divided by the step ({self.step}) "
+ f"if given and rounded up"
)
+ def _slice_index(self, index: Index, result: Sized | None = None) -> Index:
+ """
+ Slices the index for a given result and the preset step.
+ """
+ return (
+ index
+ if result is None or len(result) == len(index)
+ else index[:: self.step]
+ )
+
def _create_data(self, obj: NDFrameT) -> NDFrameT:
"""
Split data into blocks & return conformed data.
@@ -324,6 +340,7 @@ def __iter__(self):
min_periods=self.min_periods,
center=self.center,
closed=self.closed,
+ step=self.step,
)
self._check_window_bounds(start, end, len(obj))
@@ -429,7 +446,8 @@ def _apply_series(
raise DataError("No numeric types to aggregate") from err
result = homogeneous_func(values)
- return obj._constructor(result, index=obj.index, name=obj.name)
+ index = self._slice_index(obj.index, result)
+ return obj._constructor(result, index=index, name=obj.name)
def _apply_blockwise(
self, homogeneous_func: Callable[..., ArrayLike], name: str | None = None
@@ -466,9 +484,12 @@ def hfunc(values: ArrayLike) -> ArrayLike:
res_values.append(res)
taker.append(i)
+ index = self._slice_index(
+ obj.index, res_values[0] if len(res_values) > 0 else None
+ )
df = type(obj)._from_arrays(
res_values,
- index=obj.index,
+ index=index,
columns=obj.columns.take(taker),
verify_integrity=False,
)
@@ -503,7 +524,13 @@ def _apply_tablewise(
values = values.T if self.axis == 1 else values
result = homogeneous_func(values)
result = result.T if self.axis == 1 else result
- out = obj._constructor(result, index=obj.index, columns=obj.columns)
+ index = self._slice_index(obj.index, result)
+ columns = (
+ obj.columns
+ if result.shape[1] == len(obj.columns)
+ else obj.columns[:: self.step]
+ )
+ out = obj._constructor(result, index=index, columns=columns)
return self._resolve_output(out, obj)
@@ -570,6 +597,7 @@ def calc(x):
min_periods=min_periods,
center=self.center,
closed=self.closed,
+ step=self.step,
)
self._check_window_bounds(start, end, len(x))
@@ -608,6 +636,7 @@ def _numba_apply(
min_periods=min_periods,
center=self.center,
closed=self.closed,
+ step=self.step,
)
self._check_window_bounds(start, end, len(values))
aggregator = executor.generate_shared_aggregator(
@@ -615,12 +644,14 @@ def _numba_apply(
)
result = aggregator(values, start, end, min_periods, *func_args)
result = result.T if self.axis == 1 else result
+ index = self._slice_index(obj.index, result)
if obj.ndim == 1:
result = result.squeeze()
- out = obj._constructor(result, index=obj.index, name=obj.name)
+ out = obj._constructor(result, index=index, name=obj.name)
return out
else:
- out = obj._constructor(result, index=obj.index, columns=obj.columns)
+ columns = self._slice_index(obj.columns, result.T)
+ out = obj._constructor(result, index=index, columns=columns)
return self._resolve_output(out, obj)
def aggregate(self, func, *args, **kwargs):
@@ -659,6 +690,9 @@ def __init__(
# groupby., but unexpected to users in
# groupby.rolling.
obj = obj.drop(columns=self._grouper.names, errors="ignore")
+ # GH 15354
+ if kwargs.get("step") is not None:
+ raise NotImplementedError("step not implemented for rolling groupby")
super().__init__(obj, *args, **kwargs)
def _apply(
@@ -848,8 +882,8 @@ class Window(BaseWindow):
If a BaseIndexer subclass, the window boundaries
based on the defined ``get_window_bounds`` method. Additional rolling
- keyword arguments, namely ``min_periods``, ``center``, and
- ``closed`` will be passed to ``get_window_bounds``.
+ keyword arguments, namely ``min_periods``, ``center``, ``closed`` and
+ ``step`` will be passed to ``get_window_bounds``.
min_periods : int, default None
Minimum number of observations in window required to have a value;
@@ -903,6 +937,16 @@ class Window(BaseWindow):
The closed parameter with fixed windows is now supported.
+ step : int, default None
+ When supported, applies ``[::step]`` to the resulting sequence of windows, in a
+ computationally efficient manner. Currently supported only with fixed-length
+ window indexers. Note that using a step argument other than None or 1 will
+ produce a result with a different shape than the input.
+
+ ..versionadded:: 1.5
+
+ The step parameter is only supported with fixed windows.
+
method : str {'single', 'table'}, default 'single'
.. versionadded:: 1.3.0
@@ -1021,6 +1065,17 @@ class Window(BaseWindow):
3 3.0
4 6.0
+ **step**
+
+ Rolling sum with a window length of 2 observations, minimum of 1 observation to
+ calculate a value, and a step of 2.
+
+ >>> df.rolling(2, min_periods=1, step=2).sum()
+ B
+ 0 0.0
+ 2 3.0
+ 4 4.0
+
**win_type**
Rolling sum with a window length of 2, using the Scipy ``'gaussian'``
@@ -1043,6 +1098,7 @@ class Window(BaseWindow):
"axis",
"on",
"closed",
+ "step",
"method",
]
@@ -1132,7 +1188,7 @@ def calc(x):
return result
- return self._apply_blockwise(homogeneous_func, name)
+ return self._apply_blockwise(homogeneous_func, name)[:: self.step]
@doc(
_shared_docs["aggregate"],
@@ -1548,6 +1604,11 @@ def cov(
ddof: int = 1,
**kwargs,
):
+ if self.step is not None:
+ raise NotImplementedError(
+ "step not implemented for rolling and expanding cov"
+ )
+
from pandas import Series
def cov_func(x, y):
@@ -1564,6 +1625,7 @@ def cov_func(x, y):
min_periods=min_periods,
center=self.center,
closed=self.closed,
+ step=self.step,
)
self._check_window_bounds(start, end, len(x_array))
@@ -1589,6 +1651,11 @@ def corr(
**kwargs,
):
+ if self.step is not None:
+ raise NotImplementedError(
+ "step not implemented for rolling and expanding corr"
+ )
+
from pandas import Series
def corr_func(x, y):
@@ -1605,6 +1672,7 @@ def corr_func(x, y):
min_periods=min_periods,
center=self.center,
closed=self.closed,
+ step=self.step,
)
self._check_window_bounds(start, end, len(x_array))
@@ -1643,6 +1711,7 @@ class Rolling(RollingAndExpandingMixin):
"axis",
"on",
"closed",
+ "step",
"method",
]
@@ -1682,9 +1751,22 @@ def _validate(self):
elif isinstance(self.window, BaseIndexer):
# Passed BaseIndexer subclass should handle all other rolling kwargs
- return
+ pass
elif not is_integer(self.window) or self.window < 0:
raise ValueError("window must be an integer 0 or greater")
+ # GH 15354:
+ # validate window indexer parameters do not raise in get_window_bounds
+ # this cannot be done in BaseWindow._validate because there _get_window_indexer
+ # would erroneously create a fixed window given a window argument like "1s" due
+ # to _win_freq_i8 not being set
+ indexer = self._get_window_indexer()
+ indexer.get_window_bounds(
+ num_values=0,
+ min_periods=self.min_periods,
+ center=self.center,
+ closed=self.closed,
+ step=self.step,
+ )
def _validate_datetimelike_monotonic(self):
"""
diff --git a/pandas/tests/window/conftest.py b/pandas/tests/window/conftest.py
index f2832652ed58f..f42a1a5449c5c 100644
--- a/pandas/tests/window/conftest.py
+++ b/pandas/tests/window/conftest.py
@@ -126,3 +126,9 @@ def frame():
index=bdate_range(datetime(2009, 1, 1), periods=100),
columns=np.arange(10),
)
+
+
+@pytest.fixture(params=[None, 1, 2, 5, 10])
+def step(request):
+ """step keyword argument for rolling window operations."""
+ return request.param
diff --git a/pandas/tests/window/test_api.py b/pandas/tests/window/test_api.py
index f84a579247630..6dbcc8dfd00c0 100644
--- a/pandas/tests/window/test_api.py
+++ b/pandas/tests/window/test_api.py
@@ -16,20 +16,20 @@
from pandas.core.base import SpecificationError
-def test_getitem():
+def test_getitem(step):
frame = DataFrame(np.random.randn(5, 5))
- r = frame.rolling(window=5)
- tm.assert_index_equal(r._selected_obj.columns, frame.columns)
+ r = frame.rolling(window=5, step=step)
+ tm.assert_index_equal(r._selected_obj.columns, frame[::step].columns)
- r = frame.rolling(window=5)[1]
- assert r._selected_obj.name == frame.columns[1]
+ r = frame.rolling(window=5, step=step)[1]
+ assert r._selected_obj.name == frame[::step].columns[1]
# technically this is allowed
- r = frame.rolling(window=5)[1, 3]
- tm.assert_index_equal(r._selected_obj.columns, frame.columns[[1, 3]])
+ r = frame.rolling(window=5, step=step)[1, 3]
+ tm.assert_index_equal(r._selected_obj.columns, frame[::step].columns[[1, 3]])
- r = frame.rolling(window=5)[[1, 3]]
- tm.assert_index_equal(r._selected_obj.columns, frame.columns[[1, 3]])
+ r = frame.rolling(window=5, step=step)[[1, 3]]
+ tm.assert_index_equal(r._selected_obj.columns, frame[::step].columns[[1, 3]])
def test_select_bad_cols():
@@ -53,21 +53,21 @@ def test_attribute_access():
r.F
-def tests_skip_nuisance():
+def tests_skip_nuisance(step):
df = DataFrame({"A": range(5), "B": range(5, 10), "C": "foo"})
- r = df.rolling(window=3)
+ r = df.rolling(window=3, step=step)
result = r[["A", "B"]].sum()
expected = DataFrame(
{"A": [np.nan, np.nan, 3, 6, 9], "B": [np.nan, np.nan, 18, 21, 24]},
columns=list("AB"),
- )
+ )[::step]
tm.assert_frame_equal(result, expected)
-def test_skip_sum_object_raises():
+def test_skip_sum_object_raises(step):
df = DataFrame({"A": range(5), "B": range(5, 10), "C": "foo"})
- r = df.rolling(window=3)
+ r = df.rolling(window=3, step=step)
msg = r"nuisance columns.*Dropped columns were Index\(\['C'\], dtype='object'\)"
with tm.assert_produces_warning(FutureWarning, match=msg):
# GH#42738
@@ -75,14 +75,14 @@ def test_skip_sum_object_raises():
expected = DataFrame(
{"A": [np.nan, np.nan, 3, 6, 9], "B": [np.nan, np.nan, 18, 21, 24]},
columns=list("AB"),
- )
+ )[::step]
tm.assert_frame_equal(result, expected)
-def test_agg():
+def test_agg(step):
df = DataFrame({"A": range(5), "B": range(0, 10, 2)})
- r = df.rolling(window=3)
+ r = df.rolling(window=3, step=step)
a_mean = r["A"].mean()
a_std = r["A"].std()
a_sum = r["A"].sum()
@@ -141,10 +141,10 @@ def test_agg_apply(raw):
tm.assert_frame_equal(result, expected, check_like=True)
-def test_agg_consistency():
+def test_agg_consistency(step):
df = DataFrame({"A": range(5), "B": range(0, 10, 2)})
- r = df.rolling(window=3)
+ r = df.rolling(window=3, step=step)
result = r.agg([np.sum, np.mean]).columns
expected = MultiIndex.from_product([list("AB"), ["sum", "mean"]])
@@ -182,7 +182,7 @@ def test_agg_nested_dicts():
r.agg({"A": {"ra": ["mean", "std"]}, "B": {"rb": ["mean", "std"]}})
-def test_count_nonnumeric_types():
+def test_count_nonnumeric_types(step):
# GH12541
cols = [
"int",
@@ -239,13 +239,13 @@ def test_count_nonnumeric_types():
"periods_nat": [1.0, 2.0, 1.0],
},
columns=cols,
- )
+ )[::step]
- result = df.rolling(window=2, min_periods=0).count()
+ result = df.rolling(window=2, min_periods=0, step=step).count()
tm.assert_frame_equal(result, expected)
- result = df.rolling(1, min_periods=0).count()
- expected = df.notna().astype(float)
+ result = df.rolling(1, min_periods=0, step=step).count()
+ expected = df.notna().astype(float)[::step]
tm.assert_frame_equal(result, expected)
@@ -339,11 +339,11 @@ def test_validate_deprecated():
@pytest.mark.filterwarnings("ignore:min_periods:FutureWarning")
def test_dont_modify_attributes_after_methods(
- arithmetic_win_operators, closed, center, min_periods
+ arithmetic_win_operators, closed, center, min_periods, step
):
# GH 39554
roll_obj = Series(range(1)).rolling(
- 1, center=center, closed=closed, min_periods=min_periods
+ 1, center=center, closed=closed, min_periods=min_periods, step=step
)
expected = {attr: getattr(roll_obj, attr) for attr in roll_obj._attributes}
getattr(roll_obj, arithmetic_win_operators)()
@@ -351,40 +351,49 @@ def test_dont_modify_attributes_after_methods(
assert result == expected
-def test_centered_axis_validation():
+def test_centered_axis_validation(step):
# ok
- Series(np.ones(10)).rolling(window=3, center=True, axis=0).mean()
+ Series(np.ones(10)).rolling(window=3, center=True, axis=0, step=step).mean()
# bad axis
msg = "No axis named 1 for object type Series"
with pytest.raises(ValueError, match=msg):
- Series(np.ones(10)).rolling(window=3, center=True, axis=1).mean()
+ Series(np.ones(10)).rolling(window=3, center=True, axis=1, step=step).mean()
# ok ok
- DataFrame(np.ones((10, 10))).rolling(window=3, center=True, axis=0).mean()
- DataFrame(np.ones((10, 10))).rolling(window=3, center=True, axis=1).mean()
+ DataFrame(np.ones((10, 10))).rolling(
+ window=3, center=True, axis=0, step=step
+ ).mean()
+ DataFrame(np.ones((10, 10))).rolling(
+ window=3, center=True, axis=1, step=step
+ ).mean()
# bad axis
msg = "No axis named 2 for object type DataFrame"
with pytest.raises(ValueError, match=msg):
- (DataFrame(np.ones((10, 10))).rolling(window=3, center=True, axis=2).mean())
+ (
+ DataFrame(np.ones((10, 10)))
+ .rolling(window=3, center=True, axis=2, step=step)
+ .mean()
+ )
-def test_rolling_min_min_periods():
+def test_rolling_min_min_periods(step):
a = Series([1, 2, 3, 4, 5])
- result = a.rolling(window=100, min_periods=1).min()
- expected = Series(np.ones(len(a)))
+ result = a.rolling(window=100, min_periods=1, step=step).min()
+ expected = Series(np.ones(len(a)))[::step]
tm.assert_series_equal(result, expected)
msg = "min_periods 5 must be <= window 3"
with pytest.raises(ValueError, match=msg):
- Series([1, 2, 3]).rolling(window=3, min_periods=5).min()
+ Series([1, 2, 3]).rolling(window=3, min_periods=5, step=step).min()
-def test_rolling_max_min_periods():
+def test_rolling_max_min_periods(step):
a = Series([1, 2, 3, 4, 5], dtype=np.float64)
- b = a.rolling(window=100, min_periods=1).max()
- tm.assert_almost_equal(a, b)
+ result = a.rolling(window=100, min_periods=1, step=step).max()
+ expected = a[::step]
+ tm.assert_almost_equal(result, expected)
msg = "min_periods 5 must be <= window 3"
with pytest.raises(ValueError, match=msg):
- Series([1, 2, 3]).rolling(window=3, min_periods=5).max()
+ Series([1, 2, 3]).rolling(window=3, min_periods=5, step=step).max()
diff --git a/pandas/tests/window/test_apply.py b/pandas/tests/window/test_apply.py
index 6683137960146..bbf9f3d886794 100644
--- a/pandas/tests/window/test_apply.py
+++ b/pandas/tests/window/test_apply.py
@@ -52,7 +52,8 @@ def test_rolling_apply_out_of_bounds(engine_and_raw):
@pytest.mark.parametrize("window", [2, "2s"])
-def test_rolling_apply_with_pandas_objects(window):
+@pytest.mark.parametrize("step", [None])
+def test_rolling_apply_with_pandas_objects(window, step):
# 5071
df = DataFrame(
{"A": np.random.randn(5), "B": np.random.randint(0, 10, size=5)},
@@ -66,32 +67,37 @@ def f(x):
return np.nan
return x.iloc[-1]
- result = df.rolling(window).apply(f, raw=False)
- expected = df.iloc[2:].reindex_like(df)
+ result = df.rolling(window, step=step).apply(f, raw=False)
+ expected = df.iloc[2:].reindex_like(df)[::step]
tm.assert_frame_equal(result, expected)
with tm.external_error_raised(AttributeError):
df.rolling(window).apply(f, raw=True)
-def test_rolling_apply(engine_and_raw):
+def test_rolling_apply(engine_and_raw, step):
engine, raw = engine_and_raw
expected = Series([], dtype="float64")
- result = expected.rolling(10).apply(lambda x: x.mean(), engine=engine, raw=raw)
+ result = expected.rolling(10, step=step).apply(
+ lambda x: x.mean(), engine=engine, raw=raw
+ )
tm.assert_series_equal(result, expected)
# gh-8080
s = Series([None, None, None])
- result = s.rolling(2, min_periods=0).apply(lambda x: len(x), engine=engine, raw=raw)
- expected = Series([1.0, 2.0, 2.0])
+ result = s.rolling(2, min_periods=0, step=step).apply(
+ lambda x: len(x), engine=engine, raw=raw
+ )
+ expected = Series([1.0, 2.0, 2.0])[::step]
tm.assert_series_equal(result, expected)
- result = s.rolling(2, min_periods=0).apply(len, engine=engine, raw=raw)
+ result = s.rolling(2, min_periods=0, step=step).apply(len, engine=engine, raw=raw)
tm.assert_series_equal(result, expected)
-def test_all_apply(engine_and_raw):
+@pytest.mark.parametrize("step", [None])
+def test_all_apply(engine_and_raw, step):
engine, raw = engine_and_raw
df = (
@@ -100,15 +106,16 @@ def test_all_apply(engine_and_raw):
).set_index("A")
* 2
)
- er = df.rolling(window=1)
- r = df.rolling(window="1s")
+ er = df.rolling(window=1, step=step)
+ r = df.rolling(window="1s", step=step)
result = r.apply(lambda x: 1, engine=engine, raw=raw)
expected = er.apply(lambda x: 1, engine=engine, raw=raw)
tm.assert_frame_equal(result, expected)
-def test_ragged_apply(engine_and_raw):
+@pytest.mark.parametrize("step", [None])
+def test_ragged_apply(engine_and_raw, step):
engine, raw = engine_and_raw
df = DataFrame({"B": range(5)})
@@ -121,18 +128,24 @@ def test_ragged_apply(engine_and_raw):
]
f = lambda x: 1
- result = df.rolling(window="1s", min_periods=1).apply(f, engine=engine, raw=raw)
- expected = df.copy()
+ result = df.rolling(window="1s", min_periods=1, step=step).apply(
+ f, engine=engine, raw=raw
+ )
+ expected = df.copy()[::step]
expected["B"] = 1.0
tm.assert_frame_equal(result, expected)
- result = df.rolling(window="2s", min_periods=1).apply(f, engine=engine, raw=raw)
- expected = df.copy()
+ result = df.rolling(window="2s", min_periods=1, step=step).apply(
+ f, engine=engine, raw=raw
+ )
+ expected = df.copy()[::step]
expected["B"] = 1.0
tm.assert_frame_equal(result, expected)
- result = df.rolling(window="5s", min_periods=1).apply(f, engine=engine, raw=raw)
- expected = df.copy()
+ result = df.rolling(window="5s", min_periods=1, step=step).apply(
+ f, engine=engine, raw=raw
+ )
+ expected = df.copy()[::step]
expected["B"] = 1.0
tm.assert_frame_equal(result, expected)
@@ -266,9 +279,13 @@ def test_time_rule_frame(raw, frame):
@pytest.mark.parametrize("minp", [0, 99, 100])
-def test_min_periods(raw, series, minp):
- result = series.rolling(len(series) + 1, min_periods=minp).apply(f, raw=raw)
- expected = series.rolling(len(series), min_periods=minp).apply(f, raw=raw)
+def test_min_periods(raw, series, minp, step):
+ result = series.rolling(len(series) + 1, min_periods=minp, step=step).apply(
+ f, raw=raw
+ )
+ expected = series.rolling(len(series), min_periods=minp, step=step).apply(
+ f, raw=raw
+ )
nan_mask = isna(result)
tm.assert_series_equal(nan_mask, isna(expected))
diff --git a/pandas/tests/window/test_base_indexer.py b/pandas/tests/window/test_base_indexer.py
index 5593aa8351c69..aef79f97bf93d 100644
--- a/pandas/tests/window/test_base_indexer.py
+++ b/pandas/tests/window/test_base_indexer.py
@@ -46,7 +46,7 @@ def test_indexer_constructor_arg():
df = DataFrame({"values": range(5)})
class CustomIndexer(BaseIndexer):
- def get_window_bounds(self, num_values, min_periods, center, closed):
+ def get_window_bounds(self, num_values, min_periods, center, closed, step):
start = np.empty(num_values, dtype=np.int64)
end = np.empty(num_values, dtype=np.int64)
for i in range(num_values):
@@ -68,11 +68,17 @@ def test_indexer_accepts_rolling_args():
df = DataFrame({"values": range(5)})
class CustomIndexer(BaseIndexer):
- def get_window_bounds(self, num_values, min_periods, center, closed):
+ def get_window_bounds(self, num_values, min_periods, center, closed, step):
start = np.empty(num_values, dtype=np.int64)
end = np.empty(num_values, dtype=np.int64)
for i in range(num_values):
- if center and min_periods == 1 and closed == "both" and i == 2:
+ if (
+ center
+ and min_periods == 1
+ and closed == "both"
+ and step == 1
+ and i == 2
+ ):
start[i] = 0
end[i] = num_values
else:
@@ -81,7 +87,9 @@ def get_window_bounds(self, num_values, min_periods, center, closed):
return start, end
indexer = CustomIndexer(window_size=1)
- result = df.rolling(indexer, center=True, min_periods=1, closed="both").sum()
+ result = df.rolling(
+ indexer, center=True, min_periods=1, closed="both", step=1
+ ).sum()
expected = DataFrame({"values": [0.0, 1.0, 10.0, 3.0, 4.0]})
tm.assert_frame_equal(result, expected)
@@ -141,7 +149,7 @@ def get_window_bounds(self, num_values, min_periods, center, closed):
],
)
@pytest.mark.filterwarnings("ignore:min_periods:FutureWarning")
-def test_rolling_forward_window(constructor, func, np_func, expected, np_kwargs):
+def test_rolling_forward_window(constructor, func, np_func, expected, np_kwargs, step):
# GH 32865
values = np.arange(10.0)
values[5] = 100.0
@@ -158,11 +166,11 @@ def test_rolling_forward_window(constructor, func, np_func, expected, np_kwargs)
rolling = constructor(values).rolling(window=indexer, closed="right")
getattr(rolling, func)()
- rolling = constructor(values).rolling(window=indexer, min_periods=2)
+ rolling = constructor(values).rolling(window=indexer, min_periods=2, step=step)
result = getattr(rolling, func)()
# Check that the function output matches the explicitly provided array
- expected = constructor(expected)
+ expected = constructor(expected)[::step]
tm.assert_equal(result, expected)
# Check that the rolling function output matches applying an alternative
@@ -182,12 +190,12 @@ def test_rolling_forward_window(constructor, func, np_func, expected, np_kwargs)
@pytest.mark.parametrize("constructor", [Series, DataFrame])
-def test_rolling_forward_skewness(constructor):
+def test_rolling_forward_skewness(constructor, step):
values = np.arange(10.0)
values[5] = 100.0
indexer = FixedForwardWindowIndexer(window_size=5)
- rolling = constructor(values).rolling(window=indexer, min_periods=3)
+ rolling = constructor(values).rolling(window=indexer, min_periods=3, step=step)
result = rolling.skew()
expected = constructor(
@@ -203,7 +211,7 @@ def test_rolling_forward_skewness(constructor):
np.nan,
np.nan,
]
- )
+ )[::step]
tm.assert_equal(result, expected)
@@ -239,7 +247,7 @@ def test_rolling_forward_cov_corr(func, expected):
# We are interested in checking only pairwise covariance / correlation
result = getattr(rolling, func)().loc[(slice(None), 1), 0]
result = result.reset_index(drop=True)
- expected = Series(expected)
+ expected = Series(expected).reset_index(drop=True)
expected.name = result.name
tm.assert_equal(result, expected)
@@ -251,22 +259,23 @@ def test_rolling_forward_cov_corr(func, expected):
["left", [0.0, 0.0, 1.0, 2.0, 5.0, 9.0, 5.0, 6.0, 7.0, 8.0]],
],
)
-def test_non_fixed_variable_window_indexer(closed, expected_data):
+@pytest.mark.parametrize("step", [None])
+def test_non_fixed_variable_window_indexer(closed, expected_data, step):
index = date_range("2020", periods=10)
df = DataFrame(range(10), index=index)
offset = BusinessDay(1)
indexer = VariableOffsetWindowIndexer(index=index, offset=offset)
- result = df.rolling(indexer, closed=closed).sum()
- expected = DataFrame(expected_data, index=index)
+ result = df.rolling(indexer, closed=closed, step=step).sum()
+ expected = DataFrame(expected_data, index=index)[::step]
tm.assert_frame_equal(result, expected)
-def test_fixed_forward_indexer_count():
+def test_fixed_forward_indexer_count(step):
# GH: 35579
df = DataFrame({"b": [None, None, None, 7]})
indexer = FixedForwardWindowIndexer(window_size=2)
- result = df.rolling(window=indexer, min_periods=0).count()
- expected = DataFrame({"b": [0.0, 0.0, 1.0, 1.0]})
+ result = df.rolling(window=indexer, min_periods=0, step=step).count()
+ expected = DataFrame({"b": [0.0, 0.0, 1.0, 1.0]})[::step]
tm.assert_frame_equal(result, expected)
@@ -277,7 +286,7 @@ def test_fixed_forward_indexer_count():
def test_indexer_quantile_sum(end_value, values, func, args):
# GH 37153
class CustomIndexer(BaseIndexer):
- def get_window_bounds(self, num_values, min_periods, center, closed):
+ def get_window_bounds(self, num_values, min_periods, center, closed, step):
start = np.empty(num_values, dtype=np.int64)
end = np.empty(num_values, dtype=np.int64)
for i in range(num_values):
@@ -338,14 +347,16 @@ def test_indexers_are_reusable_after_groupby_rolling(
],
)
def test_fixed_forward_indexer_bounds(
- window_size, num_values, expected_start, expected_end
+ window_size, num_values, expected_start, expected_end, step
):
# GH 43267
indexer = FixedForwardWindowIndexer(window_size=window_size)
- start, end = indexer.get_window_bounds(num_values=num_values)
+ start, end = indexer.get_window_bounds(num_values=num_values, step=step)
- tm.assert_numpy_array_equal(start, np.array(expected_start), check_dtype=False)
- tm.assert_numpy_array_equal(end, np.array(expected_end), check_dtype=False)
+ tm.assert_numpy_array_equal(
+ start, np.array(expected_start[::step]), check_dtype=False
+ )
+ tm.assert_numpy_array_equal(end, np.array(expected_end[::step]), check_dtype=False)
assert len(start) == len(end)
@@ -456,7 +467,7 @@ def test_rolling_groupby_with_fixed_forward_many(group_keys, window_size):
def test_unequal_start_end_bounds():
class CustomIndexer(BaseIndexer):
- def get_window_bounds(self, num_values, min_periods, center, closed):
+ def get_window_bounds(self, num_values, min_periods, center, closed, step):
return np.array([1]), np.array([1, 2])
indexer = CustomIndexer()
@@ -478,7 +489,7 @@ def get_window_bounds(self, num_values, min_periods, center, closed):
def test_unequal_bounds_to_object():
# GH 44470
class CustomIndexer(BaseIndexer):
- def get_window_bounds(self, num_values, min_periods, center, closed):
+ def get_window_bounds(self, num_values, min_periods, center, closed, step):
return np.array([1]), np.array([2])
indexer = CustomIndexer()
diff --git a/pandas/tests/window/test_dtypes.py b/pandas/tests/window/test_dtypes.py
index 80a96c3a8cee9..161976a6112a5 100644
--- a/pandas/tests/window/test_dtypes.py
+++ b/pandas/tests/window/test_dtypes.py
@@ -90,9 +90,11 @@ def dtypes(request):
),
],
)
-def test_series_dtypes(method, data, expected_data, coerce_int, dtypes, min_periods):
+def test_series_dtypes(
+ method, data, expected_data, coerce_int, dtypes, min_periods, step
+):
ser = Series(data, dtype=get_dtype(dtypes, coerce_int=coerce_int))
- rolled = ser.rolling(2, min_periods=min_periods)
+ rolled = ser.rolling(2, min_periods=min_periods, step=step)
if dtypes in ("m8[ns]", "M8[ns]", "datetime64[ns, UTC]") and method != "count":
msg = "No numeric types to aggregate"
@@ -100,15 +102,15 @@ def test_series_dtypes(method, data, expected_data, coerce_int, dtypes, min_peri
getattr(rolled, method)()
else:
result = getattr(rolled, method)()
- expected = Series(expected_data, dtype="float64")
+ expected = Series(expected_data, dtype="float64")[::step]
tm.assert_almost_equal(result, expected)
-def test_series_nullable_int(any_signed_int_ea_dtype):
+def test_series_nullable_int(any_signed_int_ea_dtype, step):
# GH 43016
ser = Series([0, 1, NA], dtype=any_signed_int_ea_dtype)
- result = ser.rolling(2).mean()
- expected = Series([np.nan, 0.5, np.nan])
+ result = ser.rolling(2, step=step).mean()
+ expected = Series([np.nan, 0.5, np.nan])[::step]
tm.assert_series_equal(result, expected)
@@ -156,10 +158,10 @@ def test_series_nullable_int(any_signed_int_ea_dtype):
),
],
)
-def test_dataframe_dtypes(method, expected_data, dtypes, min_periods):
+def test_dataframe_dtypes(method, expected_data, dtypes, min_periods, step):
df = DataFrame(np.arange(10).reshape((5, 2)), dtype=get_dtype(dtypes))
- rolled = df.rolling(2, min_periods=min_periods)
+ rolled = df.rolling(2, min_periods=min_periods, step=step)
if dtypes in ("m8[ns]", "M8[ns]", "datetime64[ns, UTC]") and method != "count":
msg = "No numeric types to aggregate"
@@ -167,5 +169,5 @@ def test_dataframe_dtypes(method, expected_data, dtypes, min_periods):
getattr(rolled, method)()
else:
result = getattr(rolled, method)()
- expected = DataFrame(expected_data, dtype="float64")
+ expected = DataFrame(expected_data, dtype="float64")[::step]
tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/window/test_groupby.py b/pandas/tests/window/test_groupby.py
index 25782d0c0617f..90b9288b77690 100644
--- a/pandas/tests/window/test_groupby.py
+++ b/pandas/tests/window/test_groupby.py
@@ -448,7 +448,12 @@ def test_groupby_rolling_custom_indexer(self):
# GH 35557
class SimpleIndexer(BaseIndexer):
def get_window_bounds(
- self, num_values=0, min_periods=None, center=None, closed=None
+ self,
+ num_values=0,
+ min_periods=None,
+ center=None,
+ closed=None,
+ step=None,
):
min_periods = self.window_size if min_periods is None else 0
end = np.arange(num_values, dtype=np.int64) + 1
diff --git a/pandas/tests/window/test_numba.py b/pandas/tests/window/test_numba.py
index 6fd45606ae98d..0e7fe24420171 100644
--- a/pandas/tests/window/test_numba.py
+++ b/pandas/tests/window/test_numba.py
@@ -54,7 +54,7 @@ def arithmetic_numba_supported_operators(request):
# Filter warnings when parallel=True and the function can't be parallelized by Numba
class TestEngine:
@pytest.mark.parametrize("jit", [True, False])
- def test_numba_vs_cython_apply(self, jit, nogil, parallel, nopython, center):
+ def test_numba_vs_cython_apply(self, jit, nogil, parallel, nopython, center, step):
def f(x, *args):
arg_sum = 0
for arg in args:
@@ -70,10 +70,10 @@ def f(x, *args):
args = (2,)
s = Series(range(10))
- result = s.rolling(2, center=center).apply(
+ result = s.rolling(2, center=center, step=step).apply(
f, args=args, engine="numba", engine_kwargs=engine_kwargs, raw=True
)
- expected = s.rolling(2, center=center).apply(
+ expected = s.rolling(2, center=center, step=step).apply(
f, engine="cython", args=args, raw=True
)
tm.assert_series_equal(result, expected)
@@ -82,14 +82,20 @@ def f(x, *args):
"data", [DataFrame(np.eye(5)), Series(range(5), name="foo")]
)
def test_numba_vs_cython_rolling_methods(
- self, data, nogil, parallel, nopython, arithmetic_numba_supported_operators
+ self,
+ data,
+ nogil,
+ parallel,
+ nopython,
+ arithmetic_numba_supported_operators,
+ step,
):
method, kwargs = arithmetic_numba_supported_operators
engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython}
- roll = data.rolling(2)
+ roll = data.rolling(2, step=step)
result = getattr(roll, method)(
engine="numba", engine_kwargs=engine_kwargs, **kwargs
)
@@ -116,7 +122,7 @@ def test_numba_vs_cython_expanding_methods(
tm.assert_equal(result, expected)
@pytest.mark.parametrize("jit", [True, False])
- def test_cache_apply(self, jit, nogil, parallel, nopython):
+ def test_cache_apply(self, jit, nogil, parallel, nopython, step):
# Test that the functions are cached correctly if we switch functions
def func_1(x):
return np.mean(x) + 4
@@ -132,7 +138,7 @@ def func_2(x):
engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython}
- roll = Series(range(10)).rolling(2)
+ roll = Series(range(10)).rolling(2, step=step)
result = roll.apply(
func_1, engine="numba", engine_kwargs=engine_kwargs, raw=True
)
@@ -327,21 +333,29 @@ def f(x):
)
def test_table_method_rolling_methods(
- self, axis, nogil, parallel, nopython, arithmetic_numba_supported_operators
+ self,
+ axis,
+ nogil,
+ parallel,
+ nopython,
+ arithmetic_numba_supported_operators,
+ step,
):
method, kwargs = arithmetic_numba_supported_operators
engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython}
df = DataFrame(np.eye(3))
- roll_table = df.rolling(2, method="table", axis=axis, min_periods=0)
+ roll_table = df.rolling(2, method="table", axis=axis, min_periods=0, step=step)
if method in ("var", "std"):
with pytest.raises(NotImplementedError, match=f"{method} not supported"):
getattr(roll_table, method)(
engine_kwargs=engine_kwargs, engine="numba", **kwargs
)
else:
- roll_single = df.rolling(2, method="single", axis=axis, min_periods=0)
+ roll_single = df.rolling(
+ 2, method="single", axis=axis, min_periods=0, step=step
+ )
result = getattr(roll_table, method)(
engine_kwargs=engine_kwargs, engine="numba", **kwargs
)
@@ -350,29 +364,29 @@ def test_table_method_rolling_methods(
)
tm.assert_frame_equal(result, expected)
- def test_table_method_rolling_apply(self, axis, nogil, parallel, nopython):
+ def test_table_method_rolling_apply(self, axis, nogil, parallel, nopython, step):
engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython}
def f(x):
return np.sum(x, axis=0) + 1
df = DataFrame(np.eye(3))
- result = df.rolling(2, method="table", axis=axis, min_periods=0).apply(
- f, raw=True, engine_kwargs=engine_kwargs, engine="numba"
- )
- expected = df.rolling(2, method="single", axis=axis, min_periods=0).apply(
- f, raw=True, engine_kwargs=engine_kwargs, engine="numba"
- )
+ result = df.rolling(
+ 2, method="table", axis=axis, min_periods=0, step=step
+ ).apply(f, raw=True, engine_kwargs=engine_kwargs, engine="numba")
+ expected = df.rolling(
+ 2, method="single", axis=axis, min_periods=0, step=step
+ ).apply(f, raw=True, engine_kwargs=engine_kwargs, engine="numba")
tm.assert_frame_equal(result, expected)
- def test_table_method_rolling_weighted_mean(self):
+ def test_table_method_rolling_weighted_mean(self, step):
def weighted_mean(x):
arr = np.ones((1, x.shape[1]))
arr[:, :2] = (x[:, :2] * x[:, 2]).sum(axis=0) / x[:, 2].sum()
return arr
df = DataFrame([[1, 2, 0.6], [2, 3, 0.4], [3, 4, 0.2], [4, 5, 0.7]])
- result = df.rolling(2, method="table", min_periods=0).apply(
+ result = df.rolling(2, method="table", min_periods=0, step=step).apply(
weighted_mean, raw=True, engine="numba"
)
expected = DataFrame(
@@ -382,7 +396,7 @@ def weighted_mean(x):
[3.333333, 2.333333, 1.0],
[1.555556, 7, 1.0],
]
- )
+ )[::step]
tm.assert_frame_equal(result, expected)
def test_table_method_expanding_apply(self, axis, nogil, parallel, nopython):
diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py
index ced163178f73a..53e1d442d60a4 100644
--- a/pandas/tests/window/test_rolling.py
+++ b/pandas/tests/window/test_rolling.py
@@ -26,8 +26,17 @@
)
import pandas._testing as tm
from pandas.api.indexers import BaseIndexer
+from pandas.core.indexers.objects import (
+ ExpandingIndexer,
+ ExponentialMovingWindowIndexer,
+ GroupbyIndexer,
+ VariableOffsetWindowIndexer,
+ VariableWindowIndexer,
+)
from pandas.core.window import Rolling
+from pandas.tseries.offsets import BusinessDay
+
def test_doc_string():
@@ -81,8 +90,77 @@ def test_invalid_constructor(frame_or_series, w):
c(window=2, min_periods=1, center=w)
+@pytest.mark.parametrize(
+ "window",
+ [
+ timedelta(days=3),
+ Timedelta(days=3),
+ "3D",
+ ExpandingIndexer(window_size=3),
+ ExponentialMovingWindowIndexer(window_size=3),
+ GroupbyIndexer(window_size=3),
+ VariableOffsetWindowIndexer(
+ index=date_range("2015-12-25", periods=5), offset=BusinessDay(1)
+ ),
+ VariableWindowIndexer(window_size=3),
+ ],
+)
+@pytest.mark.parametrize(
+ "func",
+ [
+ lambda df: df.rolling,
+ lambda df: df.groupby("key").rolling,
+ ],
+)
+def test_constructor_step_not_implemented(window, func, step):
+ # GH 15354
+ df = DataFrame(
+ {"value": np.arange(10), "key": np.array([1] * 5 + [2] * 5)},
+ index=date_range("2015-12-24", periods=10, freq="D"),
+ )
+ f = lambda: func(df)(window=window, step=step)
+ if step is None:
+ f()
+ else:
+ with pytest.raises(NotImplementedError, match="step not implemented"):
+ f()
+
+
+@pytest.mark.parametrize("agg", ["cov", "corr"])
+def test_constructor_step_not_implemented_for_cov_corr(agg, step):
+ # GH 15354
+ df = DataFrame(
+ {"value": np.arange(10), "key": np.array([1] * 5 + [2] * 5)},
+ index=date_range("2015-12-24", periods=10, freq="D"),
+ )
+ f = lambda: getattr(df.rolling(window=2, step=step), agg)(df)
+ if step is None:
+ f()
+ else:
+ with pytest.raises(NotImplementedError, match="step not implemented"):
+ f()
+
+
+@pytest.mark.parametrize(
+ "func",
+ [
+ lambda df: df.expanding,
+ lambda df: df.ewm,
+ ],
+)
+def test_constructor_step_unsupported(func, step):
+ # GH 15354
+ df = DataFrame(
+ {"value": np.arange(10), "key": np.array([1] * 5 + [2] * 5)},
+ index=date_range("2015-12-24", periods=10, freq="D"),
+ )
+ with pytest.raises(TypeError, match="got an unexpected keyword argument 'step'"):
+ func(df)(step=step)
+
+
@pytest.mark.parametrize("window", [timedelta(days=3), Timedelta(days=3)])
-def test_constructor_with_timedelta_window(window):
+@pytest.mark.parametrize("step", [None])
+def test_constructor_with_timedelta_window(window, step):
# GH 15440
n = 10
df = DataFrame(
@@ -91,18 +169,19 @@ def test_constructor_with_timedelta_window(window):
)
expected_data = np.append([0.0, 1.0], np.arange(3.0, 27.0, 3))
- result = df.rolling(window=window).sum()
+ result = df.rolling(window=window, step=step).sum()
expected = DataFrame(
{"value": expected_data},
index=date_range("2015-12-24", periods=n, freq="D"),
- )
+ )[::step]
tm.assert_frame_equal(result, expected)
- expected = df.rolling("3D").sum()
+ expected = df.rolling("3D", step=step).sum()
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize("window", [timedelta(days=3), Timedelta(days=3), "3D"])
-def test_constructor_timedelta_window_and_minperiods(window, raw):
+@pytest.mark.parametrize("step", [None])
+def test_constructor_timedelta_window_and_minperiods(window, step, raw):
# GH 15305
n = 10
df = DataFrame(
@@ -112,9 +191,11 @@ def test_constructor_timedelta_window_and_minperiods(window, raw):
expected = DataFrame(
{"value": np.append([np.NaN, 1.0], np.arange(3.0, 27.0, 3))},
index=date_range("2017-08-08", periods=n, freq="D"),
+ )[::step]
+ result_roll_sum = df.rolling(window=window, min_periods=2, step=step).sum()
+ result_roll_generic = df.rolling(window=window, min_periods=2, step=step).apply(
+ sum, raw=raw
)
- result_roll_sum = df.rolling(window=window, min_periods=2).sum()
- result_roll_generic = df.rolling(window=window, min_periods=2).apply(sum, raw=raw)
tm.assert_frame_equal(result_roll_sum, expected)
tm.assert_frame_equal(result_roll_generic, expected)
@@ -133,18 +214,21 @@ def test_numpy_compat(method):
@pytest.mark.parametrize("closed", ["right", "left", "both", "neither"])
-def test_closed_fixed(closed, arithmetic_win_operators):
+@pytest.mark.parametrize("step", [None])
+def test_closed_fixed(closed, arithmetic_win_operators, step):
# GH 34315
func_name = arithmetic_win_operators
df_fixed = DataFrame({"A": [0, 1, 2, 3, 4]})
df_time = DataFrame({"A": [0, 1, 2, 3, 4]}, index=date_range("2020", periods=5))
result = getattr(
- df_fixed.rolling(2, closed=closed, min_periods=1),
+ df_fixed.rolling(2, closed=closed, min_periods=1, step=step),
func_name,
)()
+ if step is not None:
+ result = result.reset_index(drop=True)
expected = getattr(
- df_time.rolling("2D", closed=closed, min_periods=1),
+ df_time.rolling("2D", closed=closed, min_periods=1, step=step),
func_name,
)().reset_index(drop=True)
@@ -196,8 +280,9 @@ def test_closed_fixed(closed, arithmetic_win_operators):
),
],
)
+@pytest.mark.parametrize("step", [None])
def test_datetimelike_centered_selections(
- closed, window_selections, arithmetic_win_operators
+ closed, window_selections, step, arithmetic_win_operators
):
# GH 34315
func_name = arithmetic_win_operators
@@ -208,7 +293,7 @@ def test_datetimelike_centered_selections(
expected = DataFrame(
{"A": [getattr(df_time["A"].iloc[s], func_name)() for s in window_selections]},
index=date_range("2020", periods=5),
- )
+ )[::step]
if func_name == "sem":
kwargs = {"ddof": 0}
@@ -216,7 +301,7 @@ def test_datetimelike_centered_selections(
kwargs = {}
result = getattr(
- df_time.rolling("2D", closed=closed, min_periods=1, center=True),
+ df_time.rolling("2D", closed=closed, min_periods=1, center=True, step=step),
func_name,
)(**kwargs)
@@ -236,8 +321,9 @@ def test_datetimelike_centered_selections(
("2s", "neither", [1.0, 2.0, 2.0]),
],
)
+@pytest.mark.parametrize("step", [None])
def test_datetimelike_centered_offset_covers_all(
- window, closed, expected, frame_or_series
+ window, closed, expected, step, frame_or_series
):
# GH 42753
@@ -248,8 +334,8 @@ def test_datetimelike_centered_offset_covers_all(
]
df = frame_or_series([1, 1, 1], index=index)
- result = df.rolling(window, closed=closed, center=True).sum()
- expected = frame_or_series(expected, index=index)
+ result = df.rolling(window, closed=closed, center=True, step=step).sum()
+ expected = frame_or_series(expected, index=index)[::step]
tm.assert_equal(result, expected)
@@ -262,8 +348,9 @@ def test_datetimelike_centered_offset_covers_all(
("2D", "neither", [2, 2, 2, 2, 2, 2, 2, 2]),
],
)
+@pytest.mark.parametrize("step", [None])
def test_datetimelike_nonunique_index_centering(
- window, closed, expected, frame_or_series
+ window, closed, expected, frame_or_series, step
):
index = DatetimeIndex(
[
@@ -279,28 +366,29 @@ def test_datetimelike_nonunique_index_centering(
)
df = frame_or_series([1] * 8, index=index, dtype=float)
- expected = frame_or_series(expected, index=index, dtype=float)
+ expected = frame_or_series(expected, index=index, dtype=float)[::step]
- result = df.rolling(window, center=True, closed=closed).sum()
+ result = df.rolling(window, center=True, closed=closed, step=step).sum()
tm.assert_equal(result, expected)
-def test_even_number_window_alignment():
+@pytest.mark.parametrize("step", [None])
+def test_even_number_window_alignment(step):
# see discussion in GH 38780
s = Series(range(3), index=date_range(start="2020-01-01", freq="D", periods=3))
# behavior of index- and datetime-based windows differs here!
# s.rolling(window=2, min_periods=1, center=True).mean()
- result = s.rolling(window="2D", min_periods=1, center=True).mean()
+ result = s.rolling(window="2D", min_periods=1, center=True, step=step).mean()
- expected = Series([0.5, 1.5, 2], index=s.index)
+ expected = Series([0.5, 1.5, 2], index=s.index)[::step]
tm.assert_series_equal(result, expected)
-def test_closed_fixed_binary_col(center):
+def test_closed_fixed_binary_col(center, step):
# GH 34315
data = [0, 1, 1, 0, 0, 1, 0, 1]
df = DataFrame(
@@ -317,31 +405,37 @@ def test_closed_fixed_binary_col(center):
expected_data,
columns=["binary_col"],
index=date_range(start="2020-01-01", freq="min", periods=len(expected_data)),
- )
+ )[::step]
- rolling = df.rolling(window=len(df), closed="left", min_periods=1, center=center)
+ rolling = df.rolling(
+ window=len(df), closed="left", min_periods=1, center=center, step=step
+ )
result = rolling.mean()
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize("closed", ["neither", "left"])
-def test_closed_empty(closed, arithmetic_win_operators):
+@pytest.mark.parametrize("step", [None])
+def test_closed_empty(closed, arithmetic_win_operators, step):
# GH 26005
func_name = arithmetic_win_operators
ser = Series(data=np.arange(5), index=date_range("2000", periods=5, freq="2D"))
- roll = ser.rolling("1D", closed=closed)
+ roll = ser.rolling("1D", closed=closed, step=step)
result = getattr(roll, func_name)()
- expected = Series([np.nan] * 5, index=ser.index)
+ expected = Series([np.nan] * 5, index=ser.index)[::step]
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("func", ["min", "max"])
-def test_closed_one_entry(func):
+@pytest.mark.parametrize("step", [None])
+def test_closed_one_entry(func, step):
# GH24718
ser = Series(data=[2], index=date_range("2000", periods=1))
- result = getattr(ser.rolling("10D", closed="left"), func)()
- tm.assert_series_equal(result, Series([np.nan], index=ser.index))
+ result = getattr(ser.rolling("10D", closed="left", step=step), func)()
+ index = ser.index.copy()
+ index.freq = index.freq * (step or 1)
+ tm.assert_series_equal(result, Series([np.nan], index=index))
@pytest.mark.parametrize("func", ["min", "max"])
@@ -1362,7 +1456,7 @@ def test_rolling_non_monotonic(method, expected):
df = DataFrame({"values": np.arange(len(use_expanding)) ** 2})
class CustomIndexer(BaseIndexer):
- def get_window_bounds(self, num_values, min_periods, center, closed):
+ def get_window_bounds(self, num_values, min_periods, center, closed, step):
start = np.empty(num_values, dtype=np.int64)
end = np.empty(num_values, dtype=np.int64)
for i in range(num_values):
diff --git a/pandas/tests/window/test_rolling_functions.py b/pandas/tests/window/test_rolling_functions.py
index 842c056806092..9ab4ff13796d6 100644
--- a/pandas/tests/window/test_rolling_functions.py
+++ b/pandas/tests/window/test_rolling_functions.py
@@ -38,10 +38,11 @@
[lambda x: np.var(x, ddof=0), "var", {"ddof": 0}],
],
)
-def test_series(series, compare_func, roll_func, kwargs):
- result = getattr(series.rolling(50), roll_func)(**kwargs)
+def test_series(series, compare_func, roll_func, kwargs, step):
+ result = getattr(series.rolling(50, step=step), roll_func)(**kwargs)
assert isinstance(result, Series)
- tm.assert_almost_equal(result.iloc[-1], compare_func(series[-50:]))
+ end = range(0, len(series), step or 1)[-1] + 1
+ tm.assert_almost_equal(result.iloc[-1], compare_func(series[end - 50 : end]))
@pytest.mark.parametrize(
@@ -64,12 +65,13 @@ def test_series(series, compare_func, roll_func, kwargs):
[lambda x: np.var(x, ddof=0), "var", {"ddof": 0}],
],
)
-def test_frame(raw, frame, compare_func, roll_func, kwargs):
- result = getattr(frame.rolling(50), roll_func)(**kwargs)
+def test_frame(raw, frame, compare_func, roll_func, kwargs, step):
+ result = getattr(frame.rolling(50, step=step), roll_func)(**kwargs)
assert isinstance(result, DataFrame)
+ end = range(0, len(frame), step or 1)[-1] + 1
tm.assert_series_equal(
result.iloc[-1, :],
- frame.iloc[-50:, :].apply(compare_func, axis=0, raw=raw),
+ frame.iloc[end - 50 : end, :].apply(compare_func, axis=0, raw=raw),
check_names=False,
)
@@ -200,13 +202,13 @@ def test_nans_count():
],
)
@pytest.mark.parametrize("minp", [0, 99, 100])
-def test_min_periods(series, minp, roll_func, kwargs):
- result = getattr(series.rolling(len(series) + 1, min_periods=minp), roll_func)(
- **kwargs
- )
- expected = getattr(series.rolling(len(series), min_periods=minp), roll_func)(
- **kwargs
- )
+def test_min_periods(series, minp, roll_func, kwargs, step):
+ result = getattr(
+ series.rolling(len(series) + 1, min_periods=minp, step=step), roll_func
+ )(**kwargs)
+ expected = getattr(
+ series.rolling(len(series), min_periods=minp, step=step), roll_func
+ )(**kwargs)
nan_mask = isna(result)
tm.assert_series_equal(nan_mask, isna(expected))
@@ -214,9 +216,9 @@ def test_min_periods(series, minp, roll_func, kwargs):
tm.assert_almost_equal(result[nan_mask], expected[nan_mask])
-def test_min_periods_count(series):
- result = series.rolling(len(series) + 1, min_periods=0).count()
- expected = series.rolling(len(series), min_periods=0).count()
+def test_min_periods_count(series, step):
+ result = series.rolling(len(series) + 1, min_periods=0, step=step).count()
+ expected = series.rolling(len(series), min_periods=0, step=step).count()
nan_mask = isna(result)
tm.assert_series_equal(nan_mask, isna(expected))
@@ -364,7 +366,7 @@ def test_rolling_functions_window_non_shrinkage(f):
tm.assert_frame_equal(df_result, df_expected)
-def test_rolling_max_gh6297():
+def test_rolling_max_gh6297(step):
"""Replicate result expected in GH #6297"""
indices = [datetime(1975, 1, i) for i in range(1, 6)]
# So that we can have 2 datapoints on one of the days
@@ -378,12 +380,12 @@ def test_rolling_max_gh6297():
expected = Series(
[1.0, 2.0, 6.0, 4.0, 5.0],
index=DatetimeIndex([datetime(1975, 1, i, 0) for i in range(1, 6)], freq="D"),
- )
- x = series.resample("D").max().rolling(window=1).max()
+ )[::step]
+ x = series.resample("D").max().rolling(window=1, step=step).max()
tm.assert_series_equal(expected, x)
-def test_rolling_max_resample():
+def test_rolling_max_resample(step):
indices = [datetime(1975, 1, i) for i in range(1, 6)]
# So that we can have 3 datapoints on last day (4, 10, and 20)
@@ -399,16 +401,16 @@ def test_rolling_max_resample():
expected = Series(
[0.0, 1.0, 2.0, 3.0, 20.0],
index=DatetimeIndex([datetime(1975, 1, i, 0) for i in range(1, 6)], freq="D"),
- )
- x = series.resample("D").max().rolling(window=1).max()
+ )[::step]
+ x = series.resample("D").max().rolling(window=1, step=step).max()
tm.assert_series_equal(expected, x)
# Now specify median (10.0)
expected = Series(
[0.0, 1.0, 2.0, 3.0, 10.0],
index=DatetimeIndex([datetime(1975, 1, i, 0) for i in range(1, 6)], freq="D"),
- )
- x = series.resample("D").median().rolling(window=1).max()
+ )[::step]
+ x = series.resample("D").median().rolling(window=1, step=step).max()
tm.assert_series_equal(expected, x)
# Now specify mean (4+10+20)/3
@@ -416,12 +418,12 @@ def test_rolling_max_resample():
expected = Series(
[0.0, 1.0, 2.0, 3.0, v],
index=DatetimeIndex([datetime(1975, 1, i, 0) for i in range(1, 6)], freq="D"),
- )
- x = series.resample("D").mean().rolling(window=1).max()
+ )[::step]
+ x = series.resample("D").mean().rolling(window=1, step=step).max()
tm.assert_series_equal(expected, x)
-def test_rolling_min_resample():
+def test_rolling_min_resample(step):
indices = [datetime(1975, 1, i) for i in range(1, 6)]
# So that we can have 3 datapoints on last day (4, 10, and 20)
@@ -437,8 +439,8 @@ def test_rolling_min_resample():
expected = Series(
[0.0, 1.0, 2.0, 3.0, 4.0],
index=DatetimeIndex([datetime(1975, 1, i, 0) for i in range(1, 6)], freq="D"),
- )
- r = series.resample("D").min().rolling(window=1)
+ )[::step]
+ r = series.resample("D").min().rolling(window=1, step=step)
tm.assert_series_equal(expected, r.min())
diff --git a/pandas/tests/window/test_rolling_quantile.py b/pandas/tests/window/test_rolling_quantile.py
index 56b79097a1d05..815ee419590f7 100644
--- a/pandas/tests/window/test_rolling_quantile.py
+++ b/pandas/tests/window/test_rolling_quantile.py
@@ -34,21 +34,23 @@ def scoreatpercentile(a, per):
@pytest.mark.parametrize("q", [0.0, 0.1, 0.5, 0.9, 1.0])
-def test_series(series, q):
+def test_series(series, q, step):
compare_func = partial(scoreatpercentile, per=q)
- result = series.rolling(50).quantile(q)
+ result = series.rolling(50, step=step).quantile(q)
assert isinstance(result, Series)
- tm.assert_almost_equal(result.iloc[-1], compare_func(series[-50:]))
+ end = range(0, len(series), step or 1)[-1] + 1
+ tm.assert_almost_equal(result.iloc[-1], compare_func(series[end - 50 : end]))
@pytest.mark.parametrize("q", [0.0, 0.1, 0.5, 0.9, 1.0])
-def test_frame(raw, frame, q):
+def test_frame(raw, frame, q, step):
compare_func = partial(scoreatpercentile, per=q)
- result = frame.rolling(50).quantile(q)
+ result = frame.rolling(50, step=step).quantile(q)
assert isinstance(result, DataFrame)
+ end = range(0, len(frame), step or 1)[-1] + 1
tm.assert_series_equal(
result.iloc[-1, :],
- frame.iloc[-50:, :].apply(compare_func, axis=0, raw=raw),
+ frame.iloc[end - 50 : end, :].apply(compare_func, axis=0, raw=raw),
check_names=False,
)
@@ -113,9 +115,9 @@ def test_nans(q):
@pytest.mark.parametrize("minp", [0, 99, 100])
@pytest.mark.parametrize("q", [0.0, 0.1, 0.5, 0.9, 1.0])
-def test_min_periods(series, minp, q):
- result = series.rolling(len(series) + 1, min_periods=minp).quantile(q)
- expected = series.rolling(len(series), min_periods=minp).quantile(q)
+def test_min_periods(series, minp, q, step):
+ result = series.rolling(len(series) + 1, min_periods=minp, step=step).quantile(q)
+ expected = series.rolling(len(series), min_periods=minp, step=step).quantile(q)
nan_mask = isna(result)
tm.assert_series_equal(nan_mask, isna(expected))
diff --git a/pandas/tests/window/test_rolling_skew_kurt.py b/pandas/tests/window/test_rolling_skew_kurt.py
index 46b7eb6cbc285..152172d7b2266 100644
--- a/pandas/tests/window/test_rolling_skew_kurt.py
+++ b/pandas/tests/window/test_rolling_skew_kurt.py
@@ -112,9 +112,13 @@ def test_nans(sp_func, roll_func):
@pytest.mark.parametrize("minp", [0, 99, 100])
@pytest.mark.parametrize("roll_func", ["kurt", "skew"])
-def test_min_periods(series, minp, roll_func):
- result = getattr(series.rolling(len(series) + 1, min_periods=minp), roll_func)()
- expected = getattr(series.rolling(len(series), min_periods=minp), roll_func)()
+def test_min_periods(series, minp, roll_func, step):
+ result = getattr(
+ series.rolling(len(series) + 1, min_periods=minp, step=step), roll_func
+ )()
+ expected = getattr(
+ series.rolling(len(series), min_periods=minp, step=step), roll_func
+ )()
nan_mask = isna(result)
tm.assert_series_equal(nan_mask, isna(expected))
@@ -172,55 +176,55 @@ def test_center_reindex_frame(frame, roll_func):
tm.assert_frame_equal(frame_xp, frame_rs)
-def test_rolling_skew_edge_cases():
+def test_rolling_skew_edge_cases(step):
- all_nan = Series([np.NaN] * 5)
+ all_nan = Series([np.NaN] * 5)[::step]
# yields all NaN (0 variance)
d = Series([1] * 5)
- x = d.rolling(window=5).skew()
+ x = d.rolling(window=5, step=step).skew()
tm.assert_series_equal(all_nan, x)
# yields all NaN (window too small)
d = Series(np.random.randn(5))
- x = d.rolling(window=2).skew()
+ x = d.rolling(window=2, step=step).skew()
tm.assert_series_equal(all_nan, x)
# yields [NaN, NaN, NaN, 0.177994, 1.548824]
d = Series([-1.50837035, -0.1297039, 0.19501095, 1.73508164, 0.41941401])
- expected = Series([np.NaN, np.NaN, np.NaN, 0.177994, 1.548824])
- x = d.rolling(window=4).skew()
+ expected = Series([np.NaN, np.NaN, np.NaN, 0.177994, 1.548824])[::step]
+ x = d.rolling(window=4, step=step).skew()
tm.assert_series_equal(expected, x)
-def test_rolling_kurt_edge_cases():
+def test_rolling_kurt_edge_cases(step):
- all_nan = Series([np.NaN] * 5)
+ all_nan = Series([np.NaN] * 5)[::step]
# yields all NaN (0 variance)
d = Series([1] * 5)
- x = d.rolling(window=5).kurt()
+ x = d.rolling(window=5, step=step).kurt()
tm.assert_series_equal(all_nan, x)
# yields all NaN (window too small)
d = Series(np.random.randn(5))
- x = d.rolling(window=3).kurt()
+ x = d.rolling(window=3, step=step).kurt()
tm.assert_series_equal(all_nan, x)
# yields [NaN, NaN, NaN, 1.224307, 2.671499]
d = Series([-1.50837035, -0.1297039, 0.19501095, 1.73508164, 0.41941401])
- expected = Series([np.NaN, np.NaN, np.NaN, 1.224307, 2.671499])
- x = d.rolling(window=4).kurt()
+ expected = Series([np.NaN, np.NaN, np.NaN, 1.224307, 2.671499])[::step]
+ x = d.rolling(window=4, step=step).kurt()
tm.assert_series_equal(expected, x)
-def test_rolling_skew_eq_value_fperr():
+def test_rolling_skew_eq_value_fperr(step):
# #18804 all rolling skew for all equal values should return Nan
- a = Series([1.1] * 15).rolling(window=10).skew()
+ a = Series([1.1] * 15).rolling(window=10, step=step).skew()
assert np.isnan(a).all()
-def test_rolling_kurt_eq_value_fperr():
+def test_rolling_kurt_eq_value_fperr(step):
# #18804 all rolling kurt for all equal values should return Nan
- a = Series([1.1] * 15).rolling(window=10).kurt()
+ a = Series([1.1] * 15).rolling(window=10, step=step).kurt()
assert np.isnan(a).all()
diff --git a/pandas/tests/window/test_win_type.py b/pandas/tests/window/test_win_type.py
index 03ea745d9cb86..c356c9bdc7742 100644
--- a/pandas/tests/window/test_win_type.py
+++ b/pandas/tests/window/test_win_type.py
@@ -125,10 +125,10 @@ def test_constructor_with_win_type_invalid(frame_or_series):
@td.skip_if_no_scipy
@pytest.mark.filterwarnings("ignore:can't resolve:ImportWarning")
-def test_window_with_args():
+def test_window_with_args(step):
# make sure that we are aggregating window functions correctly with arg
r = Series(np.random.randn(100)).rolling(
- window=10, min_periods=1, win_type="gaussian"
+ window=10, min_periods=1, win_type="gaussian", step=step
)
expected = concat([r.mean(std=10), r.mean(std=0.01)], axis=1)
expected.columns = ["", ""]
@@ -173,7 +173,7 @@ def test_win_type_freq_return_deprecation():
@td.skip_if_no_scipy
def test_win_type_not_implemented():
class CustomIndexer(BaseIndexer):
- def get_window_bounds(self, num_values, min_periods, center, closed):
+ def get_window_bounds(self, num_values, min_periods, center, closed, step):
return np.array([0, 1]), np.array([1, 2])
df = DataFrame({"values": range(2)})
@@ -183,10 +183,10 @@ def get_window_bounds(self, num_values, min_periods, center, closed):
@td.skip_if_no_scipy
-def test_cmov_mean():
+def test_cmov_mean(step):
# GH 8238
vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, 9.48, 10.63, 14.48])
- result = Series(vals).rolling(5, center=True).mean()
+ result = Series(vals).rolling(5, center=True, step=step).mean()
expected_values = [
np.nan,
np.nan,
@@ -199,15 +199,15 @@ def test_cmov_mean():
np.nan,
np.nan,
]
- expected = Series(expected_values)
+ expected = Series(expected_values)[::step]
tm.assert_series_equal(expected, result)
@td.skip_if_no_scipy
-def test_cmov_window():
+def test_cmov_window(step):
# GH 8238
vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, 9.48, 10.63, 14.48])
- result = Series(vals).rolling(5, win_type="boxcar", center=True).mean()
+ result = Series(vals).rolling(5, win_type="boxcar", center=True, step=step).mean()
expected_values = [
np.nan,
np.nan,
@@ -220,28 +220,28 @@ def test_cmov_window():
np.nan,
np.nan,
]
- expected = Series(expected_values)
+ expected = Series(expected_values)[::step]
tm.assert_series_equal(expected, result)
@td.skip_if_no_scipy
-def test_cmov_window_corner():
+def test_cmov_window_corner(step):
# GH 8238
# all nan
vals = Series([np.nan] * 10)
- result = vals.rolling(5, center=True, win_type="boxcar").mean()
+ result = vals.rolling(5, center=True, win_type="boxcar", step=step).mean()
assert np.isnan(result).all()
# empty
vals = Series([], dtype=object)
- result = vals.rolling(5, center=True, win_type="boxcar").mean()
+ result = vals.rolling(5, center=True, win_type="boxcar", step=step).mean()
assert len(result) == 0
# shorter than window
vals = Series(np.random.randn(5))
- result = vals.rolling(10, win_type="boxcar").mean()
+ result = vals.rolling(10, win_type="boxcar", step=step).mean()
assert np.isnan(result).all()
- assert len(result) == 5
+ assert len(result) == len(range(0, 5, step or 1))
@td.skip_if_no_scipy
@@ -310,7 +310,7 @@ def test_cmov_window_corner():
),
],
)
-def test_cmov_window_frame(f, xp):
+def test_cmov_window_frame(f, xp, step):
# Gh 8238
df = DataFrame(
np.array(
@@ -328,28 +328,30 @@ def test_cmov_window_frame(f, xp):
]
)
)
- xp = DataFrame(np.array(xp))
+ xp = DataFrame(np.array(xp))[::step]
- roll = df.rolling(5, win_type="boxcar", center=True)
+ roll = df.rolling(5, win_type="boxcar", center=True, step=step)
rs = getattr(roll, f)()
tm.assert_frame_equal(xp, rs)
@td.skip_if_no_scipy
-def test_cmov_window_na_min_periods():
+def test_cmov_window_na_min_periods(step):
# min_periods
vals = Series(np.random.randn(10))
vals[4] = np.nan
vals[8] = np.nan
- xp = vals.rolling(5, min_periods=4, center=True).mean()
- rs = vals.rolling(5, win_type="boxcar", min_periods=4, center=True).mean()
+ xp = vals.rolling(5, min_periods=4, center=True, step=step).mean()
+ rs = vals.rolling(
+ 5, win_type="boxcar", min_periods=4, center=True, step=step
+ ).mean()
tm.assert_series_equal(xp, rs)
@td.skip_if_no_scipy
-def test_cmov_window_regular(win_types):
+def test_cmov_window_regular(win_types, step):
# GH 8238
vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, 9.48, 10.63, 14.48])
xps = {
@@ -451,26 +453,26 @@ def test_cmov_window_regular(win_types):
],
}
- xp = Series(xps[win_types])
- rs = Series(vals).rolling(5, win_type=win_types, center=True).mean()
+ xp = Series(xps[win_types])[::step]
+ rs = Series(vals).rolling(5, win_type=win_types, center=True, step=step).mean()
tm.assert_series_equal(xp, rs)
@td.skip_if_no_scipy
-def test_cmov_window_regular_linear_range(win_types):
+def test_cmov_window_regular_linear_range(win_types, step):
# GH 8238
vals = np.array(range(10), dtype=float)
xp = vals.copy()
xp[:2] = np.nan
xp[-2:] = np.nan
- xp = Series(xp)
+ xp = Series(xp)[::step]
- rs = Series(vals).rolling(5, win_type=win_types, center=True).mean()
+ rs = Series(vals).rolling(5, win_type=win_types, center=True, step=step).mean()
tm.assert_series_equal(xp, rs)
@td.skip_if_no_scipy
-def test_cmov_window_regular_missing_data(win_types):
+def test_cmov_window_regular_missing_data(win_types, step):
# GH 8238
vals = np.array(
[6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, np.nan, 10.63, 14.48]
@@ -574,13 +576,13 @@ def test_cmov_window_regular_missing_data(win_types):
],
}
- xp = Series(xps[win_types])
- rs = Series(vals).rolling(5, win_type=win_types, min_periods=3).mean()
+ xp = Series(xps[win_types])[::step]
+ rs = Series(vals).rolling(5, win_type=win_types, min_periods=3, step=step).mean()
tm.assert_series_equal(xp, rs)
@td.skip_if_no_scipy
-def test_cmov_window_special(win_types_special):
+def test_cmov_window_special(win_types_special, step):
# GH 8238
kwds = {
"kaiser": {"beta": 1.0},
@@ -642,17 +644,17 @@ def test_cmov_window_special(win_types_special):
],
}
- xp = Series(xps[win_types_special])
+ xp = Series(xps[win_types_special])[::step]
rs = (
Series(vals)
- .rolling(5, win_type=win_types_special, center=True)
+ .rolling(5, win_type=win_types_special, center=True, step=step)
.mean(**kwds[win_types_special])
)
tm.assert_series_equal(xp, rs)
@td.skip_if_no_scipy
-def test_cmov_window_special_linear_range(win_types_special):
+def test_cmov_window_special_linear_range(win_types_special, step):
# GH 8238
kwds = {
"kaiser": {"beta": 1.0},
@@ -666,11 +668,11 @@ def test_cmov_window_special_linear_range(win_types_special):
xp = vals.copy()
xp[:2] = np.nan
xp[-2:] = np.nan
- xp = Series(xp)
+ xp = Series(xp)[::step]
rs = (
Series(vals)
- .rolling(5, win_type=win_types_special, center=True)
+ .rolling(5, win_type=win_types_special, center=True, step=step)
.mean(**kwds[win_types_special])
)
tm.assert_series_equal(xp, rs)