From 1aca95f8b0c97554b09e161e7567131a55929cf7 Mon Sep 17 00:00:00 2001 From: rbs392 Date: Sat, 11 Apr 2020 18:32:35 +0200 Subject: [PATCH 01/11] Add center params For LevelShiftAD, PersistAD to detect anomalies based on only past values. For SeasonalAD to remove trend on past values only --- src/adtk/detector/_detector_1d.py | 32 +++++++++++++++++++++---- src/adtk/transformer/_transformer_1d.py | 13 ++++++++-- 2 files changed, 38 insertions(+), 7 deletions(-) diff --git a/src/adtk/detector/_detector_1d.py b/src/adtk/detector/_detector_1d.py index 0e2b8ff..1537cf0 100644 --- a/src/adtk/detector/_detector_1d.py +++ b/src/adtk/detector/_detector_1d.py @@ -406,6 +406,11 @@ class PersistAD(_TrainableUnivariateDetector): Aggregation operation of the time window, either "mean" or "median". Default: "median". + center: bool, optional + If True, the current point is the right edge of right window; + Otherwise, it is the right edge of left window. + Default: True. + Attributes ---------- pipe_: adtk.pipe.Pipenet @@ -420,6 +425,7 @@ def __init__( side: str = "both", min_periods: Optional[int] = None, agg: str = "median", + center=True ) -> None: self.pipe_ = Pipenet( { @@ -427,7 +433,7 @@ def __init__( "model": DoubleRollingAggregate( agg=agg, window=(window, 1), - center=True, + center=center, min_periods=(min_periods, 1), diff="l1", ), @@ -441,7 +447,7 @@ def __init__( "model": DoubleRollingAggregate( agg=agg, window=(window, 1), - center=True, + center=center, min_periods=(min_periods, 1), diff="diff", ), @@ -570,6 +576,11 @@ class LevelShiftAD(_TrainableUnivariateDetector): for that window. If 2-tuple, it defines the left and right window respectively. Default: None, i.e. all observations must have values. + center: bool, optional + If True, the current point is the right edge of right window; + Otherwise, it is the right edge of left window. + Default: True. + Attributes ---------- pipe_: adtk.pipe.Pipenet @@ -587,6 +598,7 @@ def __init__( min_periods: Union[ Optional[int], Tuple[Optional[int], Optional[int]] ] = None, + center: bool = True ) -> None: self.pipe_ = Pipenet( { @@ -594,7 +606,7 @@ def __init__( "model": DoubleRollingAggregate( agg="median", window=window, - center=True, + center=center, min_periods=min_periods, diff="l1", ), @@ -608,7 +620,7 @@ def __init__( "model": DoubleRollingAggregate( agg="median", window=window, - center=True, + center=center, min_periods=min_periods, diff="diff", ), @@ -1051,6 +1063,11 @@ class SeasonalAD(_TrainableUnivariateDetector): trend: bool, optional Whether to extract trend during decomposition. Default: False. + two_sided: bool, optional + The moving average method used in filtering out trend. + If True (default), a centered moving average is computed using the filt. + If False, the filter coefficients are for past values only. + Attributes ---------- freq_: int @@ -1072,12 +1089,17 @@ def __init__( side: str = "both", c: float = 3.0, trend: bool = False, + two_sided: bool = True ) -> None: self.pipe_ = Pipenet( { "deseasonal_residual": { "model": ( - ClassicSeasonalDecomposition(freq=freq, trend=trend) + ClassicSeasonalDecomposition( + freq=freq, + trend=trend, + two_sided=two_sided + ) ), "input": "original", }, diff --git a/src/adtk/transformer/_transformer_1d.py b/src/adtk/transformer/_transformer_1d.py index 901a7bf..41775b7 100644 --- a/src/adtk/transformer/_transformer_1d.py +++ b/src/adtk/transformer/_transformer_1d.py @@ -657,6 +657,11 @@ class ClassicSeasonalDecomposition(_TrainableUnivariateTransformer): If False, the time series will be assumed the sum of seasonal pattern and residual. Default: False. + two_sided: bool, optional + The moving average method used in filtering out trend. + If True (default), a centered moving average is computed using the filt. + If False, the filter coefficients are for past values only. + Attributes ---------- freq_: int @@ -669,11 +674,15 @@ class ClassicSeasonalDecomposition(_TrainableUnivariateTransformer): """ def __init__( - self, freq: Optional[int] = None, trend: bool = False + self, + freq: Optional[int] = None, + trend: bool = False, + two_sided: bool = True ) -> None: super().__init__() self.freq = freq self.trend = trend + self.two_sided = two_sided @property def _param_names(self) -> Tuple[str, ...]: @@ -718,7 +727,7 @@ def _fit_core(self, s: pd.Series) -> None: # get seasonal pattern if self.trend: seasonal_decompose_results = ( - seasonal_decompose(s, period=self.freq_) + seasonal_decompose(s, period=self.freq_, two_sided=self.two_sided) if parse(statsmodels.__version__) >= parse("0.11") else seasonal_decompose(s, freq=self.freq_) ) From 53555497add75039495f4eeed60d8cce12a95d50 Mon Sep 17 00:00:00 2001 From: rbs392 Date: Sat, 11 Apr 2020 18:40:04 +0200 Subject: [PATCH 02/11] version bump --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index ac720db..799d6b1 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = adtk -version = 0.6.0 +version = 0.6.1 author = Arundo Analytics, Inc. maintainer = Tailai Wen maintainer_email = tailai.wen@arundo.com From 85b9dd46ccf8d0265ac1f84fddb92490e07d6853 Mon Sep 17 00:00:00 2001 From: rbs392 Date: Sat, 11 Apr 2020 18:46:15 +0200 Subject: [PATCH 03/11] fix params missing in branch --- src/adtk/transformer/_transformer_1d.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/adtk/transformer/_transformer_1d.py b/src/adtk/transformer/_transformer_1d.py index 41775b7..ceba1af 100644 --- a/src/adtk/transformer/_transformer_1d.py +++ b/src/adtk/transformer/_transformer_1d.py @@ -729,7 +729,7 @@ def _fit_core(self, s: pd.Series) -> None: seasonal_decompose_results = ( seasonal_decompose(s, period=self.freq_, two_sided=self.two_sided) if parse(statsmodels.__version__) >= parse("0.11") - else seasonal_decompose(s, freq=self.freq_) + else seasonal_decompose(s, freq=self.freq_, two_sided=self.two_sided) ) self.seasonal_ = getattr(seasonal_decompose_results, "seasonal")[ : self.freq_ From 25e9fee49d214ce470d032c4bb4ee763a521fcc1 Mon Sep 17 00:00:00 2001 From: rbs392 Date: Sat, 11 Apr 2020 18:55:02 +0200 Subject: [PATCH 04/11] add params to predict core --- src/adtk/detector/_detector_1d.py | 2 +- src/adtk/transformer/_transformer_1d.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/adtk/detector/_detector_1d.py b/src/adtk/detector/_detector_1d.py index 1537cf0..f9f892e 100644 --- a/src/adtk/detector/_detector_1d.py +++ b/src/adtk/detector/_detector_1d.py @@ -1065,7 +1065,7 @@ class SeasonalAD(_TrainableUnivariateDetector): two_sided: bool, optional The moving average method used in filtering out trend. - If True (default), a centered moving average is computed using the filt. + If True (default), a centered moving average is computed. If False, the filter coefficients are for past values only. Attributes diff --git a/src/adtk/transformer/_transformer_1d.py b/src/adtk/transformer/_transformer_1d.py index ceba1af..37c1b70 100644 --- a/src/adtk/transformer/_transformer_1d.py +++ b/src/adtk/transformer/_transformer_1d.py @@ -659,7 +659,7 @@ class ClassicSeasonalDecomposition(_TrainableUnivariateTransformer): two_sided: bool, optional The moving average method used in filtering out trend. - If True (default), a centered moving average is computed using the filt. + If True (default), a centered moving average is computed. If False, the filter coefficients are for past values only. Attributes @@ -810,9 +810,9 @@ def _predict_core(self, s: pd.Series) -> pd.Series: # remove trend if self.trend: seasonal_decompose_results = ( - seasonal_decompose(s, period=self.freq_) + seasonal_decompose(s, period=self.freq_, two_sided=self.two_sided) if parse(statsmodels.__version__) >= parse("0.11") - else seasonal_decompose(s, freq=self.freq_) + else seasonal_decompose(s, freq=self.freq_, two_sided=self.two_sided) ) s_trend = getattr(seasonal_decompose_results, "trend") s_detrended = s - s_trend From fc8238a14346c2bbec504d15d64f7fbb74886c5a Mon Sep 17 00:00:00 2001 From: rbs392 Date: Sat, 11 Apr 2020 19:06:54 +0200 Subject: [PATCH 05/11] bump version --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 799d6b1..4582be0 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = adtk -version = 0.6.1 +version = 0.6.2 author = Arundo Analytics, Inc. maintainer = Tailai Wen maintainer_email = tailai.wen@arundo.com From 104b3f6e61f020030f1610b21123849f619ef1a4 Mon Sep 17 00:00:00 2001 From: rbs392 Date: Sat, 11 Apr 2020 19:42:22 +0200 Subject: [PATCH 06/11] fix params not getting passed --- .gitignore | 1 + setup.cfg | 2 +- src/adtk/detector/_detector_1d.py | 12 ++++++++---- src/adtk/transformer/_transformer_1d.py | 2 +- 4 files changed, 11 insertions(+), 6 deletions(-) diff --git a/.gitignore b/.gitignore index c0c219b..2b7e627 100644 --- a/.gitignore +++ b/.gitignore @@ -122,3 +122,4 @@ dmypy.json # Pyre type checker .pyre/ +.idea \ No newline at end of file diff --git a/setup.cfg b/setup.cfg index 4582be0..799d6b1 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = adtk -version = 0.6.2 +version = 0.6.1 author = Arundo Analytics, Inc. maintainer = Tailai Wen maintainer_email = tailai.wen@arundo.com diff --git a/src/adtk/detector/_detector_1d.py b/src/adtk/detector/_detector_1d.py index f9f892e..ade053d 100644 --- a/src/adtk/detector/_detector_1d.py +++ b/src/adtk/detector/_detector_1d.py @@ -488,11 +488,12 @@ def __init__( self.window = window self.min_periods = min_periods self.agg = agg + self.center = center self._sync_params() @property def _param_names(self) -> Tuple[str, ...]: - return ("window", "c", "side", "min_periods", "agg") + return ("window", "c", "side", "min_periods", "agg", "center") def _sync_params(self) -> None: if self.agg not in ["median", "mean"]: @@ -507,12 +508,14 @@ def _sync_params(self) -> None: agg=self.agg, window=(self.window, 1), min_periods=(self.min_periods, 1), + center=self.center, ) self.pipe_.steps["iqr_ad"]["model"].set_params(c=(None, self.c)) self.pipe_.steps["diff"]["model"].set_params( agg=self.agg, window=(self.window, 1), min_periods=(self.min_periods, 1), + center=self.center, ) self.pipe_.steps["sign_check"]["model"].set_params( high=( @@ -660,11 +663,12 @@ def __init__( self.side = side self.window = window self.min_periods = min_periods + self.center = center self._sync_params() @property def _param_names(self) -> Tuple[str, ...]: - return ("window", "c", "side", "min_periods") + return ("window", "c", "side", "min_periods", "center") def _sync_params(self) -> None: if self.side not in ["both", "positive", "negative"]: @@ -672,11 +676,11 @@ def _sync_params(self) -> None: "Parameter `side` must be 'both', 'positive' or 'negative'." ) self.pipe_.steps["diff_abs"]["model"].set_params( - window=self.window, min_periods=self.min_periods + window=self.window, min_periods=self.min_periods, center=center ) self.pipe_.steps["iqr_ad"]["model"].set_params(c=(None, self.c)) self.pipe_.steps["diff"]["model"].set_params( - window=self.window, min_periods=self.min_periods + window=self.window, min_periods=self.min_periods, center=center ) self.pipe_.steps["sign_check"]["model"].set_params( high=( diff --git a/src/adtk/transformer/_transformer_1d.py b/src/adtk/transformer/_transformer_1d.py index 37c1b70..6ac23df 100644 --- a/src/adtk/transformer/_transformer_1d.py +++ b/src/adtk/transformer/_transformer_1d.py @@ -686,7 +686,7 @@ def __init__( @property def _param_names(self) -> Tuple[str, ...]: - return ("freq", "trend") + return ("freq", "trend", "two_sided") def _fit_core(self, s: pd.Series) -> None: if not ( From 066c04458ef26766175b446d83c7f87d21ac7988 Mon Sep 17 00:00:00 2001 From: rbs392 Date: Sat, 11 Apr 2020 19:52:18 +0200 Subject: [PATCH 07/11] fix invalid params --- src/adtk/detector/_detector_1d.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/adtk/detector/_detector_1d.py b/src/adtk/detector/_detector_1d.py index ade053d..11a3768 100644 --- a/src/adtk/detector/_detector_1d.py +++ b/src/adtk/detector/_detector_1d.py @@ -676,11 +676,11 @@ def _sync_params(self) -> None: "Parameter `side` must be 'both', 'positive' or 'negative'." ) self.pipe_.steps["diff_abs"]["model"].set_params( - window=self.window, min_periods=self.min_periods, center=center + window=self.window, min_periods=self.min_periods, center=self.center ) self.pipe_.steps["iqr_ad"]["model"].set_params(c=(None, self.c)) self.pipe_.steps["diff"]["model"].set_params( - window=self.window, min_periods=self.min_periods, center=center + window=self.window, min_periods=self.min_periods, center=self.center ) self.pipe_.steps["sign_check"]["model"].set_params( high=( @@ -1149,15 +1149,16 @@ def __init__( self.side = side self.c = c self.trend = trend + self.two_sided = two_sided self._sync_params() @property def _param_names(self) -> Tuple[str, ...]: - return ("freq", "side", "c", "trend") + return ("freq", "side", "c", "trend", "two_sided") def _sync_params(self) -> None: self.pipe_.steps["deseasonal_residual"]["model"].set_params( - freq=self.freq, trend=self.trend + freq=self.freq, trend=self.trend, two_sided=self.two_sided ) self.pipe_.steps["iqr_ad"]["model"].set_params(c=(None, self.c)) self.pipe_.steps["sign_check"]["model"].set_params( From 6c179500d18e3a076e81e3f6a5eb81f2dd9a910b Mon Sep 17 00:00:00 2001 From: rbs392 Date: Sat, 11 Apr 2020 23:49:21 +0200 Subject: [PATCH 08/11] fix failing test --- tests/test_pipe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_pipe.py b/tests/test_pipe.py index bdd59b3..d2201e3 100644 --- a/tests/test_pipe.py +++ b/tests/test_pipe.py @@ -641,7 +641,7 @@ def test_pipeline(): ) assert my_pipe.get_params() == { - "deseasonal_residual": {"freq": 6, "trend": False}, + "deseasonal_residual": {"freq": 6, "trend": False, "two_sided": True}, "abs_residual": { "fit_func": None, "fit_func_params": None, From 34cd8886ef11d049db0126e835820d169d0ee9c5 Mon Sep 17 00:00:00 2001 From: rbs392 Date: Mon, 13 Apr 2020 21:55:08 +0200 Subject: [PATCH 09/11] fix mypy issues --- src/adtk/detector/_detector_1d.py | 2 +- src/adtk/detector/_detector_hd.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/adtk/detector/_detector_1d.py b/src/adtk/detector/_detector_1d.py index 11a3768..787c8f7 100644 --- a/src/adtk/detector/_detector_1d.py +++ b/src/adtk/detector/_detector_1d.py @@ -425,7 +425,7 @@ def __init__( side: str = "both", min_periods: Optional[int] = None, agg: str = "median", - center=True + center: bool = True ) -> None: self.pipe_ = Pipenet( { diff --git a/src/adtk/detector/_detector_hd.py b/src/adtk/detector/_detector_hd.py index 0afe35e..e881f4b 100644 --- a/src/adtk/detector/_detector_hd.py +++ b/src/adtk/detector/_detector_hd.py @@ -126,7 +126,7 @@ def _fit_core(self, df: pd.DataFrame) -> None: if df.dropna().empty: raise RuntimeError("Valid values are not enough for training.") clustering_result = self.model.fit_predict(df.dropna()) - cluster_count = Counter(clustering_result) + cluster_count: Counter = Counter(clustering_result) self._anomalous_cluster_id = cluster_count.most_common()[-1][0] def _predict_core(self, df: pd.DataFrame) -> pd.Series: From b18dbefc5a138b0239b7caebd83895fe8d5ca295 Mon Sep 17 00:00:00 2001 From: rbs392 Date: Mon, 13 Apr 2020 22:03:39 +0200 Subject: [PATCH 10/11] fix mypy error in py3.5 --- src/adtk/detector/_detector_hd.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/adtk/detector/_detector_hd.py b/src/adtk/detector/_detector_hd.py index e881f4b..0afe35e 100644 --- a/src/adtk/detector/_detector_hd.py +++ b/src/adtk/detector/_detector_hd.py @@ -126,7 +126,7 @@ def _fit_core(self, df: pd.DataFrame) -> None: if df.dropna().empty: raise RuntimeError("Valid values are not enough for training.") clustering_result = self.model.fit_predict(df.dropna()) - cluster_count: Counter = Counter(clustering_result) + cluster_count = Counter(clustering_result) self._anomalous_cluster_id = cluster_count.most_common()[-1][0] def _predict_core(self, df: pd.DataFrame) -> pd.Series: From 1830d18dfba68b628f59f6a7deb6fff70af0d9f7 Mon Sep 17 00:00:00 2001 From: rbs392 Date: Mon, 13 Apr 2020 22:19:56 +0200 Subject: [PATCH 11/11] final fix for py3.5 --- src/adtk/detector/_detector_hd.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/adtk/detector/_detector_hd.py b/src/adtk/detector/_detector_hd.py index 0afe35e..686f664 100644 --- a/src/adtk/detector/_detector_hd.py +++ b/src/adtk/detector/_detector_hd.py @@ -126,7 +126,7 @@ def _fit_core(self, df: pd.DataFrame) -> None: if df.dropna().empty: raise RuntimeError("Valid values are not enough for training.") clustering_result = self.model.fit_predict(df.dropna()) - cluster_count = Counter(clustering_result) + cluster_count = Counter(clustering_result) # type: Counter self._anomalous_cluster_id = cluster_count.most_common()[-1][0] def _predict_core(self, df: pd.DataFrame) -> pd.Series: