From e19b534de1c66c8eec1cfcf9cc4d86ab2d14b362 Mon Sep 17 00:00:00 2001 From: sayanc Date: Sat, 22 Aug 2020 22:21:42 -0700 Subject: [PATCH 1/3] wdm pandas T and 15T config fixes with doc-updates --- docs/tutorial/streaming.rst | 14 +++++++------- luminaire/model/lad_filtering.py | 6 +++++- luminaire/model/lad_structural.py | 7 +++---- luminaire/model/window_density.py | 30 +++++++++++++++--------------- 4 files changed, 30 insertions(+), 27 deletions(-) diff --git a/docs/tutorial/streaming.rst b/docs/tutorial/streaming.rst index 5b596bd..1fb5f78 100644 --- a/docs/tutorial/streaming.rst +++ b/docs/tutorial/streaming.rst @@ -29,22 +29,22 @@ index 2020-06-16 23:58:00 10663.0 10663.0 2020-06-16 23:59:00 11034.0 11034.0 ->>> hyper_params = WindowDensityHyperParams(freq='M').params +>>> hyper_params = WindowDensityHyperParams(freq='T').params >>> wdm_obj = WindowDensityModel(hyper_params=hyper_params) >>> success, model = wdm_obj.train(data=data) >>> print(success, model) (True, ) -The model object contains the data density structure over a pre-specified window, given the frequency. Lumianire sets the following defaults for some typical pandas frequencies (any custom requirements can be updated in the hyperparameter object instance): +The model object contains the data density structure over a pre-specified window, given the frequency. Luminaire sets the following defaults for some typical pandas frequencies (any custom requirements can be updated in the hyperparameter object instance): - 'S': Hourly windows -- 'M': Daily windows -- 'QM': Weekly windows -- 'H': 12 hours windows -- 'D': 10 days windows +- 'T': 24 hours windows +- '15T': 24 hours windows +- 'H': 24 hours windows +- 'D': 4 weeks windows - 'custom': User specified windows -In order to score a new window innovation given the trained model object, we have to provide a equal sized window that represents a similar time interval. For example, if each of the windows in the training data represents a 24 hour window between 9 AM to 8:59:59 AM for last few days, the scoring data should represent the same interval of a different day and should have the same window size. +In order to score a new window innovation given the trained model object, we have to provide a equal sized window that represents a similar time interval. For example, if each of the windows in the training data represents a 24 hour window between 9 AM to 8:59:59 AM (next day) for last few days, the scoring data should represent the same interval of a different day and should have the same window size. .. image:: window_train_score_auto.png :scale: 45% diff --git a/luminaire/model/lad_filtering.py b/luminaire/model/lad_filtering.py index 5534d17..d428ad1 100644 --- a/luminaire/model/lad_filtering.py +++ b/luminaire/model/lad_filtering.py @@ -39,13 +39,17 @@ class LADFilteringModel(BaseModel): through Kalman Filter based model estimation. :param dict hyper_params: Hyper parameters for Luminaire structural modeling. - See :class:`luminaire.optimization.hyperparameter_optimization.HyperparameterOptimization` for detailed information. + See :class:`luminaire.model.lad_filtering.LADFilteringHyperParams` for detailed information. :param str freq: The frequency of the time-series. A `Pandas offset`_ such as 'D', 'H', or 'M'. Luminaire currently supports the following pandas frequency types: 'H', 'D', 'W', 'W-SUN', 'W-MON', 'W-TUE', 'W-WED', 'W-THU', 'W-FRI', 'W-SAT', 'M', 'MS'. :param min_ts_length: The minimum required length of the time series for training. :param max_ts_length: The maximum required length of the time series for training. + .. Note :: This class should be used to manually configure the structural model. Exact configuration parameters + can be found in `luminaire.model.lad_filtering.LADFilteringHyperParams`. Optimal configuration can be + obtained by using Luminaire hyperparameter optimization. + .. _Pandas offset: https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#dateoffset-objects >>> hyper = {"is_log_transformed": 1} diff --git a/luminaire/model/lad_structural.py b/luminaire/model/lad_structural.py index 009a9b3..66bb01a 100644 --- a/luminaire/model/lad_structural.py +++ b/luminaire/model/lad_structural.py @@ -51,8 +51,7 @@ class LADStructuralModel(BaseModel): A LAD structural time series model. :param dict hyper_params: Hyper parameters for Luminaire structural modeling. - See :class:`luminaire.optimization.hyperparameter_optimization.HyperparameterOptimization` for detailed - information. + See :class:`luminaire.model.lad_structural.LADStructuralHyperParams` for detailed information. :param str freq: The frequency of the time-series. A `Pandas offset`_ such as 'D', 'H', or 'M'. Luminaire currently supports the following pandas frequency types: 'H', 'D', 'W', 'W-SUN', 'W-MON', 'W-TUE', 'W-WED', 'W-THU', 'W-FRI', 'W-SAT', 'M', 'MS'. @@ -63,8 +62,8 @@ class LADStructuralModel(BaseModel): :param int min_ts_mean_window: Size of the most recent window to calculate min_ts_mean. .. Note :: This class should be used to manually configure the structural model. Exact configuration parameters - can be found in `luminaire.hyperparameter_optimization.HyperparameterOptimization`. Optimal configuration - can be obtained by using LAD hyperparameter optimization. + can be found in `luminaire.model.lad_structural.LADStructuralHyperParams`. Optimal configuration can be + obtained by using Luminaire hyperparameter optimization. .. _statsmodels docs: http://www.statsmodels.org/stable/generated/statsmodels.tsa.arima_model.ARIMA.html .. _Pandas offset: https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#dateoffset-objects diff --git a/luminaire/model/window_density.py b/luminaire/model/window_density.py index b3392ac..a444adf 100644 --- a/luminaire/model/window_density.py +++ b/luminaire/model/window_density.py @@ -6,7 +6,7 @@ class WindowDensityHyperParams(BaseModelHyperParams): """ Hyperparameter class for Luminaire Window density model. - :param str freq: The frequency of the time-series. Luminaire supports default configuration for 'S', 'M', 'QM', + :param str freq: The frequency of the time-series. Luminaire supports default configuration for 'S', T, '15T', 'H', 'D'. Any other frequency type should be specified as 'custom' and configuration should be set manually. :param int ignore_window: ignore a time window to be considered for training. :param float max_missing_train_prop: Maximum proportion of missing observation allowed in the training data. @@ -48,7 +48,7 @@ class WindowDensityHyperParams(BaseModelHyperParams): - "diff" (differencing based). """ def __init__(self, - freq='M', + freq='T', ignore_window=None, max_missing_train_prop=0.1, is_log_transformed=False, @@ -62,35 +62,35 @@ def __init__(self, ): # Detection method is KL divergence for high frequency data and sign test for low frequency data if not detection_method: - detection_method = "kldiv" if freq in ['S', 'M', 'QM'] else "sign_test" + detection_method = "kldiv" if freq in ['S', 'T', '15T'] else "sign_test" # Pre-specification of the window lengths for different window frequencies with their min and max min_window_length_dict = { 'S': 60 * 10, - 'M': 60 * 12, - 'QM': 4 * 24 * 7, + 'T': 60 * 12, + '15T': 4 * 8, 'H': 12, 'D': 10, } max_window_length_dict = { 'S': 60 * 60 * 24, - 'M': 60 * 24 * 84, - 'QM': 4 * 24 * 168, + 'T': 60 * 24 * 84, + '15T': 4 * 24 * 7, 'H': 24 * 7, 'D': 90, } window_length_dict = { 'S': 60 * 60, - 'M': 60 * 24, - 'QM': 4 * 24 * 14, + 'T': 60 * 24, + '15T': 4 * 24, 'H': 24, 'D': 28, } ma_window_length_dict = { 'S': 10 * 60, - 'M': 60, - 'QM': 4 * 4, + 'T': 60, + '15T': 4 * 4, 'H': 12, 'D': 7, } - if freq in ['S', 'M', 'QM', 'H', 'D']: + if freq in ['S', 'T', '15T', 'H', 'D']: min_window_length = min_window_length_dict.get(freq) max_window_length = max_window_length_dict.get(freq) window_length = window_length_dict.get(freq) @@ -435,7 +435,7 @@ def train(self, data, **kwargs): min_num_train_windows = self.min_num_train_windows max_num_train_windows = self.max_num_train_windows ignore_window = self._params['ignore_window'] - if freq in ['S', 'M', 'QM', 'H', 'D']: + if freq in ['S', 'T', '15T', 'H', 'D']: min_window_length = self._params['min_window_length'] max_window_length = self._params['max_window_length'] window_length = self._params['window_length'] @@ -452,7 +452,7 @@ def train(self, data, **kwargs): detrend_method = self._params['detrend_method'] target_metric = 'raw' imputed_metric = 'interpolated' - if freq not in ['S', 'M', 'QM', 'H', 'D']: + if freq not in ['S', 'T', '15T', 'H', 'D']: detection_method = self._params['detection_method'] if not detection_method: raise ValueError('Detection method should be specified in case frequency not in the specified list') @@ -668,7 +668,7 @@ def score(self, data, **kwargs): detrend_method = self._params['detrend_method'] target_metric = 'raw' imputed_metric = 'interpolated' - if freq not in ['S', 'M', 'QM', 'H', 'D']: + if freq not in ['S', 'T', '15T', 'H', 'D']: detection_method = self._params['detection_method'] if not detection_method: raise ValueError('Detection method should be specified in case frequency not in the specified list') From 54efabc40e2e41d313f6f39ecef73c6df877b954 Mon Sep 17 00:00:00 2001 From: sayanc Date: Sat, 22 Aug 2020 22:28:58 -0700 Subject: [PATCH 2/3] version update --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index a0766ae..f816229 100644 --- a/setup.py +++ b/setup.py @@ -14,7 +14,7 @@ setup( name='luminaire', - version='0.1.0', + version='0.1.1', license='Apache License 2.0', From e38b5db946c7b155e19df958f6089ee6dbc51c3f Mon Sep 17 00:00:00 2001 From: sayanc Date: Sat, 22 Aug 2020 22:53:36 -0700 Subject: [PATCH 3/3] daily freq specs moved to new lines --- luminaire/model/window_density.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/luminaire/model/window_density.py b/luminaire/model/window_density.py index a444adf..4d2f258 100644 --- a/luminaire/model/window_density.py +++ b/luminaire/model/window_density.py @@ -69,25 +69,29 @@ def __init__(self, 'S': 60 * 10, 'T': 60 * 12, '15T': 4 * 8, - 'H': 12, 'D': 10, + 'H': 12, + 'D': 10, } max_window_length_dict = { 'S': 60 * 60 * 24, 'T': 60 * 24 * 84, '15T': 4 * 24 * 7, - 'H': 24 * 7, 'D': 90, + 'H': 24 * 7, + 'D': 90, } window_length_dict = { 'S': 60 * 60, 'T': 60 * 24, '15T': 4 * 24, - 'H': 24, 'D': 28, + 'H': 24, + 'D': 28, } ma_window_length_dict = { 'S': 10 * 60, 'T': 60, '15T': 4 * 4, - 'H': 12, 'D': 7, + 'H': 12, + 'D': 7, } if freq in ['S', 'T', '15T', 'H', 'D']: