Skip to content

Commit

Permalink
Add random_state argument to HyperparameterOptimization class to ensu…
Browse files Browse the repository at this point in the history
…re reproducibility of results. - Fixes for #125 (#131)

* set seed for numpy

* add a random_state argument to hyperparameter optimization module

* add random state for hyperopt fmin function

* add rstate for hyperparameter optimization and add a test for it

* fixed MR comments

* fix attempt for ci cd

* version update for a minor fix release

---------

Authored by: Panagiotis Papaemmanouil <[email protected]>
Review changes authored by: @sayanchk
  • Loading branch information
sayanchk authored Jan 31, 2024
1 parent 9471eb3 commit 29dacc9
Show file tree
Hide file tree
Showing 6 changed files with 48 additions and 11 deletions.
18 changes: 12 additions & 6 deletions luminaire/optimization/hyperparameter_optimization.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
from hyperopt import fmin, tpe, hp, STATUS_OK
from luminaire.model import LADStructuralModel, LADStructuralHyperParams, LADFilteringModel, LADFilteringHyperParams
from luminaire.exploration.data_exploration import DataExploration
from luminaire.utils.random_state_validation import check_random_state
import warnings
warnings.filterwarnings('ignore')


class HyperparameterOptimization(object):
"""
Hyperparameter optimization for LAD outlier detection configuration for batch data.
Expand All @@ -20,6 +20,7 @@ class HyperparameterOptimization(object):
:param int min_ts_length: The minimum required length of the time series for training. The input time series will be
truncated if the length is greater than this value.
:param int scoring_length: Number of innovations to be scored after training window with respect to the frequency.
:param int random_state: Turn seed into a np.random.RandomState instance
.. _Pandas offset: https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#dateoffset-objects
"""
Expand All @@ -31,6 +32,7 @@ def __init__(self,
max_ts_length=None,
min_ts_length=None,
scoring_length=None,
random_state=None,
**kwargs):
self._target_metric = 'raw'
self.freq = freq
Expand All @@ -48,6 +50,8 @@ def __init__(self,
self.scoring_length = scoring_length or (scoring_length_dict.get(freq)
if freq in scoring_length_dict.keys() else 30)

self.random_state = random_state

def _mape(self, actuals, predictions):
"""
This function computes the mean absolute percentage error for the observed vs the predicted values.
Expand Down Expand Up @@ -93,7 +97,8 @@ def _synthetic_anomaly_check(self, observation, prediction, std_error):

# Anomaly detection based on synthetic anomalies generated through a given intensity list
for prop in self.anomaly_intensity_list:
trial_prob = np.random.uniform(0, 1, 1)
rnd = check_random_state(self.random_state)
trial_prob = rnd.uniform(0, 1, 1)
if trial_prob < 0.4:
synthetic_value = observation + (prop * observation)
anomaly_flags.append(1)
Expand Down Expand Up @@ -227,7 +232,8 @@ def _objective_part(self, data, smoothed_series, args):
anomaly_probabilities_list = []
local_model = copy.deepcopy(stable_model)
for i, row in scoring_data.iterrows():
trial_prob = np.random.uniform(0, 1, 1)
rnd = check_random_state(self.random_state)
trial_prob = rnd.random.uniform(0, 1, 1)
observed_value = row.raw
synthetic_actual = observed_value
if trial_prob < 0.4:
Expand Down Expand Up @@ -263,7 +269,7 @@ def _optimize(self, data, objective_part, algo=tpe.suggest, max_evals=50):
:return: Optimal hyperparameters
:rtype: dict
"""

import numpy as np
from functools import partial
from pykalman import KalmanFilter

Expand All @@ -288,7 +294,7 @@ def _optimize(self, data, objective_part, algo=tpe.suggest, max_evals=50):

try:
series = data[self._target_metric].values
kf = KalmanFilter()
kf = KalmanFilter(random_state=self.random_state)
smoothed_series, cov_series = kf.em(series).smooth(series)
except:
raise ValueError('Kalman Smoothing requires more than one data point')
Expand All @@ -299,7 +305,7 @@ def _optimize(self, data, objective_part, algo=tpe.suggest, max_evals=50):
raise ValueError('Only `detection_type=OutlierDetection` is supported in hyperparameter optimization right now')

# Calling the optimization function
hyper_param = fmin(objective, space=space, algo=algo, max_evals=max_evals, show_progressbar=True)
hyper_param = fmin(objective, space=space, algo=algo, max_evals=max_evals, show_progressbar=True, rstate=np.random.default_rng(self.random_state))
hyper_param['LuminaireModel'] = hyper_param_list[hyper_param['LuminaireModel']]['model']
if 'max_ft_freq' in hyper_param:
hyper_param['max_ft_freq'] = hyper_param['max_ft_freq'] + 2
Expand Down
11 changes: 9 additions & 2 deletions luminaire/tests/test_hyper.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,16 @@

class TestHyperparameterOptimization(object):

def test_run(self, test_data_with_missing):

def test_run1(self, test_data_with_missing):
"""Test using the default random_state=None"""
hyper_obj = HyperparameterOptimization(freq='D', detection_type='OutlierDetection')
hyper_parameters = hyper_obj.run(test_data_with_missing, max_evals=5)

assert isinstance(hyper_parameters, dict)

def test_run2(self, test_data_with_missing):
"""Test defining a random_state"""
hyper_obj = HyperparameterOptimization(freq='D', detection_type='OutlierDetection', random_state=42)
hyper_parameters = hyper_obj.run(test_data_with_missing, max_evals=5)

assert isinstance(hyper_parameters, dict)
1 change: 1 addition & 0 deletions luminaire/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .random_state_validation import check_random_state
23 changes: 23 additions & 0 deletions luminaire/utils/random_state_validation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import numpy as np
import numbers

def check_random_state(seed):
"""Turn seed into a np.random.RandomState instance
:param int seed: seed for the random state
:return: None, int or instance of RandomState
If seed is None, return the RandomState singleton used by np.random.
If seed is an int, return a new RandomState instance seeded with seed.
If seed is already a RandomState instance, return it.
Otherwise raise ValueError.
:rtype: np.random.RandomState or None
"""
if seed is None or seed is np.random:
return np.random.mtrand._rand
if isinstance(seed, numbers.Integral):
return np.random.RandomState(seed)
if isinstance(seed, np.random.RandomState):
return seed
raise ValueError(
"%r cannot be used to seed a numpy.random.RandomState instance" % seed
)
4 changes: 2 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@ bayescd>=0.4
changepy>=0.3.1
hyperopt>=0.1.2
numpy>=1.17.5, <=1.22.4
pandas>=0.25.3
pandas>=0.25.3, <=2.0.3
pykalman>=0.9.5
scipy>=1.6.0
statsmodels>=0.13.0
statsmodels>=0.13.0, <=0.13.5
scikit-learn>=0.24.2
decorator>=5.1.0
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

setup(
name='luminaire',
version='0.4.2',
version='0.4.3',

license='Apache License 2.0',

Expand Down

0 comments on commit 29dacc9

Please sign in to comment.