diff --git a/luminaire/optimization/hyperparameter_optimization.py b/luminaire/optimization/hyperparameter_optimization.py index b14d66d..55c3f34 100644 --- a/luminaire/optimization/hyperparameter_optimization.py +++ b/luminaire/optimization/hyperparameter_optimization.py @@ -1,10 +1,10 @@ from hyperopt import fmin, tpe, hp, STATUS_OK from luminaire.model import LADStructuralModel, LADStructuralHyperParams, LADFilteringModel, LADFilteringHyperParams from luminaire.exploration.data_exploration import DataExploration +from luminaire.utils.random_state_validation import check_random_state import warnings warnings.filterwarnings('ignore') - class HyperparameterOptimization(object): """ Hyperparameter optimization for LAD outlier detection configuration for batch data. @@ -20,6 +20,7 @@ class HyperparameterOptimization(object): :param int min_ts_length: The minimum required length of the time series for training. The input time series will be truncated if the length is greater than this value. :param int scoring_length: Number of innovations to be scored after training window with respect to the frequency. + :param int random_state: Turn seed into a np.random.RandomState instance .. _Pandas offset: https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#dateoffset-objects """ @@ -31,6 +32,7 @@ def __init__(self, max_ts_length=None, min_ts_length=None, scoring_length=None, + random_state=None, **kwargs): self._target_metric = 'raw' self.freq = freq @@ -48,6 +50,8 @@ def __init__(self, self.scoring_length = scoring_length or (scoring_length_dict.get(freq) if freq in scoring_length_dict.keys() else 30) + self.random_state = random_state + def _mape(self, actuals, predictions): """ This function computes the mean absolute percentage error for the observed vs the predicted values. @@ -93,7 +97,8 @@ def _synthetic_anomaly_check(self, observation, prediction, std_error): # Anomaly detection based on synthetic anomalies generated through a given intensity list for prop in self.anomaly_intensity_list: - trial_prob = np.random.uniform(0, 1, 1) + rnd = check_random_state(self.random_state) + trial_prob = rnd.uniform(0, 1, 1) if trial_prob < 0.4: synthetic_value = observation + (prop * observation) anomaly_flags.append(1) @@ -227,7 +232,8 @@ def _objective_part(self, data, smoothed_series, args): anomaly_probabilities_list = [] local_model = copy.deepcopy(stable_model) for i, row in scoring_data.iterrows(): - trial_prob = np.random.uniform(0, 1, 1) + rnd = check_random_state(self.random_state) + trial_prob = rnd.random.uniform(0, 1, 1) observed_value = row.raw synthetic_actual = observed_value if trial_prob < 0.4: @@ -263,7 +269,7 @@ def _optimize(self, data, objective_part, algo=tpe.suggest, max_evals=50): :return: Optimal hyperparameters :rtype: dict """ - + import numpy as np from functools import partial from pykalman import KalmanFilter @@ -288,7 +294,7 @@ def _optimize(self, data, objective_part, algo=tpe.suggest, max_evals=50): try: series = data[self._target_metric].values - kf = KalmanFilter() + kf = KalmanFilter(random_state=self.random_state) smoothed_series, cov_series = kf.em(series).smooth(series) except: raise ValueError('Kalman Smoothing requires more than one data point') @@ -299,7 +305,7 @@ def _optimize(self, data, objective_part, algo=tpe.suggest, max_evals=50): raise ValueError('Only `detection_type=OutlierDetection` is supported in hyperparameter optimization right now') # Calling the optimization function - hyper_param = fmin(objective, space=space, algo=algo, max_evals=max_evals, show_progressbar=True) + hyper_param = fmin(objective, space=space, algo=algo, max_evals=max_evals, show_progressbar=True, rstate=np.random.default_rng(self.random_state)) hyper_param['LuminaireModel'] = hyper_param_list[hyper_param['LuminaireModel']]['model'] if 'max_ft_freq' in hyper_param: hyper_param['max_ft_freq'] = hyper_param['max_ft_freq'] + 2 diff --git a/luminaire/tests/test_hyper.py b/luminaire/tests/test_hyper.py index ef6535a..ef30348 100644 --- a/luminaire/tests/test_hyper.py +++ b/luminaire/tests/test_hyper.py @@ -2,9 +2,16 @@ class TestHyperparameterOptimization(object): - def test_run(self, test_data_with_missing): - + def test_run1(self, test_data_with_missing): + """Test using the default random_state=None""" hyper_obj = HyperparameterOptimization(freq='D', detection_type='OutlierDetection') hyper_parameters = hyper_obj.run(test_data_with_missing, max_evals=5) assert isinstance(hyper_parameters, dict) + + def test_run2(self, test_data_with_missing): + """Test defining a random_state""" + hyper_obj = HyperparameterOptimization(freq='D', detection_type='OutlierDetection', random_state=42) + hyper_parameters = hyper_obj.run(test_data_with_missing, max_evals=5) + + assert isinstance(hyper_parameters, dict) diff --git a/luminaire/utils/__init__.py b/luminaire/utils/__init__.py new file mode 100644 index 0000000..9f27902 --- /dev/null +++ b/luminaire/utils/__init__.py @@ -0,0 +1 @@ +from .random_state_validation import check_random_state \ No newline at end of file diff --git a/luminaire/utils/random_state_validation.py b/luminaire/utils/random_state_validation.py new file mode 100644 index 0000000..928c9dd --- /dev/null +++ b/luminaire/utils/random_state_validation.py @@ -0,0 +1,23 @@ +import numpy as np +import numbers + +def check_random_state(seed): + """Turn seed into a np.random.RandomState instance + + :param int seed: seed for the random state + :return: None, int or instance of RandomState + If seed is None, return the RandomState singleton used by np.random. + If seed is an int, return a new RandomState instance seeded with seed. + If seed is already a RandomState instance, return it. + Otherwise raise ValueError. + :rtype: np.random.RandomState or None + """ + if seed is None or seed is np.random: + return np.random.mtrand._rand + if isinstance(seed, numbers.Integral): + return np.random.RandomState(seed) + if isinstance(seed, np.random.RandomState): + return seed + raise ValueError( + "%r cannot be used to seed a numpy.random.RandomState instance" % seed + ) \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 798dc88..50d75ac 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,9 +2,9 @@ bayescd>=0.4 changepy>=0.3.1 hyperopt>=0.1.2 numpy>=1.17.5, <=1.22.4 -pandas>=0.25.3 +pandas>=0.25.3, <=2.0.3 pykalman>=0.9.5 scipy>=1.6.0 -statsmodels>=0.13.0 +statsmodels>=0.13.0, <=0.13.5 scikit-learn>=0.24.2 decorator>=5.1.0 diff --git a/setup.py b/setup.py index b3c2735..34eaf07 100644 --- a/setup.py +++ b/setup.py @@ -14,7 +14,7 @@ setup( name='luminaire', - version='0.4.2', + version='0.4.3', license='Apache License 2.0',