Skip to content

Commit

Permalink
window size detection logic updated
Browse files Browse the repository at this point in the history
  • Loading branch information
sayanchk committed Jun 8, 2021
1 parent ffed199 commit ad40a3b
Showing 1 changed file with 14 additions and 2 deletions.
16 changes: 14 additions & 2 deletions luminaire/exploration/data_exploration.py
Original file line number Diff line number Diff line change
Expand Up @@ -1032,6 +1032,7 @@ def stream_profile(self, df, impute_only=False, **kwargs):
import datetime
import numpy as np
import pandas as pd
from scipy import stats

try:
processed_df, freq = self._prepare(df, impute_only=impute_only, streaming=True, **kwargs)
Expand All @@ -1058,7 +1059,7 @@ def stream_profile(self, df, impute_only=False, **kwargs):
if not self.window_length:
window_length_list = []

for i in range(20):
for i in range(100):
rand_date = sample(idx_date_list, 1)[0]
rand_start_idx = pd.Timestamp(datetime.datetime.combine(rand_date, training_start_time))
if rand_date in idx_date_list[:int(len(idx_date_list) / 2)]:
Expand All @@ -1070,7 +1071,18 @@ def stream_profile(self, df, impute_only=False, **kwargs):
else self.window_length
window_length_list.append(window_length_i)

window_length = int(np.median(window_length_list))
window_length_list = np.array(window_length_list)
if np.all(window_length_list == min(window_length_list)):
window_length = window_length_list[0]
else:
bin_count = max(1, int((max(window_length_list) - min(window_length_list)) / 12))
bins = np.linspace(min(window_length_list) - 1, max(window_length_list) + 1, bin_count)
if len(bins) == 1:
window_length = int(stats.mode(window_length_list).mode[0])
else:
digitized = np.digitize(window_length_list, bins)
arg_mode = np.argmax([len(window_length_list[digitized == i]) for i in range(1, len(bins))]) + 1
window_length = int(stats.mode(window_length_list[digitized == arg_mode]).mode[0])

if window_length < self.min_window_length:
raise ValueError('Training window too small')
Expand Down

0 comments on commit ad40a3b

Please sign in to comment.