From d3579bed5b432e67cd44ccedb7bbe05df13d203f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Morales?= Date: Mon, 22 Apr 2024 15:09:25 -0600 Subject: [PATCH] support quarters in fill_gaps (#80) --- nbs/preprocessing.ipynb | 20 +++++++++++++++----- settings.ini | 2 +- utilsforecast/__init__.py | 2 +- utilsforecast/preprocessing.py | 10 +++++++--- 4 files changed, 24 insertions(+), 10 deletions(-) diff --git a/nbs/preprocessing.ipynb b/nbs/preprocessing.ipynb index 9745dd8..79185af 100644 --- a/nbs/preprocessing.ipynb +++ b/nbs/preprocessing.ipynb @@ -178,6 +178,7 @@ " return grid.join(df, on=[id_col, time_col], how='left')\n", " if isinstance(freq, str):\n", " offset = pd.tseries.frequencies.to_offset(freq)\n", + " n = offset.n\n", " if isinstance(offset.base, pd.offsets.Minute):\n", " # minutes are represented as 'm' in numpy\n", " freq = 'm'\n", @@ -188,15 +189,18 @@ " elif isinstance(offset.base, pd.offsets.Hour):\n", " # hours are represented as 'h' in numpy\n", " freq = 'h'\n", - " if offset.n > 1:\n", - " freq = freq.replace(str(offset.n), '')\n", + " elif isinstance(offset.base, (pd.offsets.QuarterBegin, pd.offsets.QuarterEnd)):\n", + " n *= 3\n", + " freq = 'M'\n", + " if n > 1:\n", + " freq = freq.replace(str(n), '')\n", " try:\n", " pd.Timedelta(offset)\n", " except ValueError:\n", " # irregular freq, try using first letter of abbreviation\n", " # such as MS = 'Month Start' -> 'M', YS = 'Year Start' -> 'Y'\n", " freq = freq[0]\n", - " delta: Union[np.timedelta64, int] = np.timedelta64(offset.n, freq)\n", + " delta: Union[np.timedelta64, int] = np.timedelta64(n, freq)\n", " else:\n", " delta = freq\n", " times_by_id = df.groupby(id_col, observed=True)[time_col].agg(['min', 'max'])\n", @@ -1622,11 +1626,17 @@ " assert max_dates[0] == expected_end\n", "\n", "n_periods = 100\n", - "freqs = ['YE', 'YS', 'ME', 'MS', 'W', 'W-TUE', 'D', 's', 'ms', 1, 2, '20D', '30s', '2YE', '3YS', '30min', 'B', '1h']\n", + "freqs = ['YE', 'YS', 'ME', 'MS', 'W', 'W-TUE', 'D', 's', 'ms', 1, 2, '20D', '30s', '2YE', '3YS', '30min', 'B', '1h', 'QS-OCT', 'QE']\n", "try:\n", " pd.tseries.frequencies.to_offset('YE')\n", "except ValueError:\n", - " freqs = [f.replace('YE', 'Y').replace('ME', 'M').replace('h', 'H') for f in freqs if isinstance(f, str)]\n", + " freqs = [\n", + " f.replace('YE', 'Y')\n", + " .replace('ME', 'M')\n", + " .replace('h', 'H')\n", + " .replace('QE', 'Q')\n", + " for f in freqs if isinstance(f, str)\n", + " ]\n", "for freq in freqs:\n", " if isinstance(freq, (pd.offsets.BaseOffset, str)): \n", " dates = pd.date_range('1900-01-01', periods=n_periods, freq=freq)\n", diff --git a/settings.ini b/settings.ini index 52c2b27..3dd7fae 100644 --- a/settings.ini +++ b/settings.ini @@ -1,7 +1,7 @@ [DEFAULT] repo = utilsforecast lib_name = utilsforecast -version = 0.1.5 +version = 0.1.6 min_python = 3.8 license = apache2 black_formatting = True diff --git a/utilsforecast/__init__.py b/utilsforecast/__init__.py index 1276d02..0a8da88 100644 --- a/utilsforecast/__init__.py +++ b/utilsforecast/__init__.py @@ -1 +1 @@ -__version__ = "0.1.5" +__version__ = "0.1.6" diff --git a/utilsforecast/preprocessing.py b/utilsforecast/preprocessing.py index 7e54f63..f864042 100644 --- a/utilsforecast/preprocessing.py +++ b/utilsforecast/preprocessing.py @@ -124,6 +124,7 @@ def fill_gaps( return grid.join(df, on=[id_col, time_col], how="left") if isinstance(freq, str): offset = pd.tseries.frequencies.to_offset(freq) + n = offset.n if isinstance(offset.base, pd.offsets.Minute): # minutes are represented as 'm' in numpy freq = "m" @@ -134,15 +135,18 @@ def fill_gaps( elif isinstance(offset.base, pd.offsets.Hour): # hours are represented as 'h' in numpy freq = "h" - if offset.n > 1: - freq = freq.replace(str(offset.n), "") + elif isinstance(offset.base, (pd.offsets.QuarterBegin, pd.offsets.QuarterEnd)): + n *= 3 + freq = "M" + if n > 1: + freq = freq.replace(str(n), "") try: pd.Timedelta(offset) except ValueError: # irregular freq, try using first letter of abbreviation # such as MS = 'Month Start' -> 'M', YS = 'Year Start' -> 'Y' freq = freq[0] - delta: Union[np.timedelta64, int] = np.timedelta64(offset.n, freq) + delta: Union[np.timedelta64, int] = np.timedelta64(n, freq) else: delta = freq times_by_id = df.groupby(id_col, observed=True)[time_col].agg(["min", "max"])