Skip to content

Commit

Permalink
support quarters in fill_gaps (#80)
Browse files Browse the repository at this point in the history
  • Loading branch information
jmoralez authored Apr 22, 2024
1 parent 22a670f commit d3579be
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 10 deletions.
20 changes: 15 additions & 5 deletions nbs/preprocessing.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,7 @@
" return grid.join(df, on=[id_col, time_col], how='left')\n",
" if isinstance(freq, str):\n",
" offset = pd.tseries.frequencies.to_offset(freq)\n",
" n = offset.n\n",
" if isinstance(offset.base, pd.offsets.Minute):\n",
" # minutes are represented as 'm' in numpy\n",
" freq = 'm'\n",
Expand All @@ -188,15 +189,18 @@
" elif isinstance(offset.base, pd.offsets.Hour):\n",
" # hours are represented as 'h' in numpy\n",
" freq = 'h'\n",
" if offset.n > 1:\n",
" freq = freq.replace(str(offset.n), '')\n",
" elif isinstance(offset.base, (pd.offsets.QuarterBegin, pd.offsets.QuarterEnd)):\n",
" n *= 3\n",
" freq = 'M'\n",
" if n > 1:\n",
" freq = freq.replace(str(n), '')\n",
" try:\n",
" pd.Timedelta(offset)\n",
" except ValueError:\n",
" # irregular freq, try using first letter of abbreviation\n",
" # such as MS = 'Month Start' -> 'M', YS = 'Year Start' -> 'Y'\n",
" freq = freq[0]\n",
" delta: Union[np.timedelta64, int] = np.timedelta64(offset.n, freq)\n",
" delta: Union[np.timedelta64, int] = np.timedelta64(n, freq)\n",
" else:\n",
" delta = freq\n",
" times_by_id = df.groupby(id_col, observed=True)[time_col].agg(['min', 'max'])\n",
Expand Down Expand Up @@ -1622,11 +1626,17 @@
" assert max_dates[0] == expected_end\n",
"\n",
"n_periods = 100\n",
"freqs = ['YE', 'YS', 'ME', 'MS', 'W', 'W-TUE', 'D', 's', 'ms', 1, 2, '20D', '30s', '2YE', '3YS', '30min', 'B', '1h']\n",
"freqs = ['YE', 'YS', 'ME', 'MS', 'W', 'W-TUE', 'D', 's', 'ms', 1, 2, '20D', '30s', '2YE', '3YS', '30min', 'B', '1h', 'QS-OCT', 'QE']\n",
"try:\n",
" pd.tseries.frequencies.to_offset('YE')\n",
"except ValueError:\n",
" freqs = [f.replace('YE', 'Y').replace('ME', 'M').replace('h', 'H') for f in freqs if isinstance(f, str)]\n",
" freqs = [\n",
" f.replace('YE', 'Y')\n",
" .replace('ME', 'M')\n",
" .replace('h', 'H')\n",
" .replace('QE', 'Q')\n",
" for f in freqs if isinstance(f, str)\n",
" ]\n",
"for freq in freqs:\n",
" if isinstance(freq, (pd.offsets.BaseOffset, str)): \n",
" dates = pd.date_range('1900-01-01', periods=n_periods, freq=freq)\n",
Expand Down
2 changes: 1 addition & 1 deletion settings.ini
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[DEFAULT]
repo = utilsforecast
lib_name = utilsforecast
version = 0.1.5
version = 0.1.6
min_python = 3.8
license = apache2
black_formatting = True
Expand Down
2 changes: 1 addition & 1 deletion utilsforecast/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.1.5"
__version__ = "0.1.6"
10 changes: 7 additions & 3 deletions utilsforecast/preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ def fill_gaps(
return grid.join(df, on=[id_col, time_col], how="left")
if isinstance(freq, str):
offset = pd.tseries.frequencies.to_offset(freq)
n = offset.n
if isinstance(offset.base, pd.offsets.Minute):
# minutes are represented as 'm' in numpy
freq = "m"
Expand All @@ -134,15 +135,18 @@ def fill_gaps(
elif isinstance(offset.base, pd.offsets.Hour):
# hours are represented as 'h' in numpy
freq = "h"
if offset.n > 1:
freq = freq.replace(str(offset.n), "")
elif isinstance(offset.base, (pd.offsets.QuarterBegin, pd.offsets.QuarterEnd)):
n *= 3
freq = "M"
if n > 1:
freq = freq.replace(str(n), "")
try:
pd.Timedelta(offset)
except ValueError:
# irregular freq, try using first letter of abbreviation
# such as MS = 'Month Start' -> 'M', YS = 'Year Start' -> 'Y'
freq = freq[0]
delta: Union[np.timedelta64, int] = np.timedelta64(offset.n, freq)
delta: Union[np.timedelta64, int] = np.timedelta64(n, freq)
else:
delta = freq
times_by_id = df.groupby(id_col, observed=True)[time_col].agg(["min", "max"])
Expand Down

0 comments on commit d3579be

Please sign in to comment.