Skip to content

Commit

Permalink
support polars in GlobalSklearnTransformer (#377)
Browse files Browse the repository at this point in the history
  • Loading branch information
jmoralez authored Jul 17, 2024
1 parent 407fda2 commit 91c2a59
Show file tree
Hide file tree
Showing 10 changed files with 260 additions and 367 deletions.
10 changes: 2 additions & 8 deletions .github/workflows/build-docs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,6 @@ on:
branches: ["main"]
workflow_dispatch:

defaults:
run:
shell: bash

jobs:
build-docs:
runs-on: ubuntu-latest
Expand All @@ -24,12 +20,10 @@ jobs:
path: docs-scripts
- uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # 5.1.1
with:
cache: "pip"
python-version: '3.10'
cache-dependency-path: settings.ini
python-version: "3.10"
- name: Build docs
run: |
pip install -e ".[dev]"
pip install uv && uv pip install --system ".[all]"
mkdir nbs/_extensions
cp -r docs-scripts/mintlify/ nbs/_extensions/
python docs-scripts/update-quarto.py
Expand Down
92 changes: 21 additions & 71 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,44 +7,18 @@ on:
branches: [main]
workflow_dispatch:

defaults:
run:
shell: bash -l {0}

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true

jobs:
nb-sync:
runs-on: ubuntu-latest
steps:
- name: Clone repo
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7

- name: Set up python
uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # 5.1.1

- name: Install nbdev
run: pip install nbdev

- name: Check if all notebooks are cleaned
run: |
echo "Check we are starting with clean git checkout"
if [ -n "$(git status -uno -s)" ]; then echo "git status is not clean"; false; fi
echo "Trying to strip out notebooks"
./action_files/clean_nbs
echo "Check that strip out was unnecessary"
git status -s # display the status to see which nbs need cleaning up
if [ -n "$(git status -uno -s)" ]; then echo -e "!!! Detected unstripped out notebooks\n!!!Remember to run nbdev_install_hooks"; false; fi
run-all-tests:
runs-on: ubuntu-latest
timeout-minutes: 30
strategy:
fail-fast: false
matrix:
python-version: ['3.8', '3.9', '3.10']
python-version: ["3.8", "3.9", "3.10", "3.11"]
env:
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID_NIXTLA_TMP }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY_NIXTLA_TMP }}
Expand All @@ -53,49 +27,24 @@ jobs:
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7

- name: Set up environment
uses: mamba-org/setup-micromamba@f8b8a1e23a26f60a44c853292711bacfd3eac822 # v1.9.0
uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # 5.1.1
with:
environment-file: environment.yml
create-args: python=${{ matrix.python-version }}
cache-environment: true
python-version: ${{ matrix.python-version }}

- name: Install the library
run: pip install ./
run: pip install uv && uv pip install --system ".[all]"

- name: Run all tests
run: nbdev_test --n_workers 0 --do_print --timing --flags 'polars core'
run: nbdev_test --n_workers 0 --do_print --timing --skip_file_re 'electricity' --flags 'polars'

run-macos-tests:
runs-on: macos-13
run-local-tests:
runs-on: ${{ matrix.os }}
timeout-minutes: 30
strategy:
fail-fast: false
matrix:
python-version: ['3.8', '3.9', '3.10', '3.11']
steps:
- name: Clone repo
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7

- name: Set up environment
uses: mamba-org/setup-micromamba@f8b8a1e23a26f60a44c853292711bacfd3eac822 # v1.9.0
with:
environment-file: local_environment.yml
create-args: python=${{ matrix.python-version }}
cache-environment: true

- name: Install the library
run: pip install ./

- name: Run local tests
run: nbdev_test --n_workers 0 --do_print --timing --skip_file_glob "*distributed*" --flags 'polars core'

run-windows-tests:
runs-on: windows-latest
timeout-minutes: 30
strategy:
fail-fast: false
matrix:
python-version: ['3.8', '3.9', '3.10', '3.11']
os: [macos-13, macos-14, windows-latest]
python-version: ["3.8", "3.9", "3.10", "3.11"]
steps:
- name: Clone repo
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
Expand All @@ -106,10 +55,14 @@ jobs:
python-version: ${{ matrix.python-version }}

- name: Install the library
run: pip install uv && uv pip install ".[dev]" --system
run: pip install uv && uv pip install --system ".[dev]"

- name: Install OpenMP
if: startsWith(matrix.os, 'macos')
run: brew install libomp

- name: Run local tests
run: nbdev_test --n_workers 0 --do_print --timing --skip_file_glob "*distributed*" --flags 'polars core'
run: nbdev_test --n_workers 0 --do_print --timing --skip_file_re "(distributed|electricity)" --flags 'polars'

check-deps:
runs-on: ubuntu-latest
Expand All @@ -120,11 +73,10 @@ jobs:
- name: Set up python
uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # 5.1.1
with:
python-version: '3.10'
cache: 'pip'
python-version: "3.10"

- name: Install forecast notebook dependencies
run: pip install . datasetsforecast lightgbm matplotlib nbdev xgboost
run: pip install uv && uv pip install --system . datasetsforecast lightgbm matplotlib nbdev xgboost

- name: Run forecast notebook
run: nbdev_test --path nbs/forecast.ipynb
Expand All @@ -136,11 +88,10 @@ jobs:

- uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # 5.1.1
with:
python-version: '3.10'
cache: 'pip'
python-version: "3.10"

- name: Install dependencies
run: pip install . pytest pytest-benchmark
run: pip install uv && uv pip install --system . pytest pytest-benchmark

- name: Run efficiency tests
run: pytest tests/test_pipeline.py --benchmark-group-by=func --benchmark-sort=fullname
Expand All @@ -152,11 +103,10 @@ jobs:

- uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # 5.1.1
with:
python-version: '3.10'
cache: 'pip'
python-version: "3.10"

- name: Install dependencies
run: pip install . datasetsforecast lightgbm pytest
run: pip install uv && uv pip install --system . datasetsforecast lightgbm pytest

- name: Run m4 performance tests
run: pytest tests/test_m4.py
37 changes: 0 additions & 37 deletions environment.yml

This file was deleted.

28 changes: 0 additions & 28 deletions local_environment.yml

This file was deleted.

37 changes: 19 additions & 18 deletions mlforecast/target_transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

import coreforecast.scalers as core_scalers
import numpy as np
import pandas as pd
import utilsforecast.processing as ufp
from coreforecast.grouped_array import GroupedArray as CoreGroupedArray
from sklearn.base import TransformerMixin, clone
from utilsforecast.compat import DataFrame
Expand Down Expand Up @@ -295,25 +295,26 @@ class GlobalSklearnTransformer(BaseTargetTransform):
def __init__(self, transformer: TransformerMixin):
self.transformer = transformer

def fit_transform(self, df: pd.DataFrame) -> pd.DataFrame:
df = df.copy(deep=False)
def fit_transform(self, df: DataFrame) -> DataFrame:
df = ufp.copy_if_pandas(df, deep=False)
self.transformer_ = clone(self.transformer)
df[self.target_col] = self.transformer_.fit_transform(
df[[self.target_col]].values
transformed = self.transformer_.fit_transform(df[[self.target_col]].to_numpy())
return ufp.assign_columns(df, self.target_col, transformed[:, 0])

def inverse_transform(self, df: DataFrame) -> DataFrame:
df = ufp.copy_if_pandas(df, deep=False)
cols_to_transform = [
c for c in df.columns if c not in (self.id_col, self.time_col)
]
transformed = self.transformer_.inverse_transform(
df[cols_to_transform].to_numpy()
)
return df

def inverse_transform(self, df: pd.DataFrame) -> pd.DataFrame:
df = df.copy(deep=False)
cols_to_transform = df.columns.drop([self.id_col, self.time_col])
for col in cols_to_transform:
df[col] = self.transformer_.inverse_transform(df[[col]].values)
return df

def update(self, df: pd.DataFrame) -> pd.DataFrame:
df = df.copy(deep=False)
df[self.target_col] = self.transformer_.transform(df[[self.target_col]].values)
return df
return ufp.assign_columns(df, cols_to_transform, transformed)

def update(self, df: DataFrame) -> DataFrame:
df = ufp.copy_if_pandas(df, deep=False)
transformed = self.transformer_.transform(df[[self.target_col]].to_numpy())
return ufp.assign_columns(df, self.target_col, transformed[:, 0])

@staticmethod
def stack(transforms: Sequence["GlobalSklearnTransformer"]) -> "GlobalSklearnTransformer": # type: ignore[override]
Expand Down
Loading

0 comments on commit 91c2a59

Please sign in to comment.