From bfbf50714ac00a719e3e3cc2487ce59a0d705bd3 Mon Sep 17 00:00:00 2001 From: Jan Ittner Date: Sat, 4 Sep 2021 12:18:53 +0200 Subject: [PATCH 01/11] BUILD: start 2.0.x development branch --- azure-pipelines.yml | 6 +++--- environment.yml | 4 ++-- pyproject.toml | 10 +++++----- src/facet/__init__.py | 2 +- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 12fe43e2..7985dacf 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -1,9 +1,9 @@ trigger: - - 1.2.x + - 2.0.x - release/* pr: - - 1.2.x + - 2.0.x - release/* # set the build name @@ -23,7 +23,7 @@ resources: type: github endpoint: BCG-Gamma name: BCG-Gamma/pytools - ref: 1.2.x + ref: 2.0.x variables: ${{ if not(startsWith(variables['Build.SourceBranch'], 'refs/pull/')) }}: diff --git a/environment.yml b/environment.yml index 1b8c9198..226e0802 100644 --- a/environment.yml +++ b/environment.yml @@ -5,7 +5,7 @@ channels: dependencies: # run - boruta_py ~= 0.3 - - gamma-pytools ~= 1.2, >= 1.2.1 + - gamma-pytools >= 2dev0, < 3a - joblib ~= 1.0 - lightgbm ~= 3.2 - matplotlib ~= 3.3 @@ -14,7 +14,7 @@ dependencies: - python ~= 3.8 - scikit-learn ~= 0.24.2 - scipy ~= 1.5 - - sklearndf ~= 1.2 + - sklearndf >= 2dev0, < 3a # build/test - black = 20.8b1 - conda-build ~= 3.20 diff --git a/pyproject.toml b/pyproject.toml index 71f54858..0824b90b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,14 +15,14 @@ license = "Apache Software License v2.0" requires = [ # direct requirements of gamma-facet - "gamma-pytools ~=1.2,>=1.2.1", + "gamma-pytools ~=2.0dev0", "matplotlib ~=3.0", "numpy >=1.17,<2a", "packaging >=20", "pandas >=0.24,<2a", "scipy ~=1.2", "shap >=0.34,<0.40a", - "sklearndf ~=1.2", + "sklearndf ~=2.0dev0", # additional requirements of shap 0.38 "ipython >=7", ] @@ -71,7 +71,7 @@ Repository = "https://github.com/BCG-Gamma/facet" [build.matrix.min] # direct requirements of gamma-facet -gamma-pytools = "~=1.2.1" +gamma-pytools = "~=2.0.0dev0" matplotlib = "~=3.0.3" numpy = ">=1.17.5,<18a" packaging = "~=20.9" @@ -92,7 +92,7 @@ ipython = "~=7.0" [build.matrix.max] # direct requirements of gamma-facet -gamma-pytools = "~=1.2,>=1.2.1" +gamma-pytools = ">=2dev0,<3a" matplotlib = "~=3.4" numpy = ">=1.20,<2a" packaging = ">=20.9" @@ -100,7 +100,7 @@ pandas = "~=1.2" python = "~=3.8" scipy = "~=1.5" shap = "~=0.39.0" -sklearndf = "~=1.2" +sklearndf = ">=2dev0,<3a" # additional maximum requirements of sklearndf boruta = "~=0.3" lightgbm = "~=3.2" diff --git a/src/facet/__init__.py b/src/facet/__init__.py index 99e08b1b..f365a4f5 100644 --- a/src/facet/__init__.py +++ b/src/facet/__init__.py @@ -6,7 +6,7 @@ """ -__version__ = "1.2.0" +__version__ = "2.0.0dev0" __logo__ = ( r""" From c5b7c48549f7c4361c7982914def532aab3d8b32 Mon Sep 17 00:00:00 2001 From: Jan Ittner Date: Thu, 9 Sep 2021 00:30:48 +0200 Subject: [PATCH 02/11] API: replace MatplotStyle.renderer with get_renderer() --- src/facet/simulation/viz/_style.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/facet/simulation/viz/_style.py b/src/facet/simulation/viz/_style.py index 03b8971d..d1f92912 100644 --- a/src/facet/simulation/viz/_style.py +++ b/src/facet/simulation/viz/_style.py @@ -201,7 +201,7 @@ def _make_sub_axes() -> Axes: def _x_axis_height() -> float: _, axis_below_size_pixels = main_ax.get_xaxis().get_text_heights( - self.renderer + self.get_renderer() ) ((_, y0), (_, y1)) = main_ax.transData.inverted().transform( ((0, 0), (0, axis_below_size_pixels)) From fe18153be04e336791544fa25d7dee92136d6646 Mon Sep 17 00:00:00 2001 From: Jan Ittner Date: Fri, 10 Sep 2021 10:03:01 +0200 Subject: [PATCH 03/11] BUILD: update version to 1.2.1 --- src/facet/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/facet/__init__.py b/src/facet/__init__.py index 99e08b1b..03bb03a7 100644 --- a/src/facet/__init__.py +++ b/src/facet/__init__.py @@ -6,7 +6,7 @@ """ -__version__ = "1.2.0" +__version__ = "1.2.1" __logo__ = ( r""" From 0d7c0dfb2b4856ca7427f43bd8c1077e6a153324 Mon Sep 17 00:00:00 2001 From: Jan Ittner Date: Fri, 10 Sep 2021 18:03:13 +0200 Subject: [PATCH 04/11] FIX: change github option from isPreRelease to isPrerelease --- azure-pipelines.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 7985dacf..0a3f7328 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -541,7 +541,7 @@ stages: conda install -c conda-forge -c bcg_gamma $(package_name) isDraft: false - isPreRelease: $(is_prerelease) + isPrerelease: $(is_prerelease) assets: | $(System.ArtifactsDirectory)/tox_default/tox/$(package_name)-*.tar.gz $(System.ArtifactsDirectory)/conda_default/conda/noarch/$(package_name)-*.tar.bz2 From 6d4486f020e212d5741737d0c2362be7afc02f93 Mon Sep 17 00:00:00 2001 From: Jan Ittner Date: Fri, 10 Sep 2021 18:03:53 +0200 Subject: [PATCH 05/11] BUILD: publish development releases as pre-releases --- azure-pipelines.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 0a3f7328..3208fed5 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -466,8 +466,9 @@ stages: export PYTHONPATH=$(System.DefaultWorkingDirectory)/pytools/sphinx/base version=$(python -c "import make_base; print(make_base.get_package_version())") echo "Current version: $version" - echo "Detecting pre-release ('rc' in version)" + echo "Detecting pre-release ('dev' or 'rc' in version)" prerelease=False + [[ $version == *dev* ]] && prerelease=True && echo "Development release identified" [[ $version == *rc* ]] && prerelease=True && echo "Pre-release identified" echo "##vso[task.setvariable variable=current_version]$version" echo "##vso[task.setvariable variable=is_prerelease]$prerelease" From b5caadb635510093e3c4ff1778a2aa217c5ea0e9 Mon Sep 17 00:00:00 2001 From: Jan Ittner Date: Fri, 10 Sep 2021 23:13:37 +0200 Subject: [PATCH 06/11] API: add function pytools.build.validate_release_version() --- test/test/test_package_version.py | 56 +++++-------------------------- 1 file changed, 9 insertions(+), 47 deletions(-) diff --git a/test/test/test_package_version.py b/test/test/test_package_version.py index c08815c4..41332436 100644 --- a/test/test/test_package_version.py +++ b/test/test/test_package_version.py @@ -1,61 +1,23 @@ import logging -import re from os import environ -from urllib import request -from xml.etree import ElementTree from pytest import mark +from pytools.build import validate_release_version + import facet log = logging.getLogger(__name__) -ENV_RUN_PACKAGE_VERSION_TEST = "RUN_PACKAGE_VERSION_TEST" + +PACKAGE_VERSION = facet.__version__ +MODULE_NAME = facet.__name__ +PACKAGE_NAME = "gamma-facet" @mark.skipif( - condition=environ.get(ENV_RUN_PACKAGE_VERSION_TEST, "") != facet.__name__, - reason="Parent build is not primarily for facet.", + condition=environ.get("RUN_PACKAGE_VERSION_TEST", "") != MODULE_NAME, + reason=f"build is not for a {PACKAGE_NAME} release", ) def test_package_version() -> None: - dev_version = facet.__version__ - - log.info(f"Test package version – version set to: {dev_version}") - assert re.match( - r"^(\d)+\.(\d)+\.(\d)+(rc\d+)?$", dev_version - ), "facet.__version__ is not in MAJOR.MINOR.PATCH[rcN] format." - - releases_uri = "https://pypi.org/rss/project/gamma-facet/releases.xml" - - with request.urlopen(releases_uri) as response: - assert response.getcode() == 200, "Error getting releases from PyPi" - releases_xml = response.read() - - tree = ElementTree.fromstring(releases_xml) - releases_nodes = tree.findall(path=".//channel//item//title") - releases = [r.text for r in releases_nodes] - - log.info(f"Found these releases on PyPi:{releases}") - - assert ( - dev_version not in releases - ), f"Current package version {dev_version} already on PyPi" - - is_minor_or_major_release = dev_version.endswith(".0") - - if is_minor_or_major_release: - pre_releases = [ - version - for version in releases - if re.match(f"{dev_version}rc\\d+$", version) - ] - - assert pre_releases, ( - f"Release of major or minor version {dev_version} " - f"requires at least one pre-release, e.g. {dev_version}rc0" - ) - - log.info( - f"Pre-release(s) {pre_releases} exist(s) – " - f"release of major/minor version {dev_version} allowed" - ) + validate_release_version(package=PACKAGE_NAME, version=PACKAGE_VERSION) From f9a4ef9e2574020903c5ed93719d90cc46a00634 Mon Sep 17 00:00:00 2001 From: Jan Ittner Date: Mon, 13 Sep 2021 21:54:31 +0200 Subject: [PATCH 07/11] BUILD: install 'packaging' when running make.py --- azure-pipelines.yml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 3208fed5..1c361447 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -30,8 +30,8 @@ variables: branchName: $[ replace(variables['Build.SourceBranch'], 'refs/heads/', '') ] ${{ if startsWith(variables['Build.SourceBranch'], 'refs/pull/') }}: branchName: $[ replace(variables['System.PullRequest.SourceBranch'], 'refs/heads/', '') ] - source_is_release_branch: $[ startsWith(variables['branchName'], 'release') ] - source_is_develop_branch: $[ or(startsWith(variables['branchName'], 'develop'), startsWith(variables['branchName'], 'dev/')) ] + source_is_release_branch: $[ startsWith(variables['branchName'], 'release/') ] + source_is_develop_branch: $[ startsWith(variables['branchName'], 'dev/') ] is_scheduled: $[ eq(variables['Build.Reason'], 'Schedule') ] project_name: facet project_root: $(project_name) @@ -97,7 +97,7 @@ stages: cd $(System.DefaultWorkingDirectory) files_changed=$(git diff $(Build.SourceVersion)^! --name-only) echo "Files changed since last commit: ${files_changed}" - n_files_changed=$(git diff $(Build.SourceVersion)^! --name-only | grep -i -E 'meta.yaml|pyproject.toml|azure-pipelines.yml|tox.ini' | wc -l | xargs) + n_files_changed=$(git diff $(Build.SourceVersion)^! --name-only | grep -i -E 'meta\.yaml|pyproject\.toml|azure-pipelines\.yml|tox\.ini|make\.py' | wc -l | xargs) if [ ${n_files_changed} -gt 0 ] then build_changed=1 @@ -210,7 +210,7 @@ stages: - script: dir $(Build.SourcesDirectory) - script: | - conda install -y -c anaconda conda-build~=3.20.5 conda-verify toml=0.10.* flit=3.0.* + conda install -y -c anaconda conda-build~=3.20.5 conda-verify toml=0.10.* flit=3.0.* packaging~=20.9 displayName: 'Install conda-build, flit, toml' condition: eq(variables['BUILD_SYSTEM'], 'conda') @@ -297,7 +297,7 @@ stages: - script: dir $(Build.SourcesDirectory) - script: | - conda install -y -c anaconda conda-build~=3.20.5 conda-verify toml=0.10.* flit=3.0.* + conda install -y -c anaconda conda-build~=3.20.5 conda-verify toml=0.10.* flit=3.0.* packaging~=20.9 displayName: 'Install conda-build, flit, toml' condition: eq(variables['BUILD_SYSTEM'], 'conda') @@ -398,7 +398,7 @@ stages: condition: ne(variables.branchName, 'develop') script: | set -eux - python -m pip install "toml==0.10.*" + python -m pip install toml~=0.10.2 packaging~=20.9 cd $(System.DefaultWorkingDirectory)/pytools python < Date: Mon, 13 Sep 2021 21:55:42 +0200 Subject: [PATCH 08/11] TEST: replace test_package_version.py with version check in make.py --- test/test/test_package_version.py | 23 ----------------------- 1 file changed, 23 deletions(-) delete mode 100644 test/test/test_package_version.py diff --git a/test/test/test_package_version.py b/test/test/test_package_version.py deleted file mode 100644 index 41332436..00000000 --- a/test/test/test_package_version.py +++ /dev/null @@ -1,23 +0,0 @@ -import logging -from os import environ - -from pytest import mark - -from pytools.build import validate_release_version - -import facet - -log = logging.getLogger(__name__) - - -PACKAGE_VERSION = facet.__version__ -MODULE_NAME = facet.__name__ -PACKAGE_NAME = "gamma-facet" - - -@mark.skipif( - condition=environ.get("RUN_PACKAGE_VERSION_TEST", "") != MODULE_NAME, - reason=f"build is not for a {PACKAGE_NAME} release", -) -def test_package_version() -> None: - validate_release_version(package=PACKAGE_NAME, version=PACKAGE_VERSION) From c852e6c3bbfe8bd3344ad7a7582252a104e74703 Mon Sep 17 00:00:00 2001 From: Jan Ittner Date: Mon, 13 Sep 2021 22:08:19 +0200 Subject: [PATCH 09/11] BUILD: change version to 2.0.dev0 --- src/facet/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/facet/__init__.py b/src/facet/__init__.py index f365a4f5..7dbf09c3 100644 --- a/src/facet/__init__.py +++ b/src/facet/__init__.py @@ -6,7 +6,7 @@ """ -__version__ = "2.0.0dev0" +__version__ = "2.0.dev0" __logo__ = ( r""" From c2853d2c336e8b798e311791af02807d0bcd52d7 Mon Sep 17 00:00:00 2001 From: Jan Ittner Date: Thu, 16 Sep 2021 19:42:15 +0200 Subject: [PATCH 10/11] API: update code for updated pytools.parallelization API (#301) * API: expect Iterable not positional args in run_jobs() and run_queues() * API: rename JobQueue.collate() to .aggregate() --- src/facet/crossfit/_crossfit.py | 2 +- src/facet/inspection/_shap.py | 32 ++++++++++++++--------------- src/facet/selection/_selection.py | 2 +- src/facet/simulation/_simulation.py | 24 +++++++++------------- 4 files changed, 27 insertions(+), 33 deletions(-) diff --git a/src/facet/crossfit/_crossfit.py b/src/facet/crossfit/_crossfit.py index 8a39b8aa..3b4789d0 100644 --- a/src/facet/crossfit/_crossfit.py +++ b/src/facet/crossfit/_crossfit.py @@ -480,7 +480,7 @@ def on_run(self) -> None: if do_fit: crossfit._reset_fit() - def collate(self, job_results: List[FitResult]) -> Optional[np.ndarray]: + def aggregate(self, job_results: List[FitResult]) -> Optional[np.ndarray]: models, scores = zip(*job_results) if do_fit: diff --git a/src/facet/inspection/_shap.py b/src/facet/inspection/_shap.py index 1e488cf0..39ba1ff5 100644 --- a/src/facet/inspection/_shap.py +++ b/src/facet/inspection/_shap.py @@ -275,23 +275,21 @@ def _make_explainer(_model: T_LearnerPipelineDF) -> BaseExplainer: else: shap_df_per_split = JobRunner.from_parallelizable(self).run_jobs( - *( - Job.delayed(self._get_shap_for_split)( - model, - sample, - _make_explainer(model), - self.feature_index_, - self._convert_raw_shap_to_df, - self.get_multi_output_type(), - self._get_multi_output_names(model=model, sample=sample), - ) - for model, sample in zip( - crossfit.models(), - ( - sample.subsample(iloc=oob_split) - for _, oob_split in crossfit.splits() - ), - ) + Job.delayed(self._get_shap_for_split)( + model, + sample, + _make_explainer(model), + self.feature_index_, + self._convert_raw_shap_to_df, + self.get_multi_output_type(), + self._get_multi_output_names(model=model, sample=sample), + ) + for model, sample in zip( + crossfit.models(), + ( + sample.subsample(iloc=oob_split) + for _, oob_split in crossfit.splits() + ), ) ) diff --git a/src/facet/selection/_selection.py b/src/facet/selection/_selection.py index c8dfe6ab..e27a3949 100644 --- a/src/facet/selection/_selection.py +++ b/src/facet/selection/_selection.py @@ -518,7 +518,7 @@ def _rank_learners( ) pipeline_scorings: List[np.ndarray] = list( - JobRunner.from_parallelizable(self).run_queues(*queues) + JobRunner.from_parallelizable(self).run_queues(queues) ) for crossfit, pipeline_parameters, pipeline_scoring in zip( diff --git a/src/facet/simulation/_simulation.py b/src/facet/simulation/_simulation.py index 4dfd68f4..9187b608 100644 --- a/src/facet/simulation/_simulation.py +++ b/src/facet/simulation/_simulation.py @@ -383,12 +383,10 @@ def simulate_actuals(self) -> pd.Series: y_mean = self.expected_output() result: List[float] = JobRunner.from_parallelizable(self).run_jobs( - *( - Job.delayed(self._simulate_actuals)( - model, subsample.features, y_mean, self._simulate - ) - for model, subsample in self._get_simulations() + Job.delayed(self._simulate_actuals)( + model, subsample.features, y_mean, self._simulate ) + for model, subsample in self._get_simulations() ) return pd.Series( @@ -455,16 +453,14 @@ def _simulate_feature_with_values( simulation_means_and_sems_per_split: List[ Tuple[Sequence[float], Sequence[float]] ] = JobRunner.from_parallelizable(self).run_jobs( - *( - Job.delayed(UnivariateUpliftSimulator._simulate_values_for_split)( - model=model, - subsample=subsample, - feature_name=feature_name, - simulated_values=simulation_values, - simulate_fn=self._simulate, - ) - for (model, subsample) in self._get_simulations() + Job.delayed(UnivariateUpliftSimulator._simulate_values_for_split)( + model=model, + subsample=subsample, + feature_name=feature_name, + simulated_values=simulation_values, + simulate_fn=self._simulate, ) + for (model, subsample) in self._get_simulations() ) index_name: str From c2f960bb1fa1b6fc1d0abafe5ad6faa28cbcb8d8 Mon Sep 17 00:00:00 2001 From: Jan Ittner Date: Thu, 16 Sep 2021 23:11:55 +0200 Subject: [PATCH 11/11] BUILD: set min requirement for sklearndf to ~=2.0.dev0 --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 0824b90b..11def4e0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -71,7 +71,7 @@ Repository = "https://github.com/BCG-Gamma/facet" [build.matrix.min] # direct requirements of gamma-facet -gamma-pytools = "~=2.0.0dev0" +gamma-pytools = "~=2.0.dev0" matplotlib = "~=3.0.3" numpy = ">=1.17.5,<18a" packaging = "~=20.9" @@ -79,7 +79,7 @@ pandas = "~=0.24.2" python = "~=3.6.13" scipy = "~=1.2.1" shap = "~=0.34.0" -sklearndf = "~=1.2.0" +sklearndf = "~=2.0.dev0" # additional minimum requirements of sklearndf boruta = "~=0.3.0" lightgbm = "~=3.0.0"