From f7c34469e28f1217491153b92fdc6b103b206685 Mon Sep 17 00:00:00 2001 From: Konstantin Lopuhin Date: Wed, 21 Aug 2019 09:29:27 +0200 Subject: [PATCH 01/18] Prepare 0.10.0 release --- CHANGES.rst | 6 ++++++ eli5/__init__.py | 4 ++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 20a98c85..f4cb72e5 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,6 +1,12 @@ Changelog ========= +0.10.0 (2019-08-21) +------------------- + +* Keras image classifiers: explaining predictions with Grad-CAM + (GSoC-2019 project by @teabolt). + 0.9.0 (2019-07-05) ------------------ diff --git a/eli5/__init__.py b/eli5/__init__.py index 610a84f5..c12cce0c 100644 --- a/eli5/__init__.py +++ b/eli5/__init__.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- from __future__ import absolute_import -__version__ = '0.9.0' +__version__ = '0.10.0' from .formatters import ( format_as_html, @@ -95,4 +95,4 @@ ) except ImportError: # keras is not available - pass \ No newline at end of file + pass From 24f8c9ac168eeec807248c892d7ed406c04c852b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guillem=20Garc=C3=ADa=20Subies?= <37592763+GuillemGSubies@users.noreply.github.com> Date: Wed, 28 Aug 2019 11:41:48 +0200 Subject: [PATCH 02/18] Install typing only for old python --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 011fdd28..ec561515 100755 --- a/setup.py +++ b/setup.py @@ -37,7 +37,6 @@ def get_long_description(): 'scipy', 'six', 'scikit-learn >= 0.18', - 'typing', 'graphviz', 'tabulate>=0.7.7', ], @@ -45,6 +44,7 @@ def get_long_description(): ":python_version<'3.5.6'": [ 'singledispatch >= 3.4.0.3', ], + ":python_version<'3.5'": ['typing'], }, classifiers=[ 'Development Status :: 4 - Beta', From 4cd66ac5e0ffde0c002d8f46dc64d7091f49445f Mon Sep 17 00:00:00 2001 From: Konstantin Lopuhin Date: Thu, 29 Aug 2019 09:18:08 +0300 Subject: [PATCH 03/18] 0.10.1 release --- CHANGES.rst | 6 ++++++ eli5/__init__.py | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index f4cb72e5..92588f7a 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,6 +1,12 @@ Changelog ========= +0.10.1 (2019-08-29) +------------------- + +* Don't include typing dependency on Python 3.5+ + to fix installation on Python 3.7 + 0.10.0 (2019-08-21) ------------------- diff --git a/eli5/__init__.py b/eli5/__init__.py index c12cce0c..ffb406cd 100644 --- a/eli5/__init__.py +++ b/eli5/__init__.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- from __future__ import absolute_import -__version__ = '0.10.0' +__version__ = '0.10.1' from .formatters import ( format_as_html, From 250bc0a0cdc0e592183b4bf3a730e8382ceef54e Mon Sep 17 00:00:00 2001 From: Mikhail Korobov Date: Thu, 10 Oct 2019 21:40:43 +0200 Subject: [PATCH 04/18] DOC add a link to sklearn docs for "scoring" argument --- eli5/sklearn/permutation_importance.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/eli5/sklearn/permutation_importance.py b/eli5/sklearn/permutation_importance.py index 30ab3cad..987343c5 100644 --- a/eli5/sklearn/permutation_importance.py +++ b/eli5/sklearn/permutation_importance.py @@ -87,11 +87,13 @@ class PermutationImportance(BaseEstimator, MetaEstimatorMixin): scoring : string, callable or None, default=None Scoring function to use for computing feature importances. - A string with scoring name (see scikit-learn docs) or + A string with scoring name (see scikit-learn `docs`_) or a scorer callable object / function with signature ``scorer(estimator, X, y)``. If ``None``, the ``score`` method of the estimator is used. + .. _docs: https://scikit-learn.org/stable/modules/model_evaluation.html#common-cases-predefined-values + n_iter : int, default 5 Number of random shuffle iterations. Decrease to improve speed, increase to get more precise estimates. From c0d9d58ad9c539a6ebd9ba5f158ade36eb6d8696 Mon Sep 17 00:00:00 2001 From: Karol Szepietowski Date: Mon, 9 Dec 2019 20:30:35 +0100 Subject: [PATCH 05/18] `random_state` is depracted in `OneClassSVM` --- tests/test_sklearn_explain_prediction.py | 2 +- tests/test_sklearn_explain_weights.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_sklearn_explain_prediction.py b/tests/test_sklearn_explain_prediction.py index 646c8c7d..277fee46 100644 --- a/tests/test_sklearn_explain_prediction.py +++ b/tests/test_sklearn_explain_prediction.py @@ -379,7 +379,7 @@ def test_explain_linear_binary(newsgroups_train_binary, clf): def test_explain_one_class_svm(): X = np.array([[0, 0], [0, 1], [5, 3], [93, 94], [90, 91]]) - clf = OneClassSVM(kernel='linear', random_state=42).fit(X) + clf = OneClassSVM(kernel='linear').fit(X) res = explain_prediction(clf, X[0]) assert res.targets[0].score < 0 for expl in format_as_all(res, clf): diff --git a/tests/test_sklearn_explain_weights.py b/tests/test_sklearn_explain_weights.py index 6ca5f519..93df0881 100644 --- a/tests/test_sklearn_explain_weights.py +++ b/tests/test_sklearn_explain_weights.py @@ -210,7 +210,7 @@ def test_explain_linear_unsupported_multiclass(clf, newsgroups_train): def test_explain_one_class_svm(): X = np.array([[0,0], [0, 1], [5, 3], [93, 94], [90, 91]]) - clf = OneClassSVM(kernel='linear', random_state=42).fit(X) + clf = OneClassSVM(kernel='linear').fit(X) res = explain_weights(clf) assert len(res.targets) == 1 target = res.targets[0] From 320119609c615560fb84491985b9a63dde0e247c Mon Sep 17 00:00:00 2001 From: Karol Szepietowski Date: Mon, 9 Dec 2019 20:41:08 +0100 Subject: [PATCH 06/18] `VectorizerMixin` is no longer base class for `CountVectorizer` --- eli5/sklearn/text.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/eli5/sklearn/text.py b/eli5/sklearn/text.py index 57296a6f..632799f6 100644 --- a/eli5/sklearn/text.py +++ b/eli5/sklearn/text.py @@ -59,6 +59,10 @@ def _get_doc_weighted_spans(doc, if hasattr(vec, 'get_doc_weighted_spans'): return vec.get_doc_weighted_spans(doc, feature_weights, feature_fn) + try: + from sklearn.feature_extraction.text import _VectorizerMixin as VectorizerMixin + except ImportError: # Changed in scikit-learn 0.22 + from sklearn.feature_extraction.text import VectorizerMixin if not isinstance(vec, VectorizerMixin): return None From 7aa3dcb4ce065203f7a2f0e268728585544e8c61 Mon Sep 17 00:00:00 2001 From: Karol Szepietowski Date: Mon, 9 Dec 2019 21:00:01 +0100 Subject: [PATCH 07/18] Default value of `gamma` in `sklearn.svm.*` changed from `auto` to `scale` --- tests/test_permutation_importance.py | 2 +- tests/test_sklearn_explain_weights.py | 2 +- tests/test_sklearn_permutation_importance.py | 8 ++++---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/test_permutation_importance.py b/tests/test_permutation_importance.py index effb4ff9..f4b95233 100644 --- a/tests/test_permutation_importance.py +++ b/tests/test_permutation_importance.py @@ -41,7 +41,7 @@ def is_shuffled(X, X_sh, col): def test_get_feature_importances(boston_train): X, y, feat_names = boston_train - svr = SVR(C=20).fit(X, y) + svr = SVR(C=20, gamma='auto').fit(X, y) score, importances = get_score_importances(svr.score, X, y) assert score > 0.7 importances = dict(zip(feat_names, np.mean(importances, axis=0))) diff --git a/tests/test_sklearn_explain_weights.py b/tests/test_sklearn_explain_weights.py index 93df0881..21fc3333 100644 --- a/tests/test_sklearn_explain_weights.py +++ b/tests/test_sklearn_explain_weights.py @@ -210,7 +210,7 @@ def test_explain_linear_unsupported_multiclass(clf, newsgroups_train): def test_explain_one_class_svm(): X = np.array([[0,0], [0, 1], [5, 3], [93, 94], [90, 91]]) - clf = OneClassSVM(kernel='linear').fit(X) + clf = OneClassSVM(kernel='linear', gamma='auto').fit(X) res = explain_weights(clf) assert len(res.targets) == 1 target = res.targets[0] diff --git a/tests/test_sklearn_permutation_importance.py b/tests/test_sklearn_permutation_importance.py index 19e54e2d..4fe942fd 100644 --- a/tests/test_sklearn_permutation_importance.py +++ b/tests/test_sklearn_permutation_importance.py @@ -73,7 +73,7 @@ def test_cv(boston_train): *boston_train, noise_ratio=0.99) reg = PermutationImportance( - SVR(C=100), + SVR(C=100, gamma='auto'), random_state=42, cv=None, n_iter=50, # use the same number of experiments as with cv=10 @@ -86,7 +86,7 @@ def test_cv(boston_train): # CV feature importances reg = PermutationImportance( - SVR(C=100), + SVR(C=100, gamma='auto'), random_state=42, cv=10, ).fit(X_test, y_test) @@ -132,9 +132,9 @@ def test_feature_selection(boston_train): ), threshold=0.1, ) - pipe = make_pipeline(sel, SVR(C=10)) + pipe = make_pipeline(sel, SVR(C=10, gamma='auto')) score1 = cross_val_score(pipe, X, y).mean() - score2 = cross_val_score(SVR(C=10), X, y).mean() + score2 = cross_val_score(SVR(C=10, gamma='auto'), X, y).mean() print(score1, score2) assert score1 > score2 From f625fa9d9e2cc09712e4168cad549335056efc6e Mon Sep 17 00:00:00 2001 From: Karol Szepietowski Date: Mon, 9 Dec 2019 21:31:37 +0100 Subject: [PATCH 08/18] New default argumnets for `LogisticRegression`, `LogisticRegressionCV` and `RFECV` --- eli5/sklearn/text.py | 2 +- tests/test_sklearn_explain_weights.py | 2 +- tests/test_sklearn_transform.py | 8 ++++---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/eli5/sklearn/text.py b/eli5/sklearn/text.py index 632799f6..9f3c2fdb 100644 --- a/eli5/sklearn/text.py +++ b/eli5/sklearn/text.py @@ -62,7 +62,7 @@ def _get_doc_weighted_spans(doc, try: from sklearn.feature_extraction.text import _VectorizerMixin as VectorizerMixin except ImportError: # Changed in scikit-learn 0.22 - from sklearn.feature_extraction.text import VectorizerMixin + from sklearn.feature_extraction.text import VectorizerMixin # type: ignore if not isinstance(vec, VectorizerMixin): return None diff --git a/tests/test_sklearn_explain_weights.py b/tests/test_sklearn_explain_weights.py index 21fc3333..7f5469c2 100644 --- a/tests/test_sklearn_explain_weights.py +++ b/tests/test_sklearn_explain_weights.py @@ -451,7 +451,7 @@ def test_explain_random_forest_and_tree_feature_filter(newsgroups_train, clf): def test_explain_empty(newsgroups_train): - clf = LogisticRegression(C=0.01, penalty='l1', random_state=42) + clf = LogisticRegression(C=0.01, penalty='l1', solver='liblinear', random_state=42) docs, y, target_names = newsgroups_train vec = TfidfVectorizer() diff --git a/tests/test_sklearn_transform.py b/tests/test_sklearn_transform.py index aa200937..0c7123f5 100644 --- a/tests/test_sklearn_transform.py +++ b/tests/test_sklearn_transform.py @@ -81,19 +81,19 @@ def selection_score_func(X, y): (VarianceThreshold(1.0), ['']), (GenericUnivariateSelect(), ['']), (GenericUnivariateSelect(mode='k_best', param=2), ['', '']), - (SelectFromModel(LogisticRegression('l1', C=0.01, random_state=42)), + (SelectFromModel(LogisticRegression('l1', C=0.01, solver='liblinear', random_state=42, multi_class='ovr')), ['', '']), (SelectFromModel( PermutationImportance( - LogisticRegression(random_state=42), + LogisticRegression(solver='liblinear', random_state=42), cv=5, random_state=42, refit=False, ), threshold=0.1, ), ['', '']), - (RFE(LogisticRegression(random_state=42), 2), + (RFE(LogisticRegression(solver='liblinear', random_state=42, multi_class='ovr'), 2), ['', '']), - (RFECV(LogisticRegression(random_state=42)), + (RFECV(LogisticRegression(solver='liblinear', random_state=42, multi_class='ovr'), cv=3), ['', '', '', '']), ] + _additional_test_cases) def test_transform_feature_names_iris(transformer, expected, iris_train): From b150387a4fe3ac4ea0f7a44ea9bfa489d300b12b Mon Sep 17 00:00:00 2001 From: Karol Szepietowski Date: Wed, 11 Dec 2019 17:11:21 +0100 Subject: [PATCH 09/18] Move `VectorizerMixin` import to the top of file. --- eli5/sklearn/text.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/eli5/sklearn/text.py b/eli5/sklearn/text.py index 9f3c2fdb..20da0f48 100644 --- a/eli5/sklearn/text.py +++ b/eli5/sklearn/text.py @@ -1,8 +1,11 @@ from __future__ import absolute_import from typing import Any, Union, Callable, Dict, List, Optional, Set, Tuple -from sklearn.feature_extraction.text import VectorizerMixin # type: ignore from sklearn.pipeline import FeatureUnion # type: ignore +try: + from sklearn.feature_extraction.text import _VectorizerMixin as VectorizerMixin +except ImportError: # Changed in scikit-learn 0.22 + from sklearn.feature_extraction.text import VectorizerMixin # type: ignore from eli5.base import ( DocWeightedSpans, WeightedSpans, FeatureWeights, FeatureWeight, @@ -59,10 +62,6 @@ def _get_doc_weighted_spans(doc, if hasattr(vec, 'get_doc_weighted_spans'): return vec.get_doc_weighted_spans(doc, feature_weights, feature_fn) - try: - from sklearn.feature_extraction.text import _VectorizerMixin as VectorizerMixin - except ImportError: # Changed in scikit-learn 0.22 - from sklearn.feature_extraction.text import VectorizerMixin # type: ignore if not isinstance(vec, VectorizerMixin): return None From fc9c2d737f7316a360c5fe8e2d3e6946009022c0 Mon Sep 17 00:00:00 2001 From: Konstantin Lopuhin Date: Thu, 12 Dec 2019 10:22:48 +0300 Subject: [PATCH 10/18] xfail a test of lightning with pandas dataframes lightning prediction does not work with pandas dataframes any more --- tests/test_lightning.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_lightning.py b/tests/test_lightning.py index 6519d9d5..cebbb7a8 100644 --- a/tests/test_lightning.py +++ b/tests/test_lightning.py @@ -73,6 +73,7 @@ def test_explain_weights_regressors(boston_train, reg): has_bias=False) +@pytest.mark.xfail(reason='lightning does not work with pandas dataframes any more') @pytest.mark.parametrize(['reg'], _instances(_REGRESSORS)[:2]) def test_explain_prediction_pandas(reg, boston_train): _check_explain_prediction_pandas(reg, boston_train) From e6750a51a58830951deef4d6949fab02cdb36b58 Mon Sep 17 00:00:00 2001 From: Karol Szepietowski Date: Sun, 15 Dec 2019 19:40:19 +0100 Subject: [PATCH 11/18] Upgrade version of mypy. Fix typing errors --- eli5/formatters/text.py | 2 +- eli5/formatters/trees.py | 2 ++ eli5/formatters/utils.py | 4 ++-- eli5/keras/gradcam.py | 2 +- eli5/lime/_vectorizer.py | 4 ++-- eli5/lime/lime.py | 4 ++-- tox.ini | 2 +- 7 files changed, 11 insertions(+), 9 deletions(-) diff --git a/eli5/formatters/text.py b/eli5/formatters/text.py index a6269ed3..138dcdc2 100644 --- a/eli5/formatters/text.py +++ b/eli5/formatters/text.py @@ -159,7 +159,7 @@ def _transition_features_lines(explanation): return [ "", "Transition features:", - tabulate(tf.coef, headers=tf.class_names, showindex=tf.class_names, + tabulate(tf.coef, headers=tf.class_names, showindex=tf.class_names, # type: ignore floatfmt="0.3f"), "" ] diff --git a/eli5/formatters/trees.py b/eli5/formatters/trees.py index 7be1e9c3..5cfbee10 100644 --- a/eli5/formatters/trees.py +++ b/eli5/formatters/trees.py @@ -24,6 +24,8 @@ def p(*args): else: assert node.left is not None assert node.right is not None + assert node.threshold is not None + feat_name = node.feature_name if depth > 0: diff --git a/eli5/formatters/utils.py b/eli5/formatters/utils.py index 542402d1..b461f0ee 100644 --- a/eli5/formatters/utils.py +++ b/eli5/formatters/utils.py @@ -144,7 +144,7 @@ def tabulate(data, # type: List[List[Any]] def format_weight(value): # type: (Real) -> str - return '{:+.3f}'.format(value) + return '{:+.3f}'.format(value) # type: ignore def format_value(value): @@ -154,4 +154,4 @@ def format_value(value): elif np.isnan(value): return 'Missing' else: - return '{:.3f}'.format(value) + return '{:.3f}'.format(value) # type: ignore diff --git a/eli5/keras/gradcam.py b/eli5/keras/gradcam.py index 1878d642..31c80f4c 100644 --- a/eli5/keras/gradcam.py +++ b/eli5/keras/gradcam.py @@ -199,7 +199,7 @@ def _validate_target(target, output_shape): output_nodes = output_shape[1:][0] if not (0 <= target < output_nodes): raise ValueError('Prediction target index is ' - 'outside the required range [0, {}). ', + 'outside the required range [0, {}). ' 'Got {}'.format(output_nodes, target)) else: raise TypeError('Prediction target must be int. ' diff --git a/eli5/lime/_vectorizer.py b/eli5/lime/_vectorizer.py index d5168454..4d40ab31 100644 --- a/eli5/lime/_vectorizer.py +++ b/eli5/lime/_vectorizer.py @@ -13,8 +13,8 @@ class SingleDocumentVectorizer(BaseEstimator, TransformerMixin): """ Fake vectorizer which converts document just to a vector of ones """ - def __init__(self, token_pattern=None): - # type: (Optional[str]) -> None + def __init__(self, token_pattern): + # type: (str) -> None self.token_pattern = token_pattern def fit(self, X, y=None): diff --git a/eli5/lime/lime.py b/eli5/lime/lime.py index 924675bb..c9c9aa78 100644 --- a/eli5/lime/lime.py +++ b/eli5/lime/lime.py @@ -148,7 +148,7 @@ def __init__(self, rbf_sigma=None, # type: float random_state=None, expand_factor=10, # type: Optional[int] - token_pattern=None, # type: str + token_pattern=None, # type: Optional[str] ): # type: (...) -> None self.n_samples = n_samples @@ -162,7 +162,7 @@ def __init__(self, if char_based is None: if token_pattern is None: self.char_based = False # type: Optional[bool] - self.token_pattern = DEFAULT_TOKEN_PATTERN + self.token_pattern = DEFAULT_TOKEN_PATTERN # type: str else: self.char_based = None self.token_pattern = token_pattern diff --git a/tox.ini b/tox.ini index 38e65a4c..45c686b4 100644 --- a/tox.ini +++ b/tox.ini @@ -85,7 +85,7 @@ commands={[testenv:py35-extra]commands} basepython=python3.6 deps= {[testenv]deps} - mypy == 0.641 + mypy == 0.750 lxml commands= mypy --html-report ./mypy-cov --check-untyped-defs eli5 From e508937502bfad33c2f438552cfe0e477bf07976 Mon Sep 17 00:00:00 2001 From: Karol Szepietowski Date: Mon, 16 Dec 2019 17:32:34 +0100 Subject: [PATCH 12/18] Move `tabulate` import to the top of file --- eli5/formatters/text.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/eli5/formatters/text.py b/eli5/formatters/text.py index 138dcdc2..fd8938a7 100644 --- a/eli5/formatters/text.py +++ b/eli5/formatters/text.py @@ -2,6 +2,7 @@ from __future__ import absolute_import from itertools import chain import six +from tabulate import tabulate from typing import List, Optional, Iterator from eli5.base import Explanation, FeatureImportances @@ -9,7 +10,8 @@ from .features import FormattedFeatureName from .utils import ( format_signed, format_value, format_weight, has_any_values_for_weights, - replace_spaces, should_highlight_spaces, tabulate) + replace_spaces, should_highlight_spaces) +from .utils import tabulate as eli5_tabulate from .trees import tree2text @@ -153,7 +155,6 @@ def _decision_tree_lines(explanation): def _transition_features_lines(explanation): # type: (Explanation) -> List[str] - from tabulate import tabulate # type: ignore tf = explanation.transition_features assert tf is not None return [ @@ -203,7 +204,7 @@ def _targets_lines(explanation, # type: Explanation w = target.feature_weights assert w is not None - table = tabulate( + table = eli5_tabulate( [table_line(fw) for fw in chain(w.pos, reversed(w.neg))], header=table_header, col_align=col_align, From cc90bafde4f065412a8cbfa892e114b2aec9a59c Mon Sep 17 00:00:00 2001 From: Karol Szepietowski Date: Mon, 16 Dec 2019 17:36:20 +0100 Subject: [PATCH 13/18] Fix typing in formatters.utils --- eli5/formatters/text.py | 2 +- eli5/formatters/utils.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/eli5/formatters/text.py b/eli5/formatters/text.py index fd8938a7..e6abb286 100644 --- a/eli5/formatters/text.py +++ b/eli5/formatters/text.py @@ -160,7 +160,7 @@ def _transition_features_lines(explanation): return [ "", "Transition features:", - tabulate(tf.coef, headers=tf.class_names, showindex=tf.class_names, # type: ignore + tabulate(tf.coef, headers=tf.class_names, showindex=tf.class_names, floatfmt="0.3f"), "" ] diff --git a/eli5/formatters/utils.py b/eli5/formatters/utils.py index b461f0ee..244eb15a 100644 --- a/eli5/formatters/utils.py +++ b/eli5/formatters/utils.py @@ -143,15 +143,15 @@ def tabulate(data, # type: List[List[Any]] def format_weight(value): - # type: (Real) -> str - return '{:+.3f}'.format(value) # type: ignore + # type: (float) -> str + return '{:+.3f}'.format(value) def format_value(value): - # type: (Optional[Real]) -> str + # type: (Optional[float]) -> str if value is None: return '' elif np.isnan(value): return 'Missing' else: - return '{:.3f}'.format(value) # type: ignore + return '{:.3f}'.format(value) From 0ea24fc1ec2a63f3cfda97bc804e7bc4677d3858 Mon Sep 17 00:00:00 2001 From: Karol Szepietowski Date: Mon, 16 Dec 2019 19:01:19 +0100 Subject: [PATCH 14/18] Remove unimportant `# type: ignore` in code --- eli5/_feature_names.py | 4 ++-- eli5/_feature_weights.py | 2 +- eli5/_graphviz.py | 2 +- eli5/base.py | 2 +- eli5/base_utils.py | 4 ++-- eli5/catboost.py | 4 ++-- eli5/formatters/as_dataframe.py | 2 +- eli5/formatters/as_dict.py | 4 ++-- eli5/formatters/html.py | 4 ++-- eli5/formatters/image.py | 6 +++--- eli5/formatters/text_helpers.py | 2 +- eli5/formatters/utils.py | 3 +-- eli5/ipython.py | 4 ++-- eli5/keras/explain_prediction.py | 18 +++++++++--------- eli5/keras/gradcam.py | 10 +++++----- eli5/lightgbm.py | 4 ++-- eli5/lightning.py | 6 +++--- eli5/lime/_vectorizer.py | 4 ++-- eli5/lime/lime.py | 12 ++++++------ eli5/lime/samplers.py | 14 +++++++------- eli5/lime/textutils.py | 4 ++-- eli5/lime/utils.py | 12 ++++++------ eli5/permutation_importance.py | 4 ++-- eli5/sklearn/explain_prediction.py | 16 ++++++++-------- eli5/sklearn/explain_weights.py | 18 +++++++++--------- eli5/sklearn/permutation_importance.py | 14 +++++++------- eli5/sklearn/text.py | 4 ++-- eli5/sklearn/transform.py | 10 +++++----- eli5/sklearn/treeinspect.py | 4 ++-- eli5/sklearn/unhashing.py | 8 ++++---- eli5/sklearn/utils.py | 10 +++++----- eli5/sklearn_crfsuite/explain_weights.py | 6 +++--- eli5/utils.py | 4 ++-- eli5/xgboost.py | 6 +++--- tox.ini | 2 +- 35 files changed, 116 insertions(+), 117 deletions(-) diff --git a/eli5/_feature_names.py b/eli5/_feature_names.py index fecb820f..ff1fd80c 100644 --- a/eli5/_feature_names.py +++ b/eli5/_feature_names.py @@ -5,8 +5,8 @@ Union, Callable, Pattern ) -import numpy as np # type: ignore -import scipy.sparse as sp # type: ignore +import numpy as np +import scipy.sparse as sp class FeatureNames(Sized, Iterable): diff --git a/eli5/_feature_weights.py b/eli5/_feature_weights.py index 0b737795..1c096e3a 100644 --- a/eli5/_feature_weights.py +++ b/eli5/_feature_weights.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- from __future__ import absolute_import -import numpy as np # type: ignore +import numpy as np from eli5.base import FeatureWeights, FeatureWeight from .utils import argsort_k_largest_positive, argsort_k_smallest, mask diff --git a/eli5/_graphviz.py b/eli5/_graphviz.py index 4632a925..36b26a17 100644 --- a/eli5/_graphviz.py +++ b/eli5/_graphviz.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -import graphviz # type: ignore +import graphviz def is_supported(): diff --git a/eli5/base.py b/eli5/base.py index f6e4c66b..3bac3b5b 100644 --- a/eli5/base.py +++ b/eli5/base.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- from typing import Any, List, Tuple, Union, Optional -import numpy as np # type: ignore +import numpy as np from .base_utils import attrs from .formatters.features import FormattedFeatureName diff --git a/eli5/base_utils.py b/eli5/base_utils.py index 779c6d64..f1081c3b 100644 --- a/eli5/base_utils.py +++ b/eli5/base_utils.py @@ -1,9 +1,9 @@ import inspect -import attr # type: ignore +import attr try: - from functools import singledispatch # type: ignore + from functools import singledispatch except ImportError: from singledispatch import singledispatch # type: ignore diff --git a/eli5/catboost.py b/eli5/catboost.py index 2e495cb6..56abfa9e 100644 --- a/eli5/catboost.py +++ b/eli5/catboost.py @@ -1,7 +1,7 @@ from __future__ import absolute_import, division -import numpy as np # type: ignore -import catboost # type: ignore +import numpy as np +import catboost from eli5.explain import explain_weights from eli5._feature_importances import get_feature_importance_explanation diff --git a/eli5/formatters/as_dataframe.py b/eli5/formatters/as_dataframe.py index 6f2fc302..5b801e75 100644 --- a/eli5/formatters/as_dataframe.py +++ b/eli5/formatters/as_dataframe.py @@ -2,7 +2,7 @@ from typing import Any, Dict, List, Optional import warnings -import pandas as pd # type: ignore +import pandas as pd import eli5 from eli5.base import ( diff --git a/eli5/formatters/as_dict.py b/eli5/formatters/as_dict.py index 1878d8b0..fbad5ee5 100644 --- a/eli5/formatters/as_dict.py +++ b/eli5/formatters/as_dict.py @@ -1,7 +1,7 @@ import six -import attr # type: ignore -import numpy as np # type: ignore +import attr +import numpy as np from .features import FormattedFeatureName diff --git a/eli5/formatters/html.py b/eli5/formatters/html.py index 54cf2e6c..0167b33e 100644 --- a/eli5/formatters/html.py +++ b/eli5/formatters/html.py @@ -3,8 +3,8 @@ from itertools import groupby from typing import List, Optional, Tuple -import numpy as np # type: ignore -from jinja2 import Environment, PackageLoader # type: ignore +import numpy as np +from jinja2 import Environment, PackageLoader from eli5 import _graphviz from eli5.base import (Explanation, TargetExplanation, FeatureWeights, diff --git a/eli5/formatters/image.py b/eli5/formatters/image.py index 59a77fa2..f776b2c2 100644 --- a/eli5/formatters/image.py +++ b/eli5/formatters/image.py @@ -2,9 +2,9 @@ from __future__ import absolute_import from typing import Union, Optional, Callable -import numpy as np # type: ignore -from PIL import Image # type: ignore -import matplotlib.cm # type: ignore +import numpy as np +from PIL import Image +import matplotlib.cm from eli5.base import Explanation diff --git a/eli5/formatters/text_helpers.py b/eli5/formatters/text_helpers.py index dc5ff28a..c63c66b2 100644 --- a/eli5/formatters/text_helpers.py +++ b/eli5/formatters/text_helpers.py @@ -1,7 +1,7 @@ from collections import Counter from typing import List, Optional -import numpy as np # type: ignore +import numpy as np from eli5.base import TargetExplanation, WeightedSpans, DocWeightedSpans from eli5.base_utils import attrs diff --git a/eli5/formatters/utils.py b/eli5/formatters/utils.py index 244eb15a..2e6d2d39 100644 --- a/eli5/formatters/utils.py +++ b/eli5/formatters/utils.py @@ -2,10 +2,9 @@ from itertools import chain import re import six -from numbers import Real from typing import Any, Union, List, Dict, Callable, Match, Optional -import numpy as np # type: ignore +import numpy as np from eli5.base import Explanation from .features import FormattedFeatureName diff --git a/eli5/ipython.py b/eli5/ipython.py index 0039c39e..033a840c 100644 --- a/eli5/ipython.py +++ b/eli5/ipython.py @@ -3,7 +3,7 @@ from typing import Any, Dict, Tuple import warnings -from IPython.display import HTML, Image # type: ignore +from IPython.display import HTML, Image from .explain import explain_weights, explain_prediction from .formatters import format_as_html, fields @@ -11,7 +11,7 @@ from .formatters.image import format_as_image except ImportError as e: # missing dependencies - format_as_image = e # type: ignore + format_as_image = e # type: ignore FORMAT_KWARGS = {'include_styles', 'force_weights', diff --git a/eli5/keras/explain_prediction.py b/eli5/keras/explain_prediction.py index d4928276..73deb25b 100644 --- a/eli5/keras/explain_prediction.py +++ b/eli5/keras/explain_prediction.py @@ -2,21 +2,21 @@ from __future__ import absolute_import from typing import Union, Optional, Callable, Tuple, List, TYPE_CHECKING if TYPE_CHECKING: - import PIL # type: ignore - -import numpy as np # type: ignore -import keras # type: ignore -import keras.backend as K # type: ignore -from keras.models import Model # type: ignore -from keras.layers import Layer # type: ignore -from keras.layers import ( # type: ignore + import PIL + +import numpy as np +import keras +import keras.backend as K +from keras.models import Model +from keras.layers import Layer +from keras.layers import ( Conv2D, MaxPooling2D, AveragePooling2D, GlobalMaxPooling2D, GlobalAveragePooling2D, ) -from keras.preprocessing.image import array_to_img # type: ignore +from keras.preprocessing.image import array_to_img from eli5.base import Explanation, TargetExplanation from eli5.explain import explain_prediction diff --git a/eli5/keras/gradcam.py b/eli5/keras/gradcam.py index 31c80f4c..c8bb5922 100644 --- a/eli5/keras/gradcam.py +++ b/eli5/keras/gradcam.py @@ -2,11 +2,11 @@ from __future__ import absolute_import from typing import Union, Optional, Tuple, List -import numpy as np # type: ignore -import keras # type: ignore -import keras.backend as K # type: ignore -from keras.models import Model # type: ignore -from keras.layers import Layer # type: ignore +import numpy as np +import keras +import keras.backend as K +from keras.models import Model +from keras.layers import Layer def gradcam(weights, activations): diff --git a/eli5/lightgbm.py b/eli5/lightgbm.py index b4510912..c54236b7 100644 --- a/eli5/lightgbm.py +++ b/eli5/lightgbm.py @@ -3,8 +3,8 @@ from collections import defaultdict from typing import DefaultDict, Optional -import numpy as np # type: ignore -import lightgbm # type: ignore +import numpy as np +import lightgbm from eli5.explain import explain_weights, explain_prediction from eli5._feature_importances import get_feature_importance_explanation diff --git a/eli5/lightning.py b/eli5/lightning.py index 417d4f15..2f648064 100644 --- a/eli5/lightning.py +++ b/eli5/lightning.py @@ -1,9 +1,9 @@ # -*- coding: utf-8 -*- from __future__ import absolute_import -from lightning.impl.base import BaseEstimator # type: ignore -from lightning import classification, regression # type: ignore -from sklearn.multiclass import OneVsRestClassifier # type: ignore +from lightning.impl.base import BaseEstimator +from lightning import classification, regression +from sklearn.multiclass import OneVsRestClassifier from eli5.base import Explanation from eli5.base_utils import singledispatch diff --git a/eli5/lime/_vectorizer.py b/eli5/lime/_vectorizer.py index 4d40ab31..5356d6cd 100644 --- a/eli5/lime/_vectorizer.py +++ b/eli5/lime/_vectorizer.py @@ -2,8 +2,8 @@ from __future__ import absolute_import from typing import Tuple, Callable, Dict, Optional, List -import numpy as np # type: ignore -from sklearn.base import BaseEstimator, TransformerMixin # type: ignore +import numpy as np +from sklearn.base import BaseEstimator, TransformerMixin from eli5.base import DocWeightedSpans, FeatureWeights from eli5.sklearn.text import _get_feature_weights_dict diff --git a/eli5/lime/lime.py b/eli5/lime/lime.py index c9c9aa78..2968da04 100644 --- a/eli5/lime/lime.py +++ b/eli5/lime/lime.py @@ -6,12 +6,12 @@ from __future__ import absolute_import from typing import Any, Callable, Dict, Optional -import numpy as np # type: ignore -from sklearn.feature_extraction.text import CountVectorizer # type: ignore -from sklearn.linear_model import SGDClassifier # type: ignore -from sklearn.model_selection import train_test_split # type: ignore -from sklearn.utils import check_random_state # type: ignore -from sklearn.base import clone, BaseEstimator # type: ignore +import numpy as np +from sklearn.feature_extraction.text import CountVectorizer +from sklearn.linear_model import SGDClassifier +from sklearn.model_selection import train_test_split +from sklearn.utils import check_random_state +from sklearn.base import clone, BaseEstimator import eli5 from eli5.sklearn.utils import sklearn_version diff --git a/eli5/lime/samplers.py b/eli5/lime/samplers.py index ff72f568..d079b471 100644 --- a/eli5/lime/samplers.py +++ b/eli5/lime/samplers.py @@ -5,13 +5,13 @@ from typing import List, Tuple, Any, Union, Dict, Optional import six -import numpy as np # type: ignore -from scipy.stats import itemfreq # type: ignore -from sklearn.base import BaseEstimator, clone # type: ignore -from sklearn.neighbors import KernelDensity # type: ignore -from sklearn.metrics import pairwise_distances # type: ignore -from sklearn.model_selection import GridSearchCV, KFold # type: ignore -from sklearn.utils import check_random_state # type: ignore +import numpy as np +from scipy.stats import itemfreq +from sklearn.base import BaseEstimator, clone +from sklearn.neighbors import KernelDensity +from sklearn.metrics import pairwise_distances +from sklearn.model_selection import GridSearchCV, KFold +from sklearn.utils import check_random_state from eli5.utils import vstack from eli5.lime.utils import rbf diff --git a/eli5/lime/textutils.py b/eli5/lime/textutils.py index e896f347..98da0428 100644 --- a/eli5/lime/textutils.py +++ b/eli5/lime/textutils.py @@ -7,8 +7,8 @@ import math from typing import List, Tuple, Union, Optional -import numpy as np # type: ignore -from sklearn.utils import check_random_state # type: ignore +import numpy as np +from sklearn.utils import check_random_state from eli5.utils import indices_to_bool_mask, vstack diff --git a/eli5/lime/utils.py b/eli5/lime/utils.py index ee271a65..120dbfbd 100644 --- a/eli5/lime/utils.py +++ b/eli5/lime/utils.py @@ -2,12 +2,12 @@ from __future__ import absolute_import from typing import List, Any -import numpy as np # type: ignore -from scipy.stats import entropy # type: ignore -from sklearn.pipeline import Pipeline # type: ignore -from sklearn.utils import check_random_state, issparse # type: ignore -from sklearn.utils.metaestimators import if_delegate_has_method # type: ignore -from sklearn.utils import shuffle as _shuffle # type: ignore +import numpy as np +from scipy.stats import entropy +from sklearn.pipeline import Pipeline +from sklearn.utils import check_random_state, issparse +from sklearn.utils.metaestimators import if_delegate_has_method +from sklearn.utils import shuffle as _shuffle from eli5.utils import vstack from eli5.sklearn.utils import sklearn_version diff --git a/eli5/permutation_importance.py b/eli5/permutation_importance.py index b5c4a3f0..8ff5d40d 100644 --- a/eli5/permutation_importance.py +++ b/eli5/permutation_importance.py @@ -13,8 +13,8 @@ from __future__ import absolute_import from typing import Tuple, List, Callable, Any -import numpy as np # type: ignore -from sklearn.utils import check_random_state # type: ignore +import numpy as np +from sklearn.utils import check_random_state def iter_shuffled(X, columns_to_shuffle=None, pre_shuffle=False, diff --git a/eli5/sklearn/explain_prediction.py b/eli5/sklearn/explain_prediction.py index 88586f05..18dcc36f 100644 --- a/eli5/sklearn/explain_prediction.py +++ b/eli5/sklearn/explain_prediction.py @@ -1,10 +1,10 @@ # -*- coding: utf-8 -*- from functools import partial -import numpy as np # type: ignore -import scipy.sparse as sp # type: ignore -from sklearn.base import BaseEstimator # type: ignore -from sklearn.ensemble import ( # type: ignore +import numpy as np +import scipy.sparse as sp +from sklearn.base import BaseEstimator +from sklearn.ensemble import ( ExtraTreesClassifier, ExtraTreesRegressor, GradientBoostingClassifier, @@ -12,7 +12,7 @@ RandomForestClassifier, RandomForestRegressor, ) -from sklearn.linear_model import ( # type: ignore +from sklearn.linear_model import ( ElasticNet, # includes Lasso, MultiTaskElasticNet, etc. ElasticNetCV, HuberRegressor, @@ -34,7 +34,7 @@ SGDRegressor, TheilSenRegressor, ) -from sklearn.svm import ( # type: ignore +from sklearn.svm import ( LinearSVC, LinearSVR, SVC, @@ -43,8 +43,8 @@ NuSVR, OneClassSVM, ) -from sklearn.multiclass import OneVsRestClassifier # type: ignore -from sklearn.tree import ( # type: ignore +from sklearn.multiclass import OneVsRestClassifier +from sklearn.tree import ( DecisionTreeClassifier, DecisionTreeRegressor ) diff --git a/eli5/sklearn/explain_weights.py b/eli5/sklearn/explain_weights.py index 019140a6..49010fb4 100644 --- a/eli5/sklearn/explain_weights.py +++ b/eli5/sklearn/explain_weights.py @@ -1,11 +1,11 @@ # -*- coding: utf-8 -*- from __future__ import absolute_import -import numpy as np # type: ignore +import numpy as np -from sklearn.base import BaseEstimator, RegressorMixin # type: ignore -from sklearn.pipeline import Pipeline # type: ignore -from sklearn.linear_model import ( # type: ignore +from sklearn.base import BaseEstimator, RegressorMixin +from sklearn.pipeline import Pipeline +from sklearn.linear_model import ( ElasticNet, # includes Lasso, MultiTaskElasticNet, etc. ElasticNetCV, HuberRegressor, @@ -27,8 +27,8 @@ SGDRegressor, TheilSenRegressor, ) -from sklearn.multiclass import OneVsRestClassifier # type: ignore -from sklearn.svm import ( # type: ignore +from sklearn.multiclass import OneVsRestClassifier +from sklearn.svm import ( LinearSVC, LinearSVR, SVC, @@ -38,8 +38,8 @@ OneClassSVM, ) # TODO: see https://github.com/scikit-learn/scikit-learn/pull/2250 -from sklearn.naive_bayes import BernoulliNB, MultinomialNB # type: ignore -from sklearn.ensemble import ( # type: ignore +from sklearn.naive_bayes import BernoulliNB, MultinomialNB +from sklearn.ensemble import ( GradientBoostingClassifier, GradientBoostingRegressor, AdaBoostClassifier, @@ -49,7 +49,7 @@ ExtraTreesClassifier, ExtraTreesRegressor, ) -from sklearn.tree import ( # type: ignore +from sklearn.tree import ( DecisionTreeClassifier, DecisionTreeRegressor, ) diff --git a/eli5/sklearn/permutation_importance.py b/eli5/sklearn/permutation_importance.py index 987343c5..5a963880 100644 --- a/eli5/sklearn/permutation_importance.py +++ b/eli5/sklearn/permutation_importance.py @@ -2,23 +2,23 @@ from functools import partial from typing import List -import numpy as np # type: ignore -from sklearn.model_selection import check_cv # type: ignore -from sklearn.utils.metaestimators import if_delegate_has_method # type: ignore -from sklearn.utils import check_array, check_random_state # type: ignore -from sklearn.base import ( # type: ignore +import numpy as np +from sklearn.model_selection import check_cv +from sklearn.utils.metaestimators import if_delegate_has_method +from sklearn.utils import check_array, check_random_state +from sklearn.base import ( BaseEstimator, MetaEstimatorMixin, clone, is_classifier ) -from sklearn.metrics.scorer import check_scoring # type: ignore +from sklearn.metrics.scorer import check_scoring from eli5.permutation_importance import get_score_importances from eli5.sklearn.utils import pandas_available if pandas_available: - import pandas as pd # type: ignore + import pandas as pd CAVEATS_CV_NONE = """ Feature importances are computed on the same data as used for training, diff --git a/eli5/sklearn/text.py b/eli5/sklearn/text.py index 20da0f48..fb2748bf 100644 --- a/eli5/sklearn/text.py +++ b/eli5/sklearn/text.py @@ -1,11 +1,11 @@ from __future__ import absolute_import from typing import Any, Union, Callable, Dict, List, Optional, Set, Tuple -from sklearn.pipeline import FeatureUnion # type: ignore +from sklearn.pipeline import FeatureUnion try: from sklearn.feature_extraction.text import _VectorizerMixin as VectorizerMixin except ImportError: # Changed in scikit-learn 0.22 - from sklearn.feature_extraction.text import VectorizerMixin # type: ignore + from sklearn.feature_extraction.text import VectorizerMixin from eli5.base import ( DocWeightedSpans, WeightedSpans, FeatureWeights, FeatureWeight, diff --git a/eli5/sklearn/transform.py b/eli5/sklearn/transform.py index 2d431f8c..8d79f555 100644 --- a/eli5/sklearn/transform.py +++ b/eli5/sklearn/transform.py @@ -1,11 +1,11 @@ """transform_feature_names implementations for scikit-learn transformers """ -import numpy as np # type: ignore -from sklearn.pipeline import Pipeline, FeatureUnion # type: ignore -from sklearn.feature_selection.base import SelectorMixin # type: ignore +import numpy as np +from sklearn.pipeline import Pipeline, FeatureUnion +from sklearn.feature_selection.base import SelectorMixin -from sklearn.preprocessing import ( # type: ignore +from sklearn.preprocessing import ( MinMaxScaler, StandardScaler, MaxAbsScaler, @@ -26,7 +26,7 @@ def _select_names(est, in_names=None): return [in_names[i] for i in np.flatnonzero(mask)] try: - from sklearn.linear_model import ( # type: ignore + from sklearn.linear_model import ( RandomizedLogisticRegression, RandomizedLasso, ) diff --git a/eli5/sklearn/treeinspect.py b/eli5/sklearn/treeinspect.py index 373397bd..d0e9f76a 100644 --- a/eli5/sklearn/treeinspect.py +++ b/eli5/sklearn/treeinspect.py @@ -7,8 +7,8 @@ """ from __future__ import absolute_import, division -from sklearn.base import ClassifierMixin # type: ignore -from sklearn.tree import _tree, export_graphviz # type: ignore +from sklearn.base import ClassifierMixin +from sklearn.tree import _tree, export_graphviz from eli5.base import TreeInfo, NodeInfo diff --git a/eli5/sklearn/unhashing.py b/eli5/sklearn/unhashing.py index 64b44d86..f4f79b8d 100644 --- a/eli5/sklearn/unhashing.py +++ b/eli5/sklearn/unhashing.py @@ -7,14 +7,14 @@ from itertools import chain from typing import List, Iterable, Any, Dict, Tuple, Union -import numpy as np # type: ignore +import numpy as np import six -from sklearn.base import BaseEstimator, TransformerMixin # type: ignore -from sklearn.feature_extraction.text import ( # type: ignore +from sklearn.base import BaseEstimator, TransformerMixin +from sklearn.feature_extraction.text import ( HashingVectorizer, FeatureHasher, ) -from sklearn.pipeline import FeatureUnion # type: ignore +from sklearn.pipeline import FeatureUnion from eli5._feature_names import FeatureNames diff --git a/eli5/sklearn/utils.py b/eli5/sklearn/utils.py index ba3680aa..286d078a 100644 --- a/eli5/sklearn/utils.py +++ b/eli5/sklearn/utils.py @@ -3,9 +3,9 @@ from distutils.version import LooseVersion from typing import Any, Optional, List, Tuple -import numpy as np # type: ignore -import scipy.sparse as sp # type: ignore -from sklearn.multiclass import OneVsRestClassifier # type: ignore +import numpy as np +import scipy.sparse as sp +from sklearn.multiclass import OneVsRestClassifier from eli5.sklearn.unhashing import invert_hashing_and_fit, handle_hashing_vec from eli5._feature_names import FeatureNames @@ -214,7 +214,7 @@ def get_num_features(estimator): try: - import pandas as pd # type: ignore + import pandas as pd pandas_available = True except ImportError: pandas_available = False @@ -277,5 +277,5 @@ def sklearn_version(): >>> sklearn_version() > '0.17' True """ - from sklearn import __version__ # type: ignore + from sklearn import __version__ return LooseVersion(__version__) diff --git a/eli5/sklearn_crfsuite/explain_weights.py b/eli5/sklearn_crfsuite/explain_weights.py index e2a9435d..6007efd7 100644 --- a/eli5/sklearn_crfsuite/explain_weights.py +++ b/eli5/sklearn_crfsuite/explain_weights.py @@ -1,9 +1,9 @@ # -*- coding: utf-8 -*- from __future__ import absolute_import -import numpy as np # type: ignore -from scipy import sparse as sp # type: ignore -from sklearn_crfsuite import CRF # type: ignore +import numpy as np +from scipy import sparse as sp +from sklearn_crfsuite import CRF from eli5.base import Explanation, TargetExplanation, TransitionFeatureWeights from eli5.explain import explain_weights diff --git a/eli5/utils.py b/eli5/utils.py index f1ea32c4..e5f669f8 100644 --- a/eli5/utils.py +++ b/eli5/utils.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -import numpy as np # type: ignore -from scipy import sparse as sp # type: ignore +import numpy as np +from scipy import sparse as sp def argsort_k_largest(x, k): diff --git a/eli5/xgboost.py b/eli5/xgboost.py index 86d3ad58..ce6a079a 100644 --- a/eli5/xgboost.py +++ b/eli5/xgboost.py @@ -4,9 +4,9 @@ import re from typing import Any, Dict, List, Tuple, Optional, Pattern -import numpy as np # type: ignore -import scipy.sparse as sp # type: ignore -from xgboost import ( # type: ignore +import numpy as np +import scipy.sparse as sp +from xgboost import ( XGBClassifier, XGBRegressor, Booster, diff --git a/tox.ini b/tox.ini index 45c686b4..b0592901 100644 --- a/tox.ini +++ b/tox.ini @@ -88,7 +88,7 @@ deps= mypy == 0.750 lxml commands= - mypy --html-report ./mypy-cov --check-untyped-defs eli5 + mypy --html-report ./mypy-cov --check-untyped-defs --ignore-missing-imports eli5 [testenv:docs] From 6bc7f8d183cf847363b916404a3c613a7a9c6039 Mon Sep 17 00:00:00 2001 From: Rafael Fernandes Date: Fri, 17 Jan 2020 14:56:40 -0300 Subject: [PATCH 15/18] Slice sample weight to work with cross validation --- eli5/sklearn/permutation_importance.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/eli5/sklearn/permutation_importance.py b/eli5/sklearn/permutation_importance.py index 5a963880..7cb77bff 100644 --- a/eli5/sklearn/permutation_importance.py +++ b/eli5/sklearn/permutation_importance.py @@ -214,8 +214,12 @@ def _cv_scores_importances(self, X, y, groups=None, **fit_params): cv = check_cv(self.cv, y, is_classifier(self.estimator)) feature_importances = [] # type: List base_scores = [] # type: List[float] + weights = fit_params.get('sample_weight', None) + if weights is None: + weights = np.ones(len(y)) + fit_params.pop('sample_weight', None) for train, test in cv.split(X, y, groups): - est = clone(self.estimator).fit(X[train], y[train], **fit_params) + est = clone(self.estimator).fit(X[train], y[train], sample_weight=weights[train], **fit_params) score_func = partial(self.scorer_, est) _base_score, _importances = self._get_score_importances( score_func, X[test], y[test]) From 64bee305c2d309122305044e98fdd3bca767e1be Mon Sep 17 00:00:00 2001 From: Rafael Fernandes Date: Mon, 20 Jan 2020 13:19:50 -0300 Subject: [PATCH 16/18] pass sample weight only when it is present --- eli5/sklearn/permutation_importance.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/eli5/sklearn/permutation_importance.py b/eli5/sklearn/permutation_importance.py index 7cb77bff..ac139801 100644 --- a/eli5/sklearn/permutation_importance.py +++ b/eli5/sklearn/permutation_importance.py @@ -215,11 +215,12 @@ def _cv_scores_importances(self, X, y, groups=None, **fit_params): feature_importances = [] # type: List base_scores = [] # type: List[float] weights = fit_params.get('sample_weight', None) - if weights is None: - weights = np.ones(len(y)) fit_params.pop('sample_weight', None) for train, test in cv.split(X, y, groups): - est = clone(self.estimator).fit(X[train], y[train], sample_weight=weights[train], **fit_params) + if weights is None: + est = clone(self.estimator).fit(X[train], y[train], **fit_params) + else: + est = clone(self.estimator).fit(X[train], y[train], sample_weight=weights[train], **fit_params) score_func = partial(self.scorer_, est) _base_score, _importances = self._get_score_importances( score_func, X[test], y[test]) From f587bfa21f860d7d16e209028886d69e5bc2ee2a Mon Sep 17 00:00:00 2001 From: Rafael Fernandes Date: Mon, 20 Jan 2020 13:21:05 -0300 Subject: [PATCH 17/18] test to sklearn perm imp with cv and sample weight --- tests/test_sklearn_permutation_importance.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/tests/test_sklearn_permutation_importance.py b/tests/test_sklearn_permutation_importance.py index 4fe942fd..4ffec3ba 100644 --- a/tests/test_sklearn_permutation_importance.py +++ b/tests/test_sklearn_permutation_importance.py @@ -3,7 +3,7 @@ import numpy as np from sklearn.base import is_classifier, is_regressor from sklearn.svm import SVR, SVC -from sklearn.ensemble import RandomForestRegressor +from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier from sklearn.model_selection import train_test_split, cross_val_score from sklearn.pipeline import make_pipeline from sklearn.feature_selection import SelectFromModel @@ -165,6 +165,7 @@ def test_explain_weights(iris_train): for _expl in res: assert "petal width (cm)" in _expl + def test_pandas_xgboost_support(iris_train): xgboost = pytest.importorskip('xgboost') pd = pytest.importorskip('pandas') @@ -175,3 +176,17 @@ def test_pandas_xgboost_support(iris_train): est.fit(X, y) # we expect no exception to be raised here when using xgboost with pd.DataFrame perm = PermutationImportance(est).fit(X, y) + + +def test_cv_sample_weight(iris_train): + X, y, feature_names, target_names = iris_train + weights_ones = np.ones(len(y)) + model = RandomForestClassifier(random_state=42) + + # we expect no exception to be raised when passing weights with a CV + perm_weights = PermutationImportance(model, cv=5, random_state=42).\ + fit(X, y, sample_weight=weights_ones) + perm = PermutationImportance(model, cv=5, random_state=42).fit(X, y) + + # passing a vector of weights filled with one should be the same as passing no weights + assert (perm.feature_importances_ == perm_weights.feature_importances_).all() \ No newline at end of file From 729e5579ca770068b313829f70b33ad43e0f17ad Mon Sep 17 00:00:00 2001 From: Rafael Fernandes Date: Tue, 21 Jan 2020 19:01:12 -0300 Subject: [PATCH 18/18] changes as PR suggestion --- eli5/sklearn/permutation_importance.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/eli5/sklearn/permutation_importance.py b/eli5/sklearn/permutation_importance.py index ac139801..370be8be 100644 --- a/eli5/sklearn/permutation_importance.py +++ b/eli5/sklearn/permutation_importance.py @@ -214,13 +214,12 @@ def _cv_scores_importances(self, X, y, groups=None, **fit_params): cv = check_cv(self.cv, y, is_classifier(self.estimator)) feature_importances = [] # type: List base_scores = [] # type: List[float] - weights = fit_params.get('sample_weight', None) - fit_params.pop('sample_weight', None) + weights = fit_params.pop('sample_weight', None) + fold_fit_params = fit_params.copy() for train, test in cv.split(X, y, groups): - if weights is None: - est = clone(self.estimator).fit(X[train], y[train], **fit_params) - else: - est = clone(self.estimator).fit(X[train], y[train], sample_weight=weights[train], **fit_params) + if weights is not None: + fold_fit_params['sample_weight'] = weights[train] + est = clone(self.estimator).fit(X[train], y[train], **fold_fit_params) score_func = partial(self.scorer_, est) _base_score, _importances = self._get_score_importances( score_func, X[test], y[test])