diff --git a/dtale/charts/utils.py b/dtale/charts/utils.py index 2b59a38c..b98ba872 100644 --- a/dtale/charts/utils.py +++ b/dtale/charts/utils.py @@ -758,13 +758,23 @@ def build_base_chart( classifier = classify_type(find_dtype(data[col])) if classifier == "F" or (classifier == "I" and group_type == "bins"): if bin_type == "width": - data.loc[:, col] = pd.qcut( - data[col], q=bins_val, duplicates="drop" - ).astype("str") + kwargs = ( + {"duplicates": "drop"} + if pandas_util.check_pandas_version("0.23.0") + else {} + ) + data.loc[:, col] = pd.qcut(data[col], q=bins_val, **kwargs).astype( + "str" + ) + kwargs_str = ( + ', duplicates="drop"' + if pandas_util.check_pandas_version("0.23.0") + else "" + ) code.append( ( - "chart_data.loc[:, '{col}'] = pd.qcut(chart_data['{col}'], q={bins}, duplicates=\"drop\")" - ).format(col=col, bins=bins_val) + "chart_data.loc[:, '{col}'] = pd.qcut(chart_data['{col}'], q={bins}{kwargs})" + ).format(col=col, bins=bins_val, kwargs=kwargs_str) ) else: bins_data = data[col].dropna() @@ -774,18 +784,28 @@ def build_base_chart( np.arange(npt), np.sort(bins_data), ) + kwargs = ( + {"duplicates": "drop"} + if pandas_util.check_pandas_version("0.23.0") + else {} + ) data.loc[:, col] = pd.cut( - data[col], bins=equal_freq_bins, duplicates="drop" + data[col], bins=equal_freq_bins, **kwargs ).astype("str") + cut_kwargs_str = ( + ', duplicates="drop"' + if pandas_util.check_pandas_version("0.23.0") + else "" + ) code.append( ( "bins_data = data['{col}'].dropna()\n" "npt = len(bins_data)\n" "equal_freq_bins = np.interp(np.linspace(0, npt, {bins}), np.arange(npt), " "np.sort(bins_data))\n" - "chart_data.loc[:, '{col}'] = pd.cut(chart_data['{col}'], bins=equal_freq_bins, " - 'duplicates="drop")' - ).format(col=col, bins=bins_val + 1) + "chart_data.loc[:, '{col}'] = pd.cut(chart_data['{col}'], bins=equal_freq_bins" + "{cut_kwargs})" + ).format(col=col, bins=bins_val + 1, cut_kwargs=cut_kwargs_str) ) main_group = group_col diff --git a/dtale/column_builders.py b/dtale/column_builders.py index 55954832..56780b69 100644 --- a/dtale/column_builders.py +++ b/dtale/column_builders.py @@ -174,8 +174,11 @@ def build_column(self, data): self.cfg.get(p) for p in ["col", "search", "replacement", "caseSensitive", "regex"] ) + kwargs = dict(case=case) + if pandas_util.check_pandas_version("0.23.0"): + kwargs["regex"] = regex return pd.Series( - data[col].str.replace(search, replacement, case=case, regex=regex), + data[col].str.replace(search, replacement, **kwargs), index=data.index, name=self.name, ) @@ -185,12 +188,16 @@ def build_code(self): self.cfg.get(p) for p in ["col", "search", "replacement", "caseSensitive", "regex"] ) - return "data['{col}'].str.replace('{search}', '{replacement}', case={case}, regex={regex})".format( + kwargs = "" + if pandas_util.check_pandas_version("0.23.0"): + kwargs = ", regex='{}'".format("True" if regex else "False") + + return "data['{col}'].str.replace('{search}', '{replacement}', case={case}{kwargs})".format( col=col, search=search, replacement=replacement, case="True" if case else "False", - regex="True" if regex else "False", + kwargs=kwargs, ) @@ -1473,7 +1480,7 @@ def build_column(self, data): self.cfg.get(p) for p in ["col", "periods", "fillValue", "dtype"] ) kwargs = {} - if fill_value is not None: + if fill_value is not None and pandas_util.check_pandas_version("0.24.0"): fill_formatter = find_dtype_formatter(dtype) kwargs["fill_value"] = fill_formatter(fill_value) return pd.Series( @@ -1485,7 +1492,7 @@ def build_code(self): self.cfg.get(p) for p in ["col", "periods", "fillValue", "dtype"] ) kwargs = "" - if fill_value is not None: + if fill_value is not None and pandas_util.check_pandas_version("0.24.0"): if classify_type(dtype) == "S": kwargs = ", fill_value='{}'".format(fill_value) else: diff --git a/dtale/dash_application/charts.py b/dtale/dash_application/charts.py index 311c42c3..e03cdded 100644 --- a/dtale/dash_application/charts.py +++ b/dtale/dash_application/charts.py @@ -2544,28 +2544,30 @@ def build_charts(): continue layout = build_layout(build_title(selected_label, y2, group=series_key)) - chart = chart_builder( - graph_wrapper( - figure={ - "data": [ - go.Funnel( - **dict_merge( - dict(x=series[y2], y=series["x"]), - name_builder(y2, series_key), - ) + chart = graph_wrapper( + figure={ + "data": [ + go.Funnel( + **dict_merge( + dict(x=series[y2], y=series["x"]), + name_builder(y2, series_key), ) - ], - "layout": layout, - }, - modal=inputs.get("modal", False), - ), - group_filter=dict_merge( - dict(y=y2), - {} - if series_key == "all" - else dict(group=series.get("_filter_")), - ), + ) + ], + "layout": layout, + }, + modal=inputs.get("modal", False), ) + if not export: + chart = chart_builder( + chart, + group_filter=dict_merge( + dict(y=y2), + {} + if series_key == "all" + else dict(group=series.get("_filter_")), + ), + ) if len(negative_values): error_title = ( "The following negative values could not be represented within the {}Funnel chart" @@ -2600,13 +2602,16 @@ def build_charts(): title["title"]["text"] += " stacked by {}".format(", ".join(group)) layout = build_layout(title) - yield chart_builder( - graph_wrapper( - figure={"data": stacked_data, "layout": layout}, - modal=inputs.get("modal", False), - ), - group_filter=dict(y=final_cols[0]), + chart = graph_wrapper( + figure={"data": stacked_data, "layout": layout}, + modal=inputs.get("modal", False), ) + if not export: + chart = chart_builder( + chart, + group_filter=dict(y=final_cols[0]), + ) + yield chart if export: return next(build_charts()) diff --git a/dtale/query.py b/dtale/query.py index f5828d89..122097f5 100644 --- a/dtale/query.py +++ b/dtale/query.py @@ -1,8 +1,8 @@ import pandas as pd -from pkg_resources import parse_version import dtale.global_state as global_state +from dtale.pandas_util import check_pandas_version from dtale.utils import format_data, get_bool_arg @@ -142,7 +142,7 @@ def _load_pct(df): return _load_pct(df), [] return _load_pct(df) - is_pandas25 = parse_version(pd.__version__) >= parse_version("0.25.0") + is_pandas25 = check_pandas_version("0.25.0") curr_app_settings = global_state.get_app_settings() engine = curr_app_settings.get("query_engine", "python") filtered_indexes = [] diff --git a/dtale/views.py b/dtale/views.py index 4ffad872..bd8f8b09 100644 --- a/dtale/views.py +++ b/dtale/views.py @@ -3859,7 +3859,8 @@ def network_data(data_id): edges.columns = ["to", "from"] if weight: edges.loc[:, "value"] = df[weight] - edges = edges.to_dict(orient="records") + edge_f = grid_formatter(grid_columns(edges), nan_display="nan") + edges = edge_f.format_dicts(edges.itertuples()) def build_mapping(col): if col: diff --git a/tests/dtale/column_builders/test_column_builders.py b/tests/dtale/column_builders/test_column_builders.py index 14e4ba7e..7d3e8c55 100644 --- a/tests/dtale/column_builders/test_column_builders.py +++ b/tests/dtale/column_builders/test_column_builders.py @@ -373,7 +373,10 @@ def test_exponential_smoothing(rolling_data): verify_builder(builder, lambda col: col.isnull().sum() == 0) -@pytest.mark.unit +@pytest.mark.skipif( + not pandas_util.check_pandas_version("0.24.0"), + reason="requires pandas 0.24.0 or higher", +) def test_shift(rolling_data): import dtale.views as views diff --git a/tests/dtale/correlations/test_views.py b/tests/dtale/correlations/test_views.py index f016bac7..05acb4b4 100644 --- a/tests/dtale/correlations/test_views.py +++ b/tests/dtale/correlations/test_views.py @@ -148,8 +148,9 @@ def test_get_correlations(unittest, test_data, rolling_data): @pytest.mark.skipif( - parse_version(platform.python_version()) < parse_version("3.6.0"), - reason="requires python 3.6 or higher", + parse_version(platform.python_version()) < parse_version("3.6.0") + or not pandas_util.check_pandas_version("1.0.0"), + reason="requires python 3.6 or higher and pandas 1.0.0 or higher", ) def test_get_pps_matrix(unittest, test_data): import dtale.views as views @@ -210,7 +211,10 @@ def test_get_pps_matrix(unittest, test_data): corr_ts.columns = ['date', 'corr']""" -@pytest.mark.unit +@pytest.mark.skipif( + not pandas_util.check_pandas_version("1.0.0"), + reason="requires pandas 1.0.0 or higher", +) def test_get_correlations_ts(unittest, rolling_data): import dtale.views as views @@ -340,7 +344,10 @@ def test_get_correlations_ts(unittest, rolling_data): only_in_s1 = len(scatter_data[scatter_data['bar'].isnull()])""" -@pytest.mark.unit +@pytest.mark.skipif( + not pandas_util.check_pandas_version("1.0.0"), + reason="requires pandas 1.0.0 or higher", +) def test_get_scatter(unittest, rolling_data): import dtale.views as views diff --git a/tests/dtale/ppscore/test_calculation.py b/tests/dtale/ppscore/test_calculation.py index 7f6ecbe2..499d4929 100644 --- a/tests/dtale/ppscore/test_calculation.py +++ b/tests/dtale/ppscore/test_calculation.py @@ -7,6 +7,7 @@ import sys import dtale.ppscore as pps +from dtale.pandas_util import check_pandas_version @pytest.mark.skipif(sys.version_info < (3, 6), reason="requires python 3.6 or higher") @@ -49,7 +50,10 @@ def test__determine_case_and_prepare_df(): df["Pclass_integer"], infer_datetime_format=True ) df["Survived_boolean"] = df["Survived_integer"].astype(bool) - df["Cabin_string"] = pd.Series(df["Cabin"].apply(str), dtype="string") + df["Cabin_string"] = pd.Series( + df["Cabin"].apply(str), + dtype="string" if check_pandas_version("1.1.0") else "object", + ) # check regression assert _determine_case_and_prepare_df(df, "x", "Age_float")[1] == "regression" @@ -112,7 +116,8 @@ def test_score(): df["x_greater_0_boolean"] = df["x"] > 0 # df["x_greater_0_string"] = df["x_greater_0_boolean"].astype(str) df["x_greater_0_string"] = pd.Series( - df["x_greater_0_boolean"].apply(str), dtype="string" + df["x_greater_0_boolean"].apply(str), + dtype="string" if check_pandas_version("1.1.0") else "object", ) df["x_greater_0_string_object"] = df["x_greater_0_string"].astype("object") df["x_greater_0_string_category"] = df["x_greater_0_string"].astype("category") @@ -174,89 +179,92 @@ def test_score(): duplicate_column_names_df, "unique_column_name", "duplicate_column_name" ) - # check cross_validation - # if more folds than data, there is an error - with pytest.raises(ValueError): - assert pps.score(df, "x", "y", cross_validation=2000, catch_errors=False) - - # check random_seed - assert pps.score(df, "x", "y", random_seed=1) == pps.score( - df, "x", "y", random_seed=1 - ) - assert pps.score(df, "x", "y", random_seed=1) != pps.score( - df, "x", "y", random_seed=2 - ) - # the random seed that is drawn automatically is smaller than <1000 - assert pps.score(df, "x", "y") != pps.score(df, "x", "y", random_seed=123456) - - # check invalid_score - invalid_score = -99 - assert ( - pps.score(df, "nan", "y", invalid_score=invalid_score)["ppscore"] - == invalid_score - ) - - # check catch_errors using the cross_validation error from above - assert ( - pps.score( - df, - "x", - "y", - cross_validation=2000, - invalid_score=invalid_score, - catch_errors=True, - )["ppscore"] - == invalid_score - ) - - # check case discrimination - assert pps.score(df, "x", "y")["case"] == "regression" - assert pps.score(df, "x", "x_greater_0_string")["case"] == "classification" - assert pps.score(df, "x", "constant")["case"] == "target_is_constant" - assert pps.score(df, "x", "x")["case"] == "predict_itself" - assert pps.score(df, "x", "id")["case"] == "target_is_id" - assert pps.score(df, "nan", "y")["case"] == "empty_dataframe_after_dropping_na" - - # check scores - # feature is id - assert pps.score(df, "id", "y")["ppscore"] == 0 - - # numeric feature and target - assert pps.score(df, "x", "y")["ppscore"] > 0.5 - assert pps.score(df, "y", "x")["ppscore"] < 0.05 + if check_pandas_version("1.0.0"): + # check cross_validation + # if more folds than data, there is an error + with pytest.raises(ValueError): + assert pps.score(df, "x", "y", cross_validation=2000, catch_errors=False) - # boolean feature or target - assert pps.score(df, "x", "x_greater_0_boolean")["ppscore"] > 0.6 - assert pps.score(df, "x_greater_0_boolean", "x")["ppscore"] < 0.6 - - # string feature or target - assert pps.score(df, "x", "x_greater_0_string")["ppscore"] > 0.6 - assert pps.score(df, "x_greater_0_string", "x")["ppscore"] < 0.6 - - # object feature or target - assert pps.score(df, "x", "x_greater_0_string_object")["ppscore"] > 0.6 - assert pps.score(df, "x_greater_0_string_object", "x")["ppscore"] < 0.6 - - # category feature or target - assert pps.score(df, "x", "x_greater_0_string_category")["ppscore"] > 0.6 - assert pps.score(df, "x_greater_0_string_category", "x")["ppscore"] < 0.6 - - # object feature or target - assert pps.score(df, "x", "x_greater_0_boolean_object")["ppscore"] > 0.6 - assert pps.score(df, "x_greater_0_boolean_object", "x")["ppscore"] < 0.6 + # check random_seed + assert pps.score(df, "x", "y", random_seed=1) == pps.score( + df, "x", "y", random_seed=1 + ) + assert pps.score(df, "x", "y", random_seed=1) != pps.score( + df, "x", "y", random_seed=2 + ) + # the random seed that is drawn automatically is smaller than <1000 + assert pps.score(df, "x", "y") != pps.score(df, "x", "y", random_seed=123456) + + # check invalid_score + invalid_score = -99 + assert ( + pps.score(df, "nan", "y", invalid_score=invalid_score)["ppscore"] + == invalid_score + ) - # category feature or target - assert pps.score(df, "x", "x_greater_0_boolean_category")["ppscore"] > 0.6 - assert pps.score(df, "x_greater_0_boolean_category", "x")["ppscore"] < 0.6 + # check catch_errors using the cross_validation error from above + assert ( + pps.score( + df, + "x", + "y", + cross_validation=2000, + invalid_score=invalid_score, + catch_errors=True, + )["ppscore"] + == invalid_score + ) - # check special dtypes - # pd.IntegerArray e.g. Int64, Int8, etc - assert ( - pps.score(dtypes_df, "Survived_Int64", "Sex_object")["is_valid_score"] is True - ) - assert ( - pps.score(dtypes_df, "Sex_object", "Survived_Int64")["is_valid_score"] is True - ) + # check case discrimination + assert pps.score(df, "x", "y")["case"] == "regression" + assert pps.score(df, "x", "x_greater_0_string")["case"] == "classification" + assert pps.score(df, "x", "constant")["case"] == "target_is_constant" + assert pps.score(df, "x", "x")["case"] == "predict_itself" + assert pps.score(df, "x", "id")["case"] == "target_is_id" + assert pps.score(df, "nan", "y")["case"] == "empty_dataframe_after_dropping_na" + + # check scores + # feature is id + assert pps.score(df, "id", "y")["ppscore"] == 0 + + # numeric feature and target + assert pps.score(df, "x", "y")["ppscore"] > 0.5 + assert pps.score(df, "y", "x")["ppscore"] < 0.05 + + # boolean feature or target + assert pps.score(df, "x", "x_greater_0_boolean")["ppscore"] > 0.6 + assert pps.score(df, "x_greater_0_boolean", "x")["ppscore"] < 0.6 + + # string feature or target + assert pps.score(df, "x", "x_greater_0_string")["ppscore"] > 0.6 + assert pps.score(df, "x_greater_0_string", "x")["ppscore"] < 0.6 + + # object feature or target + assert pps.score(df, "x", "x_greater_0_string_object")["ppscore"] > 0.6 + assert pps.score(df, "x_greater_0_string_object", "x")["ppscore"] < 0.6 + + # category feature or target + assert pps.score(df, "x", "x_greater_0_string_category")["ppscore"] > 0.6 + assert pps.score(df, "x_greater_0_string_category", "x")["ppscore"] < 0.6 + + # object feature or target + assert pps.score(df, "x", "x_greater_0_boolean_object")["ppscore"] > 0.6 + assert pps.score(df, "x_greater_0_boolean_object", "x")["ppscore"] < 0.6 + + # category feature or target + assert pps.score(df, "x", "x_greater_0_boolean_category")["ppscore"] > 0.6 + assert pps.score(df, "x_greater_0_boolean_category", "x")["ppscore"] < 0.6 + + # check special dtypes + # pd.IntegerArray e.g. Int64, Int8, etc + assert ( + pps.score(dtypes_df, "Survived_Int64", "Sex_object")["is_valid_score"] + is True + ) + assert ( + pps.score(dtypes_df, "Sex_object", "Survived_Int64")["is_valid_score"] + is True + ) @pytest.mark.skipif(sys.version_info < (3, 6), reason="requires python 3.6 or higher") diff --git a/tests/dtale/test_cli.py b/tests/dtale/test_cli.py index 047f29a7..ea88b344 100644 --- a/tests/dtale/test_cli.py +++ b/tests/dtale/test_cli.py @@ -5,11 +5,11 @@ import mock import pandas as pd import pytest -from pkg_resources import parse_version from six import PY3 from dtale.cli import loaders, script from dtale.cli.clickutils import run +from dtale.pandas_util import check_pandas_version from tests import ExitStack, pdt @@ -92,7 +92,7 @@ def test_main(unittest): df, pd.DataFrame([dict(a=1, b=2, c=3)]), "loader should load csv" ) - if PY3 and parse_version(pd.__version__) >= parse_version("0.25.0"): + if PY3 and check_pandas_version("0.25.0"): with ExitStack() as stack: mock_show = stack.enter_context( mock.patch("dtale.cli.script.show", mock.Mock()) diff --git a/tests/dtale/test_column_filters.py b/tests/dtale/test_column_filters.py index 1b50c458..e7933431 100644 --- a/tests/dtale/test_column_filters.py +++ b/tests/dtale/test_column_filters.py @@ -2,9 +2,9 @@ import pandas as pd import pytest -from pkg_resources import parse_version from dtale.column_filters import DateFilter, NumericFilter, StringFilter +from dtale.pandas_util import check_pandas_version @pytest.mark.unit @@ -34,7 +34,7 @@ def test_date(): @pytest.mark.unit def test_string(): - is_pandas25 = parse_version(pd.__version__) >= parse_version("0.25.0") + is_pandas25 = check_pandas_version("0.25.0") def build_query(fltr): query = fltr.build_filter()["query"] diff --git a/tests/dtale/test_query.py b/tests/dtale/test_query.py index cda79271..20e3f12c 100644 --- a/tests/dtale/test_query.py +++ b/tests/dtale/test_query.py @@ -3,6 +3,7 @@ from six import PY3 import dtale.query as query +from dtale.pandas_util import check_pandas_version @pytest.mark.unit @@ -14,7 +15,7 @@ def test_run_query(): assert len(query.run_query(df, "`a` in @a", {"a": [1, 2, 3]})) == 3 - if PY3: + if PY3 and check_pandas_version("0.25.0"): df = pd.DataFrame( [ {"a.b": 1, "b": 2, "c": 3}, diff --git a/tests/dtale/test_show_loaders.py b/tests/dtale/test_show_loaders.py index bdee23cd..f8793864 100644 --- a/tests/dtale/test_show_loaders.py +++ b/tests/dtale/test_show_loaders.py @@ -4,8 +4,10 @@ import mock import pytest + from six import PY3 +from dtale.pandas_util import check_pandas_version from tests import ExitStack @@ -51,7 +53,7 @@ def test_show_excel(unittest): excel_path = os.path.join(os.path.dirname(__file__), "..", "data/test_df.xlsx") mock_show = mock.Mock() - if PY3: + if PY3 and check_pandas_version("0.25.0"): with mock.patch("dtale.cli.loaders.excel_loader.show", mock_show): dtale.show_excel(path=excel_path) mock_show.call_args[1]["data_loader"]() diff --git a/tests/dtale/test_timeseries_analysis.py b/tests/dtale/test_timeseries_analysis.py index c3b4a1ea..eabb2fec 100644 --- a/tests/dtale/test_timeseries_analysis.py +++ b/tests/dtale/test_timeseries_analysis.py @@ -110,6 +110,8 @@ def test_seasonal_decompose(unittest, ts_analysis_data): @pytest.mark.skipif(not PY3, reason="requires python 3 or higher") def test_stl(unittest, ts_analysis_data): + pytest.importorskip("statsmodels", minversion="0.11.0") + import dtale.views as views df, _ = views.format_data(ts_analysis_data) diff --git a/tests/dtale/test_upload.py b/tests/dtale/test_upload.py index 068270c1..ec28265c 100644 --- a/tests/dtale/test_upload.py +++ b/tests/dtale/test_upload.py @@ -6,6 +6,7 @@ from six import BytesIO, PY3 from dtale.app import build_app +from dtale.pandas_util import check_pandas_version from tests import ExitStack from tests.dtale import build_data_inst @@ -70,7 +71,7 @@ def test_upload(unittest): build_data_inst({c.port: df}) global_state.set_dtypes(c.port, views.build_dtypes_state(df)) assert global_state.size() == 1 - if PY3: + if PY3 and check_pandas_version("0.25.0"): c.post( "/dtale/upload", data={ diff --git a/tests/dtale/test_views.py b/tests/dtale/test_views.py index 9f7dd651..a78d8d9e 100644 --- a/tests/dtale/test_views.py +++ b/tests/dtale/test_views.py @@ -14,6 +14,7 @@ import dtale.pandas_util as pandas_util from dtale.app import build_app +from dtale.pandas_util import check_pandas_version from dtale.utils import DuplicateDataError from tests import ExitStack, pdt from tests.dtale import build_data_inst, build_settings, build_dtypes @@ -315,7 +316,7 @@ def test_startup(unittest): ["object", "category"], ) - if PY3 and parse_version(pd.__version__) >= parse_version("0.25.0"): + if PY3 and check_pandas_version("0.25.0"): s_int = pd.Series([1, 2, 3, 4, 5], index=list("abcde"), dtype=pd.Int64Dtype()) s2_int = s_int.reindex(["a", "b", "c", "f", "u"]) ints = pd.Series([1, 2, 3, 4, 5], index=list("abcfu"))