From 0b801fa8568e724a69d415dd41c488154bc3fe3b Mon Sep 17 00:00:00 2001 From: Devin Lu Date: Fri, 22 Apr 2022 13:55:07 -0700 Subject: [PATCH] refactor(eda): validated numerical target dtype, removed comments --- dataprep/eda/create_diff_report/__init__.py | 10 ---------- dataprep/eda/create_diff_report/diff_formatter.py | 2 ++ dataprep/eda/diff/render.py | 8 ++++---- 3 files changed, 6 insertions(+), 14 deletions(-) diff --git a/dataprep/eda/create_diff_report/__init__.py b/dataprep/eda/create_diff_report/__init__.py index fde97e912..7607f7066 100644 --- a/dataprep/eda/create_diff_report/__init__.py +++ b/dataprep/eda/create_diff_report/__init__.py @@ -85,16 +85,6 @@ def create_diff_report( "legend_labels": components["legend_lables"], } - # {% for div in value.plots[1] %} - #
- # {{ div }} - # {% if key in context.components.dfs[1].variables %} - # {{ context.components.dfs[1].variables[key].plots[1][loop.index0] }} - # {% endif %} - #
- - # return context - template_base = ENV_LOADER.get_template("base.html") report = template_base.render(context=context, zip=zip) return Report(report) diff --git a/dataprep/eda/create_diff_report/diff_formatter.py b/dataprep/eda/create_diff_report/diff_formatter.py index 764e8de63..0f27455b9 100644 --- a/dataprep/eda/create_diff_report/diff_formatter.py +++ b/dataprep/eda/create_diff_report/diff_formatter.py @@ -282,6 +282,8 @@ def compute_plot_data( col_dtype = col_dtype[0] orig = [src for src, seq in labeled_cols.items() if col in seq] + if col == target and not is_dtype(col_dtype, Continuous_v1()): + raise ValueError("Sorry, target must be a numerical feature.") if is_dtype(col_dtype, Continuous_v1()): data.append((col, Continuous_v1(), diff_cont_calcs(srs.apply("dropna"), cfg), orig)) diff --git a/dataprep/eda/diff/render.py b/dataprep/eda/diff/render.py index 795d4c85a..66f9ce63e 100644 --- a/dataprep/eda/diff/render.py +++ b/dataprep/eda/diff/render.py @@ -101,6 +101,7 @@ def bar_viz( ("Source", "@orig"), ] + # Used to add y-padding to the graphs col1_min = df[0][col].min() col2_min = df[1][col].min() col1_max = df[0][col].max() @@ -159,6 +160,7 @@ def bar_viz( _format_axis(fig, 0, df[baseline].max(), "y") df1, df2 = df_list[0], df_list[1] + # Feature analysis here if target != col and target and col in df1.columns and col in df2.columns: col1, col2 = df_list[0][col], df_list[1][col] row_avgs_1 = [] @@ -261,7 +263,6 @@ def hist_viz( ) bottom = 0 if yscale == "linear" or df.empty else counts.min() / 2 if y_start is not None and y_end is not None: - # fig.y_range = (y_start * (1 - y_inc), y_end * (1 + y_inc)) fig.extra_y_ranges = { "Counts": Range1d(start=y_start * (1 - y_inc), end=y_end * (1 + y_inc)) } @@ -287,8 +288,6 @@ def hist_viz( fill_color=CATEGORY10[i], line_color=CATEGORY10[i], ) - # if col == 'LotFrontage': - # breakpoint() hover = HoverTool(tooltips=tooltips, attachment="vertical", mode="vline") fig.add_tools(hover) @@ -312,6 +311,7 @@ def hist_viz( fig.xaxis.axis_label = x_axis_label fig.xaxis.axis_label_standoff = 0 + # Feature analysis here if target and target != col and col in df1.columns and col in df2.columns: col1, col2 = df1[col], df2[col] source1, source2 = col1, col2 @@ -347,7 +347,7 @@ def hist_viz( y_range_name="Averages", line_width=4, ) - fig.add_layout(LinearAxis(y_range_name="Averages", axis_label="Bin Averages"), "right") + fig.add_layout(LinearAxis(y_range_name="Averages"), "right") return fig