Skip to content

Commit

Permalink
refactor(eda): validated numerical target dtype, removed comments
Browse files Browse the repository at this point in the history
  • Loading branch information
Devin Lu committed Apr 22, 2022
1 parent abe5032 commit 0b801fa
Show file tree
Hide file tree
Showing 3 changed files with 6 additions and 14 deletions.
10 changes: 0 additions & 10 deletions dataprep/eda/create_diff_report/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,16 +85,6 @@ def create_diff_report(
"legend_labels": components["legend_lables"],
}

# {% for div in value.plots[1] %}
# <div class="vp-plot">
# {{ div }}
# {% if key in context.components.dfs[1].variables %}
# {{ context.components.dfs[1].variables[key].plots[1][loop.index0] }}
# {% endif %}
# </div>

# return context

template_base = ENV_LOADER.get_template("base.html")
report = template_base.render(context=context, zip=zip)
return Report(report)
Expand Down
2 changes: 2 additions & 0 deletions dataprep/eda/create_diff_report/diff_formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,8 @@ def compute_plot_data(
col_dtype = col_dtype[0]

orig = [src for src, seq in labeled_cols.items() if col in seq]
if col == target and not is_dtype(col_dtype, Continuous_v1()):
raise ValueError("Sorry, target must be a numerical feature.")

if is_dtype(col_dtype, Continuous_v1()):
data.append((col, Continuous_v1(), diff_cont_calcs(srs.apply("dropna"), cfg), orig))
Expand Down
8 changes: 4 additions & 4 deletions dataprep/eda/diff/render.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ def bar_viz(
("Source", "@orig"),
]

# Used to add y-padding to the graphs
col1_min = df[0][col].min()
col2_min = df[1][col].min()
col1_max = df[0][col].max()
Expand Down Expand Up @@ -159,6 +160,7 @@ def bar_viz(
_format_axis(fig, 0, df[baseline].max(), "y")

df1, df2 = df_list[0], df_list[1]
# Feature analysis here
if target != col and target and col in df1.columns and col in df2.columns:
col1, col2 = df_list[0][col], df_list[1][col]
row_avgs_1 = []
Expand Down Expand Up @@ -261,7 +263,6 @@ def hist_viz(
)
bottom = 0 if yscale == "linear" or df.empty else counts.min() / 2
if y_start is not None and y_end is not None:
# fig.y_range = (y_start * (1 - y_inc), y_end * (1 + y_inc))
fig.extra_y_ranges = {
"Counts": Range1d(start=y_start * (1 - y_inc), end=y_end * (1 + y_inc))
}
Expand All @@ -287,8 +288,6 @@ def hist_viz(
fill_color=CATEGORY10[i],
line_color=CATEGORY10[i],
)
# if col == 'LotFrontage':
# breakpoint()

hover = HoverTool(tooltips=tooltips, attachment="vertical", mode="vline")
fig.add_tools(hover)
Expand All @@ -312,6 +311,7 @@ def hist_viz(
fig.xaxis.axis_label = x_axis_label
fig.xaxis.axis_label_standoff = 0

# Feature analysis here
if target and target != col and col in df1.columns and col in df2.columns:
col1, col2 = df1[col], df2[col]
source1, source2 = col1, col2
Expand Down Expand Up @@ -347,7 +347,7 @@ def hist_viz(
y_range_name="Averages",
line_width=4,
)
fig.add_layout(LinearAxis(y_range_name="Averages", axis_label="Bin Averages"), "right")
fig.add_layout(LinearAxis(y_range_name="Averages"), "right")
return fig


Expand Down

0 comments on commit 0b801fa

Please sign in to comment.