From 9e504d574414089feb3fa7f88b63454332a06723 Mon Sep 17 00:00:00 2001 From: Daniel Rizk Date: Wed, 13 Nov 2024 17:59:04 -0500 Subject: [PATCH] fixes `@summary` --- NEWS.md | 3 +++ Project.toml | 2 +- src/docstrings.jl | 3 ++- src/summary.jl | 26 ++++++++++++++------------ 4 files changed, 20 insertions(+), 14 deletions(-) diff --git a/NEWS.md b/NEWS.md index f221043c..43fe1b3c 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,8 @@ # TidierData.jl updates +## v16.3 +- Bugfix: `@summary` will only act on numeric/integer columns, instead of throwing an error + ## v0.16.2 - 2024-09-03 - Bugfix: `@slice_min` and `@slice_max` respect the `n` argument - Adds `@head` diff --git a/Project.toml b/Project.toml index d7d9e33a..bd366a21 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "TidierData" uuid = "fe2206b3-d496-4ee9-a338-6a095c4ece80" authors = ["Karandeep Singh"] -version = "0.16.2" +version = "0.16.3" [deps] Chain = "8be319e6-bccf-4806-a6f7-6fae938471bc" diff --git a/src/docstrings.jl b/src/docstrings.jl index ae20886f..bf18730a 100644 --- a/src/docstrings.jl +++ b/src/docstrings.jl @@ -2415,7 +2415,8 @@ For numerical columns, returns a dataframe with the Q1,Q3, min, max, mean, media julia> df = DataFrame(a = [1, 2, 3, 4, 5], b = [missing, 7, 8, 9, 10], c = [11, missing, 13, 14, missing], - d = [16, 17, 18, 19, 20]); + d = [16.1, 17.2, 18.3, 19.4, 20.5], + e = ["a", "a", "a", "a", "a"]); julia> @summary(df); diff --git a/src/summary.jl b/src/summary.jl index 68e0f295..126ba36f 100644 --- a/src/summary.jl +++ b/src/summary.jl @@ -3,18 +3,20 @@ function summary_stats(df::DataFrame) summary_data = [] for column in colnames col = df[:, column] - col_nonmissing = collect(skipmissing(col)) - push!(summary_data, ( - Column = column, - Min = minimum(col_nonmissing), - Q1 = quantile(col_nonmissing, 0.25), - Median = median(col_nonmissing), - Mean = mean(col_nonmissing), - Q3 = quantile(col_nonmissing, 0.75), - Max = maximum(col_nonmissing), - Count = length(col_nonmissing), - Missing_Count = count(ismissing, col) - )) + if eltype(col) <: Union{Number, Missing} + col_nonmissing = collect(skipmissing(col)) + push!(summary_data, ( + Column = column, + Min = minimum(col_nonmissing), + Q1 = quantile(col_nonmissing, 0.25), + Median = median(col_nonmissing), + Mean = mean(col_nonmissing), + Q3 = quantile(col_nonmissing, 0.75), + Max = maximum(col_nonmissing), + Count = length(col_nonmissing), + Missing_Count = count(ismissing, col) + )) + end end return DataFrame(summary_data) end