From b0f9ada124956686b213f0c032c7585f49b45a9b Mon Sep 17 00:00:00 2001 From: Daniel Schwabeneder Date: Fri, 12 Mar 2021 23:31:54 +0100 Subject: [PATCH 1/2] add df macro --- Project.toml | 7 +++++-- src/Plots.jl | 7 ++++++- src/examples.jl | 2 -- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/Project.toml b/Project.toml index f879a9aa8..4e00f51f5 100644 --- a/Project.toml +++ b/Project.toml @@ -30,6 +30,8 @@ Showoff = "992d4aef-0814-514b-bc4d-f2e9a6c4116f" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" +TableOperations = "ab02a1b2-a7df-11e8-156e-fb1833f50b87" +Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" [compat] @@ -51,6 +53,8 @@ Requires = "1" Scratch = "1" Showoff = "0.3.1" StatsBase = "0.32, 0.33" +TableOperations = "1" +Tables = "1" julia = "1.5" [extras] @@ -66,11 +70,10 @@ PGFPlotsX = "8314cec4-20b6-5062-9cdb-752b83310925" RDatasets = "ce6b1742-4840-55fa-b093-852dadbb1d8b" StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3" StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" -StatsPlots = "f3b207a7-027a-5e70-b257-86293d7955fd" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" TestImages = "5e47fb64-e119-507b-a336-dd2b206d9990" UnicodePlots = "b8865327-cd53-5732-bb35-84acbb429228" VisualRegressionTests = "34922c18-7c2a-561c-bac1-01e79b2c4c92" [targets] -test = ["Distributions", "FileIO", "Gtk", "ImageMagick", "Images", "LibGit2", "OffsetArrays", "PGFPlotsX", "HDF5", "RDatasets", "StableRNGs", "StaticArrays", "StatsPlots", "Test", "TestImages", "UnicodePlots", "VisualRegressionTests"] +test = ["Distributions", "FileIO", "Gtk", "ImageMagick", "Images", "LibGit2", "OffsetArrays", "PGFPlotsX", "HDF5", "RDatasets", "StableRNGs", "StaticArrays", "Test", "TestImages", "UnicodePlots", "VisualRegressionTests"] diff --git a/src/Plots.jl b/src/Plots.jl index 5378fb73f..bad608b23 100644 --- a/src/Plots.jl +++ b/src/Plots.jl @@ -22,6 +22,8 @@ using Base.Meta import Showoff import StatsBase import JSON +import Tables +import TableOperations using Requires @@ -116,7 +118,9 @@ export plotattr, scalefontsize, scalefontsizes, - resetfontsizes + resetfontsizes, + + @df # --------------------------------------------------------- @@ -212,6 +216,7 @@ include("ijulia.jl") include("fileio.jl") include("init.jl") include("legend.jl") +include("df.jl") include("backends/plotly.jl") include("backends/gr.jl") diff --git a/src/examples.jl b/src/examples.jl index cfc8e96f4..78db59c37 100644 --- a/src/examples.jl +++ b/src/examples.jl @@ -515,7 +515,6 @@ const _examples = PlotExample[ "DataFrames", "Plot using DataFrame column symbols.", [ - :(using StatsPlots), # can't be inside begin block because @df gets expanded first :( begin import RDatasets @@ -599,7 +598,6 @@ const _examples = PlotExample[ "Boxplot and Violin series recipes", "", [ - :(using StatsPlots), # can't be inside begin block because @df gets expanded first :( begin import RDatasets From 2d6f43fc7f522aefa40909d9d1f92106ada18d12 Mon Sep 17 00:00:00 2001 From: Daniel Schwabeneder Date: Fri, 12 Mar 2021 23:38:07 +0100 Subject: [PATCH 2/2] add df.jl --- src/df.jl | 208 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 208 insertions(+) create mode 100644 src/df.jl diff --git a/src/df.jl b/src/df.jl new file mode 100644 index 000000000..c87bf54dd --- /dev/null +++ b/src/df.jl @@ -0,0 +1,208 @@ +""" + `@df d x` + +Convert every symbol in the expression `x` with the respective column in `d` if it exists. + +If you want to avoid replacing the symbol, escape it with `^`. + +`NA` values are replaced with `NaN` for columns of `Float64` and `""` or `Symbol()` +for strings and symbols respectively. + +`x` can be either a plot command or a block of plot commands. +""" +macro df(d, x) + esc(Expr(:call, df_helper(x), d)) +end + +""" + `@df x` + +Curried version of `@df d x`. Outputs an anonymous function `d -> @df d x`. +""" +macro df(x) + esc(df_helper(x)) +end + +function df_helper(x) + i = gensym() + Expr(:(->), i, df_helper(i, x)) +end + +function df_helper(d, x) + if isa(x, Expr) && x.head == :block # meaning that there were multiple plot commands + commands = [df_helper(d, xx) for xx in x.args if !(isa(xx, Expr) && xx.head == :line || isa(xx, LineNumberNode))] # apply the helper recursively + return Expr(:block, commands...) + + elseif isa(x, Expr) && x.head == :call # each function call is operated on alone + syms = Any[] + vars = Symbol[] + plot_call = parse_table_call!(d, x, syms, vars) + names = gensym() + compute_vars = Expr(:(=), Expr(:tuple, Expr(:tuple, vars...), names), + Expr(:call, :($(@__MODULE__).extract_columns_and_names), d, syms...)) + argnames = _argnames(names, x) + if (length(plot_call.args) >= 2) && isa(plot_call.args[2], Expr) && (plot_call.args[2].head == :parameters) + label_plot_call = Expr(:call, :($(@__MODULE__).add_label), plot_call.args[2], argnames, + plot_call.args[1], plot_call.args[3:end]...) + else + label_plot_call = Expr(:call, :($(@__MODULE__).add_label), argnames, plot_call.args...) + end + return Expr(:block, compute_vars, label_plot_call) + + else + error("Second argument ($x) can only be a block or function call") + end +end + +parse_table_call!(d, x, syms, vars) = x + +function parse_table_call!(d, x::QuoteNode, syms, vars) + new_var = gensym(x.value) + push!(syms, x) + push!(vars, new_var) + return new_var +end + + +function parse_table_call!(d, x::Expr, syms, vars) + if x.head == :. && length(x.args) == 2 + isa(x.args[2], QuoteNode) && return x + elseif x.head == :call + x.args[1] == :^ && length(x.args) == 2 && return x.args[2] + if x.args[1] == :cols + if length(x.args) == 1 + push!(x.args, :($(@__MODULE__).column_names($d))) + return parse_table_call!(d, x, syms, vars) + end + range = x.args[2] + new_vars = gensym("range") + push!(syms, range) + push!(vars, new_vars) + return new_vars + end + elseif x.head==:braces # From Query: use curly brackets to simplify writing named tuples + new_ex = Expr(:tuple, x.args...) + + for (j,field_in_NT) in enumerate(new_ex.args) + if isa(field_in_NT, Expr) && field_in_NT.head==:(=) + new_ex.args[j] = Expr(:(=), field_in_NT.args...) + elseif field_in_NT isa QuoteNode + new_ex.args[j] = Expr(:(=), field_in_NT.value, field_in_NT) + elseif isa(field_in_NT, Expr) + new_ex.args[j] = Expr(:(=), Symbol(filter(t -> t != ':', string(field_in_NT))), field_in_NT) + elseif isa(field_in_NT, Symbol) + new_ex.args[j] = Expr(:(=), field_in_NT, field_in_NT) + end + end + return parse_table_call!(d, new_ex, syms, vars) + end + return Expr(x.head, (parse_table_call!(d, arg, syms, vars) for arg in x.args)...) +end + +function column_names(t) + s = Tables.schema(t) + s === nothing ? propertynames(first(Tables.rows(t))) : s.names +end + +not_kw(x) = true +not_kw(x::Expr) = !(x.head in [:kw, :parameters]) + +function insert_kw!(x::Expr, s::Symbol, v) + index = isa(x.args[2], Expr) && x.args[2].head == :parameters ? 3 : 2 + x.args = vcat(x.args[1:index-1], Expr(:kw, s, v), x.args[index:end]) +end + +function _argnames(names, x::Expr) + Expr(:vect, [_arg2string(names, s) for s in x.args[2:end] if not_kw(s)]...) +end + +_arg2string(names, x) = stringify(x) +function _arg2string(names, x::Expr) + if x.head == :call && x.args[1] == :cols + return :($(@__MODULE__).compute_name($names, $(x.args[2]))) + elseif x.head == :call && x.args[1] == :hcat + return hcat(stringify.(x.args[2:end])...) + elseif x.head == :hcat + return hcat(stringify.(x.args)...) + else + return stringify(x) + end +end + +stringify(x) = filter(t -> t != ':', string(x)) + +compute_name(names, i::Int) = names[i] +compute_name(names, i::Symbol) = i +compute_name(names, i) = reshape([compute_name(names, ii) for ii in i], 1, :) + +""" + add_label(argnames, f, args...; kwargs...) + +This function ensures that labels are passed to the plotting command, if it accepts them. + +If `f` does not accept keyword arguments, and `kwargs` is empty, it will only +forward `args...`. + +If the user has provided keyword arguments, but `f` does not accept them, +then it will error. +""" +function add_label(argnames, f, args...; kwargs...) + i = findlast(t -> isa(t, Expr) || isa(t, AbstractArray), argnames) + try + if (i === nothing) + return f(args...; kwargs...) + else + return f(label = stringify.(argnames[i]), args...; kwargs...) + end + catch e + if e isa MethodError || + (e isa ErrorException && occursin("does not accept keyword arguments", e.msg)) + # check if the user has supplied kwargs, then we need to rethrow the error + isempty(kwargs) || rethrow(e) + # transmit only args to `f` + return f(args...) + else + rethrow(e) + end + end +end + +get_col(s::Int, col_nt, names) = col_nt[names[s]] +get_col(s::Symbol, col_nt, names) = get(col_nt, s, s) +get_col(syms, col_nt, names) = hcat((get_col(s, col_nt, names) for s in syms)...) + +# get the appropriate name when passed an Integer +add_sym!(cols, i::Integer, names) = push!(cols, names[i]) +# check for errors in Symbols +add_sym!(cols, s::Symbol, names) = s in names ? push!(cols, s) : cols +# recursively extract column names +function add_sym!(cols, s, names) + for si in s + add_sym!(cols, si, names) + end + cols +end + +""" + extract_columns_and_names(df, syms...) + +Extracts columns and their names (if the column number is an integer) +into a slightly complex `Tuple`. + +The structure goes as `((columndata...), names)`. This is unpacked by the [`@df`](@ref) macro into `gensym`'ed variables, which are passed to the plotting function. + +!!! note + If you want to extend the [`@df`](@ref) macro + to work with your custom type, this is the + function you should overload! +""" +function extract_columns_and_names(df, syms...) + Tables.istable(df) || error("Only tables are supported") + names = column_names(df) + + # extract selected column names + selected_cols = add_sym!(Symbol[], syms, names) + + cols = Tables.columntable(TableOperations.select(df, unique(selected_cols)...)) + return Tuple(get_col(s, cols, names) for s in syms), names +end