Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move @df macro to Plots #3351

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ Showoff = "992d4aef-0814-514b-bc4d-f2e9a6c4116f"
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
TableOperations = "ab02a1b2-a7df-11e8-156e-fb1833f50b87"
Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"

[compat]
Expand All @@ -51,6 +53,8 @@ Requires = "1"
Scratch = "1"
Showoff = "0.3.1"
StatsBase = "0.32, 0.33"
TableOperations = "1"
Tables = "1"
julia = "1.5"

[extras]
Expand All @@ -66,11 +70,10 @@ PGFPlotsX = "8314cec4-20b6-5062-9cdb-752b83310925"
RDatasets = "ce6b1742-4840-55fa-b093-852dadbb1d8b"
StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
StatsPlots = "f3b207a7-027a-5e70-b257-86293d7955fd"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
TestImages = "5e47fb64-e119-507b-a336-dd2b206d9990"
UnicodePlots = "b8865327-cd53-5732-bb35-84acbb429228"
VisualRegressionTests = "34922c18-7c2a-561c-bac1-01e79b2c4c92"

[targets]
test = ["Distributions", "FileIO", "Gtk", "ImageMagick", "Images", "LibGit2", "OffsetArrays", "PGFPlotsX", "HDF5", "RDatasets", "StableRNGs", "StaticArrays", "StatsPlots", "Test", "TestImages", "UnicodePlots", "VisualRegressionTests"]
test = ["Distributions", "FileIO", "Gtk", "ImageMagick", "Images", "LibGit2", "OffsetArrays", "PGFPlotsX", "HDF5", "RDatasets", "StableRNGs", "StaticArrays", "Test", "TestImages", "UnicodePlots", "VisualRegressionTests"]
7 changes: 6 additions & 1 deletion src/Plots.jl
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ using Base.Meta
import Showoff
import StatsBase
import JSON
import Tables
import TableOperations

using Requires

Expand Down Expand Up @@ -116,7 +118,9 @@ export
plotattr,
scalefontsize,
scalefontsizes,
resetfontsizes
resetfontsizes,

@df


# ---------------------------------------------------------
Expand Down Expand Up @@ -212,6 +216,7 @@ include("ijulia.jl")
include("fileio.jl")
include("init.jl")
include("legend.jl")
include("df.jl")

include("backends/plotly.jl")
include("backends/gr.jl")
Expand Down
208 changes: 208 additions & 0 deletions src/df.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,208 @@
"""
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps this code should instead go to recipes.jl?

`@df d x`

Convert every symbol in the expression `x` with the respective column in `d` if it exists.

If you want to avoid replacing the symbol, escape it with `^`.

`NA` values are replaced with `NaN` for columns of `Float64` and `""` or `Symbol()`
for strings and symbols respectively.

`x` can be either a plot command or a block of plot commands.
"""
macro df(d, x)
esc(Expr(:call, df_helper(x), d))
end

"""
`@df x`

Curried version of `@df d x`. Outputs an anonymous function `d -> @df d x`.
"""
macro df(x)
esc(df_helper(x))
end

function df_helper(x)
i = gensym()
Expr(:(->), i, df_helper(i, x))
end

function df_helper(d, x)
if isa(x, Expr) && x.head == :block # meaning that there were multiple plot commands
commands = [df_helper(d, xx) for xx in x.args if !(isa(xx, Expr) && xx.head == :line || isa(xx, LineNumberNode))] # apply the helper recursively
return Expr(:block, commands...)

elseif isa(x, Expr) && x.head == :call # each function call is operated on alone
syms = Any[]
vars = Symbol[]
plot_call = parse_table_call!(d, x, syms, vars)
names = gensym()
compute_vars = Expr(:(=), Expr(:tuple, Expr(:tuple, vars...), names),
Expr(:call, :($(@__MODULE__).extract_columns_and_names), d, syms...))
argnames = _argnames(names, x)
if (length(plot_call.args) >= 2) && isa(plot_call.args[2], Expr) && (plot_call.args[2].head == :parameters)
label_plot_call = Expr(:call, :($(@__MODULE__).add_label), plot_call.args[2], argnames,
plot_call.args[1], plot_call.args[3:end]...)
else
label_plot_call = Expr(:call, :($(@__MODULE__).add_label), argnames, plot_call.args...)
end
return Expr(:block, compute_vars, label_plot_call)

else
error("Second argument ($x) can only be a block or function call")
end
end

parse_table_call!(d, x, syms, vars) = x

function parse_table_call!(d, x::QuoteNode, syms, vars)
new_var = gensym(x.value)
push!(syms, x)
push!(vars, new_var)
return new_var
end


function parse_table_call!(d, x::Expr, syms, vars)
if x.head == :. && length(x.args) == 2
isa(x.args[2], QuoteNode) && return x
elseif x.head == :call
x.args[1] == :^ && length(x.args) == 2 && return x.args[2]
if x.args[1] == :cols
if length(x.args) == 1
push!(x.args, :($(@__MODULE__).column_names($d)))
return parse_table_call!(d, x, syms, vars)
end
range = x.args[2]
new_vars = gensym("range")
push!(syms, range)
push!(vars, new_vars)
return new_vars
end
elseif x.head==:braces # From Query: use curly brackets to simplify writing named tuples
new_ex = Expr(:tuple, x.args...)

for (j,field_in_NT) in enumerate(new_ex.args)
if isa(field_in_NT, Expr) && field_in_NT.head==:(=)
new_ex.args[j] = Expr(:(=), field_in_NT.args...)
elseif field_in_NT isa QuoteNode
new_ex.args[j] = Expr(:(=), field_in_NT.value, field_in_NT)
elseif isa(field_in_NT, Expr)
new_ex.args[j] = Expr(:(=), Symbol(filter(t -> t != ':', string(field_in_NT))), field_in_NT)
elseif isa(field_in_NT, Symbol)
new_ex.args[j] = Expr(:(=), field_in_NT, field_in_NT)
end
end
return parse_table_call!(d, new_ex, syms, vars)
end
return Expr(x.head, (parse_table_call!(d, arg, syms, vars) for arg in x.args)...)
end

function column_names(t)
s = Tables.schema(t)
s === nothing ? propertynames(first(Tables.rows(t))) : s.names
end

not_kw(x) = true
not_kw(x::Expr) = !(x.head in [:kw, :parameters])

function insert_kw!(x::Expr, s::Symbol, v)
index = isa(x.args[2], Expr) && x.args[2].head == :parameters ? 3 : 2
x.args = vcat(x.args[1:index-1], Expr(:kw, s, v), x.args[index:end])
end

function _argnames(names, x::Expr)
Expr(:vect, [_arg2string(names, s) for s in x.args[2:end] if not_kw(s)]...)
end

_arg2string(names, x) = stringify(x)
function _arg2string(names, x::Expr)
if x.head == :call && x.args[1] == :cols
return :($(@__MODULE__).compute_name($names, $(x.args[2])))
elseif x.head == :call && x.args[1] == :hcat
return hcat(stringify.(x.args[2:end])...)
elseif x.head == :hcat
return hcat(stringify.(x.args)...)
else
return stringify(x)
end
end

stringify(x) = filter(t -> t != ':', string(x))

compute_name(names, i::Int) = names[i]
compute_name(names, i::Symbol) = i
compute_name(names, i) = reshape([compute_name(names, ii) for ii in i], 1, :)

"""
add_label(argnames, f, args...; kwargs...)

This function ensures that labels are passed to the plotting command, if it accepts them.

If `f` does not accept keyword arguments, and `kwargs` is empty, it will only
forward `args...`.

If the user has provided keyword arguments, but `f` does not accept them,
then it will error.
"""
function add_label(argnames, f, args...; kwargs...)
i = findlast(t -> isa(t, Expr) || isa(t, AbstractArray), argnames)
try
if (i === nothing)
return f(args...; kwargs...)
else
return f(label = stringify.(argnames[i]), args...; kwargs...)
end
catch e
if e isa MethodError ||
(e isa ErrorException && occursin("does not accept keyword arguments", e.msg))
# check if the user has supplied kwargs, then we need to rethrow the error
isempty(kwargs) || rethrow(e)
# transmit only args to `f`
return f(args...)
else
rethrow(e)
end
end
end

get_col(s::Int, col_nt, names) = col_nt[names[s]]
get_col(s::Symbol, col_nt, names) = get(col_nt, s, s)
get_col(syms, col_nt, names) = hcat((get_col(s, col_nt, names) for s in syms)...)

# get the appropriate name when passed an Integer
add_sym!(cols, i::Integer, names) = push!(cols, names[i])
# check for errors in Symbols
add_sym!(cols, s::Symbol, names) = s in names ? push!(cols, s) : cols
# recursively extract column names
function add_sym!(cols, s, names)
for si in s
add_sym!(cols, si, names)
end
cols
end

"""
extract_columns_and_names(df, syms...)

Extracts columns and their names (if the column number is an integer)
into a slightly complex `Tuple`.

The structure goes as `((columndata...), names)`. This is unpacked by the [`@df`](@ref) macro into `gensym`'ed variables, which are passed to the plotting function.

!!! note
If you want to extend the [`@df`](@ref) macro
to work with your custom type, this is the
function you should overload!
"""
function extract_columns_and_names(df, syms...)
Tables.istable(df) || error("Only tables are supported")
names = column_names(df)

# extract selected column names
selected_cols = add_sym!(Symbol[], syms, names)

cols = Tables.columntable(TableOperations.select(df, unique(selected_cols)...))
return Tuple(get_col(s, cols, names) for s in syms), names
end
2 changes: 0 additions & 2 deletions src/examples.jl
Original file line number Diff line number Diff line change
Expand Up @@ -515,7 +515,6 @@ const _examples = PlotExample[
"DataFrames",
"Plot using DataFrame column symbols.",
[
:(using StatsPlots), # can't be inside begin block because @df gets expanded first
:(
begin
import RDatasets
Expand Down Expand Up @@ -599,7 +598,6 @@ const _examples = PlotExample[
"Boxplot and Violin series recipes",
"",
[
:(using StatsPlots), # can't be inside begin block because @df gets expanded first
:(
begin
import RDatasets
Expand Down