Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add @byrow attempt 2 #250

Merged
merged 25 commits into from
Jun 16, 2021
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/DataFramesMeta.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ using Reexport
export @with, @where, @orderby, @transform, @by, @combine, @select,
@transform!, @select!,
@eachrow, @eachrow!,
@byrow, @byrow!, @based_on # deprecated
@based_on # deprecated


global const DATAFRAMES_GEQ_22 = isdefined(DataFrames, :pretty_table) ? true : false
Expand Down
21 changes: 0 additions & 21 deletions src/eachrow.jl
Original file line number Diff line number Diff line change
Expand Up @@ -70,27 +70,6 @@ function eachrow_helper(df, body, deprecation_warning)
end
end

"""
@byrow!(d, expr)

Deprecated version of `@eachrow`, see: [`@eachrow`](@ref)

Acts the exact same way. It does not change the input argument `d` in-place.
"""
macro byrow!(df, body)
esc(eachrow_helper(df, body, true))
end

"""
@byrow(d, expr)

Deprecated version of `@eachrow`, see: [`@eachrow`](@ref)

Acts the exact same way.
"""
macro byrow(d, body)
esc(eachrow_helper(d, body, true))
end

pdeffebach marked this conversation as resolved.
Show resolved Hide resolved
"""
@eachrow(df, body)
Expand Down
95 changes: 69 additions & 26 deletions src/macros.jl
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ end
##############################################################################

function where_helper(x, args...)
exprs = create_args_vector(args...)
exprs, ByRow = create_args_vector(args...)
t = (fun_to_vec(ex; gensym_names = true, nolhs = true) for ex in exprs)
quote
$where($x, $(t...))
Expand Down Expand Up @@ -336,8 +336,8 @@ end
##############################################################################

function orderby_helper(x, args...)
exprs = create_args_vector(args...)
t = (fun_to_vec(ex; gensym_names = true, nolhs = true) for ex in exprs)
exprs, wrap_ByRow = create_args_vector(args...)
t = (fun_to_vec(ex; gensym_names = true, nolhs = true, wrap_ByRow = wrap_ByRow) for ex in exprs)
quote
$DataFramesMeta.orderby($x, $(t...))
end
Expand Down Expand Up @@ -463,8 +463,8 @@ end


function transform_helper(x, args...)
exprs = create_args_vector(args...)
t = (fun_to_vec(ex; gensym_names = false, nolhs = false) for ex in exprs)
exprs, wrap_ByRow = create_args_vector(args...)
t = [fun_to_vec(ex; gensym_names = false, nolhs = false, wrap_ByRow = wrap_ByRow) for ex in exprs]
pdeffebach marked this conversation as resolved.
Show resolved Hide resolved
quote
$DataFrames.transform($x, $(t...))
end
Expand Down Expand Up @@ -503,6 +503,28 @@ and
@transform(df, a = :x, b = :y)
```

`@transform` uses the syntax `@byrow` to wrap transformations in
bkamins marked this conversation as resolved.
Show resolved Hide resolved
pdeffebach marked this conversation as resolved.
Show resolved Hide resolved
the `ByRow` function wrapper from DataFrames, enabling broadcasting
and more. For example, the call
pdeffebach marked this conversation as resolved.
Show resolved Hide resolved

```
@transform(df, y = @byrow :x == 1 ? "true" : "false)
```

becomes

```
transform(df, :x => ByRow(x -> x == 1 ? "true", "false") => :y)
bkamins marked this conversation as resolved.
Show resolved Hide resolved
```

a transformation which cannot be conveniently expressed
pdeffebach marked this conversation as resolved.
Show resolved Hide resolved
using broadcasting.

To avoid writing `@byrow` multiple times when performing multiple
transformations by row, `@transform` allows `@byrow` at the
beginning of a block of transformations. All transformations
in the block will operate by row.
pdeffebach marked this conversation as resolved.
Show resolved Hide resolved

### Examples

```jldoctest
Expand All @@ -511,17 +533,38 @@ julia> using DataFramesMeta
julia> df = DataFrame(A = 1:3, B = [2, 1, 2]);

julia> @transform df begin
a = 2 * :A
x = :A .+ :B
end
a = 2 * :A
x = :A .+ :B
end
3×4 DataFrame
Row │ A B a x
│ Int64 Int64 Int64 Int64
─────┼────────────────────────────
1 │ 1 2 2 3
2 │ 2 1 4 3
3 │ 3 2 6 5

julia> @transform df z = @byrow :A * :B
pdeffebach marked this conversation as resolved.
Show resolved Hide resolved
3×3 DataFrame
Row │ A B z
│ Int64 Int64 Int64
─────┼─────────────────────
1 │ 1 2 2
2 │ 2 1 2
3 │ 3 2 6

julia> @transform df @byrow begin
x = :A * :B
y = :A == 1 ? 100 : 200
end

3×4 DataFrame
Row │ A B │ a │ x
│ Int64 Int64 Int64 Int64
─────┼────────────────────────────
│ 1 1 │ 2 2 │ 3
│ 2 2 │ 1 │ 4 │ 3 │
│ 3 3 │ 2 │ 6 │ 5 │
Row │ A B x y
│ Int64 Int64 Int64 Int64
─────┼────────────────────────────
11 2 2 100
22 1 2 200
33 2 6 200
```
"""
macro transform(x, args...)
Expand All @@ -537,8 +580,8 @@ end


function transform!_helper(x, args...)
exprs = create_args_vector(args...)
t = (fun_to_vec(ex; gensym_names = false, nolhs = false) for ex in exprs)
exprs, wrap_ByRow = create_args_vector(args...)
t = (fun_to_vec(ex; gensym_names = false, nolhs = false, wrap_ByRow = wrap_ByRow) for ex in exprs)
quote
$DataFrames.transform!($x, $(t...))
end
Expand Down Expand Up @@ -611,8 +654,8 @@ end
##############################################################################

function select_helper(x, args...)
exprs = create_args_vector(args...)
t = (fun_to_vec(ex; gensym_names = false, nolhs = false) for ex in exprs)
exprs, wrap_ByRow = create_args_vector(args...)
t = (fun_to_vec(ex; gensym_names = false, nolhs = false, wrap_ByRow = wrap_ByRow) for ex in exprs)
quote
$DataFrames.select($x, $(t...))
end
Expand Down Expand Up @@ -704,8 +747,8 @@ end
##############################################################################

function select!_helper(x, args...)
exprs = create_args_vector(args...)
t = (fun_to_vec(ex; gensym_names = false, nolhs = false) for ex in exprs)
exprs, wrap_ByRow = create_args_vector(args...)
t = (fun_to_vec(ex; gensym_names = false, nolhs = false, wrap_ByRow = wrap_ByRow) for ex in exprs)
quote
$DataFrames.select!($x, $(t...))
end
Expand Down Expand Up @@ -794,13 +837,13 @@ function combine_helper(x, args...; deprecation_warning = false)
deprecation_warning && @warn "`@based_on` is deprecated. Use `@combine` instead."

# Only allow one argument when returning a Table object
exprs = create_args_vector(args...)
exprs, wrap_ByRow = create_args_vector(args...)
fe = first(exprs)
if length(exprs) == 1 &&
!(fe isa QuoteNode) &&
!(fe.head == :(=) || fe.head == :kw)

t = fun_to_vec(fe; gensym_names = false, nolhs = true)
t = fun_to_vec(fe; gensym_names = false, nolhs = true, wrap_ByRow = wrap_ByRow)

# 0.22: No pair as first arg, needs AsTable in other args to return table
if DATAFRAMES_GEQ_22
Expand All @@ -814,7 +857,7 @@ function combine_helper(x, args...; deprecation_warning = false)
end
end
else
t = (fun_to_vec(ex; gensym_names = false, nolhs = false) for ex in exprs)
t = (fun_to_vec(ex; gensym_names = false, nolhs = false, wrap_ByRow = wrap_ByRow) for ex in exprs)
quote
$DataFrames.combine($x, $(t...))
end
Expand Down Expand Up @@ -922,13 +965,13 @@ end
function by_helper(x, what, args...)
# Only allow one argument when returning a Table object
# Only allow one argument when returning a Table object
exprs = create_args_vector(args...)
exprs, wrap_ByRow = create_args_vector(args...)
fe = first(exprs)
if length(exprs) == 1 &&
!(fe isa QuoteNode) &&
!(fe.head == :(=) || fe.head == :kw)

t = fun_to_vec(fe; gensym_names = false, nolhs = true)
t = fun_to_vec(fe; gensym_names = false, nolhs = true, wrap_ByRow = wrap_ByRow)

# 0.22: No pair as first arg, needs AsTable in other args to return table
if DATAFRAMES_GEQ_22
Expand All @@ -942,7 +985,7 @@ function by_helper(x, what, args...)
end
end
else
t = (fun_to_vec(ex; gensym_names = false, nolhs = false) for ex in exprs)
t = (fun_to_vec(ex; gensym_names = false, nolhs = false, wrap_ByRow = wrap_ByRow) for ex in exprs)
quote
$DataFrames.combine($groupby($x, $what), $(t...))
end
Expand Down
62 changes: 46 additions & 16 deletions src/parsing.jl
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,11 @@ function args_to_selectors(v)
:(DataFramesMeta.make_source_concrete($(Expr(:vect, t...))))
end

is_macro_head(ex, name) = false
is_macro_head(ex::Expr, name) = ex.head == :macrocall && ex.args[1] == Symbol(name)

"""
get_source_fun(function_expr)
get_source_fun(function_expr; wrap_ByRow=false)

Given an expression that may contain `QuoteNode`s (`:x`)
and items wrapped in `cols`, return a function
Expand All @@ -83,11 +85,16 @@ representing the vector of inputs that will be
used as the `src` in the `src => fun => dest`
call later on.

If `wrap_ByRow=true` then the function gets wrapped
in `ByRow`. If the expression begins with `@byrow`,
then `get_source_fun` is recurively called on the
pdeffebach marked this conversation as resolved.
Show resolved Hide resolved
expression that `@byrow` acts on, with `wrap_ByRow=true`.

### Examples

julia> using MacroTools

julia> ex = :(:x + :y)
julia> ex = :(:x + :y);

julia> DataFramesMeta.get_source_fun(ex)
(:(DataFramesMeta.make_source_concrete([:x, :y])), :+)
Expand All @@ -101,10 +108,20 @@ julia> src, fun = DataFramesMeta.get_source_fun(ex);
julia> MacroTools.prettify(fun)
:((mammoth, goat)->mammoth .+ 1 .* goat)

julia> ex = :(@byrow :x * :y);

julia> src, fun = DataFramesMeta.get_source_fun(ex);

julia> MacroTools.prettify(fun)
:(ByRow(*))
```

"""
function get_source_fun(function_expr)
function get_source_fun(function_expr; wrap_ByRow=false)
# recursive step for begin :a + :b end
if function_expr isa Expr &&
if is_macro_head(function_expr, "@byrow")
return get_source_fun(function_expr.args[3], wrap_ByRow=true)
elseif function_expr isa Expr &&
function_expr.head == :block &&
length(function_expr.args) == 1

Expand All @@ -120,15 +137,11 @@ function get_source_fun(function_expr)
else
fun = fun_t
end

return source, fun
elseif is_simple_broadcast_call(function_expr)
# extract source symbols from quotenodes
source = args_to_selectors(function_expr.args[2].args)
fun_t = function_expr.args[1]
fun = :(DataFrames.ByRow($fun_t))

return source, fun
else
membernames = Dict{Any, Symbol}()

Expand All @@ -142,15 +155,20 @@ function get_source_fun(function_expr)
$body
end
end
return source, fun
end

if wrap_ByRow
fun = :(ByRow($fun))
end

return source, fun
end

# `nolhs` needs to be `true` when we have syntax of the form
# `@combine(gd, fun(:x, :y))` where `fun` returns a `table` object.
# We don't create the "new name" pair because new names are
# given by the table.
function fun_to_vec(ex::Expr; nolhs::Bool = false, gensym_names::Bool = false)
function fun_to_vec(ex::Expr; nolhs::Bool = false, gensym_names::Bool = false, wrap_ByRow=false)
# classify the type of expression
# :x # handled via dispatch
# cols(:x) # handled as though above
Expand Down Expand Up @@ -258,8 +276,8 @@ function fun_to_vec(ex::Expr; nolhs::Bool = false, gensym_names::Bool = false)
# y = f(cols(:x))
# y = :x + 1
# y = cols(:x) + 1
source, fun = get_source_fun(rhs; wrap_ByRow = wrap_ByRow)
if lhs isa Symbol
source, fun = get_source_fun(rhs)
dest = QuoteNode(lhs)

return quote
Expand All @@ -269,7 +287,6 @@ function fun_to_vec(ex::Expr; nolhs::Bool = false, gensym_names::Bool = false)

# cols(:y) = f(:x)
if onearg(lhs, :cols)
source, fun = get_source_fun(rhs)
dest = lhs.args[2]

return quote
Expand All @@ -279,7 +296,7 @@ function fun_to_vec(ex::Expr; nolhs::Bool = false, gensym_names::Bool = false)

throw(ArgumentError("This path should not be reached"))
end
fun_to_vec(ex::QuoteNode; nolhs::Bool = false, gensym_names::Bool = false) = ex
fun_to_vec(ex::QuoteNode; nolhs::Bool = false, gensym_names::Bool = false, wrap_ByRow = false) = ex

function make_source_concrete(x::AbstractVector)
if isempty(x) || isconcretetype(eltype(x))
Expand Down Expand Up @@ -315,22 +332,35 @@ of expression-like object (`Expr`, `QuoteNode`, etc.),
puts them into a single array, removing line numbers.
"""
function create_args_vector(args...)
Any[Base.remove_linenums!(arg) for arg in args]
Any[Base.remove_linenums!(arg) for arg in args], false
end

"""
create_args_vector(arg)
pdeffebach marked this conversation as resolved.
Show resolved Hide resolved

Normalize a single input to a vector of expressions.
Normalize a single input to a vector of expressions,
with a `wrap_ByRow` flag indicating that the
expressions should operate by row.

If `arg` is a single `:block`, it is unnested.
Otherwise, return a single-element array.
Also removes line numbers.

If `arg` is of the form `@byrow ...`, then
`wrap_ByRow` is returned as `true`.
"""
function create_args_vector(arg)
if arg isa Expr && is_macro_head(arg, "@byrow")
wrap_ByRow = true
arg = arg.args[3]
else
wrap_ByRow = false
end

if arg isa Expr && arg.head == :block
x = Base.remove_linenums!(arg).args
else
x = Any[Base.remove_linenums!(arg)]
end
return x
return x, wrap_ByRow
end
Loading