diff --git a/docs/src/index.md b/docs/src/index.md index ef9759a5..24984dd4 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -400,6 +400,93 @@ julia> @rtransform df @passmissing :x = parse(Int, :x_str) 3 │ missing missing ``` +## Passing keyword arguments to underlying DataFrames.jl functions + +All DataFramesMeta.jl macros allow passing of keyword arguments to their DataFrames.jl +function equivelents. The table below describes the correspondence between DataFramesMeta.jl +macros and the function that is actually called by the macro. + +| Macro | Base DataFrames.jl function called | +|-------|---------------------------| +| @subset | `subset` | +| @subset! | `subset!` | +| @rsubset | `subset` | +| @rsubset! | `subset!` | +| @orderby | None (no keyword arguments supported) | +| @rorderby | None (no keyword arguments supported) | +| @by | `combine` | +| @combine | `combine` | +| @transform | `transform` | +| @transform! | `transform!` | +| @rtransform | `transform` | +| @rtransform! | `transform!` | +| @select | `select` | +| @select! | `select!` | +| @rselect | `select` | +| @rselect! | `select!` | + +This can be done in two ways. When inputs are given as multiple +arguments, they are added at the end after a semi-colon `;`, as in + +```julia +julia> df = DataFrame(x = [1, 1, 2, 2], b = [5, 6, 7, 8]); + +julia> @rsubset(df, :x .== 1 ; view = true) +2×2 SubDataFrame + Row │ x b + │ Int64 Int64 +─────┼────────────── + 1 │ 1 5 + 2 │ 1 6 + +``` + +When inputs are given in "block" format, the last lines may be written +`@kwarg key = value`, which indicates keyword arguments to be passed to `subset` function. + +``` +julia> df = DataFrame(x = [1, 1, 2, 2], b = [5, 6, 7, 8]); + +julia> @rsubset df begin + :x == 1 + @kwarg view = true + end +2×2 SubDataFrame + Row │ x b + │ Int64 Int64 +─────┼────────────── + 1 │ 1 5 + 2 │ 1 6 +``` + +Just as with Julia functions, it is possible to pass keyword arguments as `Pair`s +programatically to DataFramesMeta.jl macros. + +``` +julia> df = DataFrame(x = [1, 1, 2, 2], b = [5, 6, 7, 8]); + +julia> my_kwargs = [:view => true, :skipmissing => false]; + +julia> @rsubset(df, :x .== 1; my_kwargs...) +2×2 SubDataFrame + Row │ x b + │ Int64 Int64 +─────┼────────────── + 1 │ 1 5 + 2 │ 1 6 + +julia> @rsubset df begin + :x .== 1 + @kwarg my_kwargs... + end +2×2 SubDataFrame + Row │ x b + │ Int64 Int64 +─────┼────────────── + 1 │ 1 5 + 2 │ 1 6 +``` + ## Creating multiple columns at once with `@astable` Often new variables may depend on the same intermediate calculations. `@astable` makes it easy to create multiple diff --git a/src/DataFramesMeta.jl b/src/DataFramesMeta.jl index 56914e42..4854507e 100644 --- a/src/DataFramesMeta.jl +++ b/src/DataFramesMeta.jl @@ -18,7 +18,7 @@ export @with, @transform, @select, @transform!, @select!, @rtransform, @rselect, @rtransform!, @rselect!, @eachrow, @eachrow!, - @byrow, @passmissing, @astable, + @byrow, @passmissing, @astable, @kwarg, @based_on, @where # deprecated const DOLLAR = raw"$" diff --git a/src/macros.jl b/src/macros.jl index 9d8712be..3e2410ae 100644 --- a/src/macros.jl +++ b/src/macros.jl @@ -499,6 +499,36 @@ macro astable(args...) throw(ArgumentError("@astable only works inside DataFramesMeta macros.")) end +""" + @kwarg(args...) + +Inside of DataFramesMeta.jl macros, pass keyword arguments to the underlying +DataFrames.jl function when arguments are written in "block" format. + +``` +julia> df = DataFrame(x = [1, 1, 2, 2], b = [5, 6, 7, 8]); + +julia> @rsubset df begin + :x == 1 + @kwarg view = true + end +2×2 SubDataFrame + Row │ x b + │ Int64 Int64 +─────┼────────────── + 1 │ 1 5 + 2 │ 1 6 +``` + +!!! note + This only has meaning inside DataFramesMeta.jl macros. It does not work outside + of DataFrames.jl macros. + +""" +macro kwarg(args...) + throw(ArgumentError("@kwarg only works inside DataFramesMeta macros.")) +end + ############################################################################## ## ## @with @@ -728,23 +758,24 @@ write ############################################################################## function subset_helper(x, args...) - exprs, outer_flags = create_args_vector(args...) + x, exprs, outer_flags, kw = get_df_args_kwargs(x, args...; wrap_byrow = false) + t = (fun_to_vec(ex; no_dest=true, outer_flags=outer_flags) for ex in exprs) quote - $subset($x, $(t...); skipmissing=true) + $subset($x, $(t...); (skipmissing = true,)..., $(kw...)) end end function where_helper(x, args...) - exprs, outer_flags = create_args_vector(args...) + x, exprs, outer_flags, kw = get_df_args_kwargs(x, args...; wrap_byrow = false) t = (fun_to_vec(ex; no_dest=true, outer_flags=outer_flags) for ex in exprs) quote - $subset($x, $(t...); skipmissing=true) + $subset($x, $(t...); skipmissing=true, $(kw...)) end end """ - @subset(d, i...) + @subset(d, i...; kwargs...) Select row subsets in `AbstractDataFrame`s and `GroupedDataFrame`s. @@ -752,6 +783,9 @@ Select row subsets in `AbstractDataFrame`s and `GroupedDataFrame`s. * `d` : an AbstractDataFrame or GroupedDataFrame * `i...` : expression for selecting rows +* `kwargs...` : keyword arguments passed to `DataFrames.subset` + +### Details Multiple `i` expressions are "and-ed" together. @@ -805,6 +839,25 @@ end $ASTABLE_RHS_SUBSET_DOCS +`@subset` accepts the same keyword arguments as `DataFrames.subset` and can be added in +two ways. When inputs are given as multiple arguments, they are added at the end after +a semi-colon `;`, as in + +``` +@subset(df, :a; skipmissing = false, view = true) +``` + +When inputs are given in "block" format, the last lines may be written +`@kwarg key = value`, which indicates keyword arguments to be passed to `subset` function. + +``` +@subset df begin + :a .== 1 + @kwarg skipmissing = false + @kwarg view = true +end +``` + ### Examples ```jldoctest @@ -879,6 +932,25 @@ julia> @subset(df, :a .== 1) │ Int64? String? ─────┼───────────────── 1 │ 1 x + +julia> @subset(df, :a .< 3; view = true) +2×2 SubDataFrame + Row │ a b + │ Int64? String? +─────┼───────────────── + 1 │ 1 x + 2 │ 2 y + +julia> @subset df begin + :a .< 3 + @kwarg view = true + end +2×2 SubDataFrame + Row │ a b + │ Int64? String? +─────┼───────────────── + 1 │ 1 x + 2 │ 2 y ``` """ macro subset(x, args...) @@ -886,16 +958,17 @@ macro subset(x, args...) end function rsubset_helper(x, args...) - exprs, outer_flags = create_args_vector(args...; wrap_byrow=true) + x, exprs, outer_flags, kw = get_df_args_kwargs(x, args...; wrap_byrow = true) + t = (fun_to_vec(ex; no_dest=true, outer_flags=outer_flags) for ex in exprs) quote - $subset($x, $(t...); skipmissing=true) + $subset($x, $(t...); (skipmissing = true,)..., $(kw...)) end end """ - @rsubset(d, i...) + @rsubset(d, i...; kwargs...) Row-wise version of `@subset`, i.e. all operations use `@byrow` by default. See [`@subset`](@ref) for details. @@ -906,7 +979,7 @@ end """ - @subset(x, args...) + @where(x, args...) Deprecated version of `@subset`, see `?@subset` for details. """ @@ -916,24 +989,26 @@ macro where(x, args...) end function subset!_helper(x, args...) - exprs, outer_flags = create_args_vector(args...) + x, exprs, outer_flags, kw = get_df_args_kwargs(x, args...; wrap_byrow = false) + t = (fun_to_vec(ex; no_dest=true, outer_flags=outer_flags) for ex in exprs) quote - $subset!($x, $(t...); skipmissing=true) + $subset!($x, $(t...); (;skipmissing = true,)..., $(kw...)) end end function rsubset!_helper(x, args...) - exprs, outer_flags = create_args_vector(args...; wrap_byrow=true) + x, exprs, outer_flags, kw = get_df_args_kwargs(x, args...; wrap_byrow = true) + t = (fun_to_vec(ex; no_dest=true, outer_flags=outer_flags) for ex in exprs) quote - $subset!($x, $(t...); skipmissing=true) + $subset!($x, $(t...); (skipmissing = true,)..., $(kw...)) end end """ - @subset!(d, i...) + @subset!(d, i...; kwargs...) Select row subsets in `AbstractDataFrame`s and `GroupedDataFrame`s, mutating the underlying data-frame in-place. @@ -942,6 +1017,9 @@ mutating the underlying data-frame in-place. * `d` : an AbstractDataFrame or GroupedDataFrame * `i...` : expression for selecting rows +* `kwargs` : keyword arguments passed to `DataFrames.subset!` + +### Details Multiple `i` expressions are "and-ed" together. @@ -995,6 +1073,24 @@ end $ASTABLE_RHS_SUBSET_DOCS +`@subset!` accepts the same keyword arguments as `DataFrames.subset!` and can be added in +two ways. When inputs are given as multiple arguments, they are added at the end after +a semi-colon `;`, as in + +``` +@subset!(df, :a; skipmissing = false) +``` + +When inputs are given in "block" format, the last lines may be written +`@kwarg key = value`, which indicates keyword arguments to be passed to `subset!` function. + +``` +@subset! df begin + :a .== 1 + @kwarg skipmissing = false +end +``` + ### Examples ```jldoctest @@ -1094,12 +1190,11 @@ end ## @orderby ## ############################################################################## - function orderby_helper(x, args...) - exprs, outer_flags = create_args_vector(args...) + x, exprs, outer_flags, kw = get_df_args_kwargs(x, args...; wrap_byrow = false) t = (fun_to_vec(ex; gensym_names = true, outer_flags = outer_flags) for ex in exprs) quote - $DataFramesMeta.orderby($x, $(t...)) + $orderby($x, $(t...); $(kw...)) end end @@ -1123,6 +1218,13 @@ end Sort rows by values in one of several columns or a transformation of columns. Always returns a fresh `DataFrame`. Does not accept a `GroupedDataFrame`. +### Arguments + +* `d`: a `DataFrame` or `GroupedDataFrame` +* `i...`: arguments on which to sort the object + +### Details + When given a `DataFrame`, `@orderby` applies the transformation given by its arguments (but does not create new columns) and sorts the given `DataFrame` on the result, returning a new `DataFrame`. @@ -1254,10 +1356,10 @@ macro orderby(d, args...) end function rorderby_helper(x, args...) - exprs, outer_flags = create_args_vector(args...; wrap_byrow=true) + x, exprs, outer_flags, kw = get_df_args_kwargs(x, args...; wrap_byrow = true) t = (fun_to_vec(ex; gensym_names=true, outer_flags=outer_flags) for ex in exprs) quote - $DataFramesMeta.orderby($x, $(t...)) + $orderby($x, $(t...); $(kw...)) end end @@ -1280,27 +1382,31 @@ end function transform_helper(x, args...) - exprs, outer_flags = create_args_vector(args...) + x, exprs, outer_flags, kw = get_df_args_kwargs(x, args...; wrap_byrow = false) + t = (fun_to_vec(ex; gensym_names = false, outer_flags = outer_flags) for ex in exprs) quote - $DataFrames.transform($x, $(t...)) + $transform($x, $(t...); $(kw...)) end end """ - @transform(d, i...) + @transform(d, i...; kwargs...) Add additional columns or keys based on keyword-like arguments. ### Arguments -* `d` : an `AbstractDataFrame`, or `GroupedDataFrame` -* `i...` : keyword-like arguments defining new columns or keys, of the form `:y = f(:x)` +* `d`: an `AbstractDataFrame`, or `GroupedDataFrame` +* `i...`: transformations defining new columns or keys, of the form `:y = f(:x)` +* `kwargs...`: keyword arguments passed to `DataFrames.transform` ### Returns * `::AbstractDataFrame` or `::GroupedDataFrame` +### Details + Inputs to `@transform` can come in two formats: a `begin ... end` block, in which case each line in the block is a separate transformation, (`:y = f(:x)`), or as a series of @@ -1346,6 +1452,24 @@ $ASTABLE_MACRO_FLAG_DOCS $ASTABLE_RHS_SELECT_TRANSFORM_DOCS +`@transform` accepts the same keyword arguments as `DataFrames.transform!` and can be added in +two ways. When inputs are given as multiple arguments, they are added at the end after +a semi-colon `;`, as in + +``` +@transform(gd, :x = :a .- 1; ungroup = false) +``` + +When inputs are given in "block" format, the last lines may be written +`@kwarg key = value`, which indicates keyword arguments to be passed to `transform!` function. + +``` +@transform gd begin + :x = :a .- 1 + @kwarg ungroup = false +end +``` + ### Examples ```jldoctest @@ -1392,16 +1516,16 @@ macro transform(x, args...) end function rtransform_helper(x, args...) - exprs, outer_flags = create_args_vector(args...; wrap_byrow=true) + x, exprs, outer_flags, kw = get_df_args_kwargs(x, args...; wrap_byrow = true) t = (fun_to_vec(ex; gensym_names=false, outer_flags=outer_flags) for ex in exprs) quote - $DataFrames.transform($x, $(t...)) + $transform($x, $(t...); $(kw...)) end end """ - @rtransform(x, args...) + @rtransform(x, args...; kwargs...) Row-wise version of `@transform`, i.e. all operations use `@byrow` by default. See [`@transform`](@ref) for details. @@ -1418,15 +1542,16 @@ end function transform!_helper(x, args...) - exprs, outer_flags = create_args_vector(args...) + x, exprs, outer_flags, kw = get_df_args_kwargs(x, args...; wrap_byrow = false) + t = (fun_to_vec(ex; gensym_names = false, outer_flags = outer_flags) for ex in exprs) quote - $DataFrames.transform!($x, $(t...)) + $transform!($x, $(t...); $(kw...)) end end """ - @transform!(d, i...) + @transform!(d, i...; kwargs...) Mutate `d` inplace to add additional columns or keys based on keyword-like arguments and return it. No copies of existing columns are made. @@ -1434,12 +1559,14 @@ arguments and return it. No copies of existing columns are made. ### Arguments * `d` : an `AbstractDataFrame`, or `GroupedDataFrame` -* `i...` : keyword-like arguments, of the form `:y = f(:x)` defining -new columns or keys +* `i...` : transformations of the form `:y = f(:x)` defining new columns or keys +* `kwargs...`: keyword arguments passed to `DataFrames.transform!` ### Returns -* `::DataFrame` +* `::DataFrame` or a `GroupedDataFrame` + +### Details Inputs to `@transform!` can come in two formats: a `begin ... end` block, in which case each line in the block is a separate @@ -1486,6 +1613,24 @@ $ASTABLE_MACRO_FLAG_DOCS $ASTABLE_RHS_SELECT_TRANSFORM_DOCS +`@transform!` accepts the same keyword arguments as `DataFrames.transform!` and can be added in +two ways. When inputs are given as multiple arguments, they are added at the end after +a semi-colon `;`, as in + +``` +@transform!(gd, :x = :a .- 1; ungroup = false) +``` + +When inputs are given in "block" format, the last lines may be written +`@kwarg key = value`, which indicates keyword arguments to be passed to `transform!` function. + +``` +@transform! gd begin + :x = :a .- 1 + @kwarg ungroup = false +end +``` + ### Examples ```jldoctest @@ -1511,16 +1656,16 @@ macro transform!(x, args...) end function rtransform!_helper(x, args...) - exprs, outer_flags = create_args_vector(args...; wrap_byrow=true) + x, exprs, outer_flags, kw = get_df_args_kwargs(x, args...; wrap_byrow = true) t = (fun_to_vec(ex; gensym_names=false, outer_flags=outer_flags) for ex in exprs) quote - $DataFrames.transform!($x, $(t...)) + $transform!($x, $(t...); $(kw...)) end end """ - @rtransform!(x, args...) + @rtransform!(x, args...; kwargs...) Row-wise version of `@transform!`, i.e. all operations use `@byrow` by default. See [`@transform!`](@ref) for details.""" @@ -1535,27 +1680,31 @@ end ############################################################################## function select_helper(x, args...) - exprs, outer_flags = create_args_vector(args...) + x, exprs, outer_flags, kw = get_df_args_kwargs(x, args...; wrap_byrow = false) + t = (fun_to_vec(ex; gensym_names = false, outer_flags = outer_flags) for ex in exprs) quote - $DataFrames.select($x, $(t...)) + $select($x, $(t...); $(kw...)) end end """ - @select(d, e...) + @select(d, i...; kwargs...) Select and transform columns. ### Arguments * `d` : an `AbstractDataFrame` or `GroupedDataFrame` -* `e` : keyword-like arguments, of the form `:y = f(:x)` specifying +* `i` : transformations of the form `:y = f(:x)` specifying new columns in terms of existing columns or symbols to specify existing columns +* `kwargs` : keyword arguments passed to `DataFrames.select` ### Returns -* `::AbstractDataFrame` +* `::AbstractDataFrame` or a `GroupedDataFrame` + +### Details Inputs to `@select` can come in two formats: a `begin ... end` block, in which case each line in the block is a separate @@ -1602,6 +1751,24 @@ $ASTABLE_MACRO_FLAG_DOCS $ASTABLE_RHS_SELECT_TRANSFORM_DOCS +`@select` accepts the same keyword arguments as `DataFrames.select` and can be added in +two ways. When inputs are given as multiple arguments, they are added at the end after +a semi-colon `;`, as in + +``` +@select(df, :a; copycols = false) +``` + +When inputs are given in "block" format, the last lines may be written +`@kwarg key = value`, which indicates keyword arguments to be passed to `select` function. + +``` +@select gd begin + :a + @select copycols = false +end +``` + ### Examples ```jldoctest @@ -1646,16 +1813,16 @@ macro select(x, args...) end function rselect_helper(x, args...) - exprs, outer_flags = create_args_vector(args...; wrap_byrow=true) + x, exprs, outer_flags, kw = get_df_args_kwargs(x, args...; wrap_byrow = true) t = (fun_to_vec(ex; gensym_names=false, outer_flags=outer_flags) for ex in exprs) quote - $DataFrames.select($x, $(t...)) + $select($x, $(t...); $(kw...)) end end """ - @rselect(x, args...) + @rselect(x, args...; kwargs...) Row-wise version of `@select`, i.e. all operations use `@byrow` by default. See [`@select`](@ref) for details. @@ -1672,28 +1839,32 @@ end ############################################################################## function select!_helper(x, args...) - exprs, outer_flags = create_args_vector(args...) + x, exprs, outer_flags, kw = get_df_args_kwargs(x, args...; wrap_byrow = false) + t = (fun_to_vec(ex; gensym_names = false, outer_flags = outer_flags) for ex in exprs) quote - $DataFrames.select!($x, $(t...)) + $select!($x, $(t...); $(kw...)) end end """ - @select!(d, e...) + @select!(d, i...; kwargs...) Mutate `d` in-place to retain only columns or transformations specified by `e` and return it. No copies of existing columns are made. ### Arguments * `d` : an AbstractDataFrame -* `e` : keyword-like arguments, of the form `:y = f(:x)` specifying +* `i` : transformations of the form `:y = f(:x)` specifying new columns in terms of existing columns or symbols to specify existing columns +* `kwargs` : keyword arguments passed to `DataFrames.select!` ### Returns * `::DataFrame` +### Details + Inputs to `@select!` can come in two formats: a `begin ... end` block, in which case each line in the block is a separate transformation or selector, or as a series of @@ -1726,6 +1897,24 @@ $ASTABLE_MACRO_FLAG_DOCS $ASTABLE_RHS_SELECT_TRANSFORM_DOCS +`@select!` accepts the same keyword arguments as `DataFrames.select!` and can be added in +two ways. When inputs are given as multiple arguments, they are added at the end after +a semi-colon `;`, as in + +``` +@select!(gd, :a; ungroup = false) +``` + +When inputs are given in "block" format, the last lines may be written +`@kwarg key = value`, which indicates keyword arguments to be passed to `select!` function. + +``` +@select! gd begin + :a + @kwarg ungroup = false +end +``` + ### Examples ```jldoctest @@ -1778,16 +1967,16 @@ macro select!(x, args...) end function rselect!_helper(x, args...) - exprs, outer_flags = create_args_vector(args...; wrap_byrow=true) + x, exprs, outer_flags, kw = get_df_args_kwargs(x, args...; wrap_byrow = true) t = (fun_to_vec(ex; gensym_names=false, outer_flags=outer_flags) for ex in exprs) quote - $DataFrames.select!($x, $(t...)) + $select!($x, $(t...); $(kw...)) end end """ - @rselect!(x, args...) + @rselect!(x, args...; kwargs...) Row-wise version of `@select!`, i.e. all operations use `@byrow` by default. See [`@select!`](@ref) for details. @@ -1803,26 +1992,33 @@ end ############################################################################## function combine_helper(x, args...; deprecation_warning = false) - deprecation_warning && @warn "`@based_on` is deprecated. Use `@combine` instead." + x, exprs, outer_flags, kw = get_df_args_kwargs(x, args...; wrap_byrow = false) - exprs, outer_flags = create_args_vector(args...) + deprecation_warning && @warn "`@based_on` is deprecated. Use `@combine` instead." t = (fun_to_vec(ex; gensym_names = false, outer_flags = outer_flags) for ex in exprs) quote - $DataFrames.combine($x, $(t...)) + $combine($x, $(t...); $(kw...)) end end """ - @combine(x, args...) + @combine(x, args...; kwargs...) Summarize a grouping operation ### Arguments * `x` : a `GroupedDataFrame` or `AbstractDataFrame` -* `args...` : keyword-like arguments defining new columns, of the form `:y = f(:x)` +* `args...` : transformations defining new columns, of the form `:y = f(:x)` +* `kwargs`: : keyword arguments passed to `DataFrames.combine` + +### Results + +* A `DataFrame` or a `GroupedDataFrame` + +### Details Inputs to `@combine` can come in two formats: a `begin ... end` block, in which case each line in the block is a separate @@ -1844,6 +2040,24 @@ and $ASTABLE_MACRO_FLAG_DOCS +`@combine` accepts the same keyword arguments as `DataFrames.combine` and can be added in +two ways. When inputs are given as multiple arguments, they are added at the end after +a semi-colon `;`, as in + +``` +@combine(gd, :x = first(:a); ungroup = false) +``` + +When inputs are given in "block" format, the last lines may be written +`@kwarg key = value`, which indicates keyword arguments to be passed to `combine` function. + +``` +@combine gd begin + :x = first(:a) + @kwarg ungroup = false +end +``` + ### Examples ```julia @@ -1916,18 +2130,26 @@ end ############################################################################## function by_helper(x, what, args...) - # Only allow one argument when returning a Table object - exprs, outer_flags = create_args_vector(args...) + # Handle keyword arguments initially due the gouping instruction, what + if x isa Expr && x.head === :parameters + # with keyword arguments, everything is shifted to + # the right + new_what = args[1] + args = (what, args[2:end]...) + what = new_what + end + + x, exprs, outer_flags, kw = get_df_args_kwargs(x, args...; wrap_byrow = false) t = (fun_to_vec(ex; gensym_names = false, outer_flags = outer_flags) for ex in exprs) quote - $DataFrames.combine($groupby($x, $what), $(t...)) + $combine($groupby($x, $what), $(t...); $(kw...)) end end """ - @by(d::AbstractDataFrame, cols, e...) + @by(d::AbstractDataFrame, cols, e...; kwargs...) Split-apply-combine in one step. @@ -1937,10 +2159,13 @@ Split-apply-combine in one step. * `cols` : a column indicator (Symbol, Int, Vector{Symbol}, etc.) * `e` : keyword-like arguments, of the form `:y = f(:x)` specifying new columns in terms of column groupings +* `kwargs` : keyword arguments passed to `DataFrames.combine` ### Returns -* `::DataFrame` +* `::DataFrame` or a `GroupedDataFrame` + +### Details Transformation inputs to `@by` can come in two formats: a `begin ... end` block, in which case each line in the block is a separate @@ -1962,6 +2187,28 @@ and $ASTABLE_MACRO_FLAG_DOCS +`@by` accepts the same keyword arguments as `DataFrames.combine` and can be added in +two ways. When inputs are given as multiple arguments, they are added at the end after +a semi-colon `;`, as in + +``` +@by(ds, :g, :x = first(:a); ungroup = false) +``` + +When inputs are given in "block" format, the last lines may be written +`@kwarg key = value`, which indicates keyword arguments to be passed to `combine` function. + +``` +@by df :a begin + :x = first(:a) + @kwarg ungroup = false +end +``` + +Though `@by` performs both `groupby` and `combine`, `@by` only forwards keyword arguments +to `combine`, and not `groupby`. To pass keyword arguments to `groupby`, perform the +`groupby` and `@combine` steps separately. + ### Examples ```julia diff --git a/src/parsing.jl b/src/parsing.jl index 2a5adbd0..50f6a673 100644 --- a/src/parsing.jl +++ b/src/parsing.jl @@ -400,8 +400,28 @@ function make_source_concrete(x::AbstractVector) end end -function create_args_vector(args...; wrap_byrow::Bool=false) - create_args_vector(Expr(:block, args...); wrap_byrow = wrap_byrow) + +function get_df_args_kwargs(x, args...; wrap_byrow = false) + kw = [] + if x isa Expr && x.head === :parameters + append!(kw, x.args) + x = first(args) + args = args[2:end] + end + + transforms, outer_flags, kw = create_args_vector!(kw, args...; wrap_byrow = wrap_byrow) + + return (x, transforms, outer_flags, kw) +end + +function create_args_vector!(kw, args...; wrap_byrow::Bool=false) + create_args_vector!(kw, Expr(:block, args...); wrap_byrow = wrap_byrow) +end + +function get_kw_from_macro_call(e::Expr) + nv = e.args[3] + + return nv end """ @@ -415,7 +435,9 @@ If a `:block` expression, return the `args` of the block as an array. If a simple expression, wrap the expression in a one-element vector. """ -function create_args_vector(arg; wrap_byrow::Bool=false) +function create_args_vector!(kw, arg; wrap_byrow::Bool=false) + # TODO: Pass vector of keyword arguments to this function + # and modify by detecting presence of `@kwarg`. arg, outer_flags = extract_macro_flags(MacroTools.unblock(arg)) if wrap_byrow @@ -426,11 +448,27 @@ function create_args_vector(arg; wrap_byrow::Bool=false) outer_flags[BYROW_SYM][] = true end + # @astable means the whole block is one transformation if arg isa Expr && arg.head == :block && !outer_flags[ASTABLE_SYM][] x = MacroTools.rmlines(arg).args + kw = [] + transforms = [] + seen_kw = false + for xi in x + if is_macro_head(xi, "@kwarg") + kw_item = get_kw_from_macro_call(xi) + push!(kw, kw_item) + seen_kw = true + else + if seen_kw + throw(ArgumentError("@kwarg calls must be at end of block")) + end + push!(transforms, xi) + end + end else - x = Any[arg] + transforms = Any[arg] end - return x, outer_flags + return transforms, outer_flags, kw end diff --git a/test/dataframes.jl b/test/dataframes.jl index 1b032c30..5393b25e 100644 --- a/test/dataframes.jl +++ b/test/dataframes.jl @@ -553,11 +553,6 @@ cr = "c" @test_throws MethodError @eval @select(df, :n = sum(Between(:i, :t))) end -@testset "Keyword arguments failure" begin - @test_throws LoadError @eval @transform(df; :n = :i) - @test_throws LoadError @eval @select(df; :n = :i) -end - @testset "with" begin df = DataFrame(A = 1:3, B = [2, 1, 2]) diff --git a/test/grouping.jl b/test/grouping.jl index e4ea60b0..35ea8df2 100644 --- a/test/grouping.jl +++ b/test/grouping.jl @@ -149,7 +149,6 @@ newvar = :n @testset "Limits of @combine" begin @test_throws MethodError @eval @combine(gd, :n = sum(Between(:i, :t))) - @test_throws LoadError @eval @combine(gd; :n = mean(:i)) @test_throws ArgumentError @eval @combine(gd, :n = mean(:i) + mean(cols(1))) end diff --git a/test/keyword_arguments.jl b/test/keyword_arguments.jl new file mode 100644 index 00000000..2d7dd149 --- /dev/null +++ b/test/keyword_arguments.jl @@ -0,0 +1,404 @@ +module TestKW + +using Test +using DataFrames +using DataFramesMeta +using Statistics + +const ≅ = isequal + +df = DataFrame(a = [1, 1, 2, 2], b = [3, 4, 5, missing]) +gd = groupby(df, :a) + +# @subset +# skipmissing, view, ungroup +@testset "@subset keyword" begin + correct = view(df, df.a .== 1, :) + df2 = @subset(df, :a .== 1; view = true) + @test df2 ≅ correct + + df2 = @subset df begin + :a .== 1 + @kwarg view = true + end + @test df2 ≅ correct + + @test_throws ArgumentError @subset(df, :b .== 3; skipmissing = false) + + correct = gd + gd2 = @subset(gd, fill(true, length(:a)); ungroup = false) + @test gd2 ≅ correct + + gd2 = @subset gd begin + fill(true, length(:a)) + @kwarg ungroup = false + end + @test gd2 ≅ correct +end + +# @rsubset +# skipmissing, view, ungroup +@testset "@rsubset keyword" begin + correct = view(df, df.a .== 1, :) + + df2 = @rsubset(df, :a == 1; view = true) + @test df2 ≅ view(df, df.a .== 1, :) + + df2 = @rsubset df begin + :a == 1 + @kwarg view = true + end + @test df2 ≅ correct + + @test_throws ArgumentError @rsubset(df, :b == 3; skipmissing = false) + + correct = gd + gd2 = @rsubset(gd, first(true); ungroup = false) + @test gd2 ≅ correct + + gd2 = @rsubset gd begin + first(true); + @kwarg ungroup = false + end + @test gd ≅ correct +end + +# @subset! +# skipmissing, ungroup +@testset "@subset! keyword" begin + @test_throws ArgumentError @subset!(copy(df), :b .== 3; skipmissing = false) + + correct = gd + + gd2 = @subset!(deepcopy(gd), [true, true]; ungroup = false) + @test gd2 ≅ correct + + gd2 = @subset! deepcopy(gd) begin + [true, true] + @kwarg ungroup = false + end + @test gd2 ≅ correct +end + +# @rsubset! +# skipmissing, ungroup +@testset "@rsubset! keyword" begin + @test_throws ArgumentError @rsubset!(copy(df), :b == 3; skipmissing = false) + + correct = gd + + gd2 = @rsubset!(deepcopy(gd), first(true); ungroup = false) + @test gd2 ≅ correct + + gd2 = @rsubset! deepcopy(gd) begin + first(true) + @kwarg ungroup = false + end + @test gd2 ≅ correct +end + +# @orderby # Not added +@test_throws MethodError @orderby(df, :a; view = true) + +# @rorderby # Not added +@test_throws MethodError @rorderby(df, :a; view = true) + +# @select +# copycols, renamecols (not relevant) +# keepkeys, ungroup +@testset "@select keyword" begin + correct = DataFrame(a = df.a; copycols = false) + + df2 = @select(df, :a; copycols = false) + @test (df2 ≅ correct && (df2.a === correct.a)) + + df2 = @select df begin + :a + @kwarg copycols = false + end + @test (df2 ≅ correct && (df2.a === correct.a)) + + correct = df + df2 = @select(gd, :b; keepkeys = true) + @test df2 ≅ correct + + df2 = @select gd begin + :b + @kwarg keepkeys = true + end + @test df2 ≅ correct + + correct = gd + + gd2 = @select(gd, :b; ungroup = false) + gd2 = @select gd begin + :b + @kwarg ungroup = false + end + @test gd2 ≅ correct +end + +# @rselect +# copycols, renamecols (not relevant) +# keepkeys, ungroup +@testset "@rselect keyword" begin + correct = DataFrame(a = df.a; copycols = false) + + df2 = @rselect(df, :a; copycols = false) + @test (df2 ≅ correct && (df2.a === correct.a)) + + df2 = @rselect df begin + :a + @kwarg copycols = false + end + @test (df2 ≅ correct && (df2.a === correct.a)) + + correct = df + df2 = @rselect(gd, :b; keepkeys = true) + @test df2 ≅ correct + + df2 = @rselect gd begin + :b + @kwarg keepkeys = true + end + @test df2 ≅ correct + + correct = gd + + gd2 = @rselect(gd, :b; ungroup = false) + gd2 = @rselect gd begin + :b + @kwarg ungroup = false + end + @test gd2 ≅ correct +end + +# @select! +# renamecols (not relevant), ungroup +@testset "@select! keyword" begin + correct = gd + + gd2 = @select!(deepcopy(gd), :b; ungroup = false) + @test gd2 ≅ correct + + @select! deepcopy(gd) begin + :b + @kwarg ungroup = false + end + + @test gd2 ≅ correct +end + +# @rselect! +# renamecols (not relevant), ungroup +@testset "@rselect! keyword" begin + correct = gd + + gd2 = @rselect!(deepcopy(gd), :b; ungroup = false) + @test gd2 ≅ correct + + @rselect! deepcopy(gd) begin + :b + @kwarg ungroup = false + end + + @test gd2 ≅ correct +end + +# @transform +# copycols, renamecols (not relevant) +# ungroup +@testset "@transform keyword" begin + correct = df.b + + df2 = @transform(df, :a; copycols = false) + @test df2 ≅ df + # The :a above counts as a transformation, and + # is thus copied + @test df2.b === correct + + df2 = @transform df begin + :a + @kwarg copycols = false + end + @test df2.b === correct + + correct = gd + + gd2 = @transform(gd, :b; ungroup = false) + @test gd2 ≅ correct + + gd2 = @transform gd begin + :b + @kwarg ungroup = false + end + @test gd2 ≅ correct +end + +# @rtransform +# copycols, renamecols (not relevant) +# ungroup +@testset "@rtransform keyword" begin + correct = df.b + + df2 = @rtransform(df, :a; copycols = false) + @test df2.b === correct + + df2 = @rtransform df begin + :a + @kwarg copycols = false + end + @test df2.b === correct + + correct = gd + + gd2 = @rtransform(gd, :b; ungroup = false) + @test gd2 ≅ correct + + gd2 = @rtransform gd begin + :b + @kwarg ungroup = false + end + @test gd2 ≅ correct +end + +# @transform! +# renamecols (not relevant), ungroup +@testset "@transform! keyword" begin + correct = df.a + + correct = gd + + gd2 = @transform(deepcopy(gd), :b; ungroup = false) + @test gd2 ≅ correct + + gd2 = @transform deepcopy(gd) begin + :b + @kwarg ungroup = false + end + @test gd2 ≅ correct +end + + +# @rtransform! +# renamecols (not relevant), ungroup +@testset "@rtransform! keyword" begin + correct = df.a + + correct = gd + + gd2 = @rtransform(deepcopy(gd), :b; ungroup = false) + @test gd2 ≅ correct + + gd2 = @rtransform deepcopy(gd) begin + :b + @kwarg ungroup = false + end + @test gd2 ≅ correct +end + +# @combine +# renamecols (not relevant), keepkeys, +# ungroup +@testset "@combine keyword" begin + correct = DataFrame(a = [1, 2], b_f = [3, 5]) + + df2 = @combine(gd, :b_f = first(:b); keepkeys = true) + @test sort(df2, :a) ≅ correct + + df2 = @combine gd begin + :b_f = first(:b) + @kwarg keepkeys = true + end + @test sort(df2, :a) ≅ correct + + correct = gd + + gd2 = @combine(gd, :b = :b; ungroup = false) + @test gd2 ≅ correct + + gd2 = @combine gd begin + :b = :b + @kwarg ungroup = false + end + @test gd2 ≅ correct +end + + +# @by +# renamecols (not relevant), keepkeys, +# ungroup +@testset "@combine keyword" begin + correct = DataFrame(a = [1, 2], b_f = [3, 5]) + + df2 = @by(df, :a, :b_f = first(:b); keepkeys = true) + @test sort(df2, :a) ≅ correct + + df2 = @by df :a begin + :b_f = first(:b) + @kwarg keepkeys = true + end + @test sort(df2, :a) ≅ correct + + correct = gd + + gd2 = @by(df, :a, :b = :b; ungroup = false) + @test gd2 ≅ correct + + gd2 = @by df :a begin + :b = :b + @kwarg ungroup = false + end + @test gd2 ≅ correct +end + +@testset "Pairs and keyword arguments" begin + correct = @view df[df.a .== 1, :] + + t = [:view => true] + ts = [:skipmissing => true, :view => true] + + df2 = @rsubset(df, :a == 1; :view => true) + @test df2 == correct + + df2 = @rsubset(df, :a == 1; :view => true, :skipmissing => false) + @test df2 == correct + + df2 = @rsubset(df, :a == 1; t...) + @test df2 == correct + + df2 = @rsubset(df, :a == 1; ts...) + @test df2 == correct + + df2 = @rsubset df begin + :a == 1 + @kwarg :view => true + end + @test df2 == correct + + df2 = @rsubset df begin + :a == 1 + @kwarg [:view => true]... + end + @test df2 == correct + + df2 = @rsubset df begin + :a == 1 + @kwarg [:view => true, :skipmissing => false]... + end + @test df2 == correct + + df2 = @rsubset df begin + :a == 1 + @kwarg t... + end + @test df2 == correct + + df2 = @rsubset df begin + :a == 1 + @kwarg ts... + end + @test df2 == correct +end + +end # module \ No newline at end of file