From 758b1d6fe3263d2f9ab2005087105b95a26697a7 Mon Sep 17 00:00:00 2001
From: Peter Deffebach
Date: Sat, 31 Jul 2021 08:02:21 -0400
Subject: [PATCH] initial commit
---
src/macros.jl | 95 +++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 95 insertions(+)
diff --git a/src/macros.jl b/src/macros.jl
index 6da2ad53..eb292f2b 100644
--- a/src/macros.jl
+++ b/src/macros.jl
@@ -283,6 +283,101 @@ macro byrow(args...)
end
+struct SpreadMissings{F} <: Function
+ f::F
+end
+
+function (f::SpreadMissings{F})(vecs::AbstractVector...) where {F}
+ if any(x -> x isa AbstractVector{>:Missing}, vecs)
+
+ firstindices = eachindex(first(vecs))
+ if !all(x -> eachindex(x) == firstindices, vecs)
+ err_str = "Indices of arguments do not match. Indices are " *
+ join(string.eachindex.(vecs), ", ", " and ") * "."
+
+ throw(ArgumentError(err_str))
+ end
+
+ nonmissingmask = fill(true, length(vecs[1]))
+ for v in vecs
+ nonmissingmask .&= .!ismissing.(v)
+ end
+
+ nonmissinginds = findall(nonmissingmask)
+
+ vecs_counter = 1
+ newargs = ntuple(i -> view(vecs[i], nonmissinginds), length(vecs))
+
+ res = f.f(newargs...)
+
+ if res isa AbstractVector && length(res) == length(nonmissinginds)
+ out = similar(res, Union{eltype(res), Missing}, length(vecs[1]))
+ fill!(out, missing)
+ out[nonmissinginds] .= res
+
+ return out
+ else
+ return res
+ end
+ else
+ return f.f(vecs...)
+ end
+end
+
+"""
+ spreadmissings(f)
+
+Given a function `f`, wraps `f` so that a `view` selecting non-missing
+values of `AbstractVector` argument is applied before calling `f`. After `f`
+is called, if the result is a `AbstractVector` of the appropriate length,
+is is "spread" across the length of the original inputs, imputing `missing` where
+indices of inputs contain `missing` values.
+
+Consider `spreadmissing(f)(x1, x2)`, where `x1` and `x2` are defined as
+
+```
+x = [1, missing, 3, 4]
+y = [5, 6, 7, missing]
+```
+
+and the function
+
+```
+no_missing(x::Int, y::Int) = x + y
+f(x::AbstractVector, y::AbstractVector) = no_missing.(last(x), y)
+```
+
+Then the indices for which both `x` and `y` are non-missing are `[1, 2]`.
+`spreadmissings(f)` then calls `f(view(x, [1, 3]), view(y, [1, 3]))`. This will return
+`[8, 10]`.
+
+Finally, `spreadmissings(f)` will "spread" the vector `[7, 8]` across the non-missing
+indices of the inputs, filling in `missing`, otherwise. As a result we have
+
+```
+julia> DataFramesMeta.spreadmissings(f)(x, y)
+4-element Vector{Union{Missing, Int64}}:
+ 8
+ missing
+ 10
+ missing
+```
+
+If the result of `f(view()...)` is not a vector, the it is returned as-is.
+
+### Examples
+
+```
+julia> x = [1, 2, 3, 4, missing, 6]; y = [11, 5, missing, 8, 15, 12];
+
+julia> DataFramesMeta.spreadmissings(cor)(x, y)
+0.3803061508332227
+```
+
+
+"""
+spreadmissings(f) = SpreadMissings{Core.Typeof(f)}(f)
+
##############################################################################
##
## @with