From a3a0cf5c371529d59fbe051921a3a23f498cec84 Mon Sep 17 00:00:00 2001 From: pdeffebach Date: Sat, 31 Oct 2020 13:07:07 -0400 Subject: [PATCH 01/26] initial commit --- src/Missings.jl | 90 ++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 82 insertions(+), 8 deletions(-) diff --git a/src/Missings.jl b/src/Missings.jl index 77482f7..2c68930 100644 --- a/src/Missings.jl +++ b/src/Missings.jl @@ -2,7 +2,7 @@ module Missings export allowmissing, disallowmissing, ismissing, missing, missings, Missing, MissingException, levels, coalesce, passmissing, nonmissingtype, - skipmissings + skipmissings, spreadmissings using Base: ismissing, missing, Missing, MissingException using Base: @deprecate @@ -208,6 +208,80 @@ missing """ passmissing(f) = PassMissing{Core.Typeof(f)}(f) +struct SpreadMissings{F} <: Function + f::F +end + +function (f::SpreadMissings{F})(x) where {F} + @assert x isa AbstractVector + + if x isa AbstractVector{>:Missing} + nonmissinginds = collect(eachindex(skipmissing(x))) + res = f.f(view(x, nonmissinginds)) + + out = Vector{Union{Missing, eltype(res)}}(missing, length(x)) + out[nonmissinginds] .= res + + return out + else + return f.f(x) + end +end + +function (f::SpreadMissings{F})(xs...) where {F} + @assert all(x -> x isa AbstractVector, xs) + + x1 = first(xs) + @assert all(x -> length(x) == length(x1), xs) + + if any(x -> x isa AbstractVector{>:Missing}, xs) + nonmissinginds = collect(eachindex(skipmissings(xs...))) + # TODO: narrow the type of the `view` to exclude `Missing` + res = f.f((view(x, nonmissinginds) for x in xs)...) + + out = Vector{Union{Missing, eltype(res)}}(missing, length(x1)) + out[nonmissinginds] .= res + + return out + else + return f.f(xs...) + end +end + +""" + spreadmissings(f) + +Return a function that performs the following transformation given the input `args...` + +1. Assumes all `args` are `AbstractArray`s +2. Determines the indicies of `args` for which all elements are non-missing +3. Applies `f` on the `view`s or `args` for the non-missing indices, returning `res` +4. Returns a result the same length as the elements in `args`, filling in `missing` + as needed + +# Examples +``` +julia> x = [0, 1, 2, missing]; y = [-1, 0, missing, 2]; + +julia> function restricted_fun(x, y) map(x, y) do xi, yi + if xi < 1 || yi < 1 # will error on missings + return 1 + else + return 2 + end + end + end; + +julia> spreadmissings(restricted_fun)(x, y) +4-element Vector{Union{Missing, Int64}}: + 1 + 1 + missing + missing +``` +""" +spreadmissings(f) = SpreadMissings{Core.Typeof(f)}(f) + """ skipmissings(args...) @@ -258,7 +332,7 @@ struct SkipMissings{V, T} others::T end -Base.@propagate_inbounds function _anymissingindex(others::Tuple{Vararg{AbstractArray}}, i) +Base.@propagate_inbounds function _anymissingindex(others::Tuple{Vararg{AbstractArray}}, i) for oth in others oth[i] === missing && return true end @@ -267,7 +341,7 @@ Base.@propagate_inbounds function _anymissingindex(others::Tuple{Vararg{Abstract end @inline function _anymissingiterate(others::Tuple, state) - for oth in others + for oth in others y = iterate(oth, state) y !== nothing && first(y) === missing && return true end @@ -278,7 +352,7 @@ end const SkipMissingsofArrays = SkipMissings{V, T} where {V <: AbstractArray, T <: Tuple{Vararg{AbstractArray}}} -function Base.show(io::IO, mime::MIME"text/plain", itr::SkipMissings{V}) where V +function Base.show(io::IO, mime::MIME"text/plain", itr::SkipMissings{V}) where V print(io, SkipMissings, '{', V, '}', '(', itr.x, ')', " comprised of " * "$(length(itr.others) + 1) iterators") end @@ -335,7 +409,7 @@ end @inline function Base.getindex(itr::SkipMissingsofArrays, i) @boundscheck checkbounds(itr.x, i) @inbounds xi = itr.x[i] - if xi === missing || @inbounds _anymissingindex(itr.others, i) + if xi === missing || @inbounds _anymissingindex(itr.others, i) throw(MissingException("the value at index $i is missing for some element")) end return xi @@ -380,9 +454,9 @@ Base.mapreduce_impl(f, op, A::SkipMissingsofArrays, ifirst::Integer, ilast::Inte A = itr.x if ifirst == ilast @inbounds a1 = A[ifirst] - if a1 === missing + if a1 === missing return nothing - elseif _anymissingindex(itr.others, ifirst) + elseif _anymissingindex(itr.others, ifirst) return nothing else return Some(Base.mapreduce_first(f, op, a1)) @@ -436,7 +510,7 @@ end Return a vector similar to the array wrapped by the given `SkipMissings` iterator but skipping all elements with a `missing` value in one of the iterators passed to `skipmissing` and elements for which `f` returns `false`. This method -only applies when all iterators passed to `skipmissings` are arrays. +only applies when all iterators passed to `skipmissings` are arrays. # Examples ``` From 2ed4b85731e606f78bef23c976bc2edf7137991d Mon Sep 17 00:00:00 2001 From: pdeffebach Date: Wed, 4 Nov 2020 20:05:45 -0500 Subject: [PATCH 02/26] refactor --- src/Missings.jl | 58 ++++++++++++++++++++++++++++++++++--------------- 1 file changed, 41 insertions(+), 17 deletions(-) diff --git a/src/Missings.jl b/src/Missings.jl index 2c68930..cf7c7ff 100644 --- a/src/Missings.jl +++ b/src/Missings.jl @@ -229,18 +229,29 @@ function (f::SpreadMissings{F})(x) where {F} end function (f::SpreadMissings{F})(xs...) where {F} - @assert all(x -> x isa AbstractVector, xs) - - x1 = first(xs) - @assert all(x -> length(x) == length(x1), xs) - if any(x -> x isa AbstractVector{>:Missing}, xs) - nonmissinginds = collect(eachindex(skipmissings(xs...))) - # TODO: narrow the type of the `view` to exclude `Missing` - res = f.f((view(x, nonmissinginds) for x in xs)...) + vecs = Base.filter(x -> x isa AbstractVector, xs) + s = skipmissings(vecs...) + vecs_counter = 1 + newargs = ntuple(length(xs)) do i + if xs[i] isa AbstractVector + t = s[vecs_counter] + vecs_counter += 1 + else + t = xs[i] + end + t + end - out = Vector{Union{Missing, eltype(res)}}(missing, length(x1)) - out[nonmissinginds] .= res + res = f.f(newargs...) + + if res isa AbstractVector + out = Vector{Union{Missing, eltype(res)}}(missing, length(first(vecs))) + out[collect(eachindex(first(s)))] .= res + else + out = Vector{Union{Missing, typeof(res)}}(missing, length(first(vecs))) + out[collect(eachindex(first(s)))] .= Ref(res) + end return out else @@ -251,19 +262,32 @@ end """ spreadmissings(f) -Return a function that performs the following transformation given the input `args...` +Given a function `f`, wraps a function `f` but performs a transformation +on arguments before executing. Given the call + +``` +spreadmissings(f)(x::AbstractVector, y::Integer, z::AbstractVector) +``` + +will construct the intermedaite variables + +``` +sx, sy = skipmissings(x, y) +``` -1. Assumes all `args` are `AbstractArray`s -2. Determines the indicies of `args` for which all elements are non-missing -3. Applies `f` on the `view`s or `args` for the non-missing indices, returning `res` -4. Returns a result the same length as the elements in `args`, filling in `missing` - as needed +and call + +``` +f(sx, y, sy) + +``` # Examples ``` julia> x = [0, 1, 2, missing]; y = [-1, 0, missing, 2]; -julia> function restricted_fun(x, y) map(x, y) do xi, yi +julia> function restricted_fun(x, y) + map(x, y) do xi, yi if xi < 1 || yi < 1 # will error on missings return 1 else From 298a5df463534fe53a9fb8a312cf8b8eeffa34e1 Mon Sep 17 00:00:00 2001 From: pdeffebach Date: Sun, 29 Nov 2020 20:35:39 -0500 Subject: [PATCH 03/26] respont to Milan --- src/Missings.jl | 34 ++++++++++------------------------ 1 file changed, 10 insertions(+), 24 deletions(-) diff --git a/src/Missings.jl b/src/Missings.jl index cf7c7ff..0936d75 100644 --- a/src/Missings.jl +++ b/src/Missings.jl @@ -212,30 +212,15 @@ struct SpreadMissings{F} <: Function f::F end -function (f::SpreadMissings{F})(x) where {F} - @assert x isa AbstractVector - - if x isa AbstractVector{>:Missing} - nonmissinginds = collect(eachindex(skipmissing(x))) - res = f.f(view(x, nonmissinginds)) - - out = Vector{Union{Missing, eltype(res)}}(missing, length(x)) - out[nonmissinginds] .= res - - return out - else - return f.f(x) - end -end - -function (f::SpreadMissings{F})(xs...) where {F} +function (f::SpreadMissings{F})(xs...; kwargs...) where {F} if any(x -> x isa AbstractVector{>:Missing}, xs) vecs = Base.filter(x -> x isa AbstractVector, xs) s = skipmissings(vecs...) + nonmissinginds = collect(eachindex(first(s))) vecs_counter = 1 newargs = ntuple(length(xs)) do i if xs[i] isa AbstractVector - t = s[vecs_counter] + t = view(xs[i], nonmissinginds) vecs_counter += 1 else t = xs[i] @@ -243,19 +228,20 @@ function (f::SpreadMissings{F})(xs...) where {F} t end - res = f.f(newargs...) + res = f.f(newargs...; kwargs...) if res isa AbstractVector - out = Vector{Union{Missing, eltype(res)}}(missing, length(first(vecs))) - out[collect(eachindex(first(s)))] .= res + out = similar(res, Union{eltype(res), Missing}, length(vecs[1])) + fill!(out, missing) + out[nonmissinginds] .= res else - out = Vector{Union{Missing, typeof(res)}}(missing, length(first(vecs))) - out[collect(eachindex(first(s)))] .= Ref(res) + out = similar(vecs[1], Union{typeof(res), Missing}) + out[nonmissinginds] .= Ref(res) end return out else - return f.f(xs...) + return f.f(xs...; kwargs...) end end From b3163ce973e268a8a8b5b8431e8944492ff99770 Mon Sep 17 00:00:00 2001 From: pdeffebach Date: Sun, 29 Nov 2020 20:40:26 -0500 Subject: [PATCH 04/26] add missing fill --- src/Missings.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Missings.jl b/src/Missings.jl index 0936d75..bfda71a 100644 --- a/src/Missings.jl +++ b/src/Missings.jl @@ -236,6 +236,7 @@ function (f::SpreadMissings{F})(xs...; kwargs...) where {F} out[nonmissinginds] .= res else out = similar(vecs[1], Union{typeof(res), Missing}) + fill!(out, missing) out[nonmissinginds] .= Ref(res) end From 3b6621f1185ebcf24c92f2b6e245a0f2f84ebf1c Mon Sep 17 00:00:00 2001 From: pdeffebach <23196228+pdeffebach@users.noreply.github.com> Date: Mon, 28 Jun 2021 19:00:17 -0400 Subject: [PATCH 05/26] Apply suggestions from code review Co-authored-by: Milan Bouchet-Valat --- src/Missings.jl | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/Missings.jl b/src/Missings.jl index bfda71a..09ac5ae 100644 --- a/src/Missings.jl +++ b/src/Missings.jl @@ -215,8 +215,11 @@ end function (f::SpreadMissings{F})(xs...; kwargs...) where {F} if any(x -> x isa AbstractVector{>:Missing}, xs) vecs = Base.filter(x -> x isa AbstractVector, xs) - s = skipmissings(vecs...) - nonmissinginds = collect(eachindex(first(s))) + nonmissingmask = fill(true, length(vecs[1])) + for v in vecs + nonmissingmask .&= .!ismissing(v) + end + nonmissinginds = findall(nonmissingmask) vecs_counter = 1 newargs = ntuple(length(xs)) do i if xs[i] isa AbstractVector From fdecc8b2914be2a8e6619540789fea2c0aab85db Mon Sep 17 00:00:00 2001 From: Peter Deffebach Date: Mon, 28 Jun 2021 15:08:45 -0400 Subject: [PATCH 06/26] respond to a few comments --- src/Missings.jl | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/src/Missings.jl b/src/Missings.jl index 09ac5ae..34e034c 100644 --- a/src/Missings.jl +++ b/src/Missings.jl @@ -215,11 +215,17 @@ end function (f::SpreadMissings{F})(xs...; kwargs...) where {F} if any(x -> x isa AbstractVector{>:Missing}, xs) vecs = Base.filter(x -> x isa AbstractVector, xs) - nonmissingmask = fill(true, length(vecs[1])) - for v in vecs - nonmissingmask .&= .!ismissing(v) - end - nonmissinginds = findall(nonmissingmask) + + findex = eachindex(first(vecs)) + @assert all(x -> eachindex(x) == findex, vecs) + + + nonmissingmask = fill(true, length(vecs[1])) + for v in vecs + nonmissingmask .&= .!ismissing(v) + end + nonmissinginds = findall(nonmissingmask) + vecs_counter = 1 newargs = ntuple(length(xs)) do i if xs[i] isa AbstractVector From 60910e843245cf18c41c8931c373563526196211 Mon Sep 17 00:00:00 2001 From: Peter Deffebach Date: Mon, 28 Jun 2021 15:15:42 -0400 Subject: [PATCH 07/26] fix mask --- src/Missings.jl | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/Missings.jl b/src/Missings.jl index 34e034c..4f880c3 100644 --- a/src/Missings.jl +++ b/src/Missings.jl @@ -219,17 +219,15 @@ function (f::SpreadMissings{F})(xs...; kwargs...) where {F} findex = eachindex(first(vecs)) @assert all(x -> eachindex(x) == findex, vecs) - nonmissingmask = fill(true, length(vecs[1])) for v in vecs - nonmissingmask .&= .!ismissing(v) + nonmissingmask .&= .!ismissing.(v) end - nonmissinginds = findall(nonmissingmask) vecs_counter = 1 newargs = ntuple(length(xs)) do i if xs[i] isa AbstractVector - t = view(xs[i], nonmissinginds) + t = view(xs[i], nonmissingmask) vecs_counter += 1 else t = xs[i] @@ -242,11 +240,11 @@ function (f::SpreadMissings{F})(xs...; kwargs...) where {F} if res isa AbstractVector out = similar(res, Union{eltype(res), Missing}, length(vecs[1])) fill!(out, missing) - out[nonmissinginds] .= res + out[nonmissingmask] .= res else out = similar(vecs[1], Union{typeof(res), Missing}) fill!(out, missing) - out[nonmissinginds] .= Ref(res) + out[nonmissingmask] .= Ref(res) end return out From 4e77fa8bbe7150920a196e998f6bbd454c1a814d Mon Sep 17 00:00:00 2001 From: Peter Deffebach Date: Sun, 5 Dec 2021 19:27:53 -0500 Subject: [PATCH 08/26] add complicated error --- src/Missings.jl | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/src/Missings.jl b/src/Missings.jl index 4f880c3..8b55f4e 100644 --- a/src/Missings.jl +++ b/src/Missings.jl @@ -213,11 +213,32 @@ struct SpreadMissings{F} <: Function end function (f::SpreadMissings{F})(xs...; kwargs...) where {F} + if any(x -> x isa AbstractVector{>:Missing}, xs) vecs = Base.filter(x -> x isa AbstractVector, xs) findex = eachindex(first(vecs)) - @assert all(x -> eachindex(x) == findex, vecs) + if !(all(x -> eachindex(x) == findex, vecs[2:end])) + d = Dict() + for i in 1:length(vecs) + e = eachindex(vecs[i]) + if eachindex(e) in keys(d) + push!(d[i], i) + else + d[e] = [i] + end + end + s = "The indices of vector-input arguments are not all " * + "the same.\n" + + for k in keys(d) + inds = join(d[k], ", ", " and ") + ind_msg = "Vector inputs $inds have indices $k \n" + s = s * ind_msg + end + + throw(ArgumentError(s)) + end nonmissingmask = fill(true, length(vecs[1])) for v in vecs From 08045efb983605683c78e693576217d2734f192e Mon Sep 17 00:00:00 2001 From: Peter Deffebach Date: Sun, 5 Dec 2021 20:02:10 -0500 Subject: [PATCH 09/26] add keyword argument funcitonlaity --- Manifest.toml | 9 +++++++++ src/Missings.jl | 29 +++++++++++++++++++---------- 2 files changed, 28 insertions(+), 10 deletions(-) create mode 100644 Manifest.toml diff --git a/Manifest.toml b/Manifest.toml new file mode 100644 index 0000000..a996a96 --- /dev/null +++ b/Manifest.toml @@ -0,0 +1,9 @@ +# This file is machine-generated - editing it directly is not advised + +julia_version = "1.7.0" +manifest_format = "2.0" + +[[deps.DataAPI]] +git-tree-sha1 = "cc70b17275652eb47bc9e5f81635981f13cea5c8" +uuid = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a" +version = "1.9.0" diff --git a/src/Missings.jl b/src/Missings.jl index 8b55f4e..e359bd6 100644 --- a/src/Missings.jl +++ b/src/Missings.jl @@ -212,8 +212,9 @@ struct SpreadMissings{F} <: Function f::F end -function (f::SpreadMissings{F})(xs...; kwargs...) where {F} - +function (f::SpreadMissings{F})(args...; kwargs...) where {F} + kwargs_vals = values(values(kwargs)) + xs = tuple(args..., kwargs_vals...) if any(x -> x isa AbstractVector{>:Missing}, xs) vecs = Base.filter(x -> x isa AbstractVector, xs) @@ -245,18 +246,26 @@ function (f::SpreadMissings{F})(xs...; kwargs...) where {F} nonmissingmask .&= .!ismissing.(v) end - vecs_counter = 1 - newargs = ntuple(length(xs)) do i - if xs[i] isa AbstractVector - t = view(xs[i], nonmissingmask) - vecs_counter += 1 + newargs = ntuple(length(args)) do i + a = args[i] + if a isa AbstractVector + view(a, nonmissingmask) + else + a + end + end + + new_kwargs_vals = ntuple(length(kwargs_vals)) do i + a = kwargs_vals[i] + if a isa AbstractVector + view(a, nonmissingmask) else - t = xs[i] + a end - t end + new_kwargs = NamedTuple{keys(kwargs)}(new_kwargs_vals) - res = f.f(newargs...; kwargs...) + res = f.f(newargs...; new_kwargs...) if res isa AbstractVector out = similar(res, Union{eltype(res), Missing}, length(vecs[1])) From 6897e2643488d849470f742eb830800d88b59046 Mon Sep 17 00:00:00 2001 From: Peter Deffebach Date: Thu, 9 Dec 2021 16:20:31 -0500 Subject: [PATCH 10/26] no manifest --- .gitignore | 1 + Manifest.toml | 9 --------- 2 files changed, 1 insertion(+), 9 deletions(-) delete mode 100644 Manifest.toml diff --git a/.gitignore b/.gitignore index 8c960ec..3f02ca7 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ *.jl.cov *.jl.*.cov *.jl.mem +Manifest.toml diff --git a/Manifest.toml b/Manifest.toml deleted file mode 100644 index a996a96..0000000 --- a/Manifest.toml +++ /dev/null @@ -1,9 +0,0 @@ -# This file is machine-generated - editing it directly is not advised - -julia_version = "1.7.0" -manifest_format = "2.0" - -[[deps.DataAPI]] -git-tree-sha1 = "cc70b17275652eb47bc9e5f81635981f13cea5c8" -uuid = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a" -version = "1.9.0" From 67be697d591f8d95faff39271acc7a219ffc61a0 Mon Sep 17 00:00:00 2001 From: Peter Deffebach Date: Thu, 9 Dec 2021 16:53:03 -0500 Subject: [PATCH 11/26] add check for bad types --- src/Missings.jl | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/src/Missings.jl b/src/Missings.jl index e359bd6..b868ab1 100644 --- a/src/Missings.jl +++ b/src/Missings.jl @@ -212,9 +212,19 @@ struct SpreadMissings{F} <: Function f::F end +function non_spreadabble_check(t::Union{AbstractDict, NamedTuple, Tuple}) + T = typeof(t) + s = "Spreadmissings on $T is reserved. Please wrap in Ref to be " * + "treated as a scalar." + throw(ArgumentError(s)) +end +non_spreadabble_check(x) = nothing + function (f::SpreadMissings{F})(args...; kwargs...) where {F} kwargs_vals = values(values(kwargs)) xs = tuple(args..., kwargs_vals...) + foreach(non_spreadabble_check, xs) + if any(x -> x isa AbstractVector{>:Missing}, xs) vecs = Base.filter(x -> x isa AbstractVector, xs) @@ -234,7 +244,7 @@ function (f::SpreadMissings{F})(args...; kwargs...) where {F} for k in keys(d) inds = join(d[k], ", ", " and ") - ind_msg = "Vector inputs $inds have indices $k \n" + ind_msg = "Vector inputs $inds have indices $k\n" s = s * ind_msg end @@ -245,11 +255,12 @@ function (f::SpreadMissings{F})(args...; kwargs...) where {F} for v in vecs nonmissingmask .&= .!ismissing.(v) end + nonmissinginds = findall(nonmissingmask) newargs = ntuple(length(args)) do i a = args[i] if a isa AbstractVector - view(a, nonmissingmask) + view(a, nonmissinginds) else a end @@ -258,7 +269,7 @@ function (f::SpreadMissings{F})(args...; kwargs...) where {F} new_kwargs_vals = ntuple(length(kwargs_vals)) do i a = kwargs_vals[i] if a isa AbstractVector - view(a, nonmissingmask) + view(a, nonmissinginds) else a end @@ -293,17 +304,14 @@ on arguments before executing. Given the call spreadmissings(f)(x::AbstractVector, y::Integer, z::AbstractVector) ``` -will construct the intermedaite variables - -``` -sx, sy = skipmissings(x, y) -``` - -and call +will find the indices which corresond to `missing` values in *both* +`x` and `z`. Then apply `f` on the `view`s of `x` and `z` which +contain non-missing values. In essense: ``` +inds = .!missing.(x) .& .!missing.(z) +sx = view(x, inds); sy = view(y, inds) f(sx, y, sy) - ``` # Examples From 29d95c7fa9527dcb1746bf5e984413d43168504d Mon Sep 17 00:00:00 2001 From: Peter Deffebach Date: Fri, 27 Oct 2023 10:37:52 -0400 Subject: [PATCH 12/26] document behavior of spread --- src/Missings.jl | 83 +++++-- .../.ipynb_checkpoints/runtests-checkpoint.jl | 227 ++++++++++++++++++ 2 files changed, 290 insertions(+), 20 deletions(-) create mode 100644 test/.ipynb_checkpoints/runtests-checkpoint.jl diff --git a/src/Missings.jl b/src/Missings.jl index b868ab1..1f75741 100644 --- a/src/Missings.jl +++ b/src/Missings.jl @@ -210,6 +210,13 @@ passmissing(f) = PassMissing{Core.Typeof(f)}(f) struct SpreadMissings{F} <: Function f::F + spread::Symbol + function SpreadMissings(f, spread) + if !(spread in (:default, :nonmissing, :none)) + throw(ArgumentError("spread must be either :default, :nonmissing, or :none")) + end + new{Core.Typeof(f)}(f, spread) + end end function non_spreadabble_check(t::Union{AbstractDict, NamedTuple, Tuple}) @@ -220,14 +227,44 @@ function non_spreadabble_check(t::Union{AbstractDict, NamedTuple, Tuple}) end non_spreadabble_check(x) = nothing +""" +Given an input vector `a` where `nonmissinginds` is guaranteed +to not include any missing values, return a `SubArray` referencing +the `nonmissinginds`. The element type of the returned output +does not include `missing`. +""" +nomissing_subarray(a::AbstractVector, nonmissinginds::AbstractVector) = + SubArray{nonmissingtype(eltype(a)), 1, typeof(a), Tuple{typeof(nonmissinginds)}, true}(a, (nonmissinginds,), 0, 1) + +function maybespread(res, vecs, spread) + res = f.f(newargs...; new_kwargs...) + + if spread == :default || spread == :nonmissinginds + out = similar(res, Union{eltype(res), Missing}, length(vecs[1])) + fill!(out, missing) + out[nonmissingmask] .= res + + return out + else + return res + end + + return out +end + function (f::SpreadMissings{F})(args...; kwargs...) where {F} kwargs_vals = values(values(kwargs)) xs = tuple(args..., kwargs_vals...) foreach(non_spreadabble_check, xs) + # Detect vector inputs which contain missing in + # either the main arguments or keyword arguments if any(x -> x isa AbstractVector{>:Missing}, xs) + # Check that all vector inputs have the + # same indices. + # + # TODO: Allow users to protect vector inputs vecs = Base.filter(x -> x isa AbstractVector, xs) - findex = eachindex(first(vecs)) if !(all(x -> eachindex(x) == findex, vecs[2:end])) d = Dict() @@ -256,11 +293,10 @@ function (f::SpreadMissings{F})(args...; kwargs...) where {F} nonmissingmask .&= .!ismissing.(v) end nonmissinginds = findall(nonmissingmask) - newargs = ntuple(length(args)) do i a = args[i] if a isa AbstractVector - view(a, nonmissinginds) + nomissing_subarray(a, nonmissinginds) else a end @@ -269,33 +305,20 @@ function (f::SpreadMissings{F})(args...; kwargs...) where {F} new_kwargs_vals = ntuple(length(kwargs_vals)) do i a = kwargs_vals[i] if a isa AbstractVector - view(a, nonmissinginds) + nomissing_subarray(a, nonmissinginds) else a end end - new_kwargs = NamedTuple{keys(kwargs)}(new_kwargs_vals) - - res = f.f(newargs...; new_kwargs...) - if res isa AbstractVector - out = similar(res, Union{eltype(res), Missing}, length(vecs[1])) - fill!(out, missing) - out[nonmissingmask] .= res - else - out = similar(vecs[1], Union{typeof(res), Missing}) - fill!(out, missing) - out[nonmissingmask] .= Ref(res) - end - - return out + new_kwargs = NamedTuple{keys(kwargs)}(new_kwargs_vals) else return f.f(xs...; kwargs...) end end """ - spreadmissings(f) + spreadmissings(f; spread = :default) Given a function `f`, wraps a function `f` but performs a transformation on arguments before executing. Given the call @@ -335,8 +358,28 @@ julia> spreadmissings(restricted_fun)(x, y) missing missing ``` + +`spreadmissings` allows control over how the output from `f` is "spread" +along with respect to missing values. + +* `:default`: + * If `output` is a `Vector` with the same length as the number of + jointly non-missing elements of the inputs `output` is "spread" + to match the non-missing elements of the inputs. + * If the `output` is a `Vector` whose length is not the same + as the length of number of non-missing elements of the inputs, + a `DimensionMismatch` error is thrown. + * If the output is not a `Vector`, `output` is simply returned directly +* `:nonmissing`: + * If `output` is a `Vector`, behavior is the same as `:default` + * If `output` is not a `Vector`, `output` is spread along non-missing + elements of the inputs. +* `:none`: `output` is returned directly, whether a `Vector` or not. + +To spread a vector across all non-missing indices of inputs, wrap the +result in `Ref`. """ -spreadmissings(f) = SpreadMissings{Core.Typeof(f)}(f) +spreadmissings(f; spread = :default) = SpreadMissings(f, spread) """ skipmissings(args...) diff --git a/test/.ipynb_checkpoints/runtests-checkpoint.jl b/test/.ipynb_checkpoints/runtests-checkpoint.jl new file mode 100644 index 0000000..7c18225 --- /dev/null +++ b/test/.ipynb_checkpoints/runtests-checkpoint.jl @@ -0,0 +1,227 @@ +using Test, SparseArrays, Missings + +# Must be defined outside testset on v1.0 +struct CubeRooter end +(::CubeRooter)(x) = cbrt(x) + +@testset "Missings" begin + x = Missings.replace([1, 2, missing, 4], 3) + @test eltype(x) === Int + @test length(x) == 4 + @test size(x) == (4,) + @test collect(x) == collect(1:4) + @test collect(x) isa Vector{Int} + x = Missings.replace([1, 2, missing, 4], 3.0) + @test eltype(x) === Int + @test length(x) == 4 + @test size(x) == (4,) + @test collect(x) == collect(1:4) + @test collect(x) isa Vector{Int} + x = Missings.replace([1 2; missing 4], 3) + @test eltype(x) === Int + @test length(x) == 4 + @test size(x) == (2, 2) + @test collect(x) == [1 2; 3 4] + @test collect(x) isa Matrix{Int} + x = Missings.replace((v for v in [missing, 1, missing, 2, 4]), 0) + @test length(x) == 5 + @test size(x) == (5,) + @test eltype(x) === Any + @test collect(x) == [0, 1, 0, 2, 4] + @test collect(x) isa Vector{Int} + + x = Missings.fail([1, 2, 3, 4]) + @test eltype(x) === Int + @test length(x) == 4 + @test size(x) == (4,) + @test collect(x) == [1, 2, 3, 4] + @test collect(x) isa Vector{Int} + x = Missings.fail([1 2; 3 4]) + @test eltype(x) === Int + @test length(x) == 4 + @test size(x) == (2, 2) + @test collect(x) == [1 2; 3 4] + @test collect(x) isa Matrix{Int} + @test_throws MissingException collect(Missings.fail([1, 2, missing, 4])) + x = Missings.fail(v for v in [1, 2, 4]) + @test eltype(x) === Any + @test length(x) == 3 + @test size(x) == (3,) + @test collect(x) == [1, 2, 4] + @test collect(x) isa Vector{Int} + + x = skipmissing([1, 2, missing, 4]) + @test eltype(x) === Int + @test collect(x) == [1, 2, 4] + @test collect(x) isa Vector{Int} + x = skipmissing([1 2; missing 4]) + @test eltype(x) === Int + @test collect(x) == [1, 2, 4] + @test collect(x) isa Vector{Int} + x = collect(skipmissing([missing])) + @test eltype(x) === Union{} + @test isempty(collect(x)) + @test collect(x) isa Vector{Union{}} + x = collect(skipmissing(Union{Int, Missing}[])) + @test eltype(x) === Int + @test isempty(collect(x)) + @test collect(x) isa Vector{Int} + x = skipmissing([missing, missing, 1, 2, missing, 4, missing, missing]) + @test eltype(x) === Int + @test collect(x) == [1, 2, 4] + @test collect(x) isa Vector{Int} + x = skipmissing(v for v in [missing, 1, missing, 2, 4]) + @test eltype(x) === Any + @test collect(x) == [1, 2, 4] + @test collect(x) isa Vector{Int} + + x = [1, 2, missing, 4] + y = ["a", "b", "c", missing] + z = [missing, missing, 3.1, 4.5] + l = [1, 2, 3, 4, 5] + @test_throws ArgumentError skipmissings(x, l) + mx, my = skipmissings(x, y) + iobuf = IOBuffer() + show(iobuf, MIME("text/plain"), mx) + s = String(take!(iobuf)) + @test s == "Missings.SkipMissings{Array{Union{Missing, $Int},1}}(" * + "Union{Missing, $Int}[1, 2, missing, 4]) comprised of 2 iterators" + @test collect(mx) == [1, 2] + @test collect(mx) isa Vector{Int} + @test reduce(+, mx) === reduce(+, collect(mx)) === sum(mx) === + mapreduce(identity, +, mx) === 3 + @test mapreduce(x -> x^2, +, mx) === mapreduce(x -> x^2, +, collect(mx)) === 5 + mx, my, mz = skipmissings(x, y, z) + @test eltype(mx) == Int + @test eltype(my) == String + @test eltype(mz) == Float64 + @test isempty(collect(mx)) + @test sum(mx) === 0 + x = [missing 4; 2 5; 3 6] + y = [1 4; missing 5; 3 6] + mx, my = skipmissings(x, y) + @test collect(mx) == [3, 4, 5, 6] + @test mx[3] == 3 + @test_throws MissingException mx[1] + @test reduce(+, mx) === 18 + @test isapprox(mapreduce(cos, *, collect(mx)), mapreduce(cos, *, mx)) + @static if VERSION >= v"1.4.0-DEV" + @inferred Union{Float64, Missing} mapreduce(cos, *, mx) + @inferred Union{Float64, Missing} sum(mx) + @inferred Union{Float64, Missing} reduce(+, mx) + end + + x = [missing missing missing] + y = [1, 2, 3] + mx, my = skipmissings(x, y) + @test_throws ArgumentError reduce(x -> x/2, mx) + @test_throws ArgumentError mapreduce(x -> x/2, +, mx) + @test_throws MethodError length(mx) + @test IndexStyle(typeof(mx)) == IndexStyle(typeof(x)) + x = [isodd(i) ? missing : i for i in 1:64] + y = [isodd(i) ? missing : i for i in 65:128] + mx, my = skipmissings(x, y) + @test sum(mx) === 1056 + @static if VERSION >= v"1.4.0-DEV" + @inferred Union{Missing, Int} sum(mx) + @inferred Union{Missing, Int} reduce(+, mx) + end + + @test levels(1:1) == levels([1]) == levels([1, missing]) == levels([missing, 1]) == [1] + @test levels(2:-1:1) == levels([2, 1]) == levels([2, missing, 1]) == [1, 2] + @test levels([missing, "a", "c", missing, "b"]) == ["a", "b", "c"] + @test levels([Complex(0, 1), Complex(1, 0), missing]) == [Complex(0, 1), Complex(1, 0)] + @test levels(sparse([0 3 2])) == [0, 2, 3] + @test typeof(levels([1])) === typeof(levels([1, missing])) === Vector{Int} + @test typeof(levels(["a"])) === typeof(levels(["a", missing])) === Vector{String} + @test typeof(levels(sparse([1]))) === Vector{Int} + @test isempty(levels([missing])) + @test isempty(levels([])) + + @test nonmissingtype(Union{Int, Missing}) == Int + @test nonmissingtype(Any) == Any + @test nonmissingtype(Missing) == Union{} + @test nonmissingtype(Union{Array{Int}, Missing}) == Array{Int} + + @test isequal(missings(1), [missing]) + @test isequal(missings(Any, 1), [missing]) + @test isequal(missings(Int, 1), [missing]) + @test missings(Int, 1) isa Vector{Union{Int, Missing}} + @test missings(Any, 1) isa Vector{Union{Any, Missing}} + @test isequal(missings(Union{Int, Missing}, 1, 2), [missing missing]) + @test missings(Union{Int, Missing}, 1, 2) isa Matrix{Union{Int, Missing}} + @test Union{Int, Missing}[1,2,3] == (Union{Int, Missing})[1,2,3] + x = missings(Int, (1, 2)) + @test isa(x, Matrix{Union{Int, Missing}}) + @test isequal(x, [missing missing]) + x = missings((1, 2)) + @test isa(x, Matrix{Missing}) + @test isequal(x, [missing missing]) + + @test allowmissing([1]) == [1] + @test allowmissing([1]) isa AbstractVector{Union{Int, Missing}} + @test allowmissing(Any[:a]) == [:a] + @test allowmissing(Any[:a]) isa AbstractVector{Any} + @test isequal(allowmissing([1, missing]), [1, missing]) + @test allowmissing([1, missing]) isa AbstractVector{Union{Int, Missing}} + @test isequal(allowmissing([missing]), [missing]) + @test allowmissing([missing]) isa AbstractVector{Missing} + + @test allowmissing([1 1]) == [1 1] + @test allowmissing([1 1]) isa AbstractArray{Union{Int, Missing}, 2} + @test allowmissing([:a 1]) == [:a 1] + @test allowmissing([:a 1]) isa AbstractArray{Any, 2} + @test isequal(allowmissing([1 missing]), [1 missing]) + @test allowmissing([1 missing]) isa AbstractArray{Union{Int, Missing}, 2} + @test isequal(allowmissing([missing missing]), [missing missing]) + @test allowmissing([missing missing]) isa AbstractArray{Missing, 2} + + @test disallowmissing(Union{Int, Missing}[1]) == [1] + @test disallowmissing(Union{Int, Missing}[1]) isa AbstractVector{Int} + @test disallowmissing([1]) == [1] + @test disallowmissing([1]) isa AbstractVector{Int} + @test disallowmissing(Any[:a]) == [:a] + @test disallowmissing(Any[:a]) isa AbstractVector{Any} + @test_throws MethodError disallowmissing([1, missing]) + @test_throws MethodError disallowmissing([missing]) + + @test disallowmissing(Union{Int, Missing}[1 1]) == [1 1] + @test disallowmissing(Union{Int, Missing}[1 1]) isa AbstractArray{Int, 2} + @test disallowmissing([1 1]) == [1 1] + @test disallowmissing([1 1]) isa AbstractArray{Int, 2} + @test disallowmissing([:a 1]) == [:a 1] + @test disallowmissing([:a 1]) isa AbstractArray{Any, 2} + @test_throws MethodError disallowmissing([1 missing]) + @test_throws MethodError disallowmissing([missing missing]) + + # Lifting + ## functor + cuberoot = CubeRooter() # defined at top of file + @test passmissing(cuberoot)(27) == 3.0 + @test isequal(passmissing(cuberoot)(missing), missing) + ## type + @test passmissing(Int)(1.0) == 1 + @test isequal(passmissing(Int)(missing), missing) + ## function + @test passmissing(sqrt)(4) == 2.0 + @test isequal(passmissing(sqrt)(missing), missing) + @test isequal(passmissing(sqrt).([missing, 4]), [missing, 2.0]) + @test passmissing((x,y)->"$x $y")(1, 2) == "1 2" + @test isequal(passmissing((x,y)->"$x $y")(missing), missing) + if VERSION >= v"1.4.0-DEV" + @test_throws MethodError passmissing(string)(missing, base=2) + else + @test_throws ErrorException passmissing(string)(missing, base=2) + end + + @test passmissing(sin) === Missings.PassMissing{typeof(sin)}(sin) + @test passmissing(Int) === Missings.PassMissing{Type{Int}}(Int) + @test passmissing(cuberoot) === Missings.PassMissing{CubeRooter}(cuberoot) + + @testset "deprecated" begin + # The (unexported) `Missings.T` was deprecated to `Missings.nonmissingtype` + for x in (Union{Int, Missing}, Any, Missing, Union{Array{Int}, Missing}) + @test Missings.T(x) == Missings.nonmissingtype(x) + end + end +end From 48ab861073d1a3a481e66363089c68a16d2f1183 Mon Sep 17 00:00:00 2001 From: Peter Deffebach Date: Fri, 27 Oct 2023 15:04:12 -0400 Subject: [PATCH 13/26] tests --- src/Missings.jl | 287 ++++++++++++++++++++++++++++------------- test/runtests.jl | 2 + test/spreadmissings.jl | 148 +++++++++++++++++++++ 3 files changed, 349 insertions(+), 88 deletions(-) create mode 100644 test/spreadmissings.jl diff --git a/src/Missings.jl b/src/Missings.jl index 1f75741..e69bdfb 100644 --- a/src/Missings.jl +++ b/src/Missings.jl @@ -233,25 +233,128 @@ to not include any missing values, return a `SubArray` referencing the `nonmissinginds`. The element type of the returned output does not include `missing`. """ -nomissing_subarray(a::AbstractVector, nonmissinginds::AbstractVector) = - SubArray{nonmissingtype(eltype(a)), 1, typeof(a), Tuple{typeof(nonmissinginds)}, true}(a, (nonmissinginds,), 0, 1) +function nomissing_subarray(a::AbstractVector, nonmissinginds::AbstractVector) + T = nonmissingtype(eltype(a)) # Element type + N = 1 # Dimension of view + P = typeof(a) # Type of parent array + I = Tuple{typeof(nonmissinginds)} # Type of the non-missing indices + L = Base.IndexStyle(a) == IndexLinear # If the type supports fast linear indexing (assumed true) + SubArray{T, N, P, I, L}(a, (nonmissinginds,), 0, 1) +end + +function new_args_subarray(args::Tuple, nonmissinginds::AbstractVector) + newargs = ntuple(length(args)) do i + a = args[i] + if a isa AbstractVector + nomissing_subarray(a, nonmissinginds) + else + a + end + end +end -function maybespread(res, vecs, spread) +function maybespread_missing(f, newargs, new_kwargs, vecs, nonmissinginds, nonmissingmask) + spread = f.spread res = f.f(newargs...; new_kwargs...) - if spread == :default || spread == :nonmissinginds - out = similar(res, Union{eltype(res), Missing}, length(vecs[1])) - fill!(out, missing) - out[nonmissingmask] .= res + if res isa AbstractVector + # Default and spread have the same behavior if + # output is a vector + if spread === :default || spread === :nonmissing + if length(res) != length(nonmissinginds) + s = "When spreading a vector result, " * + "length of output must match number of jointly non-" + "missing indices in inputs. Currently spread = :$(spread)." + throw(DimensionMismatch(s)) + end + out = similar(res, Union{eltype(res), Missing}, length(vecs[1])) + fill!(out, missing) + out[nonmissingmask] .= res + elseif spread === :none + out = res + else + throw(ArgumentError("Should not reach 1")) + end + else + if spread === :nonmissing + out = Vector{Union{typeof(res), Missing}}(undef, length(vecs[1])) + fill!(out, missing) + for ind in nonmissinginds + out[ind] = res + end + elseif spread === :default || spread === :none + out = res + else + throw(ArgumentError("Should not reach 2")) + end + end - return out + return out +end + +function maybespread_nomissing(f, args, kwargs, vecs) + spread = f.spread + res = f.f(args...; kwargs...) + + if res isa AbstractVector + # Default and spread have the same behavior if + # output is a vector + if spread === :default || spread === :nonmissing + if length(res) != length(first(vecs)) + s = "When spreading a vector result, " * + "length of output must match number of jointly non-" + "missing indices in inputs. Currently spread = :$(spread)." + throw(DimensionMismatch(s)) + end + out = res + elseif spread === :none + out = res + else + throw(ArgumentError("Should not reach 1")) + end else - return res + if spread === :nonmissing + out = Vector{typeof(res)}(undef, length(vecs[1])) + fill!(out, res) + elseif spread === :default || spread === :none + out = res + else + throw(ArgumentError("Should not reach 2")) + end end return out end +function check_indices_match(vecs...) + Base.require_one_based_indexing(vecs...) + findex = eachindex(first(vecs)) + # If vectors don't have the same indices, throw a + # nice error for the user. + if !(all(x -> eachindex(x) == findex, vecs[2:end])) + d = Dict() + for i in 1:length(vecs) + e = eachindex(vecs[i]) + if eachindex(e) in keys(d) + push!(d[i], i) + else + d[e] = [i] + end + end + s = "The indices of vector-input arguments are not all " * + "the same.\n" + + for k in keys(d) + inds = join(d[k], ", ", " and ") + ind_msg = "Vector inputs $inds have indices $k\n" + s = s * ind_msg + end + + throw(DimensionMismatch(s)) + end +end + + function (f::SpreadMissings{F})(args...; kwargs...) where {F} kwargs_vals = values(values(kwargs)) xs = tuple(args..., kwargs_vals...) @@ -261,74 +364,69 @@ function (f::SpreadMissings{F})(args...; kwargs...) where {F} # either the main arguments or keyword arguments if any(x -> x isa AbstractVector{>:Missing}, xs) # Check that all vector inputs have the - # same indices. + # same indices. Collect these vector inputs + # into a single object. # # TODO: Allow users to protect vector inputs vecs = Base.filter(x -> x isa AbstractVector, xs) - findex = eachindex(first(vecs)) - if !(all(x -> eachindex(x) == findex, vecs[2:end])) - d = Dict() - for i in 1:length(vecs) - e = eachindex(vecs[i]) - if eachindex(e) in keys(d) - push!(d[i], i) - else - d[e] = [i] - end - end - s = "The indices of vector-input arguments are not all " * - "the same.\n" - - for k in keys(d) - inds = join(d[k], ", ", " and ") - ind_msg = "Vector inputs $inds have indices $k\n" - s = s * ind_msg - end - - throw(ArgumentError(s)) - end - + check_indices_match(vecs...) + # Determine which indices in our collection of + # vector inputs have no missing values in + # all our inputs. nonmissingmask = fill(true, length(vecs[1])) + for v in vecs nonmissingmask .&= .!ismissing.(v) end nonmissinginds = findall(nonmissingmask) - newargs = ntuple(length(args)) do i - a = args[i] - if a isa AbstractVector - nomissing_subarray(a, nonmissinginds) - else - a - end - end - - new_kwargs_vals = ntuple(length(kwargs_vals)) do i - a = kwargs_vals[i] - if a isa AbstractVector - nomissing_subarray(a, nonmissinginds) - else - a - end - end + # Construct new versions of arguments + # with no Vector{Union{T, Missing}} + newargs = new_args_subarray(args, nonmissinginds) + new_kwargs_vals = new_args_subarray(kwargs_vals, nonmissinginds) new_kwargs = NamedTuple{keys(kwargs)}(new_kwargs_vals) + maybespread_missing(f, newargs, new_kwargs, vecs, nonmissinginds, nonmissingmask) + # There is at least one vector, but none of the vectors can contain missing + elseif any(x -> x isa AbstractVector, xs) + vecs = Base.filter(x -> x isa AbstractVector, xs) + check_indices_match(vecs...) + maybespread_nomissing(f, args, kwargs, vecs) else - return f.f(xs...; kwargs...) + f.f(args...; kwargs...) end end """ spreadmissings(f; spread = :default) -Given a function `f`, wraps a function `f` but performs a transformation -on arguments before executing. Given the call +Given a function `f`, function `f` but performs a transformation +on arguments to remove missing values before executing. + +### Initial example + +```julia-repl +julia> using Statistics; + +julia> xmiss = [1, 2, 3, missing]; + +julia> ymiss = [missing, 200, 300, 400]; + +julia> summeans(x, y) = mean(x) + mean(y); + +julia> spreadmissings(summeans)(xmiss, ymiss) +252.5 +``` + +### Details + +Given the call ``` spreadmissings(f)(x::AbstractVector, y::Integer, z::AbstractVector) ``` -will find the indices which corresond to `missing` values in *both* -`x` and `z`. Then apply `f` on the `view`s of `x` and `z` which +finds the indices which corresond to `missing` values in *both* +`x` and `z`. Then apply `f` on the `SubArray`s of `x` and `z` which contain non-missing values. In essense: ``` @@ -337,47 +435,60 @@ sx = view(x, inds); sy = view(y, inds) f(sx, y, sy) ``` -# Examples -``` -julia> x = [0, 1, 2, missing]; y = [-1, 0, missing, 2]; - -julia> function restricted_fun(x, y) - map(x, y) do xi, yi - if xi < 1 || yi < 1 # will error on missings - return 1 - else - return 2 - end - end - end; - -julia> spreadmissings(restricted_fun)(x, y) -4-element Vector{Union{Missing, Int64}}: - 1 - 1 - missing - missing -``` +!!! note + `spreadmissings` does not use the default `view` behavior. Rather, + it constructs a `SubArray` directly such that the eltype of the new + inputs do not include `Missing`. + +### `spread` keyword argument -`spreadmissings` allows control over how the output from `f` is "spread" +Control over how the output from `f` is "spread" along with respect to missing values. * `:default`: - * If `output` is a `Vector` with the same length as the number of - jointly non-missing elements of the inputs `output` is "spread" - to match the non-missing elements of the inputs. - * If the `output` is a `Vector` whose length is not the same - as the length of number of non-missing elements of the inputs, - a `DimensionMismatch` error is thrown. - * If the output is not a `Vector`, `output` is simply returned directly + * If `output` is a `Vector` with the same length as the number of + jointly non-missing elements of the inputs `output` is "spread" + to match the non-missing elements of the inputs. + * If the `output` is a `Vector` whose length is not the same + as the length of number of non-missing elements of the inputs, + a `DimensionMismatch` error is thrown. + * If the output is not a `Vector`, `output` is simply returned directly * `:nonmissing`: - * If `output` is a `Vector`, behavior is the same as `:default` - * If `output` is not a `Vector`, `output` is spread along non-missing + * If `output` is a `Vector`, behavior is the same as `:default` + * If `output` is not a `Vector`, `output` is spread along non-missing elements of the inputs. * `:none`: `output` is returned directly, whether a `Vector` or not. -To spread a vector across all non-missing indices of inputs, wrap the -result in `Ref`. +A summary of the behavior is given in the table below: + +| spread \\ output type | Vector | Non-Vector | +|:---------------------- |:---------------- |:---------------- | +| :default | spread and match | return | +| :nonmissing | spread and match | spread and match | +| :none | return | return | + +If there are `AbstractVector` inputs but none of these inputs +`AbstractVector{>:Missing}`, behavior of `spread` is the same as +with inputs which allows for missing values. However the returned +vectors will not allow for `missing`. + +If none of the argumets are `AbstractVector`s, `spreadmissings(f)` +behaves the same as `f` regardpess of `spread`. + +### Limitations + +`spreadmissings` currently does not support: + +* Different length vector inputs. For + +``` +spreadmissings(f)([1, 2], [100, 200, 300]) +``` + +will error. + +* Full spreading of scalar outputs across the *full* length of the +input vector. That is, there is no `spread = :all` option. """ spreadmissings(f; spread = :default) = SpreadMissings(f, spread) diff --git a/test/runtests.jl b/test/runtests.jl index 7c18225..f642921 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -4,6 +4,8 @@ using Test, SparseArrays, Missings struct CubeRooter end (::CubeRooter)(x) = cbrt(x) +include("spreadmissings.jl") + @testset "Missings" begin x = Missings.replace([1, 2, missing, 4], 3) @test eltype(x) === Int diff --git a/test/spreadmissings.jl b/test/spreadmissings.jl new file mode 100644 index 0000000..b45b7eb --- /dev/null +++ b/test/spreadmissings.jl @@ -0,0 +1,148 @@ +module SpreadMissingTests + +using Test, Missings + +const ≈ = isequal + +function small_vec(args...; kwargs...) + [1, 2] +end + +function right_vec(args...; kwargs...) + kwargs_vals = values(values(kwargs)) + xs = tuple(args..., kwargs_vals...) + vecs = Base.filter(x -> x isa AbstractVector, xs) + if !isempty(vecs) + collect(1:length(first(vecs))) + else + [-1] + end +end + +function scalar(args...; kwargs...) + 1 +end + +xmiss = [1, 2, 3, missing] +x = [1, 2, 3, 4] + +ymiss = [missing, 200, 300, 400] +y = [100, 200, 300, 400] + +s = [1000, 2000] + +## vector, :default +### missings in main arg only +t = spreadmissings(right_vec)(xmiss) +@test t ≈ [1, 2, 3, missing] +### missing in keyword arg only +t = spreadmissings(right_vec)(; z = ymiss) +@test t ≈ [missing, 1, 2, 3] +### missing in both main arg and keyword arg +t = spreadmissings(right_vec)(x, xmiss; z = ymiss) +@test t ≈ [missing, 1, 2, missing] +### missings nowhere +t = spreadmissings(right_vec)(x; z = y) +@test t ≈ [1, 2, 3, 4] +@test t isa Vector{Int} +### Mis-matched vector lengths +@test_throws DimensionMismatch spreadmissings(right_vec)(x, s) +### no vectors +t = spreadmissings(right_vec)(1, 2; z = 9) +@test t == [-1] + +## vector, :nonmissing +### missings in main arg only +t = spreadmissings(right_vec; spread = :nonmissing)(xmiss) +@test t ≈ [1, 2, 3, missing] +### missing in keyword arg only +t = spreadmissings(right_vec; spread = :nonmissing)(; z = ymiss) +@test t ≈ [missing, 1, 2, 3] +### missing in both main arg and keyword arg +t = spreadmissings(right_vec; spread = :nonmissing)(x, xmiss; z = ymiss) +@test t ≈ [missing, 1, 2, missing] +### missings nowhere +t = spreadmissings(right_vec; spread = :nonmissing)(x; z = y) +@test t ≈ [1, 2, 3, 4] +@test t isa Vector{Int} +### Mis-matched vector lengths +@test_throws DimensionMismatch spreadmissings(right_vec; spread = :nonmissing)(x, s) +### no vectors +t = spreadmissings(right_vec)(1, 2; z = 9) +@test t == [-1] + +## vector, :none +t = spreadmissings(right_vec; spread = :none)(xmiss) +@test t ≈ [1, 2, 3] +### missing in keyword arg only +t = spreadmissings(right_vec; spread = :none)(; z = ymiss) +@test t ≈ [1, 2, 3] +### missing in both main arg and keyword arg +t = spreadmissings(right_vec; spread = :none)(x, xmiss; z = ymiss) +@test t ≈ [1, 2] +### missings nowhere +t = spreadmissings(right_vec; spread = :none)(x; z = y) +@test t ≈ [1, 2, 3, 4] +@test t isa Vector{Int} +### Mis-matched vector lengths +@test_throws DimensionMismatch spreadmissings(right_vec; spread = :none)(x, s) + +## non-vector, :default +### missings in main arg only +t = spreadmissings(scalar; spread = :default)(xmiss) +@test t ≈ 1 +### missing in keyword arg only +t = spreadmissings(scalar; spread = :default)(; z = ymiss) +@test t ≈ 1 +### missing in both main arg and keyword arg +t = spreadmissings(scalar; spread = :default)(x, xmiss; z = ymiss) +@test t ≈ 1 +### missings nowhere +t = spreadmissings(scalar; spread = :default)(x; z = y) +@test t ≈ 1 +### Mis-matched vector lengths +@test_throws DimensionMismatch spreadmissings(scalar; spread = :default)(x, s) +### no vectors +t = spreadmissings(scalar; spread = :default)(1, 2; z = 9) +@test t == 1 + +## non-vector, :nonmissing +### missings in main arg only +t = spreadmissings(scalar; spread = :nonmissing)(xmiss) +@test t ≈ [1, 1, 1, missing] +### missing in keyword arg only +t = spreadmissings(scalar; spread = :nonmissing)(; z = ymiss) +@test t ≈ [missing, 1, 1, 1] +### missing in both main arg and keyword arg +t = spreadmissings(scalar; spread = :nonmissing)(x, xmiss; z = ymiss) +@test t ≈ [missing, 1, 1, missing] +### missings nowhere +t = spreadmissings(scalar; spread = :nonmissing)(x; z = y) +@test t ≈ [1, 1, 1, 1] +### Mis-matched vector lengths +@test_throws DimensionMismatch spreadmissings(scalar; spread = :nonmissing)(x, s) +### no vectors +t = spreadmissings(scalar; spread = :nonmissing)(1, 2; z = 9) +@test t == 1 + +## non-vector, :none +### missings in main arg only +t = spreadmissings(scalar; spread = :none)(xmiss) +@test t ≈ 1 +### missing in keyword arg only +t = spreadmissings(scalar; spread = :none)(; z = ymiss) +@test t ≈ 1 +### missing in both main arg and keyword arg +t = spreadmissings(scalar; spread = :none)(x, xmiss; z = ymiss) +@test t ≈ 1 +### missings nowhere +t = spreadmissings(scalar; spread = :none)(x; z = y) +@test t ≈ 1 +### Mis-matched vector lengths +@test_throws DimensionMismatch spreadmissings(scalar; spread = :none)(x, s) +### no vectors +t = spreadmissings(scalar; spread = :none)(1, 2; z = 9) +@test t == 1 + + +end # module \ No newline at end of file From 4debddcd8e3461ee497bf010811476908665daf6 Mon Sep 17 00:00:00 2001 From: pdeffebach <23196228+pdeffebach@users.noreply.github.com> Date: Sat, 9 Dec 2023 14:48:11 -0500 Subject: [PATCH 14/26] Update src/Missings.jl Co-authored-by: Milan Bouchet-Valat --- src/Missings.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Missings.jl b/src/Missings.jl index e69bdfb..84f2e5c 100644 --- a/src/Missings.jl +++ b/src/Missings.jl @@ -238,7 +238,7 @@ function nomissing_subarray(a::AbstractVector, nonmissinginds::AbstractVector) N = 1 # Dimension of view P = typeof(a) # Type of parent array I = Tuple{typeof(nonmissinginds)} # Type of the non-missing indices - L = Base.IndexStyle(a) == IndexLinear # If the type supports fast linear indexing (assumed true) + L = Base.IndexStyle(a) === IndexLinear # If the type supports fast linear indexing (assumed true) SubArray{T, N, P, I, L}(a, (nonmissinginds,), 0, 1) end From 7db9f6812bc45bbb022e224d3e724702390133a6 Mon Sep 17 00:00:00 2001 From: pdeffebach <23196228+pdeffebach@users.noreply.github.com> Date: Sat, 9 Dec 2023 14:49:00 -0500 Subject: [PATCH 15/26] Update src/Missings.jl Co-authored-by: Milan Bouchet-Valat --- src/Missings.jl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Missings.jl b/src/Missings.jl index 84f2e5c..41ee506 100644 --- a/src/Missings.jl +++ b/src/Missings.jl @@ -228,6 +228,8 @@ end non_spreadabble_check(x) = nothing """ + nomissing_subarray(a::AbstractVector, nonmissinginds::AbstractVector) + Given an input vector `a` where `nonmissinginds` is guaranteed to not include any missing values, return a `SubArray` referencing the `nonmissinginds`. The element type of the returned output From 8bfb6421cb6c1a68ae54dcd64096b2ac33497525 Mon Sep 17 00:00:00 2001 From: pdeffebach <23196228+pdeffebach@users.noreply.github.com> Date: Sat, 9 Dec 2023 15:10:36 -0500 Subject: [PATCH 16/26] Update src/Missings.jl Co-authored-by: Milan Bouchet-Valat --- src/Missings.jl | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/Missings.jl b/src/Missings.jl index 41ee506..386b658 100644 --- a/src/Missings.jl +++ b/src/Missings.jl @@ -448,13 +448,12 @@ Control over how the output from `f` is "spread" along with respect to missing values. * `:default`: - * If `output` is a `Vector` with the same length as the number of - jointly non-missing elements of the inputs `output` is "spread" + * If output is not a vector, it is returned directly. + * If output is a vector with the same length as the number of + jointly non-missing elements of the inputs, it is "spread" to match the non-missing elements of the inputs. - * If the `output` is a `Vector` whose length is not the same - as the length of number of non-missing elements of the inputs, - a `DimensionMismatch` error is thrown. - * If the output is not a `Vector`, `output` is simply returned directly + * Otherwise a `DimensionMismatch` error is thrown. + * `:nonmissing`: * If `output` is a `Vector`, behavior is the same as `:default` * If `output` is not a `Vector`, `output` is spread along non-missing From 27a26f733b545b82cbd1062780ca55984fb0cd28 Mon Sep 17 00:00:00 2001 From: Peter Deffebach Date: Sat, 9 Dec 2023 15:35:59 -0500 Subject: [PATCH 17/26] respond to comments --- src/Missings.jl | 204 ++++++++++++++++++++++++++++-------------------- 1 file changed, 119 insertions(+), 85 deletions(-) diff --git a/src/Missings.jl b/src/Missings.jl index e69bdfb..c42702b 100644 --- a/src/Missings.jl +++ b/src/Missings.jl @@ -211,7 +211,7 @@ passmissing(f) = PassMissing{Core.Typeof(f)}(f) struct SpreadMissings{F} <: Function f::F spread::Symbol - function SpreadMissings(f, spread) + function SpreadMissings(f, spread::Symbol) if !(spread in (:default, :nonmissing, :none)) throw(ArgumentError("spread must be either :default, :nonmissing, or :none")) end @@ -219,13 +219,12 @@ struct SpreadMissings{F} <: Function end end -function non_spreadabble_check(t::Union{AbstractDict, NamedTuple, Tuple}) +function non_spreadable_check(t::Union{AbstractDict, NamedTuple, Tuple}) T = typeof(t) - s = "Spreadmissings on $T is reserved. Please wrap in Ref to be " * - "treated as a scalar." + s = "spreadmissings on $T is reserved." throw(ArgumentError(s)) end -non_spreadabble_check(x) = nothing +non_spreadable_check(x) = nothing """ Given an input vector `a` where `nonmissinginds` is guaranteed @@ -243,8 +242,7 @@ function nomissing_subarray(a::AbstractVector, nonmissinginds::AbstractVector) end function new_args_subarray(args::Tuple, nonmissinginds::AbstractVector) - newargs = ntuple(length(args)) do i - a = args[i] + newargs = map(args) do a if a isa AbstractVector nomissing_subarray(a, nonmissinginds) else @@ -253,7 +251,9 @@ function new_args_subarray(args::Tuple, nonmissinginds::AbstractVector) end end -function maybespread_missing(f, newargs, new_kwargs, vecs, nonmissinginds, nonmissingmask) +function maybespread_missing(f, newargs::Tuple, new_kwargs::NamedTuple, vecs::Tuple, + nonmissinginds::AbstractVector{<:Integer}, nonmissingmask::AbstractVector{<:Bool}) + spread = f.spread res = f.f(newargs...; new_kwargs...) @@ -262,9 +262,12 @@ function maybespread_missing(f, newargs, new_kwargs, vecs, nonmissinginds, nonmi # output is a vector if spread === :default || spread === :nonmissing if length(res) != length(nonmissinginds) - s = "When spreading a vector result, " * + + s = "When spreading a vector result with `spread=$(spread)`, " * "length of output must match number of jointly non-" - "missing indices in inputs. Currently spread = :$(spread)." + "missing values in inputs " + "(got $(length(res)) and $(length(nonmissinginds))).". + throw(DimensionMismatch(s)) end out = similar(res, Union{eltype(res), Missing}, length(vecs[1])) @@ -279,9 +282,7 @@ function maybespread_missing(f, newargs, new_kwargs, vecs, nonmissinginds, nonmi if spread === :nonmissing out = Vector{Union{typeof(res), Missing}}(undef, length(vecs[1])) fill!(out, missing) - for ind in nonmissinginds - out[ind] = res - end + out[nonmissinginds] .= Ref(res) elseif spread === :default || spread === :none out = res else @@ -301,9 +302,10 @@ function maybespread_nomissing(f, args, kwargs, vecs) # output is a vector if spread === :default || spread === :nonmissing if length(res) != length(first(vecs)) - s = "When spreading a vector result, " * + s = "When spreading a vector result with `spread=$(spread)`, " * "length of output must match number of jointly non-" - "missing indices in inputs. Currently spread = :$(spread)." + "missing values in inputs " + "(got $(length(res)) and $(length(nonmissinginds))).". throw(DimensionMismatch(s)) end out = res @@ -328,37 +330,13 @@ end function check_indices_match(vecs...) Base.require_one_based_indexing(vecs...) - findex = eachindex(first(vecs)) - # If vectors don't have the same indices, throw a - # nice error for the user. - if !(all(x -> eachindex(x) == findex, vecs[2:end])) - d = Dict() - for i in 1:length(vecs) - e = eachindex(vecs[i]) - if eachindex(e) in keys(d) - push!(d[i], i) - else - d[e] = [i] - end - end - s = "The indices of vector-input arguments are not all " * - "the same.\n" - - for k in keys(d) - inds = join(d[k], ", ", " and ") - ind_msg = "Vector inputs $inds have indices $k\n" - s = s * ind_msg - end - - throw(DimensionMismatch(s)) - end + findex = eachindex(vecs...) end - function (f::SpreadMissings{F})(args...; kwargs...) where {F} kwargs_vals = values(values(kwargs)) xs = tuple(args..., kwargs_vals...) - foreach(non_spreadabble_check, xs) + foreach(non_spreadable_check, xs) # Detect vector inputs which contain missing in # either the main arguments or keyword arguments @@ -374,13 +352,12 @@ function (f::SpreadMissings{F})(args...; kwargs...) where {F} # vector inputs have no missing values in # all our inputs. nonmissingmask = fill(true, length(vecs[1])) - for v in vecs nonmissingmask .&= .!ismissing.(v) end nonmissinginds = findall(nonmissingmask) # Construct new versions of arguments - # with no Vector{Union{T, Missing}} + # with SubArrays whose eltypes do not allow Missing newargs = new_args_subarray(args, nonmissinginds) new_kwargs_vals = new_args_subarray(kwargs_vals, nonmissinginds) @@ -399,10 +376,26 @@ end """ spreadmissings(f; spread = :default) -Given a function `f`, function `f` but performs a transformation -on arguments to remove missing values before executing. +Return a function which calls function `f` after skipping entries +corresponding to missing values in `AbstractVector` arguments. + +All input vectors must have the same length. Non-`AbstractVector` +arguments are left untouched. + +If `spread` is `:default` or `:nonmissing` and `f` returns a vector, +its length must be equal to the number of jointly non-missing +entries in the vector inputs. A vector of the same length as +vector inputs is returned, filling positions corresponding +to missing values with `missing`. + +If `spread` is `:none`, or if `f` returns a value other than a vector, +it is returned as-is. + +For each vector argument, `f` is passed a `SubArray` +view with an element type equal to `nonmissingtype(T)`, +with `T` the element type of the original argument. -### Initial example +### Examples ```julia-repl julia> using Statistics; @@ -415,19 +408,43 @@ julia> summeans(x, y) = mean(x) + mean(y); julia> spreadmissings(summeans)(xmiss, ymiss) 252.5 + +julia> xmiss = [10, 20, 30, missing]; + +julia> ymiss = [missing, 500, 400, 300]; + +julia> cor(xmiss, ymiss) +missing + +julia> spreadmissings(cor)(xmiss, ymiss) +-1.0 + +julia> standardize(xmiss) +4-element Vector{Missing}: + missing + missing + missing + missing + +julia> spreadmissings(standardize)(xmiss) +4-element Vector{Union{Missing, Float64}}: + -10.0 + 0.0 + 10.0 + missing ``` -### Details +# Extended help -Given the call +The behavior of `spreadmissing` can be illustrated using an example. The call ``` spreadmissings(f)(x::AbstractVector, y::Integer, z::AbstractVector) ``` -finds the indices which corresond to `missing` values in *both* -`x` and `z`. Then apply `f` on the `SubArray`s of `x` and `z` which -contain non-missing values. In essense: +finds the indices which correspond to `missing` values in *both* +`x` and `z`. Then `f` is applied on the `SubArray`s of `x` and `z` which +contain non-missing values. This is essentially equivalent to: ``` inds = .!missing.(x) .& .!missing.(z) @@ -435,15 +452,15 @@ sx = view(x, inds); sy = view(y, inds) f(sx, y, sy) ``` -!!! note - `spreadmissings` does not use the default `view` behavior. Rather, - it constructs a `SubArray` directly such that the eltype of the new - inputs do not include `Missing`. -### `spread` keyword argument +`spreadmissings` does not use the default `view` behavior. Rather, +it constructs a `SubArray` directly such that the eltype of the new +inputs do not include `Missing`. -Control over how the output from `f` is "spread" -along with respect to missing values. +# `spread` keyword argument + +The `spread` keyword argument controls whether the output from +`f` is "spread" over non-missing values. * `:default`: * If `output` is a `Vector` with the same length as the number of @@ -454,41 +471,58 @@ along with respect to missing values. a `DimensionMismatch` error is thrown. * If the output is not a `Vector`, `output` is simply returned directly * `:nonmissing`: - * If `output` is a `Vector`, behavior is the same as `:default` + * If output is not a vector, it is is spread over non-missing + elements of the inputs. + * If output is a vector, behavior is the same as `:default`. * If `output` is not a `Vector`, `output` is spread along non-missing elements of the inputs. -* `:none`: `output` is returned directly, whether a `Vector` or not. +* `:none`: output is returned directly, whether a vector or not. A summary of the behavior is given in the table below: -| spread \\ output type | Vector | Non-Vector | -|:---------------------- |:---------------- |:---------------- | -| :default | spread and match | return | -| :nonmissing | spread and match | spread and match | -| :none | return | return | +| spread \\ output type | Vector | Non-vector | +|:---------------------- |:------ |:-----------| +| :default | spread | return | +| :nonmissing | spread | spread | +| :none | return | return | -If there are `AbstractVector` inputs but none of these inputs -`AbstractVector{>:Missing}`, behavior of `spread` is the same as -with inputs which allows for missing values. However the returned -vectors will not allow for `missing`. +If there are `AbstractVector` inputs but none of these inputs are +`AbstractVector{>:Missing}`, the returned vectors will not allow +for `missing`. -If none of the argumets are `AbstractVector`s, `spreadmissings(f)` -behaves the same as `f` regardpess of `spread`. +If none of the arguments are `AbstractVector`s, `spreadmissings(f)` +behaves the same as `f` regardless of `spread`. -### Limitations - -`spreadmissings` currently does not support: - -* Different length vector inputs. For - -``` -spreadmissings(f)([1, 2], [100, 200, 300]) -``` - -will error. - -* Full spreading of scalar outputs across the *full* length of the -input vector. That is, there is no `spread = :all` option. +!!! note + `spreadmissings` has a subtly different behavior than common uses of + `skipmissing`. Compare the two functions below + + ```julia-repl + julia> function fillmean_skip(x) + m = mean(skipmissing(x)) + fill(m, length(x)) + end; + + julia> fillmean(x) = fill(mean(x), length(x)); + + julia> x = [2, missing]; + + julia> fillmean_skip(x) + 2-element Vector{Float64}: + 2.0 + 2.0 + + julia> spreadmissings(fillmean)(x) + 2-element Vector{Union{Missing, Float64}}: + 2.0 + missing + ``` + + 1. `fillmean_skip` fills all entries of the original vector `x` with the mean, + excluding `missing`s. By contrast, `spreadmissings(fillmean)` only fills non-missing + elements of the original `x`. + 2. `fillmean_skip` returns a vector which does not allow for `missing`, while + `spreadmissings(fillmean)` does. """ spreadmissings(f; spread = :default) = SpreadMissings(f, spread) From 00b951a0d9e15845ad957168064118026246b035 Mon Sep 17 00:00:00 2001 From: Peter Deffebach Date: Sat, 9 Dec 2023 15:42:42 -0500 Subject: [PATCH 18/26] remove non-spreading behavior --- src/Missings.jl | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/Missings.jl b/src/Missings.jl index 4b4583f..3cb99b4 100644 --- a/src/Missings.jl +++ b/src/Missings.jl @@ -219,13 +219,6 @@ struct SpreadMissings{F} <: Function end end -function non_spreadable_check(t::Union{AbstractDict, NamedTuple, Tuple}) - T = typeof(t) - s = "spreadmissings on $T is reserved." - throw(ArgumentError(s)) -end -non_spreadable_check(x) = nothing - """ nomissing_subarray(a::AbstractVector, nonmissinginds::AbstractVector) @@ -338,7 +331,6 @@ end function (f::SpreadMissings{F})(args...; kwargs...) where {F} kwargs_vals = values(values(kwargs)) xs = tuple(args..., kwargs_vals...) - foreach(non_spreadable_check, xs) # Detect vector inputs which contain missing in # either the main arguments or keyword arguments From c85a4c859c57415d7f28609e2d01e3454ee052f0 Mon Sep 17 00:00:00 2001 From: Peter Deffebach Date: Sat, 9 Dec 2023 16:12:51 -0500 Subject: [PATCH 19/26] fix keyword argument stuff --- src/Missings.jl | 43 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 37 insertions(+), 6 deletions(-) diff --git a/src/Missings.jl b/src/Missings.jl index 3cb99b4..2b2f6a3 100644 --- a/src/Missings.jl +++ b/src/Missings.jl @@ -232,7 +232,7 @@ function nomissing_subarray(a::AbstractVector, nonmissinginds::AbstractVector) N = 1 # Dimension of view P = typeof(a) # Type of parent array I = Tuple{typeof(nonmissinginds)} # Type of the non-missing indices - L = Base.IndexStyle(a) === IndexLinear # If the type supports fast linear indexing (assumed true) + L = Base.IndexStyle(a) === IndexLinear # If the type supports fast linear indexing SubArray{T, N, P, I, L}(a, (nonmissinginds,), 0, 1) end @@ -246,8 +246,25 @@ function new_args_subarray(args::Tuple, nonmissinginds::AbstractVector) end end -function maybespread_missing(f, newargs::Tuple, new_kwargs::NamedTuple, vecs::Tuple, - nonmissinginds::AbstractVector{<:Integer}, nonmissingmask::AbstractVector{<:Bool}) +""" + maybespread_missing( + f::SpreadMissings, + newargs::Tuple, + new_kwargs, + vecs::Tuple, + nonmissinginds::AbstractVector{<:Integer}, + nonmissingmask::AbstractVector{<:Bool}) + +Applied when `spreadmissing(f)(args...; kwargs...)` is called and +`args` or `kwargs` contain a `Vector{Union{T, Missing}}`. +""" +function maybespread_missing( + f::SpreadMissings, + newargs::Tuple, + new_kwargs, + vecs::Tuple, + nonmissinginds::AbstractVector{<:Integer}, + nonmissingmask::AbstractVector{<:Bool}) spread = f.spread res = f.f(newargs...; new_kwargs...) @@ -257,7 +274,6 @@ function maybespread_missing(f, newargs::Tuple, new_kwargs::NamedTuple, vecs::Tu # output is a vector if spread === :default || spread === :nonmissing if length(res) != length(nonmissinginds) - s = "When spreading a vector result with `spread=$(spread)`, " * "length of output must match number of jointly non-" "missing values in inputs " @@ -288,7 +304,22 @@ function maybespread_missing(f, newargs::Tuple, new_kwargs::NamedTuple, vecs::Tu return out end -function maybespread_nomissing(f, args, kwargs, vecs) +""" + maybespread_nomissing( + f::SpreadMissings, + args::Tuple, + kwargs, + vecs::Tuple) + +Applied when `spreadmissing(f)(args...; kwargs...)` is called and *neither* +`args` nor `kwargs` contain a `Vector{Union{T, Missing}}`. +""" +function maybespread_nomissing( + f::SpreadMissings, + args::Tuple, + kwargs, + vecs::Tuple) + spread = f.spread res = f.f(args...; kwargs...) @@ -355,7 +386,7 @@ function (f::SpreadMissings{F})(args...; kwargs...) where {F} newargs = new_args_subarray(args, nonmissinginds) new_kwargs_vals = new_args_subarray(kwargs_vals, nonmissinginds) - new_kwargs = NamedTuple{keys(kwargs)}(new_kwargs_vals) + new_kwargs = (k => v for (k, v) in zip(keys(kwargs), new_kwargs_vals)) maybespread_missing(f, newargs, new_kwargs, vecs, nonmissinginds, nonmissingmask) # There is at least one vector, but none of the vectors can contain missing elseif any(x -> x isa AbstractVector, xs) From ea39189d5330cae18a86320b05b52f15ca6dd302 Mon Sep 17 00:00:00 2001 From: Peter Deffebach Date: Sat, 9 Dec 2023 17:14:54 -0500 Subject: [PATCH 20/26] get closer to type stability --- src/Missings.jl | 67 +++++++++++++++++++++++++++--------------- test/spreadmissings.jl | 7 +++++ 2 files changed, 50 insertions(+), 24 deletions(-) diff --git a/src/Missings.jl b/src/Missings.jl index 2b2f6a3..7371898 100644 --- a/src/Missings.jl +++ b/src/Missings.jl @@ -208,14 +208,19 @@ missing """ passmissing(f) = PassMissing{Core.Typeof(f)}(f) -struct SpreadMissings{F} <: Function +abstract type AbstractSpread end +struct SpreadDefault <: AbstractSpread end +struct SpreadNonMissing <: AbstractSpread end +struct SpreadNone <: AbstractSpread end + +struct SpreadMissings{F, S <: AbstractSpread} <: Function f::F - spread::Symbol - function SpreadMissings(f, spread::Symbol) - if !(spread in (:default, :nonmissing, :none)) - throw(ArgumentError("spread must be either :default, :nonmissing, or :none")) + spread::S + function SpreadMissings(f, spread::AbstractSpread) + if !(spread isa AbstractSpread) + throw(ArgumentError("spread must be either SpreadDefault(), SpreadNonMissing(), or SpreadNone()")) end - new{Core.Typeof(f)}(f, spread) + new{Core.Typeof(f), typeof(spread)}(f, spread) end end @@ -259,22 +264,23 @@ Applied when `spreadmissing(f)(args...; kwargs...)` is called and `args` or `kwargs` contain a `Vector{Union{T, Missing}}`. """ function maybespread_missing( - f::SpreadMissings, + f::SpreadMissings{F, S}, newargs::Tuple, new_kwargs, vecs::Tuple, nonmissinginds::AbstractVector{<:Integer}, - nonmissingmask::AbstractVector{<:Bool}) + nonmissingmask::AbstractVector{<:Bool}) where {F, S} - spread = f.spread res = f.f(newargs...; new_kwargs...) + spread = f.spread + if res isa AbstractVector # Default and spread have the same behavior if # output is a vector - if spread === :default || spread === :nonmissing + if spread === SpreadDefault() || spread === SpreadNonMissing() if length(res) != length(nonmissinginds) - s = "When spreading a vector result with `spread=$(spread)`, " * + s = "When spreading a vector result with `spread=$(S)`, " * "length of output must match number of jointly non-" "missing values in inputs " "(got $(length(res)) and $(length(nonmissinginds))).". @@ -284,17 +290,17 @@ function maybespread_missing( out = similar(res, Union{eltype(res), Missing}, length(vecs[1])) fill!(out, missing) out[nonmissingmask] .= res - elseif spread === :none + elseif spread === SpreadNone() out = res else throw(ArgumentError("Should not reach 1")) end else - if spread === :nonmissing + if spread === SpreadNonMissing() out = Vector{Union{typeof(res), Missing}}(undef, length(vecs[1])) fill!(out, missing) out[nonmissinginds] .= Ref(res) - elseif spread === :default || spread === :none + elseif spread === SpreadDefault() || spread === SpreadNone() out = res else throw(ArgumentError("Should not reach 2")) @@ -315,36 +321,36 @@ Applied when `spreadmissing(f)(args...; kwargs...)` is called and *neither* `args` nor `kwargs` contain a `Vector{Union{T, Missing}}`. """ function maybespread_nomissing( - f::SpreadMissings, + f::SpreadMissings{F, S}, args::Tuple, kwargs, - vecs::Tuple) + vecs::Tuple) where {F, S} - spread = f.spread res = f.f(args...; kwargs...) + spread = f.spread if res isa AbstractVector # Default and spread have the same behavior if # output is a vector - if spread === :default || spread === :nonmissing + if spread === SpreadDefault() || spread === SpreadNonMissing() if length(res) != length(first(vecs)) - s = "When spreading a vector result with `spread=$(spread)`, " * + s = "When spreading a vector result with `spread=$(S)`, " * "length of output must match number of jointly non-" "missing values in inputs " "(got $(length(res)) and $(length(nonmissinginds))).". throw(DimensionMismatch(s)) end out = res - elseif spread === :none + elseif spread === SpreadNone() out = res else throw(ArgumentError("Should not reach 1")) end else - if spread === :nonmissing + if spread === SpreadNonMissing() out = Vector{typeof(res)}(undef, length(vecs[1])) fill!(out, res) - elseif spread === :default || spread === :none + elseif spread === SpreadDefault() || spread === SpreadNone() out = res else throw(ArgumentError("Should not reach 2")) @@ -359,10 +365,16 @@ function check_indices_match(vecs...) findex = eachindex(vecs...) end -function (f::SpreadMissings{F})(args...; kwargs...) where {F} +function (f::SpreadMissings{F, S})(args...; kwargs...) where {F, S} kwargs_vals = values(values(kwargs)) xs = tuple(args..., kwargs_vals...) + if any(ismissing, xs) + s = "Using `spreadmissings` with a positional or keyword argumet" * + " that is `missing` is reserved" + throw(ArgumentError(s)) + end + # Detect vector inputs which contain missing in # either the main arguments or keyword arguments if any(x -> x isa AbstractVector{>:Missing}, xs) @@ -548,8 +560,15 @@ behaves the same as `f` regardless of `spread`. 2. `fillmean_skip` returns a vector which does not allow for `missing`, while `spreadmissings(fillmean)` does. """ -spreadmissings(f; spread = :default) = SpreadMissings(f, spread) +SpreadMissings(f, spread::Val{:default}) = SpreadMissings(f, SpreadDefault()) +SpreadMissings(f, spread::Val{:nonmissing}) = SpreadMissings(f, SpreadNonMissing()) +SpreadMissings(f, spread::Val{:none}) = SpreadMissings(f, SpreadNone()) + +function spreadmissings(f; spread::Symbol = :default) + SpreadMissings(f, Val(spread)) +# throw(ArgumentError("`spread` must be one of `:default`, `:nonmissing`, or `:none`")) +end """ skipmissings(args...) diff --git a/test/spreadmissings.jl b/test/spreadmissings.jl index b45b7eb..669ec6b 100644 --- a/test/spreadmissings.jl +++ b/test/spreadmissings.jl @@ -145,4 +145,11 @@ t = spreadmissings(scalar; spread = :none)(1, 2; z = 9) @test t == 1 +# Error on missing +alwaysone(args...; kwargs...) = 1 +@test_throws ArgumentError spreadmissings(alwaysone)(missing) +@test_throws ArgumentError spreadmissings(alwaysone)(; a = missing) +@test_throws ArgumentError spreadmissings(alwaysone)([1, 2], missing) +@test_throws ArgumentError spreadmissings(alwaysone)([1, 2]; a = missing) + end # module \ No newline at end of file From e653c5a55d1fcc451359f455b4b7c1b25a9c7ef8 Mon Sep 17 00:00:00 2001 From: Peter Deffebach Date: Tue, 12 Dec 2023 11:54:39 -0500 Subject: [PATCH 21/26] restore type stability --- src/Missings.jl | 152 ++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 140 insertions(+), 12 deletions(-) diff --git a/src/Missings.jl b/src/Missings.jl index 7371898..d45e151 100644 --- a/src/Missings.jl +++ b/src/Missings.jl @@ -251,6 +251,135 @@ function new_args_subarray(args::Tuple, nonmissinginds::AbstractVector) end end +function spread_missing( + res::AbstractVector{T}, + vecs::Tuple, + nonmissinginds::AbstractVector{<:Integer}, + nonmissingmask::AbstractVector{<:Bool})::Vector{Union{Missing, T}} where {T} + + if length(res) != length(nonmissinginds) + s = "When spreading a vector result with `spread=$(S)`, " * + "length of output must match number of jointly non-" + "missing values in inputs " + "(got $(length(res)) and $(length(nonmissinginds))).". + + throw(DimensionMismatch(s)) + end + out = similar(res, Union{eltype(res), Missing}, length(vecs[1])) + fill!(out, missing) + out[nonmissingmask] .= res + out +end + +function maybespread_missing( + res::T, + spread::SpreadDefault, + vecs::Tuple, + nonmissinginds::AbstractVector{<:Integer}, + nonmissingmask::AbstractVector{<:Bool})::T where{T} + + res +end + +function maybespread_missing( + res::AbstractVector{T}, + spread::SpreadDefault, + vecs::Tuple, + nonmissinginds::AbstractVector{<:Integer}, + nonmissingmask::AbstractVector{<:Bool})::Vector{Union{Missing, T}} where {T} + + spread_missing(res, vecs, nonmissinginds, nonmissingmask) +end + +function maybespread_missing( + res::T, + spread::SpreadNonMissing, + vecs::Tuple, + nonmissinginds::AbstractVector{<:Integer}, + nonmissingmask::AbstractVector{<:Bool})::Vector{Union{Missing, T}} where{T} + + out = Vector{Union{typeof(res), Missing}}(undef, length(vecs[1])) + fill!(out, missing) + out[nonmissinginds] .= Ref(res) + out +end + +function maybespread_missing( + res::AbstractVector{T}, + spread::SpreadNonMissing, + vecs::Tuple, + nonmissinginds::AbstractVector{<:Integer}, + nonmissingmask::AbstractVector{<:Bool})::Vector{Union{Missing, T}} where {T} + + spread_missing(res, vecs, nonmissinginds, nonmissingmask) +end + +function maybespread_missing( + res::T, + spread::SpreadNone, + vecs::Tuple, + nonmissinginds::AbstractVector{<:Integer}, + nonmissingmask::AbstractVector{<:Bool})::T where {T} + + res +end + +function spread_nomissing( + res::AbstractVector{T}, + vecs::Tuple)::typeof(res) where {T} + + if length(res) != length(first(vecs)) + s = "When spreading a vector result with `spread=$(S)`, " * + "length of output must match number of jointly non-" + "missing values in inputs " + "(got $(length(res)) and $(length(first(vecs)))).". + throw(DimensionMismatch(s)) + end + res +end + +function maybespread_nomissing( + res::T, + spread::SpreadDefault, + vecs::Tuple)::T where{T} + + res +end + +function maybespread_nomissing( + res::AbstractVector{T}, + spread::SpreadDefault, + vecs::Tuple)::typeof(res) where {T} + + spread_nomissing(res, vecs) +end + +function maybespread_nomissing( + res::T, + spread::SpreadNonMissing, + vecs::Tuple)::Vector{T} where{T} + + out = Vector{typeof(res)}(undef, length(vecs[1])) + fill!(out, res) + out +end + +function maybespread_nomissing( + res::AbstractVector{T}, + spread::SpreadNonMissing, + vecs::Tuple)::typeof(res) where {T} + + spread_nomissing(res, vecs) +end + +function maybespread_nomissing( + res::T, + spread::SpreadNone, + vecs::Tuple)::T where {T} + + res +end + """ maybespread_missing( f::SpreadMissings, @@ -263,17 +392,12 @@ end Applied when `spreadmissing(f)(args...; kwargs...)` is called and `args` or `kwargs` contain a `Vector{Union{T, Missing}}`. """ -function maybespread_missing( - f::SpreadMissings{F, S}, - newargs::Tuple, - new_kwargs, +#=function maybespread_missing( + res, + spread::AbstractSpread, vecs::Tuple, nonmissinginds::AbstractVector{<:Integer}, - nonmissingmask::AbstractVector{<:Bool}) where {F, S} - - res = f.f(newargs...; new_kwargs...) - - spread = f.spread + nonmissingmask::AbstractVector{<:Bool}) if res isa AbstractVector # Default and spread have the same behavior if @@ -308,7 +432,7 @@ function maybespread_missing( end return out -end +end=# """ maybespread_nomissing( @@ -320,6 +444,7 @@ end Applied when `spreadmissing(f)(args...; kwargs...)` is called and *neither* `args` nor `kwargs` contain a `Vector{Union{T, Missing}}`. """ +#= function maybespread_nomissing( f::SpreadMissings{F, S}, args::Tuple, @@ -359,6 +484,7 @@ function maybespread_nomissing( return out end +=# function check_indices_match(vecs...) Base.require_one_based_indexing(vecs...) @@ -399,12 +525,14 @@ function (f::SpreadMissings{F, S})(args...; kwargs...) where {F, S} new_kwargs_vals = new_args_subarray(kwargs_vals, nonmissinginds) new_kwargs = (k => v for (k, v) in zip(keys(kwargs), new_kwargs_vals)) - maybespread_missing(f, newargs, new_kwargs, vecs, nonmissinginds, nonmissingmask) + res = f.f(newargs...; new_kwargs...) + maybespread_missing(res, f.spread, vecs, nonmissinginds, nonmissingmask) # There is at least one vector, but none of the vectors can contain missing elseif any(x -> x isa AbstractVector, xs) vecs = Base.filter(x -> x isa AbstractVector, xs) check_indices_match(vecs...) - maybespread_nomissing(f, args, kwargs, vecs) + res = f.f(args...; kwargs...) + maybespread_nomissing(res, f.spread, vecs) else f.f(args...; kwargs...) end From 788b9e9be252849d13cdcaca4b29eb5beb74067c Mon Sep 17 00:00:00 2001 From: Peter Deffebach Date: Tue, 12 Dec 2023 12:05:58 -0500 Subject: [PATCH 22/26] relax type hints to allow categorical --- src/Missings.jl | 8 +- test/spreadmissings.jl | 243 ++++++++++++++++++++++------------------- 2 files changed, 134 insertions(+), 117 deletions(-) diff --git a/src/Missings.jl b/src/Missings.jl index d45e151..b2f3090 100644 --- a/src/Missings.jl +++ b/src/Missings.jl @@ -255,7 +255,7 @@ function spread_missing( res::AbstractVector{T}, vecs::Tuple, nonmissinginds::AbstractVector{<:Integer}, - nonmissingmask::AbstractVector{<:Bool})::Vector{Union{Missing, T}} where {T} + nonmissingmask::AbstractVector{<:Bool})::AbstractVector{Union{Missing, T}} where {T} if length(res) != length(nonmissinginds) s = "When spreading a vector result with `spread=$(S)`, " * @@ -286,7 +286,7 @@ function maybespread_missing( spread::SpreadDefault, vecs::Tuple, nonmissinginds::AbstractVector{<:Integer}, - nonmissingmask::AbstractVector{<:Bool})::Vector{Union{Missing, T}} where {T} + nonmissingmask::AbstractVector{<:Bool})::AbstractVector{Union{Missing, T}} where {T} spread_missing(res, vecs, nonmissinginds, nonmissingmask) end @@ -296,7 +296,7 @@ function maybespread_missing( spread::SpreadNonMissing, vecs::Tuple, nonmissinginds::AbstractVector{<:Integer}, - nonmissingmask::AbstractVector{<:Bool})::Vector{Union{Missing, T}} where{T} + nonmissingmask::AbstractVector{<:Bool})::AbstractVector{Union{Missing, T}} where{T} out = Vector{Union{typeof(res), Missing}}(undef, length(vecs[1])) fill!(out, missing) @@ -309,7 +309,7 @@ function maybespread_missing( spread::SpreadNonMissing, vecs::Tuple, nonmissinginds::AbstractVector{<:Integer}, - nonmissingmask::AbstractVector{<:Bool})::Vector{Union{Missing, T}} where {T} + nonmissingmask::AbstractVector{<:Bool})::AbstractVector{Union{Missing, T}} where {T} spread_missing(res, vecs, nonmissinginds, nonmissingmask) end diff --git a/test/spreadmissings.jl b/test/spreadmissings.jl index 669ec6b..191c0ff 100644 --- a/test/spreadmissings.jl +++ b/test/spreadmissings.jl @@ -1,6 +1,6 @@ module SpreadMissingTests -using Test, Missings +using Test, Missings, CategoricalArrays const ≈ = isequal @@ -31,119 +31,136 @@ y = [100, 200, 300, 400] s = [1000, 2000] -## vector, :default -### missings in main arg only -t = spreadmissings(right_vec)(xmiss) -@test t ≈ [1, 2, 3, missing] -### missing in keyword arg only -t = spreadmissings(right_vec)(; z = ymiss) -@test t ≈ [missing, 1, 2, 3] -### missing in both main arg and keyword arg -t = spreadmissings(right_vec)(x, xmiss; z = ymiss) -@test t ≈ [missing, 1, 2, missing] -### missings nowhere -t = spreadmissings(right_vec)(x; z = y) -@test t ≈ [1, 2, 3, 4] -@test t isa Vector{Int} -### Mis-matched vector lengths -@test_throws DimensionMismatch spreadmissings(right_vec)(x, s) -### no vectors -t = spreadmissings(right_vec)(1, 2; z = 9) -@test t == [-1] - -## vector, :nonmissing -### missings in main arg only -t = spreadmissings(right_vec; spread = :nonmissing)(xmiss) -@test t ≈ [1, 2, 3, missing] -### missing in keyword arg only -t = spreadmissings(right_vec; spread = :nonmissing)(; z = ymiss) -@test t ≈ [missing, 1, 2, 3] -### missing in both main arg and keyword arg -t = spreadmissings(right_vec; spread = :nonmissing)(x, xmiss; z = ymiss) -@test t ≈ [missing, 1, 2, missing] -### missings nowhere -t = spreadmissings(right_vec; spread = :nonmissing)(x; z = y) -@test t ≈ [1, 2, 3, 4] -@test t isa Vector{Int} -### Mis-matched vector lengths -@test_throws DimensionMismatch spreadmissings(right_vec; spread = :nonmissing)(x, s) -### no vectors -t = spreadmissings(right_vec)(1, 2; z = 9) -@test t == [-1] - -## vector, :none -t = spreadmissings(right_vec; spread = :none)(xmiss) -@test t ≈ [1, 2, 3] -### missing in keyword arg only -t = spreadmissings(right_vec; spread = :none)(; z = ymiss) -@test t ≈ [1, 2, 3] -### missing in both main arg and keyword arg -t = spreadmissings(right_vec; spread = :none)(x, xmiss; z = ymiss) -@test t ≈ [1, 2] -### missings nowhere -t = spreadmissings(right_vec; spread = :none)(x; z = y) -@test t ≈ [1, 2, 3, 4] -@test t isa Vector{Int} -### Mis-matched vector lengths -@test_throws DimensionMismatch spreadmissings(right_vec; spread = :none)(x, s) - -## non-vector, :default -### missings in main arg only -t = spreadmissings(scalar; spread = :default)(xmiss) -@test t ≈ 1 -### missing in keyword arg only -t = spreadmissings(scalar; spread = :default)(; z = ymiss) -@test t ≈ 1 -### missing in both main arg and keyword arg -t = spreadmissings(scalar; spread = :default)(x, xmiss; z = ymiss) -@test t ≈ 1 -### missings nowhere -t = spreadmissings(scalar; spread = :default)(x; z = y) -@test t ≈ 1 -### Mis-matched vector lengths -@test_throws DimensionMismatch spreadmissings(scalar; spread = :default)(x, s) -### no vectors -t = spreadmissings(scalar; spread = :default)(1, 2; z = 9) -@test t == 1 - -## non-vector, :nonmissing -### missings in main arg only -t = spreadmissings(scalar; spread = :nonmissing)(xmiss) -@test t ≈ [1, 1, 1, missing] -### missing in keyword arg only -t = spreadmissings(scalar; spread = :nonmissing)(; z = ymiss) -@test t ≈ [missing, 1, 1, 1] -### missing in both main arg and keyword arg -t = spreadmissings(scalar; spread = :nonmissing)(x, xmiss; z = ymiss) -@test t ≈ [missing, 1, 1, missing] -### missings nowhere -t = spreadmissings(scalar; spread = :nonmissing)(x; z = y) -@test t ≈ [1, 1, 1, 1] -### Mis-matched vector lengths -@test_throws DimensionMismatch spreadmissings(scalar; spread = :nonmissing)(x, s) -### no vectors -t = spreadmissings(scalar; spread = :nonmissing)(1, 2; z = 9) -@test t == 1 - -## non-vector, :none -### missings in main arg only -t = spreadmissings(scalar; spread = :none)(xmiss) -@test t ≈ 1 -### missing in keyword arg only -t = spreadmissings(scalar; spread = :none)(; z = ymiss) -@test t ≈ 1 -### missing in both main arg and keyword arg -t = spreadmissings(scalar; spread = :none)(x, xmiss; z = ymiss) -@test t ≈ 1 -### missings nowhere -t = spreadmissings(scalar; spread = :none)(x; z = y) -@test t ≈ 1 -### Mis-matched vector lengths -@test_throws DimensionMismatch spreadmissings(scalar; spread = :none)(x, s) -### no vectors -t = spreadmissings(scalar; spread = :none)(1, 2; z = 9) -@test t == 1 +@testset "vector, :default" begin + ### missings in main arg only + t = spreadmissings(right_vec)(xmiss) + @test t ≈ [1, 2, 3, missing] + ### missing in keyword arg only + t = spreadmissings(right_vec)(; z = ymiss) + @test t ≈ [missing, 1, 2, 3] + ### missing in both main arg and keyword arg + t = spreadmissings(right_vec)(x, xmiss; z = ymiss) + @test t ≈ [missing, 1, 2, missing] + ### missings nowhere + t = spreadmissings(right_vec)(x; z = y) + @test t ≈ [1, 2, 3, 4] + @test t isa Vector{Int} + ### Mis-matched vector lengths + @test_throws DimensionMismatch spreadmissings(right_vec)(x, s) + ### no vectors + t = spreadmissings(right_vec)(1, 2; z = 9) + @test t == [-1] +end + +@testset "vector, :nonmissing" begin + ### missings in main arg only + t = spreadmissings(right_vec; spread = :nonmissing)(xmiss) + @test t ≈ [1, 2, 3, missing] + ### missing in keyword arg only + t = spreadmissings(right_vec; spread = :nonmissing)(; z = ymiss) + @test t ≈ [missing, 1, 2, 3] + ### missing in both main arg and keyword arg + t = spreadmissings(right_vec; spread = :nonmissing)(x, xmiss; z = ymiss) + @test t ≈ [missing, 1, 2, missing] + ### missings nowhere + t = spreadmissings(right_vec; spread = :nonmissing)(x; z = y) + @test t ≈ [1, 2, 3, 4] + @test t isa Vector{Int} + ### Mis-matched vector lengths + @test_throws DimensionMismatch spreadmissings(right_vec; spread = :nonmissing)(x, s) + ### no vectors + t = spreadmissings(right_vec)(1, 2; z = 9) + @test t == [-1] +end + +@testset "vector, :none" begin + t = spreadmissings(right_vec; spread = :none)(xmiss) + @test t ≈ [1, 2, 3] + ### missing in keyword arg only + t = spreadmissings(right_vec; spread = :none)(; z = ymiss) + @test t ≈ [1, 2, 3] + ### missing in both main arg and keyword arg + t = spreadmissings(right_vec; spread = :none)(x, xmiss; z = ymiss) + @test t ≈ [1, 2] + ### missings nowhere + t = spreadmissings(right_vec; spread = :none)(x; z = y) + @test t ≈ [1, 2, 3, 4] + @test t isa Vector{Int} + ### Mis-matched vector lengths + @test_throws DimensionMismatch spreadmissings(right_vec; spread = :none)(x, s) +end + +@testset "non-vector, :default" begin + ### missings in main arg only + t = spreadmissings(scalar; spread = :default)(xmiss) + @test t ≈ 1 + ### missing in keyword arg only + t = spreadmissings(scalar; spread = :default)(; z = ymiss) + @test t ≈ 1 + ### missing in both main arg and keyword arg + t = spreadmissings(scalar; spread = :default)(x, xmiss; z = ymiss) + @test t ≈ 1 + ### missings nowhere + t = spreadmissings(scalar; spread = :default)(x; z = y) + @test t ≈ 1 + ### Mis-matched vector lengths + @test_throws DimensionMismatch spreadmissings(scalar; spread = :default)(x, s) + ### no vectors + t = spreadmissings(scalar; spread = :default)(1, 2; z = 9) + @test t == 1 +end +@testset "non-vector, :nonmissing" begin + ### missings in main arg only + t = spreadmissings(scalar; spread = :nonmissing)(xmiss) + @test t ≈ [1, 1, 1, missing] + ### missing in keyword arg only + t = spreadmissings(scalar; spread = :nonmissing)(; z = ymiss) + @test t ≈ [missing, 1, 1, 1] + ### missing in both main arg and keyword arg + t = spreadmissings(scalar; spread = :nonmissing)(x, xmiss; z = ymiss) + @test t ≈ [missing, 1, 1, missing] + ### missings nowhere + t = spreadmissings(scalar; spread = :nonmissing)(x; z = y) + @test t ≈ [1, 1, 1, 1] + ### Mis-matched vector lengths + @test_throws DimensionMismatch spreadmissings(scalar; spread = :nonmissing)(x, s) + ### no vectors + t = spreadmissings(scalar; spread = :nonmissing)(1, 2; z = 9) + @test t == 1 +end + +@testset "non-vector, :none" begin + ### missings in main arg only + t = spreadmissings(scalar; spread = :none)(xmiss) + @test t ≈ 1 + ### missing in keyword arg only + t = spreadmissings(scalar; spread = :none)(; z = ymiss) + @test t ≈ 1 + ### missing in both main arg and keyword arg + t = spreadmissings(scalar; spread = :none)(x, xmiss; z = ymiss) + @test t ≈ 1 + ### missings nowhere + t = spreadmissings(scalar; spread = :none)(x; z = y) + @test t ≈ 1 + ### Mis-matched vector lengths + @test_throws DimensionMismatch spreadmissings(scalar; spread = :none)(x, s) + ### no vectors + t = spreadmissings(scalar; spread = :none)(1, 2; z = 9) + @test t == 1 +end + +@testset "categorical" begin + x = [1, 2, 3] + t = spreadmissings(categorical)(x) + @test t == categorical([1, 2, 3]) + @test typeof(t) == typeof(categorical([1, 2, 3])) + + x = [1, 2, 3, missing] + t = spreadmissings(categorical)(x) + @test t ≈ categorical([1, 2, 3, missing]) + @test typeof(t) == typeof(categorical([1, 2, 3, missing])) +end # Error on missing alwaysone(args...; kwargs...) = 1 From 4c0a7449766329d680407cf570ae4644f044f099 Mon Sep 17 00:00:00 2001 From: Peter Deffebach Date: Tue, 12 Dec 2023 13:36:56 -0500 Subject: [PATCH 23/26] add :all --- src/Missings.jl | 169 ++++++++++++++--------------------------- test/spreadmissings.jl | 40 ++++++++++ 2 files changed, 98 insertions(+), 111 deletions(-) diff --git a/src/Missings.jl b/src/Missings.jl index b2f3090..5484c3b 100644 --- a/src/Missings.jl +++ b/src/Missings.jl @@ -212,6 +212,7 @@ abstract type AbstractSpread end struct SpreadDefault <: AbstractSpread end struct SpreadNonMissing <: AbstractSpread end struct SpreadNone <: AbstractSpread end +struct SpreadAll <: AbstractSpread end struct SpreadMissings{F, S <: AbstractSpread} <: Function f::F @@ -224,6 +225,11 @@ struct SpreadMissings{F, S <: AbstractSpread} <: Function end end +SpreadMissings(f, spread::Val{:default}) = SpreadMissings(f, SpreadDefault()) +SpreadMissings(f, spread::Val{:nonmissing}) = SpreadMissings(f, SpreadNonMissing()) +SpreadMissings(f, spread::Val{:none}) = SpreadMissings(f, SpreadNone()) +SpreadMissings(f, spread::Val{:all}) = SpreadMissings(f, SpreadAll()) + """ nomissing_subarray(a::AbstractVector, nonmissinginds::AbstractVector) @@ -324,6 +330,28 @@ function maybespread_missing( res end +function maybespread_missing( + res::T, + spread::SpreadAll, + vecs::Tuple, + nonmissinginds::AbstractVector{<:Integer}, + nonmissingmask::AbstractVector{<:Bool})::AbstractVector{T} where {T} + + out = Vector{typeof(res)}(undef, length(first(vecs))) + out .= Ref(res) + out +end + +function maybespread_missing( + res::AbstractVector, + spread::SpreadAll, + vecs::Tuple, + nonmissinginds::AbstractVector{<:Integer}, + nonmissingmask::AbstractVector{<:Bool}) + + throw(ArgumentError("spreadmissings with :all on vector output is reserved")) +end + function spread_nomissing( res::AbstractVector{T}, vecs::Tuple)::typeof(res) where {T} @@ -380,111 +408,23 @@ function maybespread_nomissing( res end -""" - maybespread_missing( - f::SpreadMissings, - newargs::Tuple, - new_kwargs, - vecs::Tuple, - nonmissinginds::AbstractVector{<:Integer}, - nonmissingmask::AbstractVector{<:Bool}) - -Applied when `spreadmissing(f)(args...; kwargs...)` is called and -`args` or `kwargs` contain a `Vector{Union{T, Missing}}`. -""" -#=function maybespread_missing( - res, - spread::AbstractSpread, - vecs::Tuple, - nonmissinginds::AbstractVector{<:Integer}, - nonmissingmask::AbstractVector{<:Bool}) - - if res isa AbstractVector - # Default and spread have the same behavior if - # output is a vector - if spread === SpreadDefault() || spread === SpreadNonMissing() - if length(res) != length(nonmissinginds) - s = "When spreading a vector result with `spread=$(S)`, " * - "length of output must match number of jointly non-" - "missing values in inputs " - "(got $(length(res)) and $(length(nonmissinginds))).". - - throw(DimensionMismatch(s)) - end - out = similar(res, Union{eltype(res), Missing}, length(vecs[1])) - fill!(out, missing) - out[nonmissingmask] .= res - elseif spread === SpreadNone() - out = res - else - throw(ArgumentError("Should not reach 1")) - end - else - if spread === SpreadNonMissing() - out = Vector{Union{typeof(res), Missing}}(undef, length(vecs[1])) - fill!(out, missing) - out[nonmissinginds] .= Ref(res) - elseif spread === SpreadDefault() || spread === SpreadNone() - out = res - else - throw(ArgumentError("Should not reach 2")) - end - end +function maybespread_nomissing( + res::T, + spread::SpreadAll, + vecs::Tuple)::AbstractVector{T} where {T} - return out -end=# + out = Vector{typeof(res)}(undef, length(first(vecs))) + out .= Ref(res) + out +end -""" - maybespread_nomissing( - f::SpreadMissings, - args::Tuple, - kwargs, - vecs::Tuple) - -Applied when `spreadmissing(f)(args...; kwargs...)` is called and *neither* -`args` nor `kwargs` contain a `Vector{Union{T, Missing}}`. -""" -#= function maybespread_nomissing( - f::SpreadMissings{F, S}, - args::Tuple, - kwargs, - vecs::Tuple) where {F, S} - - res = f.f(args...; kwargs...) - spread = f.spread - - if res isa AbstractVector - # Default and spread have the same behavior if - # output is a vector - if spread === SpreadDefault() || spread === SpreadNonMissing() - if length(res) != length(first(vecs)) - s = "When spreading a vector result with `spread=$(S)`, " * - "length of output must match number of jointly non-" - "missing values in inputs " - "(got $(length(res)) and $(length(nonmissinginds))).". - throw(DimensionMismatch(s)) - end - out = res - elseif spread === SpreadNone() - out = res - else - throw(ArgumentError("Should not reach 1")) - end - else - if spread === SpreadNonMissing() - out = Vector{typeof(res)}(undef, length(vecs[1])) - fill!(out, res) - elseif spread === SpreadDefault() || spread === SpreadNone() - out = res - else - throw(ArgumentError("Should not reach 2")) - end - end + res::AbstractVector, + spread::SpreadAll, + vecs::Tuple) - return out + throw(ArgumentError("spreadmissings with :all on vector output is reserved")) end -=# function check_indices_match(vecs...) Base.require_one_based_indexing(vecs...) @@ -560,6 +500,10 @@ For each vector argument, `f` is passed a `SubArray` view with an element type equal to `nonmissingtype(T)`, with `T` the element type of the original argument. +If none of the input arguments are vectors of any kind, +`spreadmissings(f)` behaves exactly the same as `f`. No +pre or post-processing is done. + ### Examples ```julia-repl @@ -641,14 +585,20 @@ The `spread` keyword argument controls whether the output from * If `output` is not a `Vector`, `output` is spread along non-missing elements of the inputs. * `:none`: output is returned directly, whether a vector or not. +* `:all`: + * If output is not a vector, it is spread over the full + length of the input vectors, not only the indices with + missing values with inputs. + * If the output is a vector, an error is thrown. A summary of the behavior is given in the table below: -| spread \\ output type | Vector | Non-vector | -|:---------------------- |:------ |:-----------| -| :default | spread | return | -| :nonmissing | spread | spread | -| :none | return | return | +| spread \\ output type | Vector | Non-vector | +|:---------------------- |:------------------------------- |:------------------------------------| +| :default | spread over non-missing indices | return | +| :nonmissing | spread over non-missing indices | spread over non-missing indices | +| :none | return | return | +| :all | error | spread over all indices | If there are `AbstractVector` inputs but none of these inputs are `AbstractVector{>:Missing}`, the returned vectors will not allow @@ -687,16 +637,14 @@ behaves the same as `f` regardless of `spread`. elements of the original `x`. 2. `fillmean_skip` returns a vector which does not allow for `missing`, while `spreadmissings(fillmean)` does. -""" - -SpreadMissings(f, spread::Val{:default}) = SpreadMissings(f, SpreadDefault()) -SpreadMissings(f, spread::Val{:nonmissing}) = SpreadMissings(f, SpreadNonMissing()) -SpreadMissings(f, spread::Val{:none}) = SpreadMissings(f, SpreadNone()) + Use the keyword `spread = :all` to emulate the `skipmissing` behavior. +""" function spreadmissings(f; spread::Symbol = :default) SpreadMissings(f, Val(spread)) # throw(ArgumentError("`spread` must be one of `:default`, `:nonmissing`, or `:none`")) end + """ skipmissings(args...) @@ -718,7 +666,6 @@ julia> collect(tx) 2-element Array{Int64,1}: 1 2 - ``` """ function skipmissings(args...) diff --git a/test/spreadmissings.jl b/test/spreadmissings.jl index 191c0ff..3733c9b 100644 --- a/test/spreadmissings.jl +++ b/test/spreadmissings.jl @@ -90,6 +90,26 @@ end @test_throws DimensionMismatch spreadmissings(right_vec; spread = :none)(x, s) end +@testset "vector, :all" begin + ### missings in main arg only + @test_throws ArgumentError spreadmissings(right_vec; spread = :all)(xmiss) + + ### missing in keyword arg only + @test_throws ArgumentError spreadmissings(right_vec; spread = :all)(; z = ymiss) + + ### missing in both main arg and keyword arg + @test_throws ArgumentError spreadmissings(right_vec; spread = :all)(x, xmiss; z = ymiss) + + ### missings nowhere + @test_throws ArgumentError spreadmissings(right_vec; spread = :all)(x; z = y) + + ### Mis-matched vector lengths + @test_throws DimensionMismatch spreadmissings(right_vec)(x, s) + ### no vectors + t = spreadmissings(right_vec)(1, 2; z = 9) + @test t == [-1] +end + @testset "non-vector, :default" begin ### missings in main arg only t = spreadmissings(scalar; spread = :default)(xmiss) @@ -150,6 +170,26 @@ end @test t == 1 end +@testset "non-vector, :all" begin + ### missings in main arg only + t = spreadmissings(scalar; spread = :all)(xmiss) + @test t ≈ [1, 1, 1, 1] + ### missing in keyword arg only + t = spreadmissings(scalar; spread = :all)(; z = ymiss) + @test t ≈ [1, 1, 1, 1] + ### missing in both main arg and keyword arg + t = spreadmissings(scalar; spread = :all)(x, xmiss; z = ymiss) + @test t ≈ [1, 1, 1, 1] + ### missings nowhere + t = spreadmissings(scalar; spread = :all)(x; z = y) + @test t ≈ [1, 1, 1, 1] + ### Mis-matched vector lengths + @test_throws DimensionMismatch spreadmissings(scalar; spread = :all)(x, s) + ### no vectors + t = spreadmissings(scalar; spread = :all)(1, 2; z = 9) + @test t == 1 +end + @testset "categorical" begin x = [1, 2, 3] t = spreadmissings(categorical)(x) From 50d8ffa4739916facc0d0e8d2df464374ac91f15 Mon Sep 17 00:00:00 2001 From: Peter Deffebach Date: Tue, 12 Dec 2023 13:44:05 -0500 Subject: [PATCH 24/26] remove val --- src/Missings.jl | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/src/Missings.jl b/src/Missings.jl index 5484c3b..1431f2a 100644 --- a/src/Missings.jl +++ b/src/Missings.jl @@ -225,11 +225,6 @@ struct SpreadMissings{F, S <: AbstractSpread} <: Function end end -SpreadMissings(f, spread::Val{:default}) = SpreadMissings(f, SpreadDefault()) -SpreadMissings(f, spread::Val{:nonmissing}) = SpreadMissings(f, SpreadNonMissing()) -SpreadMissings(f, spread::Val{:none}) = SpreadMissings(f, SpreadNone()) -SpreadMissings(f, spread::Val{:all}) = SpreadMissings(f, SpreadAll()) - """ nomissing_subarray(a::AbstractVector, nonmissinginds::AbstractVector) @@ -641,8 +636,17 @@ behaves the same as `f` regardless of `spread`. Use the keyword `spread = :all` to emulate the `skipmissing` behavior. """ function spreadmissings(f; spread::Symbol = :default) - SpreadMissings(f, Val(spread)) -# throw(ArgumentError("`spread` must be one of `:default`, `:nonmissing`, or `:none`")) + if spread === :default + SpreadMissings(f, SpreadDefault()) + elseif spread === :nonmissing + SpreadMissings(f, SpreadNonMissing()) + elseif spread === :none + SpreadMissings(f, SpreadNone()) + elseif spread === :all + SpreadMissings(f, SpreadAll()) + else + throw(ArgumentError("`spread` must be one of `:default`, `:nonmissing`, or `:none`")) + end end """ From bfef9888c39a0f0d12247f0ea8b8bee9c586e608 Mon Sep 17 00:00:00 2001 From: Peter Deffebach Date: Tue, 12 Dec 2023 13:51:37 -0500 Subject: [PATCH 25/26] fix error message --- src/Missings.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Missings.jl b/src/Missings.jl index 1431f2a..26c4c24 100644 --- a/src/Missings.jl +++ b/src/Missings.jl @@ -645,7 +645,7 @@ function spreadmissings(f; spread::Symbol = :default) elseif spread === :all SpreadMissings(f, SpreadAll()) else - throw(ArgumentError("`spread` must be one of `:default`, `:nonmissing`, or `:none`")) + throw(ArgumentError("`spread` must be one of `:default`, `:nonmissing`, `:none`, or `:all`")) end end From c667ed8199e855098b248dcee423154c1fc057f2 Mon Sep 17 00:00:00 2001 From: pdeffebach <23196228+pdeffebach@users.noreply.github.com> Date: Sun, 4 Feb 2024 14:59:31 -0500 Subject: [PATCH 26/26] Update src/Missings.jl Co-authored-by: Milan Bouchet-Valat --- src/Missings.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Missings.jl b/src/Missings.jl index 26c4c24..fda0525 100644 --- a/src/Missings.jl +++ b/src/Missings.jl @@ -423,7 +423,8 @@ end function check_indices_match(vecs...) Base.require_one_based_indexing(vecs...) - findex = eachindex(vecs...) + eachindex(vecs...) + nothing end function (f::SpreadMissings{F, S})(args...; kwargs...) where {F, S}