From 81c6a6908ed6144409894aaddc909e587fbbc139 Mon Sep 17 00:00:00 2001 From: Phillip Alday Date: Tue, 11 Oct 2022 23:28:54 -0500 Subject: [PATCH 01/10] allow predicting from a single observation --- src/grouping.jl | 55 +++++++++++++++++++++++++++++++++++++++++++++++++ test/predict.jl | 7 +++++++ 2 files changed, 62 insertions(+) diff --git a/src/grouping.jl b/src/grouping.jl index 98bcd4367..5b06fb1fe 100644 --- a/src/grouping.jl +++ b/src/grouping.jl @@ -32,3 +32,58 @@ Base.getproperty(g::Grouping, prop::Symbol) = prop == :levels ? nothing : getfie function StatsModels.modelcols(::CategoricalTerm{Grouping}, d::NamedTuple) return error("can't create model columns directly from a Grouping term") end + +# copied from StatsModels@463eb0a +function StatsModels.ContrastsMatrix(contrasts::Grouping, levels::AbstractVector{T}) where {T} + + # if levels are defined on contrasts, use those, validating that they line up. + # what does that mean? either: + # + # 1. DataAPI.levels(contrasts) == levels (best case) + # 2. data levels missing from contrast: would generate empty/undefined rows. + # better to filter data frame first + # 3. contrast levels missing from data: would have empty columns, generate a + # rank-deficient model matrix. + c_levels = something(DataAPI.levels(contrasts), levels) + + mismatched_levels = symdiff(c_levels, levels) + if !isempty(mismatched_levels) + throw(ArgumentError("contrasts levels not found in data or vice-versa: " * + "$mismatched_levels." * + "\n Data levels ($(eltype(levels))): $levels." * + "\n Contrast levels ($(eltype(c_levels))): $c_levels")) + end + + + # do conversion AFTER checking for levels so users get a nice error message + # when they've made a mistake with the level types + c_levels = convert(Vector{T}, c_levels) + + + + n = length(c_levels) + # not validating this allows for prediction of only a single level of the grouping factor + # if n == 0 + # throw(ArgumentError("empty set of levels found (need at least two to compute " * + # "contrasts).")) + # elseif n == 1 + # throw(ArgumentError("only one level found: $(c_levels[1]) (need at least two to " * + # "compute contrasts).")) + # end + # find index of base level. use baselevel(contrasts), then default (1). + # base_level = baselevel(contrasts) + # baseind = base_level === nothing ? + # 1 : + # findfirst(isequal(base_level), c_levels) + # if baseind === nothing + # throw(ArgumentError("base level $(base_level) not found in levels " * + # "$c_levels.")) + # end + + base_level = nothing + baseind = 1 + + tnames = StatsModels.termnames(contrasts, c_levels, baseind) + mat = StatsModels.contrasts_matrix(contrasts, baseind, n) + StatsModels.ContrastsMatrix(mat, tnames, c_levels, contrasts) +end diff --git a/test/predict.jl b/test/predict.jl index 466828dd2..fce35b982 100644 --- a/test/predict.jl +++ b/test/predict.jl @@ -47,6 +47,13 @@ include("modelcache.jl") end @testset "predict" begin + + @testset "single obs" begin + kb07 = DataFrame(dataset(:kb07)) + m = models(:kb07)[1] + only(predict(m, kb07[1:1, :])) ≈ first(fitted(m)) + end + slp = DataFrame(dataset(:sleepstudy)) slp2 = transform(slp, :subj => ByRow(x -> (x == "S308" ? "NEW" : x)) => :subj) slpm = allowmissing(slp, :reaction) From e3b19cef744e20ac7c0cf8e802fb6f4ae1832f83 Mon Sep 17 00:00:00 2001 From: Phillip Alday Date: Tue, 11 Oct 2022 23:31:31 -0500 Subject: [PATCH 02/10] NEWS + patch bump --- NEWS.md | 5 +++++ Project.toml | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index e87202392..206b1feca 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,7 @@ +MixedModels v4.7.4 Release Notes +============================== +* Allow predicting from a single observation [#653] + MixedModels v4.7.3 Release Notes ============================== * More informative error message for formulae lacking random effects [#651] @@ -375,3 +379,4 @@ Package dependencies [#637]: https://github.com/JuliaStats/MixedModels.jl/issues/637 [#648]: https://github.com/JuliaStats/MixedModels.jl/issues/648 [#651]: https://github.com/JuliaStats/MixedModels.jl/issues/651 +[#653]: https://github.com/JuliaStats/MixedModels.jl/issues/653 diff --git a/Project.toml b/Project.toml index 192d882aa..abc2ac4b8 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "MixedModels" uuid = "ff71e718-51f3-5ec2-a782-8ffcbfa3c316" author = ["Phillip Alday ", "Douglas Bates ", "Jose Bayoan Santiago Calderon "] -version = "4.7.3" +version = "4.7.4" [deps] Arrow = "69666777-d1a9-59fb-9406-91d4454c9d45" From 857208a2afe3fa9240b6ed91385b2851251db72a Mon Sep 17 00:00:00 2001 From: Phillip Alday Date: Wed, 12 Oct 2022 04:33:12 +0000 Subject: [PATCH 03/10] JuliaFormatter Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- src/grouping.jl | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/src/grouping.jl b/src/grouping.jl index 5b06fb1fe..bcc4ff3ab 100644 --- a/src/grouping.jl +++ b/src/grouping.jl @@ -34,7 +34,9 @@ function StatsModels.modelcols(::CategoricalTerm{Grouping}, d::NamedTuple) end # copied from StatsModels@463eb0a -function StatsModels.ContrastsMatrix(contrasts::Grouping, levels::AbstractVector{T}) where {T} +function StatsModels.ContrastsMatrix( + contrasts::Grouping, levels::AbstractVector{T} +) where {T} # if levels are defined on contrasts, use those, validating that they line up. # what does that mean? either: @@ -48,19 +50,20 @@ function StatsModels.ContrastsMatrix(contrasts::Grouping, levels::AbstractVector mismatched_levels = symdiff(c_levels, levels) if !isempty(mismatched_levels) - throw(ArgumentError("contrasts levels not found in data or vice-versa: " * - "$mismatched_levels." * - "\n Data levels ($(eltype(levels))): $levels." * - "\n Contrast levels ($(eltype(c_levels))): $c_levels")) + throw( + ArgumentError( + "contrasts levels not found in data or vice-versa: " * + "$mismatched_levels." * + "\n Data levels ($(eltype(levels))): $levels." * + "\n Contrast levels ($(eltype(c_levels))): $c_levels", + ), + ) end - # do conversion AFTER checking for levels so users get a nice error message # when they've made a mistake with the level types c_levels = convert(Vector{T}, c_levels) - - n = length(c_levels) # not validating this allows for prediction of only a single level of the grouping factor # if n == 0 @@ -85,5 +88,5 @@ function StatsModels.ContrastsMatrix(contrasts::Grouping, levels::AbstractVector tnames = StatsModels.termnames(contrasts, c_levels, baseind) mat = StatsModels.contrasts_matrix(contrasts, baseind, n) - StatsModels.ContrastsMatrix(mat, tnames, c_levels, contrasts) + return StatsModels.ContrastsMatrix(mat, tnames, c_levels, contrasts) end From 67cdea89a62e50c4b29f130abd2808681cd0c4ab Mon Sep 17 00:00:00 2001 From: Phillip Alday Date: Tue, 11 Oct 2022 23:34:25 -0500 Subject: [PATCH 04/10] cruft --- src/grouping.jl | 20 +------------------- 1 file changed, 1 insertion(+), 19 deletions(-) diff --git a/src/grouping.jl b/src/grouping.jl index bcc4ff3ab..7ce56224c 100644 --- a/src/grouping.jl +++ b/src/grouping.jl @@ -33,7 +33,7 @@ function StatsModels.modelcols(::CategoricalTerm{Grouping}, d::NamedTuple) return error("can't create model columns directly from a Grouping term") end -# copied from StatsModels@463eb0a +# copied and then adapted from StatsModels@463eb0a function StatsModels.ContrastsMatrix( contrasts::Grouping, levels::AbstractVector{T} ) where {T} @@ -66,26 +66,8 @@ function StatsModels.ContrastsMatrix( n = length(c_levels) # not validating this allows for prediction of only a single level of the grouping factor - # if n == 0 - # throw(ArgumentError("empty set of levels found (need at least two to compute " * - # "contrasts).")) - # elseif n == 1 - # throw(ArgumentError("only one level found: $(c_levels[1]) (need at least two to " * - # "compute contrasts).")) - # end - # find index of base level. use baselevel(contrasts), then default (1). - # base_level = baselevel(contrasts) - # baseind = base_level === nothing ? - # 1 : - # findfirst(isequal(base_level), c_levels) - # if baseind === nothing - # throw(ArgumentError("base level $(base_level) not found in levels " * - # "$c_levels.")) - # end - base_level = nothing baseind = 1 - tnames = StatsModels.termnames(contrasts, c_levels, baseind) mat = StatsModels.contrasts_matrix(contrasts, baseind, n) return StatsModels.ContrastsMatrix(mat, tnames, c_levels, contrasts) From 8e158df2868392337c0f63df9631971dec0c2168 Mon Sep 17 00:00:00 2001 From: Phillip Alday Date: Wed, 12 Oct 2022 16:05:16 -0500 Subject: [PATCH 05/10] Remove cruft Co-authored-by: Dave Kleinschmidt --- src/grouping.jl | 32 +------------------------------- 1 file changed, 1 insertion(+), 31 deletions(-) diff --git a/src/grouping.jl b/src/grouping.jl index 7ce56224c..98ea98c26 100644 --- a/src/grouping.jl +++ b/src/grouping.jl @@ -37,38 +37,8 @@ end function StatsModels.ContrastsMatrix( contrasts::Grouping, levels::AbstractVector{T} ) where {T} - - # if levels are defined on contrasts, use those, validating that they line up. - # what does that mean? either: - # - # 1. DataAPI.levels(contrasts) == levels (best case) - # 2. data levels missing from contrast: would generate empty/undefined rows. - # better to filter data frame first - # 3. contrast levels missing from data: would have empty columns, generate a - # rank-deficient model matrix. - c_levels = something(DataAPI.levels(contrasts), levels) - - mismatched_levels = symdiff(c_levels, levels) - if !isempty(mismatched_levels) - throw( - ArgumentError( - "contrasts levels not found in data or vice-versa: " * - "$mismatched_levels." * - "\n Data levels ($(eltype(levels))): $levels." * - "\n Contrast levels ($(eltype(c_levels))): $c_levels", - ), - ) - end - - # do conversion AFTER checking for levels so users get a nice error message - # when they've made a mistake with the level types - c_levels = convert(Vector{T}, c_levels) - - n = length(c_levels) - # not validating this allows for prediction of only a single level of the grouping factor - baseind = 1 tnames = StatsModels.termnames(contrasts, c_levels, baseind) - mat = StatsModels.contrasts_matrix(contrasts, baseind, n) + mat = StatsModels.contrasts_matrix(contrasts, baseind, 0) return StatsModels.ContrastsMatrix(mat, tnames, c_levels, contrasts) end From 5b517443a02741f187aa94e86eb3dd6085451261 Mon Sep 17 00:00:00 2001 From: Phillip Alday Date: Wed, 12 Oct 2022 16:20:03 -0500 Subject: [PATCH 06/10] cut even more methods --- src/grouping.jl | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/src/grouping.jl b/src/grouping.jl index 98ea98c26..09e3174cb 100644 --- a/src/grouping.jl +++ b/src/grouping.jl @@ -22,23 +22,18 @@ julia> schema((; grp = string.(1:100_000)), Dict(:grp => Grouping())) struct Grouping <: StatsModels.AbstractContrasts end # return an empty matrix -StatsModels.contrasts_matrix(::Grouping, baseind, n) = zeros(0, 0) -StatsModels.termnames(::Grouping, levels::AbstractVector, baseind::Integer) = levels +# StatsModels.contrasts_matrix(::Grouping, baseind, n) = error("Grouping terms don't have associated contrasts") +# StatsModels.termnames(::Grouping, levels::AbstractVector, baseind::Integer) = levels # this is needed until StatsModels stops assuming all contrasts have a .levels field Base.getproperty(g::Grouping, prop::Symbol) = prop == :levels ? nothing : getfield(g, prop) # special-case categorical terms with Grouping contrasts. -function StatsModels.modelcols(::CategoricalTerm{Grouping}, d::NamedTuple) - return error("can't create model columns directly from a Grouping term") -end +StatsModels.modelcols(::CategoricalTerm{Grouping}, d::NamedTuple) = + error("can't create model columns directly from a Grouping term") -# copied and then adapted from StatsModels@463eb0a function StatsModels.ContrastsMatrix( contrasts::Grouping, levels::AbstractVector{T} ) where {T} - baseind = 1 - tnames = StatsModels.termnames(contrasts, c_levels, baseind) - mat = StatsModels.contrasts_matrix(contrasts, baseind, 0) - return StatsModels.ContrastsMatrix(mat, tnames, c_levels, contrasts) + return StatsModels.ContrastsMatrix(zeros(0, 0), levels, levels, contrasts) end From c3b715d47b6f77521c731047fb940bb7cc00f5bb Mon Sep 17 00:00:00 2001 From: Phillip Alday Date: Wed, 12 Oct 2022 16:29:12 -0500 Subject: [PATCH 07/10] update news and version for more extensive change --- NEWS.md | 4 ++-- Project.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/NEWS.md b/NEWS.md index 206b1feca..2b7073d35 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,6 @@ -MixedModels v4.7.4 Release Notes +MixedModels v4.8.0 Release Notes ============================== -* Allow predicting from a single observation [#653] +* Allow predicting from a single observation, as long as `Grouping()` is used for the grouping variables. The simplified implementation of `Grouping()` also removes several now unnecessary `StatsModels` methods that should not have been called directly by the user. [#653] MixedModels v4.7.3 Release Notes ============================== diff --git a/Project.toml b/Project.toml index abc2ac4b8..548f6f98c 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "MixedModels" uuid = "ff71e718-51f3-5ec2-a782-8ffcbfa3c316" author = ["Phillip Alday ", "Douglas Bates ", "Jose Bayoan Santiago Calderon "] -version = "4.7.4" +version = "4.8.0" [deps] Arrow = "69666777-d1a9-59fb-9406-91d4454c9d45" From 1c0c33df1eec4bfd4f098b3ba91cf76bc99696f7 Mon Sep 17 00:00:00 2001 From: Phillip Alday Date: Wed, 12 Oct 2022 16:34:23 -0500 Subject: [PATCH 08/10] Update src/grouping.jl Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- src/grouping.jl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/grouping.jl b/src/grouping.jl index 09e3174cb..95885a41c 100644 --- a/src/grouping.jl +++ b/src/grouping.jl @@ -29,8 +29,9 @@ struct Grouping <: StatsModels.AbstractContrasts end Base.getproperty(g::Grouping, prop::Symbol) = prop == :levels ? nothing : getfield(g, prop) # special-case categorical terms with Grouping contrasts. -StatsModels.modelcols(::CategoricalTerm{Grouping}, d::NamedTuple) = - error("can't create model columns directly from a Grouping term") +function StatsModels.modelcols(::CategoricalTerm{Grouping}, d::NamedTuple) + return error("can't create model columns directly from a Grouping term") +end function StatsModels.ContrastsMatrix( contrasts::Grouping, levels::AbstractVector{T} From 9fbff2d1c28da1b4b1c817328f9c7f5b4aed2fd2 Mon Sep 17 00:00:00 2001 From: Phillip Alday Date: Fri, 14 Oct 2022 03:32:18 +0000 Subject: [PATCH 09/10] Update src/grouping.jl Co-authored-by: Dave Kleinschmidt --- src/grouping.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/grouping.jl b/src/grouping.jl index 95885a41c..eb0487c4c 100644 --- a/src/grouping.jl +++ b/src/grouping.jl @@ -34,7 +34,7 @@ function StatsModels.modelcols(::CategoricalTerm{Grouping}, d::NamedTuple) end function StatsModels.ContrastsMatrix( - contrasts::Grouping, levels::AbstractVector{T} -) where {T} + contrasts::Grouping, levels::AbstractVector +) return StatsModels.ContrastsMatrix(zeros(0, 0), levels, levels, contrasts) end From 92af160b43ff82872bec40356bc918b6333fece2 Mon Sep 17 00:00:00 2001 From: Phillip Alday Date: Thu, 13 Oct 2022 23:26:07 -0500 Subject: [PATCH 10/10] remove more cruft --- src/grouping.jl | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/grouping.jl b/src/grouping.jl index eb0487c4c..2f0f49a45 100644 --- a/src/grouping.jl +++ b/src/grouping.jl @@ -21,10 +21,6 @@ julia> schema((; grp = string.(1:100_000)), Dict(:grp => Grouping())) """ struct Grouping <: StatsModels.AbstractContrasts end -# return an empty matrix -# StatsModels.contrasts_matrix(::Grouping, baseind, n) = error("Grouping terms don't have associated contrasts") -# StatsModels.termnames(::Grouping, levels::AbstractVector, baseind::Integer) = levels - # this is needed until StatsModels stops assuming all contrasts have a .levels field Base.getproperty(g::Grouping, prop::Symbol) = prop == :levels ? nothing : getfield(g, prop)