diff --git a/NEWS.md b/NEWS.md index 0e867cfa2..8620e8ed8 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,9 @@ +MixedModels v3.1 Release Notes +======================== + +* `simulate!` and thus `parametricbootstrap` methods for `GeneralizedLinearMixedModel` [#418]. +* Documented inconsistent behavior in `sdest` and `varest` `GeneralizedLinearMixedModel` [#418]. + MixedModels v3.0.2 Release Notes ======================== @@ -91,3 +97,4 @@ Package dependencies [#384]: https://github.com/JuliaStats/MixedModels.jl/issues/384 [#390]: https://github.com/JuliaStats/MixedModels.jl/issues/390 [#395]: https://github.com/JuliaStats/MixedModels.jl/issues/395 +[#418]: https://github.com/JuliaStats/MixedModels.jl/issues/418 diff --git a/Project.toml b/Project.toml index a03ad7dc5..d28d6c404 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "MixedModels" uuid = "ff71e718-51f3-5ec2-a782-8ffcbfa3c316" author = ["Phillip Alday ", "Douglas Bates ", "Jose Bayoan Santiago Calderon "] -version = "3.0.2" +version = "3.1.0" [deps] Arrow = "69666777-d1a9-59fb-9406-91d4454c9d45" diff --git a/docs/src/bootstrap.md b/docs/src/bootstrap.md index ab84ecb0f..65427de2a 100644 --- a/docs/src/bootstrap.md +++ b/docs/src/bootstrap.md @@ -1,7 +1,7 @@ -# Parametric bootstrap for linear mixed-effects models +# Parametric bootstrap for mixed-effects models Julia is well-suited to implementing bootstrapping and other simulation-based methods for statistical models. -The `parametricbootstrap` function in the [MixedModels package](https://github.com/JuliaStats/MixedModels.jl) provides an efficient parametric bootstrap for linear mixed-effects models. +The `parametricbootstrap` function in the [MixedModels package](https://github.com/JuliaStats/MixedModels.jl) provides an efficient parametric bootstrap for mixed-effects models. ```@docs parametricbootstrap @@ -98,7 +98,7 @@ For example, if we bootstrap a model fit to the `sleepstudy` data sleepstudy = MixedModels.dataset(:sleepstudy) m2 = fit( MixedModel, - @formula(reaction ~ 1+days+(1+days|subj)), + @formula(reaction ~ 1+days+(1+days|subj)), sleepstudy, ) ``` @@ -131,7 +131,7 @@ sum(ρs .≈ -1) Furthermore there are even a few cases where the estimate of the standard deviation of the random effect for the intercept is zero. ```@example Main -σs = @where(df2, :type .== "σ", :group .== "subj", :names .== "(Intercept)").value +σs = @where(df2, :type .== "σ", :group .== "subj", :names .== "(Intercept)").value sum(σs .≈ 0) ``` @@ -139,7 +139,7 @@ There is a general condition to check for singularity of an estimated covariance The parameter optimized in the estimation is `θ`, the relative covariance parameter. Some of the elements of this parameter vector must be non-negative and, when one of these components is approximately zero, one of the covariance matrices will be singular. -The `issingular` method for a `LinearMixedModel` object that tests if a parameter vector `θ` corresponds to a boundary or singular fit. +The `issingular` method for a `MixedModel` object that tests if a parameter vector `θ` corresponds to a boundary or singular fit. This operation is encapsulated in a method for the `issingular` function. ```@example Main diff --git a/src/bootstrap.jl b/src/bootstrap.jl index 9ba16171f..6af8c6fa1 100644 --- a/src/bootstrap.jl +++ b/src/bootstrap.jl @@ -32,9 +32,9 @@ struct MixedModelBootstrap{T<:AbstractFloat} end """ - parametricbootstrap(rng::AbstractRNG, nsamp::Integer, m::LinearMixedModel; + parametricbootstrap(rng::AbstractRNG, nsamp::Integer, m::MixedModel; β = coef(m), σ = m.σ, θ = m.θ, use_threads=false) - parametricbootstrap(nsamp::Integer, m::LinearMixedModel; + parametricbootstrap(nsamp::Integer, m::MixedModel; β = coef(m), σ = m.σ, θ = m.θ, use_threads=false) Perform `nsamp` parametric bootstrap replication fits of `m`, returning a `MixedModelBootstrap`. @@ -44,24 +44,29 @@ The default random number generator is `Random.GLOBAL_RNG`. # Named Arguments `β`, `σ`, and `θ` are the values of `m`'s parameters for simulating the responses. +`σ` is only valid for `LinearMixedModel` and `GeneralizedLinearMixedModel` for +families with a dispersion parameter. `use_threads` determines whether or not to use thread-based parallelism. -Note that `use_threads=true` may not offer a performance boost and may even +Note that `use_threads=true` may not offer a performance boost and may even decrease peformance if multithreaded linear algebra (BLAS) routines are available. -In this case, threads at the level of the linear algebra may already occupy all +In this case, threads at the level of the linear algebra may already occupy all processors/processor cores. There are plans to provide better support in coordinating -Julia- and BLAS-level threads in the future. +Julia- and BLAS-level threads in the future. """ function parametricbootstrap( rng::AbstractRNG, n::Integer, - morig::LinearMixedModel{T}; + morig::MixedModel{T}; β::AbstractVector=coef(morig), σ=morig.σ, θ::AbstractVector=morig.θ, use_threads::Bool=false, ) where {T} - β, σ, θ = convert(Vector{T}, β), T(σ), convert(Vector{T}, θ) + if σ !== missing + σ = T(σ) + end + β, θ = convert(Vector{T}, β), convert(Vector{T}, θ) βsc, θsc, p, k, m = similar(β), similar(θ), length(β), length(θ), deepcopy(morig) β_names = (Symbol.(fixefnames(morig))..., ) @@ -108,7 +113,7 @@ end function parametricbootstrap( nsamp::Integer, - m::LinearMixedModel; + m::MixedModel; β = m.β, σ = m.σ, θ = m.θ, @@ -129,14 +134,14 @@ function allpars(bsamp::MixedModelBootstrap{T}) where {T} nresrow = length(bstr) * npars cols = ( sizehint!(Int[], nresrow), - sizehint!(String[], nresrow), + sizehint!(String[], nresrow), sizehint!(Union{Missing,String}[], nresrow), sizehint!(Union{Missing,String}[], nresrow), sizehint!(T[], nresrow), ) nrmdr = Vector{T}[] # normalized rows of λ for (i, r) in enumerate(bstr) - σ = r.σ + σ = coalesce(r.σ, one(T)) for (nm, v) in pairs(r.β) push!.(cols, (i, "β", missing, String(nm), v)) end @@ -159,7 +164,7 @@ function allpars(bsamp::MixedModelBootstrap{T}) where {T} end end end - push!.(cols, (i, "σ", "residual", missing, σ)) + r.σ === missing || push!.(cols, (i, "σ", "residual", missing, r.σ)) end ( iter=cols[1], @@ -186,7 +191,16 @@ function Base.getproperty(bsamp::MixedModelBootstrap, s::Symbol) end end -issingular(bsamp::MixedModelBootstrap) = map(θ -> any(θ .≈ bsamp.lowerbd), bsamp.θ) +""" + issingular(bsamp::MixedModelBootstrap) + +Test each bootstrap sample for singularity of the corresponding fit. + +Equality comparisons are used b/c small non-negative θ values are replaced by 0 in `fit!`. + +See also [`issingular(::MixedModel)`](@ref). +""" +issingular(bsamp::MixedModelBootstrap) = map(θ -> any(θ .== bsamp.lowerbd), bsamp.θ) function Base.propertynames(bsamp::MixedModelBootstrap) [:allpars, :objective, :σ, :β, :se, :coefpvalues, :θ, :σs, :λ, :inds, :lowerbd, :bstr, :fcnames] @@ -290,7 +304,7 @@ function tidyσs(bsamp::MixedModelBootstrap{T}) where {T} ) for (iter, r) in enumerate(bstr) setθ!(bsamp, iter) # install r.θ in λ - σ = r.σ + σ = coalesce(r.σ, one(T)) for (grp, ll) in zip(keys(fcnames), λ) for (cn, col) in zip(getproperty(fcnames, grp), eachrow(ll)) push!(result, NamedTuple{colnms}((iter, grp, Symbol(cn), σ * norm(col)))) diff --git a/src/generalizedlinearmixedmodel.jl b/src/generalizedlinearmixedmodel.jl index aed4a1f72..635a22b11 100644 --- a/src/generalizedlinearmixedmodel.jl +++ b/src/generalizedlinearmixedmodel.jl @@ -113,6 +113,10 @@ StatsBase.deviance(m::GeneralizedLinearMixedModel) = deviance(m, m.optsum.nAGQ) fixef(m::GeneralizedLinearMixedModel) = m.β +function fixef!(v::AbstractVector{T}, m::GeneralizedLinearMixedModel{T}) where T + copyto!(fill!(v, -zero(T)), m.β) +end + objective(m::GeneralizedLinearMixedModel) = deviance(m) """ @@ -406,6 +410,8 @@ function Base.getproperty(m::GeneralizedLinearMixedModel, s::Symbol) coef(m) elseif s == :beta m.β + elseif s == :objective + objective(m) elseif s ∈ (:σ, :sigma) sdest(m) elseif s == :σs @@ -423,6 +429,12 @@ function Base.getproperty(m::GeneralizedLinearMixedModel, s::Symbol) end end +# this copy behavior matches the implicit copy behavior +# for LinearMixedModel. So this is then different than m.θ, +# which returns a reference to the same array +getθ(m::GeneralizedLinearMixedModel) = copy(m.θ) +getθ!(v::AbstractVector{T}, m::GeneralizedLinearMixedModel{T}) where {T} = copyto!(v, m.θ) + function StatsBase.loglikelihood(m::GeneralizedLinearMixedModel{T}) where {T} r = m.resp D = Distribution(m.resp) @@ -455,6 +467,7 @@ Base.propertynames(m::GeneralizedLinearMixedModel, private::Bool = false) = ( :X, :y, :lowerbd, + :objective, :σρs, :σs, :corr, @@ -612,7 +625,18 @@ function Base.setproperty!(m::GeneralizedLinearMixedModel, s::Symbol, y) end end -sdest(m::GeneralizedLinearMixedModel{T}) where {T} = convert(T, NaN) +""" + sdest(m::GeneralizedLinearMixedModel) + +Return the estimate of the dispersion, i.e. the standard deviation of the per-observation noise. + +For models with a dispersion parameter ϕ, this is simply ϕ. For models without a +dispersion parameter, this value is `missing`. This differs from `disperion`, +which returns `1` for models without a dispersion parameter. + +For Gaussian models, this parameter is often called σ. +""" +sdest(m::GeneralizedLinearMixedModel{T}) where {T} = dispersion_parameter(m) ? dispersion(m, true) : missing function Base.show(io::IO, m::GeneralizedLinearMixedModel) if m.optsum.feval < 0 @@ -644,6 +668,23 @@ function Base.show(io::IO, m::GeneralizedLinearMixedModel) show(io, coeftable(m)) end +function stderror!(v::AbstractVector{T}, m::GeneralizedLinearMixedModel{T}) where {T} + # initialize to appropriate NaN for rank-deficient case + fill!(v, zero(T) / zero(T)) + + # the inverse permutation is done here. + # if this is changed to access the permuted + # model matrix directly, then don't forget to add + # in the inverse permutation + vcovmat = vcov(m) + + for idx in 1:size(vcovmat,1) + v[idx] = sqrt(vcovmat[idx,idx]) + end + + v +end + """ updateη!(m::GeneralizedLinearMixedModel) @@ -662,9 +703,20 @@ function updateη!(m::GeneralizedLinearMixedModel) m end -varest(m::GeneralizedLinearMixedModel{T}) where {T} = one(T) +""" + varest(m::GeneralizedLinearMixedModel) + +Returns the estimate of ϕ², the variance of the conditional distribution of Y given B. + +For models with a dispersion parameter ϕ, this is simply ϕ². For models without a +dispersion parameter, this value is `missing`. This differs from `disperion`, +which returns `1` for models without a dispersion parameter. + +For Gaussian models, this parameter is often called σ². +""" +varest(m::GeneralizedLinearMixedModel{T}) where {T} = dispersion_parameter(m) ? dispersion(m, true) : missing - # delegate GLMM method to LMM field +# delegate GLMM method to LMM field for f in ( :feL, :fetrm, diff --git a/src/linearmixedmodel.jl b/src/linearmixedmodel.jl index 5722f8b32..3e52b9531 100644 --- a/src/linearmixedmodel.jl +++ b/src/linearmixedmodel.jl @@ -377,7 +377,7 @@ end StatsBase.fitted(m::LinearMixedModel{T}) where {T} = fitted!(Vector{T}(undef, nobs(m)), m) """ - fixef!(v::Vector{T}, m::LinearMixedModel{T}) + fixef!(v::Vector{T}, m::MixedModel{T}) Overwrite `v` with the pivoted fixed-effects coefficients of model `m` @@ -495,15 +495,6 @@ function Base.getproperty(m::LinearMixedModel{T}, s::Symbol) where {T} end end -""" - issingular(m::LinearMixedModel, θ=m.θ) - -Test whether the model `m` is singular if the parameter vector is `θ`. - -Equality comparisons are used b/c small non-negative θ values are replaced by 0 in `fit!`. -""" -issingular(m::LinearMixedModel, θ=m.θ) = any(lowerbd(m) .== θ) - function StatsBase.leverage(m::LinearMixedModel{T}) where {T} # This can be done more efficiently but reusing existing tools is easier. # The i'th leverage value is obtained by replacing the response with the i'th diff --git a/src/mixedmodel.jl b/src/mixedmodel.jl index ffaf2fb15..2c2f1d0b2 100644 --- a/src/mixedmodel.jl +++ b/src/mixedmodel.jl @@ -6,6 +6,15 @@ Return a vector of condition numbers of the λ matrices for the random-effects t """ LinearAlgebra.cond(m::MixedModel) = cond.(m.λ) +""" + issingular(m::MixedModel, θ=m.θ) + +Test whether the model `m` is singular if the parameter vector is `θ`. + +Equality comparisons are used b/c small non-negative θ values are replaced by 0 in `fit!`. +""" +issingular(m::MixedModel, θ=m.θ) = any(lowerbd(m) .== θ) + function retbl(mat, trm) merge( NamedTuple{(fname(trm),)}((trm.levels,)), diff --git a/src/simulate.jl b/src/simulate.jl index e085c1150..33bedfd9a 100644 --- a/src/simulate.jl +++ b/src/simulate.jl @@ -1,6 +1,6 @@ """ - simulate!(rng::AbstractRNG, m::LinearMixedModel{T}; β=m.β, σ=m.σ, θ=T[]) - simulate!(m::LinearMixedModel; β=m.β, σ=m.σ, θ=m.θ) + simulate!(rng::AbstractRNG, m::MixedModel{T}; β=m.β, σ=m.σ, θ=T[]) + simulate!(m::MixedModel; β=m.β, σ=m.σ, θ=m.θ) Overwrite the response (i.e. `m.trms[end]`) with a simulated response vector from model `m`. """ @@ -33,11 +33,111 @@ function simulate!( unscaledre!(rng, y, trm) end # scale by σ and add fixed-effects contribution - BLAS.gemv!('N', one(T), m.X, β, σ, y) + mul!(y, m.X, β, one(T), σ) + m end -function simulate!(m::LinearMixedModel{T}; β = coef(m), σ = m.σ, θ = T[]) where {T} +function simulate!( + rng::AbstractRNG, + m::GeneralizedLinearMixedModel{T}; + β = coef(m), + σ = m.σ, + θ = T[], +) where {T} + length(β) == length(fixef(m)) || + length(β) == length(coef(m)) || + throw(ArgumentError("You must specify all (non-singular) βs")) + + dispersion_parameter(m) || ismissing(σ) || + throw(ArgumentError("You must not specify a dispersion parameter for model families without a dispersion parameter")) + + β = convert(Vector{T},β) + if σ !== missing + σ = T(σ) + end + θ = convert(Vector{T},θ) + + d = m.resp.d + + if length(β) ≠ length(coef(m)) + padding = length(coef(m)) - length(β) + for ii in 1:padding + push!(β, -0.0) + end + end + + fast = (length(m.θ) == length(m.optsum.final)) + setpar! = fast ? setθ! : setβθ! + params = fast ? θ : vcat(β, θ) + setpar!(m, params) + + lm = m.LMM + + # note that these m.resp.y and m.LMM.y will later be sychronized in (re)fit!() + # but for now we use them as distinct scratch buffers to avoid allocations + + # the noise term is actually in the GLM and not the LMM part so no noise + # at the LMM level + η = fill!(m.LMM.y, zero(T)) + y = m.resp.y + + # assemble the linear predictor + + # add the unscaled random effects + # note that unit scaling may not be correct for + # families with a dispersion parameter + @inbounds for trm in m.reterms + unscaledre!(rng, η, trm) + end + + # add fixed-effects contribution + # note that unit scaling may not be correct for + # families with a dispersion parameter + mul!(η, lm.X, β, one(T), one(T)) + + # from η to μ + GLM.updateμ!(m.resp, η) + + # convert to the distribution / add in noise + @inbounds for (idx, val) in enumerate(m.resp.mu) + n = isempty(m.wt) ? 1 : m.wt[idx] + y[idx] = _rand(rng, d, val, σ, n) + end + + m +end + +""" + _rand(rng::AbstractRNG, d::Distribution, location, scale=missing, n=1) + +A convenience function taking a draw from a distrbution. + +Note that `d` is specified as an existing distribution, such as +from the `GlmResp.d` field. This isn't vectorized nicely because +for distributions where the scale/dispersion is dependent on the +location (e.g. Bernoulli, Binomial, Poisson), it's not really +possible to avoid creating multiple `Distribution` objects. + +Note that `n` is the `n` parameter for the Binomial distribution, +*not* the number of draws from the RNG. It is then used to change the +random draw (an integer in [0, n]) into a probability (a float in [0,1]). +""" +function _rand(rng::AbstractRNG, d::Distribution, location, scale=NaN, n=1) + if !ismissing(scale) + throw(ArgumentError("Families with a dispersion parameter not yet supported")) + end + + if d isa Binomial + dist = Binomial(Int(n), location) + else + dist = typeof(d)(location) + end + + rand(rng, dist) / n +end + +function simulate!(m::MixedModel{T}; β = coef(m), σ = m.σ, θ = T[]) where {T} simulate!(Random.GLOBAL_RNG, m, β = β, σ = σ, θ = θ) end diff --git a/test/bootstrap.jl b/test/bootstrap.jl index c1013cfad..7d08423ff 100644 --- a/test/bootstrap.jl +++ b/test/bootstrap.jl @@ -1,6 +1,8 @@ using DataFrames +using LinearAlgebra using MixedModels using Random +using StableRNGs using Tables using Test @@ -9,20 +11,51 @@ using MixedModels: dataset include("modelcache.jl") @testset "simulate!" begin - ds = dataset(:dyestuff) - fm = only(models(:dyestuff)) - resp₀ = copy(response(fm)) - # type conversion of ints to floats - simulate!(Random.MersenneTwister(1234321), fm, β=[1], σ=1) - refit!(fm,resp₀) - refit!(simulate!(Random.MersenneTwister(1234321), fm)) - @test deviance(fm) ≈ 339.0218639362958 atol=0.001 - refit!(fm, float(ds.yield)) - Random.seed!(1234321) - refit!(simulate!(fm)) - @test deviance(fm) ≈ 339.0218639362958 atol=0.001 - simulate!(fm, θ = fm.θ) - @test_throws DimensionMismatch refit!(fm, zeros(29)) + @testset "LMM" begin + ds = dataset(:dyestuff) + fm = only(models(:dyestuff)) + # # just in case the fit was modified in a previous test + # refit!(fm, vec(float.(ds.yield))) + resp₀ = copy(response(fm)) + # type conversion of ints to floats + simulate!(StableRNG(1234321), fm, β=[1], σ=1) + refit!(fm,resp₀) + refit!(simulate!(StableRNG(1234321), fm)) + @test deviance(fm) ≈ 322.6582 atol=0.001 + refit!(fm, float(ds.yield)) + # Global/implicit RNG method + Random.seed!(1234321) + refit!(simulate!(fm)) + # just make sure this worked, don't check fit + # (because the RNG can change between Julia versions) + @test response(fm) ≠ resp₀ + simulate!(fm, θ = fm.θ) + @test_throws DimensionMismatch refit!(fm, zeros(29)) + # restore the original state + refit!(fm, vec(float.(ds.yield))) + end + + @testset "Poisson" begin + center(v::AbstractVector) = v .- (sum(v) / length(v)) + grouseticks = DataFrame(dataset(:grouseticks)) + grouseticks.ch = center(grouseticks.height) + gm4 = fit(MixedModel, only(gfms[:grouseticks]), grouseticks, Poisson(), fast=true) # fails in pirls! with fast=false + gm4sim = refit!(simulate!(StableRNG(42), deepcopy(gm4))) + @test isapprox(gm4.β, gm4sim.β; atol=norm(stderror(gm4))) + end + + @testset "Binomial" begin + cbpp = dataset(:cbpp) + gm2 = fit(MixedModel, first(gfms[:cbpp]), cbpp, Binomial(), wts=float(cbpp.hsz)) + gm2sim = refit!(simulate!(StableRNG(42), deepcopy(gm2)), fast=true) + @test isapprox(gm2.β, gm2sim.β; atol=norm(stderror(gm2))) + end + + @testset "_rand with dispersion" begin + @test_throws ArgumentError MixedModels._rand(StableRNG(42), Normal(), 1, 1, 1) + @test_throws ArgumentError MixedModels._rand(StableRNG(42), Gamma(), 1, 1, 1) + @test_throws ArgumentError MixedModels._rand(StableRNG(42), InverseGaussian(), 1, 1, 1) + end end @testset "bootstrap" begin @@ -65,4 +98,34 @@ end @test sort(columntable(bsamp_threaded.β).β) == sort(columntable(bsamp.β).β) @test sum(issingular(bsamp)) == sum(issingular(bsamp_threaded)) end + + + @testset "Bernoulli simulate! and GLMM boostrap" begin + contra = dataset(:contra) + gm0 = fit(MixedModel, only(gfms[:contra]), contra, Bernoulli(), fast=true) + bs = parametricbootstrap(StableRNG(42), 100, gm0) + bsci = combine(groupby(DataFrame(bs.β), :coefname), + :β => shortestcovint => :ci) + bsci.lower = first.(bsci.ci) + bsci.upper = last.(bsci.ci) + select!(bsci, Not(:ci)) + ciwidth = 2 .* stderror(gm0) + waldci = DataFrame(coef=fixefnames(gm0), + lower=fixef(gm0) .- ciwidth, + upper=fixef(gm0) .+ ciwidth) + + # coarse tolerances because we're not doing many bootstrap samples + @test all(isapprox.(bsci.lower, waldci.lower; atol=0.5)) + @test all(isapprox.(bsci.upper, waldci.upper; atol=0.5)) + + σbar = mean(MixedModels.tidyσs(bs)) do x; x.σ end + @test σbar ≈ 0.56 atol=0.1 + apar = filter!(row -> row.type == "σ", DataFrame(MixedModels.allpars(bs))) + @test !("Residual" in apar.names) + @test mean(apar.value) ≈ σbar + + # can't specify dispersion for families without that parameter + @test_throws ArgumentError parametricbootstrap(StableRNG(42), 100, gm0; σ=2) + @test sum(issingular(bs)) == 0 + end end diff --git a/test/modelcache.jl b/test/modelcache.jl index 13386b68e..862cae244 100644 --- a/test/modelcache.jl +++ b/test/modelcache.jl @@ -1,6 +1,12 @@ using MixedModels using MixedModels: dataset +@isdefined(gfms) || const global gfms = Dict( + :cbpp => [@formula((incid/hsz) ~ 1 + period + (1|herd))], + :contra => [@formula(use ~ 1+age+abs2(age)+urban+livch+(1|urban&dist))], + :grouseticks => [@formula(ticks ~ 1+year+ch+ (1|index) + (1|brood) + (1|location))], + :verbagg => [@formula(r2 ~ 1+anger+gender+btype+situ+(1|subj)+(1|item))], +) @isdefined(fms) || const global fms = Dict( :dyestuff => [@formula(yield ~ 1 + (1|batch))], diff --git a/test/pirls.jl b/test/pirls.jl index bc6f99788..e650aaad9 100644 --- a/test/pirls.jl +++ b/test/pirls.jl @@ -1,22 +1,20 @@ using DataFrames using MixedModels +using StableRNGs using Tables using Test using MixedModels: dataset -const gfms = Dict( - :cbpp => [@formula((incid/hsz) ~ 1 + period + (1|herd))], - :contra => [@formula(use ~ 1+age+abs2(age)+urban+livch+(1|urban&dist))], - :grouseticks => [@formula(ticks ~ 1+year+ch+ (1|index) + (1|brood) + (1|location))], - :verbagg => [@formula(r2 ~ 1+anger+gender+btype+situ+(1|subj)+(1|item))], -) +include("modelcache.jl") @testset "contra" begin contra = dataset(:contra) - gm0 = fit(MixedModel, only(gfms[:contra]), contra, Bernoulli(), fast=true); + gm0 = fit(MixedModel, only(gfms[:contra]), contra, Bernoulli(), fast=true) @test gm0.lowerbd == zeros(1) @test isapprox(gm0.θ, [0.5720734451352923], atol=0.001) + @test !issingular(gm0) + @test issingular(gm0, [0]) @test isapprox(deviance(gm0), 2361.657188518064, atol=0.001) # the first 9 BLUPs -- I don't think there's much point in testing all 102 blups = [-0.5853637711570235, -0.9546542393824562, -0.034754249031292345, # values are the same but in different order @@ -27,6 +25,12 @@ const gfms = Dict( @test isone(length(retbl)) @test isa(retbl, NamedTuple) @test Tables.istable(only(retbl)) + @test !dispersion_parameter(gm0) + @test dispersion(gm0, false) == 1 + @test dispersion(gm0, true) == 1 + @test sdest(gm0) === missing + @test varest(gm0) === missing + @test gm0.σ === missing gm1 = fit(MixedModel, only(gfms[:contra]), contra, Bernoulli()); @test isapprox(gm1.θ, [0.573054], atol=0.005) @@ -42,7 +46,6 @@ const gfms = Dict( @test isapprox(deviance(gm1), 2360.8760, atol=0.001) @test gm1.β == gm1.beta @test gm1.θ == gm1.theta - @test isnan(gm1.σ) @test length(gm1.y) == size(gm1.X, 1) @test :θ in propertynames(gm0) @@ -57,7 +60,7 @@ const gfms = Dict( #@test isapprox(sum(gm1.resp.devresid), 2237.349, atol=0.1) show(IOBuffer(), gm1) show(IOBuffer(), BlockDescription(gm0)) - + end @testset "cbpp" begin @@ -68,19 +71,23 @@ end @test logdet(gm2) ≈ 16.90105378801136 atol=0.0001 @test isapprox(sum(gm2.resp.devresid), 73.47174762237978, atol=0.001) @test isapprox(loglikelihood(gm2), -92.02628186840045, atol=0.001) - @test isnan(sdest(gm2)) - @test varest(gm2) == 1 - + @test !dispersion_parameter(gm2) + @test dispersion(gm2, false) == 1 + @test dispersion(gm2, true) == 1 + @test sdest(gm2) === missing + @test varest(gm2) === missing + @test gm2.σ === missing + @testset "GLMM refit" begin gm2r = deepcopy(gm2) @test_throws ArgumentError fit!(gm2r) refit!(gm2r, 1 .- gm2.y; fast=true) @test gm2r.β ≈ -gm2.β atol=1e-3 @test gm2r.θ ≈ gm2.θ atol=1e-3 - + refit!(gm2r, 1 .- gm2.y; fast=false) @test gm2r.β ≈ -gm2.β atol=1e-3 - @test gm2r.θ ≈ gm2.θ atol=1e-3 + @test gm2r.θ ≈ gm2.θ atol=1e-3 end end @@ -103,6 +110,12 @@ end # these two values are not well defined at the optimum #@test isapprox(sum(x -> sum(abs2, x), gm4.u), 196.8695297987013, atol=0.1) #@test isapprox(sum(gm4.resp.devresid), 220.92685781326136, atol=0.1) + @test !dispersion_parameter(gm4) + @test dispersion(gm4, false) == 1 + @test dispersion(gm4, true) == 1 + @test sdest(gm4) === missing + @test varest(gm4) === missing + @test gm4.σ === missing end @testset "goldstein" begin # from a 2020-04-22 msg by Ben Goldstein to R-SIG-Mixed-Models @@ -145,4 +158,11 @@ end @test_logs (:warn, r"dispersion parameter") GeneralizedLinearMixedModel(form, dat, InverseGaussian()) @test_logs (:warn, r"dispersion parameter") GeneralizedLinearMixedModel(form, dat, Normal(), SqrtLink()) + # notes for future tests when GLMM with dispersion works + # @test dispersion_parameter(gm) + # @test dispersion(gm, false) == val + # @test dispersion(gm, true) == val + # @test sdest(gm) == dispersion(gm, false) == gm.σ + # @test varest(gm) == dispersion(gm, true) + end diff --git a/test/utilities.jl b/test/utilities.jl index dea1b6b55..c2dd7fb5c 100644 --- a/test/utilities.jl +++ b/test/utilities.jl @@ -1,6 +1,6 @@ using LinearAlgebra using MixedModels -using Random +using StableRNGs using SparseArrays using Test @@ -15,7 +15,7 @@ end @testset "densify" begin @test densify(sparse(1:5, 1:5, ones(5))) == Diagonal(ones(5)) - rsparsev = SparseVector(float.(rand(MersenneTwister(123454321), Bool, 20))) + rsparsev = SparseVector(float.(rand(StableRNG(123454321), Bool, 20))) @test densify(rsparsev) == Vector(rsparsev) @test densify(Diagonal(rsparsev)) == Diagonal(Vector(rsparsev)) end @@ -36,14 +36,14 @@ end end @testset "threaded_replicate" begin - rng = MersenneTwister(42); - single_thread = replicate(10,use_threads=false) do; randn(rng, 1)[1] ; end - rng = MersenneTwister(42); - multi_thread = replicate(10,use_threads=true) do + rng = StableRNG(42); + single_thread = replicate(10;use_threads=false) do; only(randn(rng, 1)) ; end + rng = StableRNG(42); + multi_thread = replicate(10;use_threads=true) do if Threads.threadid() % 2 == 0 sleep(0.001) end - r = randn(rng, 1)[1]; + r = only(randn(rng, 1)); end @test all(sort!(single_thread) .== sort!(multi_thread)) @@ -61,11 +61,11 @@ end @testset "PCA" begin pca = models(:kb07)[3].PCA.item - + show(io, pca, covcor=true, loadings=false) str = String(take!(io)) @test !isempty(findall("load: yes", str)) - + show(io, pca, covcor=false, loadings=true) str = String(take!(io)) @test !isempty(findall("PC1", str))