From c1d37b8d5bf8a9b5dfb00f1782670b5d8c0b1335 Mon Sep 17 00:00:00 2001 From: Cecile Ane Date: Tue, 4 Jun 2024 08:12:05 -0500 Subject: [PATCH] r2 corrected after GLM.ftest, in PhyloNetworks.ftest --- docs/src/man/trait_tree.md | 13 +++++++------ src/traits.jl | 9 +++++++-- test/test_lm.jl | 6 +----- 3 files changed, 15 insertions(+), 13 deletions(-) diff --git a/docs/src/man/trait_tree.md b/docs/src/man/trait_tree.md index 053f1cfd..ac1c73ca 100644 --- a/docs/src/man/trait_tree.md +++ b/docs/src/man/trait_tree.md @@ -512,18 +512,19 @@ ftest(fit_null, fit_sh) # nested models ``` Here, this test is equivalent to the Fisher F test, and gives the same p-value. -!!! warning "Warnings and incorrect R² columns" +!!! note "Warnings from GLM" A warning may appear, saying "Starting from GLM.jl 1.8, null model is defined as having no predictor at all when a model without an intercept is passed." - Why? `ftest` is inherited from the GLM package, which does not know that the intercept term is not a column of ones after transformation to remove the phylogenetic correlation. This is why `ftest` sends a warning for each model, when multiple models are compared. - - So what? - * no need to worry: the F values and p-values are correct - * but: R² values are incorrect and should be ignored. - They are calculated assuming *no* intercept and are based on the - transformed de-correlated data anyway: use the `r2` function instead. + - These specific warnings can be ignored: + * F values and p-values are correct + * R² values are also correct: they are obtained with the + `r2` function for phylogenetic linear models. + A future version of the package will attempt to remove these warnings + specifically. Note that models need to be ordered by complexity, when given to `ftest`: either from most complex to most simple, or from most simple to most complex. diff --git a/src/traits.jl b/src/traits.jl index 930ad86f..af183d04 100644 --- a/src/traits.jl +++ b/src/traits.jl @@ -3025,9 +3025,12 @@ isnested(::PagelLambda,::ScalingHybrid) = false This is because after transforming the data to de-correlate the residuals, the transformed intercept vector is not proportional to the constant vector 1. The warning is from: ftest → r2(phylomodel.lm) → nulldeviance(phylomodel.lm) → warning. + R² by GLM is wrong: assume *no* intercept, and are based on the transformed data. + R² corrected them below: r2(phylomodel) reimplemented here. But nulldeviance(phylomodel) does *not* call nulldeviance(phylomodel.lm), instead re-implemented here to use the intercept properly. - Keep these warnings: because the R² values in the ftest output table are incorrect. + Keep the warnings: unless they can be suppressed with specificity + Ideally: modify `ftest` here or in GLM. =# function GLM.ftest(objs::PhyloNetworkLinearModel...) if !all( isa(o.evomodel,BM) && isnothing(o.model_within) for o in objs) @@ -3035,7 +3038,9 @@ function GLM.ftest(objs::PhyloNetworkLinearModel...) Use a likelihood ratio test instead with function `lrtest`.""")) end objslm = [obj.lm for obj in objs] - return ftest(objslm...) + resGLM = ftest(objslm...) + resGLM.r2 = r2.(objs) + return resGLM end ## ANOVA: old version - kept for tests purposes - do not export """ diff --git a/test/test_lm.jl b/test/test_lm.jl index 4675c6fa..b3d1c56b 100644 --- a/test/test_lm.jl +++ b/test/test_lm.jl @@ -224,11 +224,7 @@ table2 = PhyloNetworks.anova(modnull, modhom, modhet) @test table1.pval[2] ≈ table2[2,Symbol("Pr(>F)")] @test table1.pval[3] ≈ table2[1,Symbol("Pr(>F)")] @test hasintercept(modnull) && hasintercept(modhom) && hasintercept(modhet) -# ## Replace next 4 lines with previous ones when GLM.ftest available -# @test table1[:F][2] ≈ table2[:F][2] -# @test table1[:F][1] ≈ table2[:F][1] -# @test table1[Symbol("Pr(>F)")][1] ≈ table2[Symbol("Pr(>F)")][1] -# @test table1[Symbol("Pr(>F)")][2] ≈ table2[Symbol("Pr(>F)")][2] +@test all(isapprox.(table1.r2, (0.8398130376214782, 0.006032952123011026, 0), atol=1e-15)) # Check that it is the same as doing shift_8 + shift_17 modhetbis = phylolm(@formula(trait ~ shift_8 + shift_17), dfr, net)