From c1d37b8d5bf8a9b5dfb00f1782670b5d8c0b1335 Mon Sep 17 00:00:00 2001
From: Cecile Ane <cecileane@users.noreply.github.com>
Date: Tue, 4 Jun 2024 08:12:05 -0500
Subject: [PATCH] r2 corrected after GLM.ftest, in PhyloNetworks.ftest

---
 docs/src/man/trait_tree.md | 13 +++++++------
 src/traits.jl              |  9 +++++++--
 test/test_lm.jl            |  6 +-----
 3 files changed, 15 insertions(+), 13 deletions(-)

diff --git a/docs/src/man/trait_tree.md b/docs/src/man/trait_tree.md
index 053f1cfd..ac1c73ca 100644
--- a/docs/src/man/trait_tree.md
+++ b/docs/src/man/trait_tree.md
@@ -512,18 +512,19 @@ ftest(fit_null, fit_sh)  # nested models
 ```
 Here, this test is equivalent to the Fisher F test, and gives the same p-value.
 
-!!! warning "Warnings and incorrect R² columns"
+!!! note "Warnings from GLM"
     A warning may appear, saying
     "Starting from GLM.jl 1.8, null model is defined as having no predictor at all when a model without an intercept is passed."
     - Why? `ftest` is inherited from the GLM package, which does not know that
       the intercept term is not a column of ones after transformation to remove
       the phylogenetic correlation. This is why `ftest` sends a warning for
       each model, when multiple models are compared.
-    - So what?
-      * no need to worry: the F values and p-values are correct
-      * but: R² values are incorrect and should be ignored.
-        They are calculated assuming *no* intercept and are based on the
-        transformed de-correlated data anyway: use the `r2` function instead.
+    - These specific warnings can be ignored:
+      * F values and p-values are correct
+      * R² values are also correct: they are obtained with the
+        `r2` function for phylogenetic linear models.
+    A future version of the package will attempt to remove these warnings
+    specifically.
 
 Note that models need to be ordered by complexity, when given to `ftest`:
 either from most complex to most simple, or from most simple to most complex.
diff --git a/src/traits.jl b/src/traits.jl
index 930ad86f..af183d04 100644
--- a/src/traits.jl
+++ b/src/traits.jl
@@ -3025,9 +3025,12 @@ isnested(::PagelLambda,::ScalingHybrid) = false
  This is because after transforming the data to de-correlate the residuals,
  the transformed intercept vector is not proportional to the constant vector 1.
  The warning is from: ftest → r2(phylomodel.lm) → nulldeviance(phylomodel.lm) → warning.
+ R² by GLM is wrong: assume *no* intercept, and are based on the transformed data.
+ R² corrected them below: r2(phylomodel) reimplemented here.
  But nulldeviance(phylomodel) does *not* call nulldeviance(phylomodel.lm),
  instead re-implemented here to use the intercept properly.
- Keep these warnings: because the R² values in the ftest output table are incorrect.
+ Keep the warnings: unless they can be suppressed with specificity
+ Ideally: modify `ftest` here or in GLM.
 =#
 function GLM.ftest(objs::PhyloNetworkLinearModel...)
     if !all( isa(o.evomodel,BM) && isnothing(o.model_within) for o in objs)
@@ -3035,7 +3038,9 @@ function GLM.ftest(objs::PhyloNetworkLinearModel...)
         Use a likelihood ratio test instead with function `lrtest`."""))
     end
     objslm = [obj.lm for obj in objs]
-    return ftest(objslm...)
+    resGLM = ftest(objslm...)
+    resGLM.r2 = r2.(objs)
+    return resGLM
 end
 ## ANOVA: old version - kept for tests purposes - do not export
 """
diff --git a/test/test_lm.jl b/test/test_lm.jl
index 4675c6fa..b3d1c56b 100644
--- a/test/test_lm.jl
+++ b/test/test_lm.jl
@@ -224,11 +224,7 @@ table2 = PhyloNetworks.anova(modnull, modhom, modhet)
 @test table1.pval[2] ≈ table2[2,Symbol("Pr(>F)")]
 @test table1.pval[3] ≈ table2[1,Symbol("Pr(>F)")]
 @test hasintercept(modnull) && hasintercept(modhom) && hasintercept(modhet)
-# ## Replace next 4 lines with previous ones when GLM.ftest available
-# @test table1[:F][2] ≈ table2[:F][2]
-# @test table1[:F][1] ≈ table2[:F][1]
-# @test table1[Symbol("Pr(>F)")][1] ≈ table2[Symbol("Pr(>F)")][1]
-# @test table1[Symbol("Pr(>F)")][2] ≈ table2[Symbol("Pr(>F)")][2]
+@test all(isapprox.(table1.r2, (0.8398130376214782, 0.006032952123011026, 0), atol=1e-15))
 
 # Check that it is the same as doing shift_8 + shift_17
 modhetbis = phylolm(@formula(trait ~ shift_8 + shift_17), dfr, net)