Skip to content

Commit

Permalink
default formula argument to NULL in geom_smooth() (tidyverse#3307)
Browse files Browse the repository at this point in the history
  • Loading branch information
bfgray3 authored and thomasp85 committed Sep 30, 2019
1 parent fa000f7 commit 0ee259c
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 24 deletions.
4 changes: 2 additions & 2 deletions R/geom-smooth.r
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,8 @@
geom_smooth <- function(mapping = NULL, data = NULL,
stat = "smooth", position = "identity",
...,
method = "auto",
formula = y ~ x,
method = NULL,
formula = NULL,
se = TRUE,
na.rm = FALSE,
show.legend = NA,
Expand Down
45 changes: 31 additions & 14 deletions R/stat-smooth.r
Original file line number Diff line number Diff line change
@@ -1,19 +1,23 @@
#' @param method Smoothing method (function) to use, accepts either a character vector,
#' e.g. `"auto"`, `"lm"`, `"glm"`, `"gam"`, `"loess"` or a function, e.g.
#' `MASS::rlm` or `mgcv::gam`, `stats::lm`, or `stats::loess`.
#' @param method Smoothing method (function) to use, accepts either
#' `NULL` or a character vector, e.g. `"lm"`, `"glm"`, `"gam"`, `"loess"`
#' or a function, e.g. `MASS::rlm` or `mgcv::gam`, `stats::lm`, or `stats::loess`.
#' `"auto"` is also accepted for backwards compatibility. It is equivalent to
#' `NULL`.
#'
#' For `method = "auto"` the smoothing method is chosen based on the
#' For `method = NULL` the smoothing method is chosen based on the
#' size of the largest group (across all panels). [stats::loess()] is
#' used for less than 1,000 observations; otherwise [mgcv::gam()] is
#' used with `formula = y ~ s(x, bs = "cs")` with `method = "REML"`. Somewhat anecdotally,
#' `loess` gives a better appearance, but is \eqn{O(N^{2})}{O(N^2)} in memory,
#' so does not work for larger datasets.
#'
#' If you have fewer than 1,000 observations but want to use the same `gam()`
#' model that `method = "auto"` would use, then set
#' model that `method = NULL` would use, then set
#' `method = "gam", formula = y ~ s(x, bs = "cs")`.
#' @param formula Formula to use in smoothing function, eg. `y ~ x`,
#' `y ~ poly(x, 2)`, `y ~ log(x)`
#' `y ~ poly(x, 2)`, `y ~ log(x)`. `NULL` by default, in which case
#' `method = NULL` implies `formula = y ~ x` when there are fewer than 1,000
#' observations and `formula = y ~ s(x, bs = "cs")` otherwise.
#' @param se Display confidence interval around smooth? (`TRUE` by default, see
#' `level` to control.)
#' @param fullrange Should the fit span the full range of the plot, or just
Expand All @@ -37,8 +41,8 @@
stat_smooth <- function(mapping = NULL, data = NULL,
geom = "smooth", position = "identity",
...,
method = "auto",
formula = y ~ x,
method = NULL,
formula = NULL,
se = TRUE,
n = 80,
span = 0.75,
Expand Down Expand Up @@ -77,7 +81,8 @@ stat_smooth <- function(mapping = NULL, data = NULL,
#' @export
StatSmooth <- ggproto("StatSmooth", Stat,
setup_params = function(data, params) {
if (identical(params$method, "auto")) {
msg <- character()
if (is.null(params$method) || identical(params$method, "auto")) {
# Use loess for small datasets, gam with a cubic regression basis for
# larger. Based on size of the _largest_ group to avoid bad memory
# behaviour of loess
Expand All @@ -87,18 +92,30 @@ StatSmooth <- ggproto("StatSmooth", Stat,
params$method <- "loess"
} else {
params$method <- "gam"
}
msg <- c(msg, paste0("method = '", params$method, "'"))
}

if (is.null(params$formula)) {
if (identical(params$method, "gam")) {
params$formula <- y ~ s(x, bs = "cs")
} else {
params$formula <- y ~ x
}
message(
"`geom_smooth()` using method = '", params$method,
"' and formula '", deparse(params$formula), "'"
)
msg <- c(msg, paste0("formula '", deparse(params$formula), "'"))
}
if (identical(params$method, "gam")) {
params$method <- mgcv::gam
}

if (length(msg) > 0) {
message("`geom_smooth()` using ", paste0(msg, collapse = " and "))
}

params
},

compute_group = function(data, scales, method = "auto", formula = y ~ x,
compute_group = function(data, scales, method = NULL, formula = NULL,
se = TRUE, n = 80, span = 0.75, fullrange = FALSE,
xseq = NULL, level = 0.95, method.args = list(),
na.rm = FALSE) {
Expand Down
20 changes: 12 additions & 8 deletions man/geom_smooth.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 9 additions & 0 deletions tests/testthat/test-geom-smooth.R
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,15 @@ test_that("default smoothing methods for small and large data sets work", {
"method = 'gam' and formula 'y ~ s\\(x, bs = \"cs\"\\)"
)
expect_equal(plot_data$y, as.numeric(out))

# backwards compatibility of method = "auto"
p <- ggplot(df, aes(x, y)) + geom_smooth(method = "auto")

expect_message(
plot_data <- layer_data(p),
"method = 'gam' and formula 'y ~ s\\(x, bs = \"cs\"\\)"
)
expect_equal(plot_data$y, as.numeric(out))
})


Expand Down

0 comments on commit 0ee259c

Please sign in to comment.