diff --git a/R/vfold.R b/R/vfold.R index 66509ee3..6b3f7a0f 100644 --- a/R/vfold.R +++ b/R/vfold.R @@ -151,13 +151,14 @@ vfold_splits <- function(data, v = 10, strata = NULL, breaks = 4, pool = 0.1) { #' on some grouping variable (which may have more than a single row #' associated with it). The function can create as many splits as #' there are unique values of the grouping variable or it can -#' create a smaller set of splits where more than one value is left +#' create a smaller set of splits where more than one group is left #' out at a time. A common use of this kind of resampling is when you have #' repeated measures of the same subject. #' #' @inheritParams vfold_cv -#' @param v The number of partitions of the data set. If left as `NULL`, `v` -#' will be set to the number of unique values in the group. +#' @param v The number of partitions of the data set. If left as `NULL` (the +#' default), `v` will be set to the number of unique values in the grouping +#' variable, creating "leave-one-group-out" splits. #' @param balance If `v` is less than the number of unique groups, how should #' groups be combined into folds? Should be one of #' `"groups"` or `"observations"`. @@ -181,6 +182,9 @@ vfold_splits <- function(data, v = 10, strata = NULL, breaks = 4, pool = 0.1) { #' ) #' group_vfold_cv(ames, group = Neighborhood, v = 5, repeats = 2) #' +#' # Leave-one-group-out CV +#' group_vfold_cv(ames, group = Neighborhood) +#' #' @export group_vfold_cv <- function(data, group = NULL, v = NULL, repeats = 1, balance = c("groups", "observations"), ...) { diff --git a/man/group_vfold_cv.Rd b/man/group_vfold_cv.Rd index e9fecd31..a873b847 100644 --- a/man/group_vfold_cv.Rd +++ b/man/group_vfold_cv.Rd @@ -20,8 +20,9 @@ group_vfold_cv( grouping observations with the same value to either the analysis or assessment set within a fold.} -\item{v}{The number of partitions of the data set. If left as \code{NULL}, \code{v} -will be set to the number of unique values in the group.} +\item{v}{The number of partitions of the data set. If left as \code{NULL} (the +default), \code{v} will be set to the number of unique values in the grouping +variable, creating "leave-one-group-out" splits.} \item{repeats}{The number of times to repeat the V-fold partitioning.} @@ -42,7 +43,7 @@ Group V-fold cross-validation creates splits of the data based on some grouping variable (which may have more than a single row associated with it). The function can create as many splits as there are unique values of the grouping variable or it can -create a smaller set of splits where more than one value is left +create a smaller set of splits where more than one group is left out at a time. A common use of this kind of resampling is when you have repeated measures of the same subject. } @@ -59,5 +60,8 @@ group_vfold_cv( balance = "observations" ) group_vfold_cv(ames, group = Neighborhood, v = 5, repeats = 2) + +# Leave-one-group-out CV +group_vfold_cv(ames, group = Neighborhood) \dontshow{\}) # examplesIf} }