Merge pull request #1 from hendersontrent/trent-dev

Initial pkg build
hendersontrent · Dec 16, 2022 · 24bcc75 · 24bcc75
2 parents 3b088c6 + 7e2b667
commit 24bcc75
Show file tree

Hide file tree

Showing 21 changed files with 586 additions and 35 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -0,0 +1,11 @@
+^.*\.Rproj$
+^\.Rproj\.user$
+^README.Rmd
+^README_files
+^_pkgdown\.yml$
+^docs$
+^pkgdown$
+^\.github$
+^doc$
+^Meta$
+^LICENSE\.md$
diff --git a/.gitignore b/.gitignore
@@ -1,39 +1,12 @@
-# History files
+.Rproj.user
 .Rhistory
-.Rapp.history
-
-# Session Data files
 .RData
-
-# User-specific files
 .Ruserdata
 
-# Example code in package build process
-*-Ex.R
-
-# Output files from R CMD build
-/*.tar.gz
-
-# Output files from R CMD check
-/*.Rcheck/
-
-# RStudio files
-.Rproj.user/
-
-# produced vignettes
-vignettes/*.html
-vignettes/*.pdf
-
-# OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
-.httr-oauth
-
-# knitr and R markdown default cache directories
-*_cache/
-/cache/
-
-# Temporary files created by R markdown
-*.utf8.md
-*.knit.md
+# Mac OS
 
-# R Environment Variables
-.Renviron
+.DS_Store
+doc
+Meta
+/doc/
+/Meta/
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -0,0 +1,30 @@
+Package: correctR
+Type: Package
+Title: Corrections For Correlated Test Statistics
+Version: 0.1.0
+Date: 2022-12-16
+Authors@R: c(
+  person("Trent", "Henderson", email = "[email protected]", role = c("cre", "aut"))
+  )
+Maintainer: Trent Henderson <[email protected]>
+Description: Calculate a set of corrected test statistics for cases when samples
+    are not independent, such as when classification accuracy values are obtained
+    over resamples or through k-fold cross-validation, as proposed by Nadeau and Bengio (2003) <doi:10.1023/A:1024068626366> 
+    and presented in Bouckaert and Frank (2004) <doi:10.1007/978-3-540-24775-3_3>.
+BugReports: https://github.com/hendersontrent/correctR/issues
+License: MIT + file LICENSE
+Encoding: UTF-8
+LazyData: true
+Depends: 
+    R (>= 3.5.0)
+Imports: 
+    stats
+Suggests: 
+    knitr,
+    markdown,
+    rmarkdown,
+    pkgdown,
+    testthat (>= 3.0.0)
+RoxygenNote: 7.2.2
+VignetteBuilder: knitr
+Config/testthat/edition: 3
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,2 @@
+YEAR: 2022
+COPYRIGHT HOLDER: Trent Henderson
diff --git a/LICENSE.md b/LICENSE.md
@@ -0,0 +1,21 @@
+# MIT License
+
+Copyright (c) 2022 Trent Henderson
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/NAMESPACE b/NAMESPACE
@@ -0,0 +1,7 @@
+# Generated by roxygen2: do not edit by hand
+
+export(kfold_ttest)
+export(repkfold_ttest)
+export(resampled_ttest)
+importFrom(stats,pt)
+importFrom(stats,var)
diff --git a/R/correctR.R b/R/correctR.R
@@ -0,0 +1,9 @@
+#'
+#' @docType package
+#' @name correctR
+#' @title Corrections For Correlated Test Statistics
+#'
+#' @description Corrections For Correlated Test Statistics
+#'
+#' @importFrom stats var pt
+NULL
diff --git a/R/kfold_ttest.R b/R/kfold_ttest.R
@@ -0,0 +1,47 @@
+#' Compute correlated t-statistic and p-value for k-fold cross-validated results
+#' @importFrom stats var pt
+#' @param x \code{numeric} vector of values for model A
+#' @param y \code{numeric} vector of values for model B
+#' @param n \code{integer} denoting total sample size
+#' @param k \code{integer} denoting number of folds used in k-fold
+#' @return object of class \code{data.frame}
+#' @references Nadeau, C., and Bengio, Y. Inference for the Generalization Error. Machine Learning 52, (2003).
+#' @references Corani, G., Benavoli, A., Demsar, J., Mangili, F., and Zaffalon, M. Statistical comparison of classifiers through Bayesian hierarchical modelling. Machine Learning, 106, (2017).
+#' @author Trent Henderson
+#' @export
+#'
+
+kfold_ttest <- function(x, y, n, k){
+
+  # Arg checks
+
+  if(length(x) != length(y)){
+    stop("x and y are not the same length.")
+  }
+
+  if(!is.numeric(x) || !is.numeric(y)){
+    stop("x and y should be numeric vectors of the same length.")
+  }
+
+  if(!is.numeric(n) || !is.numeric(k)){
+    stop("n and k should be integer scalars.")
+  }
+
+  if(length(n) != 1 || length(k) != 1){
+    stop("n and k should be integer scalars.")
+  }
+
+  # Calculations
+
+  d <- x - y # Calculate differences
+  statistic <- mean(d, na.rm = TRUE) / sqrt(stats::var(d, na.rm = TRUE) * ((1/n + (1/k)) / (1 - 1/k))) # Calculate t-statistic
+
+  if(statistic < 0){
+    p.value <- stats::pt(statistic, n - 1) # p-value for left tail
+  } else{
+    p.value <- stats::pt(statistic, n - 1, lower.tail = FALSE) # p-value for right tail
+  }
+
+  tmp <- data.frame(statistic = statistic, p.value = p.value)
+  return(tmp)
+}
diff --git a/R/repkfold_ttest.R b/R/repkfold_ttest.R
@@ -0,0 +1,72 @@
+#' Compute correlated t-statistic and p-value for repeated k-fold cross-validated results
+#' @importFrom stats var pt
+#' @param data \code{data.frame} of values for model A and model B over repeated k-fold cross-validation. Three named columns are expected:
+#' @param n1 \code{integer} denoting train set size
+#' @param n2 \code{integer} denoting test set size
+#' @param k \code{integer} denoting number of folds used in k-fold
+#' @param r \code{integer} denoting number of repeats per fold
+#' @return object of class \code{data.frame}
+#' @references Nadeau, C., and Bengio, Y. Inference for the Generalization Error. Machine Learning 52, (2003).
+#' @references Bouckaert, R. R., and Frank, E. Evaluating the Replicability of Significance Tests for Comparing Learning Algorithms. Advances in Knowledge Discovery and Data Mining. PAKDD 2004. Lecture Notes in Computer Science, 3056, (2004).
+#' @author Trent Henderson
+#' @export
+#'
+
+repkfold_ttest <- function(data, n1, n2, k, r){
+
+  # Arg checks
+
+  '%ni%' <- Negate('%in%')
+
+  if("model" %ni% colnames(data)){
+    stop("data should contain at least four columns called 'model', 'values', 'k', and 'r'.")
+  }
+
+  if("values" %ni% colnames(data)){
+    stop("data should contain at least four columns called 'model', 'values', 'k', and 'r'.")
+  }
+
+  if("k" %ni% colnames(data)){
+    stop("data should contain at least four columns called 'model', 'values', 'k', and 'r'.")
+  }
+
+  if("r" %ni% colnames(data)){
+    stop("data should contain at least four columns called 'model', 'values', 'k', and 'r'.")
+  }
+
+  if(!is.numeric(data$values) || !is.numeric(data$k) || !is.numeric(data$r)){
+    stop("data should be a data.frame with only numerical values in columns 'values', 'k', and 'r'.")
+  }
+
+  if(!is.numeric(n1) || !is.numeric(n2) || !is.numeric(k) || !is.numeric(r) ||
+     length(n1) != 1 || length(n2) != 1 || length(k) != 1 || length(r) != 1){
+    stop("n1, n2, k, and r should all be integer scalars.")
+  }
+
+  if(length(unique(data$model)) != 2){
+    stop("Column 'model' in data should only have two unique labels (one for each model to compare).")
+  }
+
+  # Calculations
+
+  d <- c()
+
+  for(i in 1:k){
+    for(j in 1:r){
+      x <- data[data$k == i, ]
+      x <- x[x$r == j, ]
+      d <- c(d, x[x$model == unique(x$model)[1], c("values")] - x[x$model == unique(x$model)[2], c("values")]) # Differences
+    }
+  }
+
+  statistic <- mean(d, na.rm = TRUE) / sqrt(stats::var(d, na.rm = TRUE) * ((1/(k * r)) + (n2/n1))) # Calculate t-statistic
+
+  if(statistic < 0){
+    p.value <- stats::pt(statistic, (k * r) - 1) # p-value for left tail
+  } else{
+    p.value <- stats::pt(statistic, (k * r) - 1, lower.tail = FALSE) # p-value for right tail
+  }
+
+  tmp <- data.frame(statistic = statistic, p.value = p.value)
+  return(tmp)
+}
diff --git a/R/resampled_ttest.R b/R/resampled_ttest.R
@@ -0,0 +1,50 @@
+#' Compute correlated t-statistic and p-value for resampled data
+#' @importFrom stats var pt
+#' @param x \code{numeric} vector of values for model A
+#' @param y \code{numeric} vector of values for model B
+#' @param n \code{integer} denoting number of repeat samples. Defaults to \code{length(x)}
+#' @param n1 \code{integer} denoting train set size
+#' @param n2 \code{integer} denoting test set size
+#' @return object of class \code{data.frame}
+#' @references Nadeau, C., and Bengio, Y. Inference for the Generalization Error. Machine Learning 52, (2003).
+#' @references Bouckaert, R. R., and Frank, E. Evaluating the Replicability of Significance Tests for Comparing Learning Algorithms. Advances in Knowledge Discovery and Data Mining. PAKDD 2004. Lecture Notes in Computer Science, 3056, (2004).
+#' @author Trent Henderson
+#' @export
+#'
+
+resampled_ttest <- function(x, y, n, n1, n2){
+
+  # Arg checks
+
+  if(length(x) != length(y)){
+    stop("x and y are not the same length.")
+  }
+
+  if(!is.numeric(x) || !is.numeric(y)){
+    stop("x and y should be numeric vectors of the same length.")
+  }
+
+  if(!is.numeric(n) || !is.numeric(n1) || !is.numeric(n2) ||
+     length(n) != 1 || length(n1) != 1 || length(n2) != 1){
+    stop("n, n1, and n2 should all be integer scalars.")
+  }
+
+  if(missing(n) || is.null(n)){
+    n <- length(x)
+    message("n argument missing. Using length(x) as default.")
+  }
+
+  # Calculations
+
+  d <- x - y # Calculate differences
+  statistic <- mean(d, na.rm = TRUE) / sqrt(stats::var(d, na.rm = TRUE) * (1/n + n2/n1)) # Calculate t-statistic
+
+  if(statistic < 0){
+    p.value <- stats::pt(statistic, n - 1) # p-value for left tail
+  } else{
+    p.value <- stats::pt(statistic, n - 1, lower.tail = FALSE) # p-value for right tail
+  }
+
+  tmp <- data.frame(statistic = statistic, p.value = p.value)
+  return(tmp)
+}
diff --git a/README.Rmd b/README.Rmd
@@ -0,0 +1,24 @@
+---
+output: rmarkdown::github_document
+---
+
+# correctR
+
+Corrections for correlated test statistics
+
+```{r, include = FALSE}
+knitr::opts_chunk$set(
+  comment = NA, fig.width = 8, fig.height = 8, cache = FALSE)
+```
+
+## Installation
+
+You can install `correctR` from GitHub:
+
+```{r eval = FALSE}
+devtools::install_github("hendersontrent/theft")
+```
+
+## General purpose
+
+Often in machine learning, we want to compare the performance of different models. However, the methods used to obtain these performance metrics (e.g., classification accuracy) violate the assumptions of traditional statistical tests such as a $t$-test. Examples of these methods include data resampling and $k$-fold cross-validation. The purpose of these methods is to either aid generalisability of findings (i.e., through quantification of error as they produce multiple values for each model instead of just one) or to optimise model hyperparameters. This makes them invaluable, but unusable with comparative approaches such as a $t$-test, as [Dietterich (1998)](https://pubmed.ncbi.nlm.nih.gov/9744903/) found that the standard $t$-test underestimates the variance, therefore driving a high Type I error. `correctR` is a lightweight package that implements a small number of corrected test statistics for cases when samples are not independent (and therefore are correlated), such as in the case of resampling and $k$-fold cross-validation. These corrections were all originally proposed by [Nadeau and Bengio (2003)](https://link.springer.com/article/10.1023/A:1024068626366). Currently, only cases where two models are to be compared are supported.
diff --git a/README.md b/README.md
@@ -1,2 +1,34 @@
+
 # correctR
-R package for computing corrected test statistics for correlated samples.
+
+Corrections for correlated test statistics
+
+## Installation
+
+You can install `correctR` from GitHub:
+
+``` r
+devtools::install_github("hendersontrent/theft")
+```
+
+## General purpose
+
+Often in machine learning, we want to compare the performance of
+different models. However, the methods used to obtain these performance
+metrics (e.g., classification accuracy) violate the assumptions of
+traditional statistical tests such as a $t$-test. Examples of these
+methods include data resampling and $k$-fold cross-validation. The
+purpose of these methods is to either aid generalisability of findings
+(i.e., through quantification of error as they produce multiple values
+for each model instead of just one) or to optimise model
+hyperparameters. This makes them invaluable, but unusable with
+comparative approaches such as a $t$-test, as [Dietterich
+(2005)](https://pubmed.ncbi.nlm.nih.gov/9744903/) found that the
+standard $t$-test underestimates the variance, therefore driving a high
+Type I error. `correctR` is a lightweight package that implements a
+small number of corrected test statistics for cases when samples are not
+independent (and therefore are correlated), such as in the case of
+resampling and $k$-fold cross-validation. These corrections were all
+originally proposed by [Nadeau and Bengio
+(2003)](https://link.springer.com/article/10.1023/A:1024068626366).
+Currently, only cases where two models are to be compared are supported.
diff --git a/correctR.Rproj b/correctR.Rproj
@@ -0,0 +1,20 @@
+Version: 1.0
+
+RestoreWorkspace: Default
+SaveWorkspace: Default
+AlwaysSaveHistory: Default
+
+EnableCodeIndexing: Yes
+UseSpacesForTab: Yes
+NumSpacesForTab: 2
+Encoding: UTF-8
+
+RnwWeave: Sweave
+LaTeX: pdfLaTeX
+
+AutoAppendNewline: Yes
+StripTrailingWhitespace: Yes
+
+BuildType: Package
+PackageUseDevtools: Yes
+PackageInstallArgs: --no-multiarch --with-keep.source
diff --git a/man/correctR.Rd b/man/correctR.Rd