Add tests for pseudo-bulk; Add conditions for availability of RcppPlanc

mvfki · Feb 28, 2024 · 792a262 · 792a262
1 parent acb9e25
commit 792a262
Show file tree

Hide file tree

Showing 22 changed files with 261 additions and 286 deletions.
diff --git a/R/ATAC.R b/R/ATAC.R
@@ -30,10 +30,12 @@
 #' bmmc <- normalize(bmmc)
 #' bmmc <- selectGenes(bmmc, datasets.use = "rna")
 #' bmmc <- scaleNotCenter(bmmc)
-#' bmmc <- runINMF(bmmc, k = 20)
-#' bmmc <- quantileNorm(bmmc)
-#' bmmc <- normalizePeak(bmmc)
-#' bmmc <- imputeKNN(bmmc, reference = "atac", queries = "rna")
+#' if (requireNamespace("RcppPlanc", quietly = TRUE)) {
+#'     bmmc <- runINMF(bmmc, k = 20)
+#'     bmmc <- quantileNorm(bmmc)
+#'     bmmc <- normalizePeak(bmmc)
+#'     bmmc <- imputeKNN(bmmc, reference = "atac", queries = "rna")
+#' }
 imputeKNN <- function(
         object,
         reference,
@@ -167,14 +169,16 @@ imputeKNN <- function(
 #' bmmc <- normalize(bmmc)
 #' bmmc <- selectGenes(bmmc)
 #' bmmc <- scaleNotCenter(bmmc)
-#' bmmc <- runINMF(bmmc, miniBatchSize = 100)
-#' bmmc <- quantileNorm(bmmc)
-#' bmmc <- normalizePeak(bmmc)
-#' bmmc <- imputeKNN(bmmc, reference = "atac", queries = "rna")
-#' corr <- linkGenesAndPeaks(
-#'     bmmc, useDataset = "rna",
-#'     pathToCoords = system.file("extdata/hg19_genes.bed", package = "rliger2")
-#' )
+#' if (requireNamespace("RcppPlanc", quietly = TRUE)) {
+#'     bmmc <- runINMF(bmmc, miniBatchSize = 100)
+#'     bmmc <- quantileNorm(bmmc)
+#'     bmmc <- normalizePeak(bmmc)
+#'     bmmc <- imputeKNN(bmmc, reference = "atac", queries = "rna")
+#'     corr <- linkGenesAndPeaks(
+#'         bmmc, useDataset = "rna",
+#'         pathToCoords = system.file("extdata/hg19_genes.bed", package = "rliger2")
+#'     )
+#' }
 linkGenesAndPeaks <- function(
         object,
         useDataset,
@@ -348,21 +352,23 @@ linkGenesAndPeaks <- function(
 #' bmmc <- normalize(bmmc)
 #' bmmc <- selectGenes(bmmc)
 #' bmmc <- scaleNotCenter(bmmc)
-#' bmmc <- runINMF(bmmc)
-#' bmmc <- quantileNorm(bmmc)
-#' bmmc <- normalizePeak(bmmc)
-#' bmmc <- imputeKNN(bmmc, reference = "atac", queries = "rna")
-#' corr <- linkGenesAndPeaks(
-#'     bmmc, useDataset = "rna",
-#'     pathToCoords = system.file("extdata/hg19_genes.bed", package = "rliger2")
-#' )
-#' resultPath <- tempfile()
-#' exportInteractTrack(
-#'     corrMat = corr,
-#'     pathToCoords = system.file("extdata/hg19_genes.bed", package = "rliger2"),
-#'     outputPath = resultPath
-#' )
-#' head(read.table(resultPath, skip = 1))
+#' if (requireNamespace("RcppPlanc", quietly = TRUE)) {
+#'     bmmc <- runINMF(bmmc)
+#'     bmmc <- quantileNorm(bmmc)
+#'     bmmc <- normalizePeak(bmmc)
+#'     bmmc <- imputeKNN(bmmc, reference = "atac", queries = "rna")
+#'     corr <- linkGenesAndPeaks(
+#'         bmmc, useDataset = "rna",
+#'         pathToCoords = system.file("extdata/hg19_genes.bed", package = "rliger2")
+#'     )
+#'     resultPath <- tempfile()
+#'     exportInteractTrack(
+#'         corrMat = corr,
+#'         pathToCoords = system.file("extdata/hg19_genes.bed", package = "rliger2"),
+#'         outputPath = resultPath
+#'     )
+#'     head(read.table(resultPath, skip = 1))
+#' }
 exportInteractTrack <- function(
         corrMat,
         pathToCoords,

diff --git a/R/DEG_marker.R b/R/DEG_marker.R
@@ -137,7 +137,8 @@ runPairwiseDEG <- function(
 #' detection will be performed by comparing "celltype_i" cells from "gender_j"
 #' against other cells from "gender_j", and etc.
 #' @examples
-#' # Identify markers for each cluster. Equivalent to old version `runWilcoxon(method = "cluster")`
+#' # Identify markers for each cluster. Equivalent to old version
+#' # `runWilcoxon(method = "cluster")`
 #' markerStats <- runMarkerDEG(pbmcPlot, conditionBy = "leiden_cluster")
 #' # Identify dataset markers within each cluster. Equivalent to old version
 #' # `runWilcoxon(method = "dataset")`.
@@ -261,19 +262,19 @@ runMarkerDEG <- function(
 }
 
 .DE.checkDataAvail <- function(object, useDatasets, method, usePeak) {
-    if (isH5Liger(object, useDatasets)) {
+    if (isH5Liger(object, useDatasets)) { # nocov start
         stop("HDF5 based datasets detected but is not supported. \n",
              "Try `object.sub <- downsample(object, useSlot = ",
              "'normData')` to create ANOTHER object with in memory data.")
-    }
+    } # nocov end
     if (method == "wilcoxon") {
         slot <- ifelse(usePeak, "normPeak", "normData")
     } else if (method == "pseudoBulk") {
-        if (!requireNamespace("DESeq2", quietly = TRUE))
+        if (!requireNamespace("DESeq2", quietly = TRUE)) # nocov start
             stop("Package \"DESeq2\" needed for this function to work. ",
                  "Please install it by command:\n",
                  "BiocManager::install('DESeq2')",
-                 call. = FALSE)
+                 call. = FALSE) # nocov end
         slot <- ifelse(usePeak, "rawPeak", "rawData")
     }
     allAvail <- all(sapply(useDatasets, function(d) {
@@ -377,11 +378,11 @@ makePseudoBulk <- function(mat, replicateAnn, minCellPerRep, verbose = TRUE) {
 # y: grouping label of columns of X
 # Rcpp source code located in src/wilcoxon.cpp
 wilcoxauc <- function(x, clusterVar) {
-    if (methods::is(x, 'dgTMatrix')) x <- methods::as(x, 'CsparseMatrix')
+    if (methods::is(x, 'dgTMatrix')) x <- methods::as(x, 'CsparseMatrix') # nocov start
     if (methods::is(x, 'TsparseMatrix')) x <- methods::as(x, 'CsparseMatrix')
     if (is.null(row.names(x))) {
         rownames(x) <- paste0('Feature', seq(nrow(x)))
-    }
+    } # nocov end
     if (!is.factor(clusterVar)) clusterVar <- factor(clusterVar)
     clusterVar <- droplevels(clusterVar)
     groupSize <- as.numeric(table(clusterVar))

diff --git a/R/integration.R b/R/integration.R
@@ -38,7 +38,9 @@
 #' pbmc <- normalize(pbmc)
 #' pbmc <- selectGenes(pbmc)
 #' pbmc <- scaleNotCenter(pbmc)
-#' pbmc <- runIntegration(pbmc)
+#' if (requireNamespace("RcppPlanc", quietly = TRUE)) {
+#'     pbmc <- runIntegration(pbmc)
+#' }
 runIntegration <- function(
         object,
         k = 20,
@@ -208,7 +210,9 @@ runIntegration.Seurat <- function(
 #' pbmc <- normalize(pbmc)
 #' pbmc <- selectGenes(pbmc)
 #' pbmc <- scaleNotCenter(pbmc)
-#' pbmc <- runINMF(pbmc)
+#' if (requireNamespace("RcppPlanc", quietly = TRUE)) {
+#'     pbmc <- runINMF(pbmc)
+#' }
 runINMF <- function(
         object,
         k = 20,
@@ -626,18 +630,20 @@ optimizeALS <- function( # nocov start
 #' pbmc <- normalize(pbmc)
 #' pbmc <- selectGenes(pbmc)
 #' pbmc <- scaleNotCenter(pbmc)
-#' # Scenario 1
-#' pbmc <- runOnlineINMF(pbmc, minibatchSize = 200)
-#' # Scenario 2
-#' # Fake new dataset by increasing all non-zero value in "ctrl" by 1
-#' ctrl2 <- rawData(dataset(pbmc, "ctrl"))
-#' ctrl2@x <- ctrl2@x + 1
-#' colnames(ctrl2) <- paste0(colnames(ctrl2), 2)
-#' pbmc2 <- runOnlineINMF(pbmc, k = 20, newDatasets = list(ctrl2 = ctrl2),
-#'                        minibatchSize = 100)
-#' # Scenario 3
-#' pbmc3 <- runOnlineINMF(pbmc, k = 20, newDatasets = list(ctrl2 = ctrl2),
-#'                        projection = TRUE)
+#' if (requireNamespace("RcppPlanc", quietly = TRUE)) {
+#'     # Scenario 1
+#'     pbmc <- runOnlineINMF(pbmc, minibatchSize = 200)
+#'     # Scenario 2
+#'     # Fake new dataset by increasing all non-zero value in "ctrl" by 1
+#'     ctrl2 <- rawData(dataset(pbmc, "ctrl"))
+#'     ctrl2@x <- ctrl2@x + 1
+#'     colnames(ctrl2) <- paste0(colnames(ctrl2), 2)
+#'     pbmc2 <- runOnlineINMF(pbmc, k = 20, newDatasets = list(ctrl2 = ctrl2),
+#'                            minibatchSize = 100)
+#'     # Scenario 3
+#'     pbmc3 <- runOnlineINMF(pbmc, k = 20, newDatasets = list(ctrl2 = ctrl2),
+#'                            projection = TRUE)
+#' }
 runOnlineINMF <- function(
         object,
         k = 20,
@@ -1307,18 +1313,10 @@ quantileNorm.liger <- function(
 ) {
     .checkObjVersion(object)
     .checkValidFactorResult(object, checkV = FALSE)
-    if (is.null(reference)) {
-        # If ref_dataset not given, set the one with the largest number of
-        # cells as reference.
-        # Should not produce intermediate variable here because it'll be
-        # recorded as a environment parameter in object@commands
-        reference <- names(which.max(sapply(datasets(object), ncol)))
-    } else {
-        reference <- .checkUseDatasets(object, useDatasets = reference)
-        if (length(reference) != 1)
-            stop("Should specify only one reference dataset.")
-    }
-    print(reference)
+    reference <- reference %||% names(which.max(sapply(datasets(object), ncol)))
+    reference <- .checkUseDatasets(object, useDatasets = reference)
+    if (length(reference) != 1)
+        stop("Should specify only one reference dataset.")
     object <- recordCommand(object, ..., dependencies = "RANN")
     out <- .quantileNorm.HList(
         object = getMatrix(object, "H"),

diff --git a/R/ligerDataset-class.R b/R/ligerDataset-class.R
@@ -247,14 +247,16 @@ setValidity("ligerDataset", .valid.ligerDataset)
 #' n <- scaleData(pbmc, "ctrl")
 #' identical(m, n)
 #' ## Any other matrices
-#' pbmc <- online_iNMF(pbmc, k = 20, miniBatch_size = 100)
-#' ctrl <- dataset(pbmc, "ctrl")
-#' V <- getMatrix(ctrl, "V")
-#' V[1:5, 1:5]
-#' Vs <- getMatrix(pbmc, "V")
-#' length(Vs)
-#' names(Vs)
-#' identical(Vs$ctrl, V)
+#' if (requireNamespace("RcppPlanc", quietly = TRUE)) {
+#'     pbmc <- runOnlineINMF(pbmc, k = 20, minibatchSize = 100)
+#'     ctrl <- dataset(pbmc, "ctrl")
+#'     V <- getMatrix(ctrl, "V")
+#'     V[1:5, 1:5]
+#'     Vs <- getMatrix(pbmc, "V")
+#'     length(Vs)
+#'     names(Vs)
+#'     identical(Vs$ctrl, V)
+#' }
 setMethod(
     f = "show",
     signature(object = "ligerDataset"),

diff --git a/R/optimizeNewParam.R b/R/optimizeNewParam.R
@@ -33,8 +33,10 @@
 #' pbmc <- selectGenes(pbmc)
 #' pbmc <- scaleNotCenter(pbmc)
 #' # Only running a few iterations for fast examples
-#' pbmc <- runINMF(pbmc, k = 20, nIteration = 2)
-#' pbmc <- optimizeNewK(pbmc, kNew = 25, nIteration = 2)
+#' if (requireNamespace("RcppPlanc", quietly = TRUE)) {
+#'     pbmc <- runINMF(pbmc, k = 20, nIteration = 2)
+#'     pbmc <- optimizeNewK(pbmc, kNew = 25, nIteration = 2)
+#' }
 optimizeNewK <- function(
         object,
         kNew,
@@ -213,14 +215,16 @@ optimizeNewK <- function(
 #' pbmc <- selectGenes(pbmc)
 #' pbmc <- scaleNotCenter(pbmc)
 #' # Only running a few iterations for fast examples
-#' pbmc <- runINMF(pbmc, k = 20, nIteration = 2)
-#' # Create fake new data by increasing all non-zero count in "ctrl" by 1,
-#' # and make unique cell identifiers
-#' ctrl2 <- rawData(dataset(pbmc, "ctrl"))
-#' ctrl2@x <- ctrl2@x + 1
-#' colnames(ctrl2) <- paste0(colnames(ctrl2), 2)
-#' pbmcNew <- optimizeNewData(pbmc, dataNew = list(ctrl2 = ctrl2),
-#'                            useDatasets = "ctrl", nIteration = 2)
+#' if (requireNamespace("RcppPlanc", quietly = TRUE)) {
+#'     pbmc <- runINMF(pbmc, k = 20, nIteration = 2)
+#'     # Create fake new data by increasing all non-zero count in "ctrl" by 1,
+#'     # and make unique cell identifiers
+#'     ctrl2 <- rawData(dataset(pbmc, "ctrl"))
+#'     ctrl2@x <- ctrl2@x + 1
+#'     colnames(ctrl2) <- paste0(colnames(ctrl2), 2)
+#'     pbmcNew <- optimizeNewData(pbmc, dataNew = list(ctrl2 = ctrl2),
+#'                                useDatasets = "ctrl", nIteration = 2)
+#' }
 optimizeNewData <- function(
         object,
         dataNew,
@@ -377,9 +381,11 @@ optimizeNewData <- function(
 #' pbmc <- normalize(pbmc)
 #' pbmc <- selectGenes(pbmc)
 #' pbmc <- scaleNotCenter(pbmc)
-#' # Only running a few iterations for fast examples
-#' pbmc <- runINMF(pbmc, k = 20, nIteration = 2)
-#' pbmc <- optimizeNewLambda(pbmc, lambdaNew = 5.5, nIteration = 2)
+#' if (requireNamespace("RcppPlanc", quietly = TRUE)) {
+#'     # Only running a few iterations for fast examples
+#'     pbmc <- runINMF(pbmc, k = 20, nIteration = 2)
+#'     pbmc <- optimizeNewLambda(pbmc, lambdaNew = 5.5, nIteration = 2)
+#' }
 optimizeNewLambda <- function(
         object,
         lambdaNew,
@@ -455,10 +461,12 @@ optimizeNewLambda <- function(
 #' pbmc <- normalize(pbmc)
 #' pbmc <- selectGenes(pbmc)
 #' pbmc <- scaleNotCenter(pbmc)
-#' # Only running a few iterations for fast examples
-#' pbmc <- runINMF(pbmc, k = 20, nIteration = 2)
-#' pbmc <- optimizeSubset(pbmc, cellIdx = sort(sample(ncol(pbmc), 200)),
-#'                        nIteration = 2)
+#' if (requireNamespace("RcppPlanc", quietly = TRUE)) {
+#'     # Only running a few iterations for fast examples
+#'     pbmc <- runINMF(pbmc, k = 20, nIteration = 2)
+#'     pbmc <- optimizeSubset(pbmc, cellIdx = sort(sample(ncol(pbmc), 200)),
+#'                            nIteration = 2)
+#' }
 optimizeSubset <- function(
         object,
         clusterVar = NULL,

diff --git a/man/exportInteractTrack.Rd b/man/exportInteractTrack.Rd
diff --git a/man/imputeKNN.Rd b/man/imputeKNN.Rd
diff --git a/man/liger-DEG.Rd b/man/liger-DEG.Rd
diff --git a/man/ligerDataset-class.Rd b/man/ligerDataset-class.Rd