Start to work on c-iNMF, works for getting some result but not yet st…

…able
mvfki · Mar 8, 2024 · 9c7607d · 9c7607d
1 parent 2cd5c05
commit 9c7607d
Show file tree

Hide file tree

Showing 16 changed files with 765 additions and 173 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -23,6 +23,8 @@ S3method(plotSpatial2D,liger)
 S3method(plotSpatial2D,ligerSpatialDataset)
 S3method(quantileNorm,Seurat)
 S3method(quantileNorm,liger)
+S3method(runCINMF,Seurat)
+S3method(runCINMF,liger)
 S3method(runINMF,Seurat)
 S3method(runINMF,liger)
 S3method(runIntegration,Seurat)
@@ -156,6 +158,7 @@ export(restoreH5Liger)
 export(restoreOnlineLiger)
 export(retrieveCellFeature)
 export(reverseMethData)
+export(runCINMF)
 export(runCluster)
 export(runDoubletFinder)
 export(runGOEnrich)

diff --git a/R/RcppExports.R b/R/RcppExports.R
@@ -5,6 +5,18 @@ RunModularityClusteringCpp <- function(SNN, modularityFunction, resolution, algo
     .Call(`_rliger2_RunModularityClusteringCpp`, SNN, modularityFunction, resolution, algorithm, nRandomStarts, nIterations, randomSeed, printOutput, edgefilename)
 }
 
+colNormalize_dense_cpp <- function(x, L) {
+    .Call(`_rliger2_colNormalize_dense_cpp`, x, L)
+}
+
+select_factor_cpp <- function(all_data, knn, threshold) {
+    .Call(`_rliger2_select_factor_cpp`, all_data, knn, threshold)
+}
+
+colAggregateMedian_dense_cpp <- function(x, group, n) {
+    .Call(`_rliger2_colAggregateMedian_dense_cpp`, x, group, n)
+}
+
 scaleNotCenter_byRow_rcpp <- function(x) {
     .Call(`_rliger2_scaleNotCenter_byRow_rcpp`, x)
 }

diff --git a/R/archive.R b/R/archive.R
@@ -107,126 +107,3 @@
 #         featureIdx
 #     return(ld)
 # }
-
-#' #' Perform Wilcoxon rank-sum test
-#' #' @description Perform Wilcoxon rank-sum tests on specified dataset using
-#' #' given method.
-#' #' @param object A \linkS4class{liger} object with cluster assignment available.
-#' #' @param useDatasets A character vector of the names, a numeric or logical
-#' #' vector of the index of the datasets to be normalized. Default
-#' #' \code{NULL} uses all datasets.
-#' #' @param method Choose from \code{"clusters"} or \code{"datasets"}. Default
-#' #' \code{"clusters"} compares between clusters across all datasets, while
-#' #' \code{"datasets"} compares between datasets within each cluster.
-#' #' @param useCluster The name of the column in \code{cellMeta} slot storing the
-#' #' cluster assignment variable. Default \code{"leiden_cluster"}
-#' #' @param usePeak Logical, whether to test peak counts instead of gene
-#' #' expression. Requires presence of ATAC modility datasets. Default
-#' #' \code{FALSE}.
-#' #' @param verbose Logical. Whether to show information of the progress. Default
-#' #' \code{getOption("ligerVerbose")} which is \code{TRUE} if users have not set.
-#' #' @param data.use,compare.method \bold{Deprecated}. See Usage section for
-#' #' replacement.
-#' #' @return A 10-columns data.frame with test results.
-#' #' @export
-#' #' @examples
-#' #' library(dplyr)
-#' #' result <- runWilcoxon(pbmcPlot)
-#' #' result %>% group_by(group) %>% top_n(2, logFC)
-#' runWilcoxon <- function(
-#'         object,
-#'         useDatasets = NULL,
-#'         method = c("clusters", "datasets"),
-#'         useCluster = "leiden_cluster",
-#'         usePeak = FALSE,
-#'         verbose = getOption("ligerVerbose"),
-#'         # Deprecated coding style,
-#'         data.use = useDatasets,
-#'         compare.method = method
-#' ) {
-#'     .deprecateArgs(list(data.use = "useDatasets", compare.method = "method"))
-#'     method <- match.arg(method)
-#'     # Input checks ####
-#'     useDatasets <- .checkUseDatasets(object, useDatasets,
-#'                                      modal = ifelse(usePeak, "atac", "default"))
-#'     if (!isTRUE(usePeak)) {
-#'         if (method == "datasets" & length(useDatasets) < 2)
-#'             stop("Should have at least 2 datasets as input ",
-#'                  "when compare between datasets")
-#'         if (isH5Liger(object, useDatasets)) {
-#'             stop("HDF5 based datasets detected but is not supported. \n",
-#'                  "Try `object.sub <- downsample(object, useSlot = ",
-#'                  "'normData')` to create ANOTHER object with in memory data.")
-#'         }
-#'         allNormed <- all(sapply(datasets(object),
-#'                                 function(ld) !is.null(normData(ld))))
-#'         if (!allNormed)
-#'             stop("All datasets being involved has to be normalized")
-#'
-#'         ## get all shared genes of datasets involved
-#'         normDataList <- getMatrix(object, "normData", dataset = useDatasets,
-#'                                   returnList = TRUE)
-#'         features <- Reduce(intersect, lapply(normDataList, rownames))
-#'         normDataList <- lapply(normDataList, function(x) x[features,])
-#'         featureMatrix <- Reduce(cbind, normDataList)
-#'     } else {
-#'         if (method == "datasets" || length(useDatasets) != 1)
-#'             stop("For wilcoxon test on peak counts, can only use ",
-#'                  "\"cluster\" method on one dataset.")
-#'         normPeakList <- lapply(useDatasets, function(d) normPeak(object, d))
-#'         features <- Reduce(intersect, lapply(normPeakList, rownames))
-#'         featureMatrix <- Reduce(cbind, normPeakList)
-#'         #featureMatrix <- normPeak(object, useDatasets)
-#'         if (is.null(featureMatrix))
-#'             stop("Peak counts of specified dataset has to be normalized. ",
-#'                  "Please try `normalizePeak(object, useDatasets = '",
-#'                  useDatasets, "')`.")
-#'         #features <- rownames(featureMatrix)
-#'     }
-#'
-#'     ## Subset metadata involved
-#'     cellIdx <- object$dataset %in% useDatasets
-#'     cellSource <- object$dataset[cellIdx]
-#'     clusters <- .fetchCellMetaVar(object, useCluster, cellIdx = cellIdx,
-#'                                   checkCategorical = TRUE)
-#'
-#'     if (isTRUE(verbose))
-#'         .log("Performing Wilcoxon test on ", length(useDatasets), " datasets: ",
-#'              paste(useDatasets, collapse = ", "))
-#'     # perform wilcoxon test ####
-#'     if (method == "clusters") {
-#'         # compare between clusters across datasets
-#'         nfeatures <- length(features)
-#'         if (nfeatures > 100000) {
-#'             if (isTRUE(verbose)) .log("Calculating Large-scale Input...")
-#'             results <- Reduce(rbind, lapply(
-#'                 suppressWarnings(split(seq(nfeatures),
-#'                                        seq(nfeatures / 100000))),
-#'                 function(index) {
-#'                     fm <- log1p(1e10*featureMatrix[index, ])
-#'                     wilcoxauc(fm, clusters)
-#'                 }))
-#'         } else {
-#'             # TODO: If we add log-transformation to normalization method in the
-#'             # future, remember to have conditions here.
-#'             fm <- log1p(1e10*featureMatrix)
-#'             results <- wilcoxauc(fm, clusters)
-#'         }
-#'     } else {
-#'         # compare between datasets within each cluster
-#'         results <- Reduce(rbind, lapply(levels(clusters), function(cluster) {
-#'             clusterIdx <- clusters == cluster
-#'             subLabel <- paste0(cluster, "-", cellSource[clusterIdx])
-#'             if (length(unique(subLabel)) == 1) {
-#'                 # if cluster has only 1 data source
-#'                 warning("Skipped Cluster ", cluster,
-#'                         " since it has only one dataset source.")
-#'                 return()
-#'             } else {
-#'                 subMatrix <- log1p(1e10*featureMatrix[, clusterIdx])
-#'                 return(wilcoxauc(subMatrix, subLabel))
-#'             }
-#'         }))
-#'     }
-#'     return(results)
-#' }