Merge pull request #227 from stemangiola/dev

Dev
stemangiola · Nov 8, 2021 · cb98ea9 · cb98ea9
2 parents 46d4856 + b2a74f4
commit cb98ea9
Show file tree

Hide file tree

Showing 11 changed files with 446 additions and 291 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Type: Package
 Package: tidybulk
 Title: Brings transcriptomics to the tidyverse 
-Version: 1.6.1
+Version: 1.6.2
 Authors@R: c(person("Stefano", "Mangiola", email = "[email protected]",
                   role = c("aut", "cre")),
             person("Maria", "Doyle", email = "[email protected]",
@@ -71,7 +71,9 @@ Suggests:
     survminer,
     tidySummarizedExperiment,
     markdown,
-    uwot
+    uwot,
+    matrixStats,
+    igraph
 VignetteBuilder: 
     knitr
 RdMacros:
@@ -80,7 +82,7 @@ Biarch: true
 biocViews: AssayDomain, Infrastructure, RNASeq, DifferentialExpression, GeneExpression, Normalization, Clustering, QualityControl, Sequencing, Transcription, Transcriptomics
 Encoding: UTF-8
 LazyData: true
-RoxygenNote: 7.1.1
+RoxygenNote: 7.1.2
 LazyDataCompression: xz
 URL: https://github.com/stemangiola/tidybulk
 BugReports: https://github.com/stemangiola/tidybulk/issues
diff --git a/NAMESPACE b/NAMESPACE
@@ -147,6 +147,7 @@ importFrom(stats,kmeans)
 importFrom(stats,lsfit)
 importFrom(stats,median)
 importFrom(stats,model.matrix)
+importFrom(stats,na.omit)
 importFrom(stats,p.adjust)
 importFrom(stats,plogis)
 importFrom(stats,prcomp)

diff --git a/R/cibersort.R b/R/cibersort.R
@@ -30,9 +30,9 @@
 
 
 # Core algorithm of Cibersort
-#' 
+#'
 #' @keywords internal
-#' 
+#'
 #' @importFrom parallel mclapply
 #' @importFrom stats cor
 #'
@@ -65,8 +65,8 @@ CoreAlg <- function(X, y, cores = 3){
 
   #Execute In a parallel way the SVM
   if(cores>1){
-    if(Sys.info()['sysname'] == 'Windows') out <- parallel::mclapply(1:svn_itor, res, mc.cores=1) 
-    else out <-  parallel::mclapply(1:svn_itor, res, mc.cores=cores) 
+    if(Sys.info()['sysname'] == 'Windows') out <- parallel::mclapply(1:svn_itor, res, mc.cores=1)
+    else out <-  parallel::mclapply(1:svn_itor, res, mc.cores=cores)
   }
   else out <-  lapply(1:svn_itor, res)
 
@@ -139,7 +139,7 @@ CoreAlg <- function(X, y, cores = 3){
 }
 
 #' @importFrom stats sd
-#' 
+#'
 #' @keywords internal
 #'
 doPerm <- function(perm, X, Y, cores = 3){
@@ -173,10 +173,11 @@ doPerm <- function(perm, X, Y, cores = 3){
 }
 
 #' @importFrom stats sd
-#' 
+#' @importFrom utils install.packages
+#'
 #' @keywords internal
-#' 
-my_CIBERSORT <- function(Y, X, perm=0, QN=TRUE, cores = 3){
+#'
+my_CIBERSORT <- function(Y, X, perm=0, QN=TRUE, cores = 3, exp_transform = FALSE){
 
 
   #read in data
@@ -195,7 +196,8 @@ my_CIBERSORT <- function(Y, X, perm=0, QN=TRUE, cores = 3){
   P <- perm #number of permutations
 
   #anti-log if max < 50 in mixture file
-  if(max(Y) < 50) {Y <- 2^Y}
+  if(is.null(exp_transform)) exp_transform = max(Y) < 50
+  if(exp_transform) {Y <- 2^Y}
 
   #quantile normalization of mixture file
 
@@ -215,6 +217,28 @@ my_CIBERSORT <- function(Y, X, perm=0, QN=TRUE, cores = 3){
   XintY <- Xgns %in% row.names(Y)
   X <- X[XintY,,drop=FALSE]
 
+  # Eliminate empty samples
+  if(length(which(colSums(Y)==0))>0)
+    warning(sprintf(
+      "tidybulk says: the samples %s were ignored for decovolution as they have 0 counts for the deconvolution signature genes",
+      colnames(Y)[colSums(Y)==0] %>% paste(collapse = ", ")
+    ))
+  Y=Y[,colSums(Y)>0, drop=FALSE]
+
+  # Check if package is installed, otherwise install
+  if (find.package("matrixStats", quiet = TRUE) %>% length %>% equals(0)) {
+    message("tidybulk says: Installing matrixStats needed for cibersort")
+    install.packages("matrixStats", repos = "https://cloud.r-project.org")
+  }
+
+  # Eliminate sd == 0
+  if(length(which(matrixStats::colSds(Y)==0))>0)
+    warning(sprintf(
+      "tidybulk says: the samples %s were ignored for decovolution as they have standard deviation of 0 for the deconvolution signature genes",
+      colnames(Y)[matrixStats::colSds(Y)==0] %>% paste(collapse = ", ")
+    ))
+  Y = Y[,matrixStats::colSds(Y)>0,drop=FALSE]
+
   #standardize sig matrix
   X <- (X - mean(X)) / sd(as.vector(X))
 

diff --git a/R/dictionary.R b/R/dictionary.R
@@ -2,3 +2,5 @@
 
 scaled_string = "_scaled"
 adjusted_string = "_adjusted"
+
+warning_for_scaling_with_few_genes = "tidybulk says: There are < 100 features/genes that are present in all you samples. Because edgeR::calcNormFactors does not allow NAs, the scaling is performed on that limited set of features.genes. The scaling could not be accurate, it is adivasble to perform impute_missing_abundance() before scaling. It is possible to filter the imputed counts after scaling."