stemangiola · stemangiola · Jun 26, 2022 · Jun 24, 2022 · Jun 24, 2022
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -82,7 +82,7 @@ Biarch: true
 biocViews: AssayDomain, Infrastructure, RNASeq, DifferentialExpression, GeneExpression, Normalization, Clustering, QualityControl, Sequencing, Transcription, Transcriptomics
 Encoding: UTF-8
 LazyData: true
-RoxygenNote: 7.1.2
+RoxygenNote: 7.2.0
 LazyDataCompression: xz
 URL: https://github.com/stemangiola/tidybulk
 BugReports: https://github.com/stemangiola/tidybulk/issues
diff --git a/R/cibersort.R b/R/cibersort.R
@@ -189,9 +189,9 @@ my_CIBERSORT <- function(Y, X, perm=0, QN=TRUE, cores = 3, exp_transform = FALSE
   ###################################
   ## This is needed to make the two tables consistent in gene
   ###################################
-
-  X <- X[order(rownames(X)),,drop=FALSE]
-  Y <- Y[order(rownames(Y)),,drop=FALSE]
+  common_genes = intersect(rownames(X), rownames(Y))
+  X <- X[common_genes,,drop=FALSE]
+  Y <- Y[common_genes,,drop=FALSE]
 
   P <- perm #number of permutations
 

diff --git a/R/methods_SE.R b/R/methods_SE.R
@@ -901,19 +901,30 @@ setMethod("aggregate_duplicates",
 
 
 
-
+#' @importFrom rlang quo_is_symbol
 .deconvolve_cellularity_se = function(.data,
 																			reference = X_cibersort,
 																			method = "cibersort",
 																			prefix = "",
 																			...) {
 
+  .transcript = enquo(.transcript)
+
 	my_assay =
 		.data %>%
 
 		assays() %>%
 		as.list() %>%
-		.[[get_assay_scaled_if_exists_SE(.data)]]
+		.[[get_assay_scaled_if_exists_SE(.data)]] %>%
+
+	  # Change row names
+	  when(quo_is_symbol(.transcript) ~ {
+  	    .x = (.)
+  	    rownames(.x) = .data %>% pivot_transcript() %>% pull(!!.transcript)
+  	    .x
+  	  },
+  	  ~ (.)
+	  )
 
 	# Get the dots arguments
 	dots_args = rlang::dots_list(...)
@@ -952,7 +963,7 @@ setMethod("aggregate_duplicates",
 				reference = reference %>% when(is.null(.) ~ X_cibersort, ~ .)
 
 				# Validate reference
-				validate_signature_SE(.data, reference, !!.transcript)
+				validate_signature_SE(., reference)
 
 				do.call(my_CIBERSORT, list(Y = ., X = reference, QN=FALSE) %>% c(dots_args)) %$%
 					proportions %>%
@@ -967,7 +978,7 @@ setMethod("aggregate_duplicates",
 				reference = reference %>% when(is.null(.) ~ X_cibersort, ~ .)
 
 				# Validate reference
-				validate_signature_SE(.data, reference, !!.transcript)
+				validate_signature_SE(., reference)
 
 				(.) %>%
 					run_llsr(reference, ...) %>%

diff --git a/R/validation.R b/R/validation.R
@@ -307,33 +307,38 @@ validate_signature = function(.data, reference, .transcript){
 
 	.transcript = enquo(.transcript)
 
-	if ((.data %>%
-			 pull(!!.transcript) %in% (reference %>% rownames)) %>%
-			which %>%
-			length %>%
-			st(50))
-		warning(
-			"tidybulk says: You have less than 50 genes in common between the query data and the reference data. Please check again your input dataframes"
-		)
+	overlapping_genes = .data %>%     pull(!!.transcript) %in% rownames(reference) %>%  which
+
+	if(length(overlapping_genes) == 0  )
+	  stop(sprintf(
+	    "\ntidybulk says: You have NO genes in common between the query data and the reference data. Please check again your input dataframes\nthe genes in the reference look like this %s", paste(rownames(reference)[1:10], collapse = ", ")
+	  ))
+
+	if ( length(overlapping_genes) %>%	st(50) )
+	  warning(sprintf(
+	    "\ntidybulk says: You have less than 50 genes in common between the query data and the reference data. Please check again your input dataframes\nthe genes in the reference look like this %s", paste(rownames(reference)[1:10], collapse = ", ")
+	  ))
 
 	# Check if rownames exist
 	if (reference %>% sapply(class) %in% c("numeric", "double", "integer") %>% not() %>% any)
-		stop("tidybulk says: your reference has non-numeric/integer columns.")
+	  stop("tidybulk says: your reference has non-numeric/integer columns.")
+
 
 }
 
-validate_signature_SE = function(.data, reference, .transcript){
+validate_signature_SE = function(assay, reference){
 
-	.transcript = enquo(.transcript)
+  overlapping_genes = (rownames(assay)  %in% rownames(reference)) %>%  which
 
-	if ((.data %>%
-			 rownames %in% (reference %>% rownames)) %>%
-			which %>%
-			length %>%
-			st(50))
-	  warning(
-			"tidybulk says: You have less than 50 genes in common between the query data and the reference data. Please check again your input dataframes"
-		)
+  if(length(overlapping_genes) == 0  )
+    stop(sprintf(
+      "\ntidybulk says: You have NO genes in common between the query data and the reference data. Please check again your input dataframes\nthe genes in the reference look like this %s", paste(rownames(reference)[1:10], collapse = ", ")
+    ))
+
+	if ( length(overlapping_genes) %>%	st(50) )
+	  warning(sprintf(
+			"\ntidybulk says: You have less than 50 genes in common between the query data and the reference data. Please check again your input dataframes\nthe genes in the reference look like this %s", paste(rownames(reference)[1:10], collapse = ", ")
+		))
 
 	# Check if rownames exist
 	if (reference %>% sapply(class) %in% c("numeric", "double", "integer") %>% not() %>% any)