From 27beb3c9e343ee0f3c18d84b1be96fe0ef6c281a Mon Sep 17 00:00:00 2001
From: stemangiola <mangiolastefano@gmail.com>
Date: Tue, 5 Jul 2022 10:52:29 +1000
Subject: [PATCH 1/7] error if there are NAs in the .transcript column

---
 R/methods_SE.R | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/R/methods_SE.R b/R/methods_SE.R
index 747507db..7346a163 100755
--- a/R/methods_SE.R
+++ b/R/methods_SE.R
@@ -825,6 +825,9 @@ setMethod("adjust_abundance",
         rownames(.x) = rowData(.data)[,quo_name(.transcript)]
 
         # Combine
+        if(rownames(.x) |> is.na() |> which() |> length() |> gt(0))
+          stop(sprintf("tidybulk says: you have some %s that are NAs", quo_name(.transcript)))
+
         .x =  combineByRow(.x, aggregation_function)
         .x = .x[match(new_row_data[,quo_name(.transcript)], rownames(.x)),,drop=FALSE]
         rownames(.x) = rownames(new_row_data)

From 8cdf39d9c22e9c0c904268f88a383cbb068e3728 Mon Sep 17 00:00:00 2001
From: stemangiola <mangiolastefano@gmail.com>
Date: Tue, 5 Jul 2022 13:25:24 +1000
Subject: [PATCH 2/7] convert DelayedMatrix and fixed sample column name

---
 R/functions.R  | 3 +++
 R/methods_SE.R | 5 +++--
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/R/functions.R b/R/functions.R
index e9460eca..9e770207 100755
--- a/R/functions.R
+++ b/R/functions.R
@@ -2917,6 +2917,9 @@ run_epic = function(mix, reference = NULL) {
 			)
 	} else { Y <- mix }
 
+  # Check if it is not matrix or data.frame, for example DelayedMatrix
+  if(!is(Y, "matrix") & !is(Y, "data.frame"))
+    Y = as.matrix(Y)
 
 	results <- EPIC(Y, reference = reference)$cellFractions %>% data.frame()
 	#results[results < 0] <- 0
diff --git a/R/methods_SE.R b/R/methods_SE.R
index 747507db..1154eb53 100755
--- a/R/methods_SE.R
+++ b/R/methods_SE.R
@@ -931,6 +931,7 @@ setMethod("aggregate_duplicates",
 																			...) {
 
   .transcript = enquo(.transcript)
+  .sample = s_(.data)$symbol
 
 	my_assay =
 		.data %>%
@@ -1043,7 +1044,7 @@ setMethod("aggregate_duplicates",
 
 		# Parse results and return
 		setNames(c(
-			"sample",
+			quo_name(.sample),
 			(.) %>% select(-1) %>% colnames() %>% sprintf("%s%s", prefix, .)
 
 		))
@@ -1051,7 +1052,7 @@ setMethod("aggregate_duplicates",
 	# Att proportions
 	colData(.data) = colData(.data) %>% cbind(
 		my_proportions %>%
-			as_matrix(rownames = "sample") %>%
+			as_matrix(rownames = .sample) %>%
 		  .[match(rownames(colData(.data)), rownames(.)),]
 		)
 

From 021b01cdf36eb19c0e53abf1c2d34d31c63cdce8 Mon Sep 17 00:00:00 2001
From: stemangiola <mangiolastefano@gmail.com>
Date: Tue, 5 Jul 2022 17:31:01 +1000
Subject: [PATCH 3/7] improve docs for deconvolution

---
 R/methods.R                           | 2 +-
 man/deconvolve_cellularity-methods.Rd | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/methods.R b/R/methods.R
index cec35919..c44cbfa8 100755
--- a/R/methods.R
+++ b/R/methods.R
@@ -1737,7 +1737,7 @@ setMethod("aggregate_duplicates", "tidybulk", .aggregate_duplicates)
 #' @param .sample The name of the sample column
 #' @param .transcript The name of the transcript/gene column
 #' @param .abundance The name of the transcript/gene abundance column
-#' @param reference A data frame. A rectangular dataframe with genes as rows names, cell types as column names and gene-transcript abundance as values. The transcript/cell_type data frame of integer transcript abundance. If NULL, the default reference for each algorithm will be used. For llsr will be LM22.
+#' @param reference A data frame. The methods cibersort and llsr can accept a custom rectangular dataframe with genes as rows names, cell types as column names and gene-transcript abundance as values. For exampler tidybulk::X_cibersort. The transcript/cell_type data frame of integer transcript abundance. If NULL, the default reference for each algorithm will be used. For llsr will be LM22.
 #' @param method A character string. The method to be used. At the moment Cibersort (default), epic and llsr (linear least squares regression) are available.
 #' @param prefix A character string. The prefix you would like to add to the result columns. It is useful if you want to reshape data.
 #' @param action A character string. Whether to join the new information to the input tbl (add), or just get the non-redundant tbl with the new information (get).
diff --git a/man/deconvolve_cellularity-methods.Rd b/man/deconvolve_cellularity-methods.Rd
index a393e5ac..2d3acce8 100644
--- a/man/deconvolve_cellularity-methods.Rd
+++ b/man/deconvolve_cellularity-methods.Rd
@@ -91,7 +91,7 @@ deconvolve_cellularity(
 
 \item{.abundance}{The name of the transcript/gene abundance column}
 
-\item{reference}{A data frame. A rectangular dataframe with genes as rows names, cell types as column names and gene-transcript abundance as values. The transcript/cell_type data frame of integer transcript abundance. If NULL, the default reference for each algorithm will be used. For llsr will be LM22.}
+\item{reference}{A data frame. The methods cibersort and llsr can accept a custom rectangular dataframe with genes as rows names, cell types as column names and gene-transcript abundance as values. For exampler tidybulk::X_cibersort. The transcript/cell_type data frame of integer transcript abundance. If NULL, the default reference for each algorithm will be used. For llsr will be LM22.}
 
 \item{method}{A character string. The method to be used. At the moment Cibersort (default), epic and llsr (linear least squares regression) are available.}
 

From 4db16f1bb0ff1e90ac9a529afd9875d92bb45cbf Mon Sep 17 00:00:00 2001
From: Stefano Mangiola <mangiolastefano@gmail.com>
Date: Tue, 5 Jul 2022 17:33:37 +1000
Subject: [PATCH 4/7] Update DESCRIPTION

---
 DESCRIPTION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 9d4bae37..3aa6462c 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Type: Package
 Package: tidybulk
 Title: Brings transcriptomics to the tidyverse 
-Version: 1.9.1
+Version: 1.9.2
 Authors@R: c(person("Stefano", "Mangiola", email = "mangiolastefano@gmail.com",
                   role = c("aut", "cre")),
             person("Maria", "Doyle", email = "Maria.Doyle@petermac.org",

From 8baff7cf23371c2bca4a6b4ab824110fa027f881 Mon Sep 17 00:00:00 2001
From: stemangiola <mangiolastefano@gmail.com>
Date: Wed, 20 Jul 2022 11:09:13 +1000
Subject: [PATCH 5/7] as_SE with multiple columns as input

---
 R/methods.R                        | 56 ++++++++++++++++++++++--------
 R/utilities.R                      | 25 +++++++------
 tests/testthat/test-bulk_methods.R | 21 ++++++-----
 3 files changed, 68 insertions(+), 34 deletions(-)

diff --git a/R/methods.R b/R/methods.R
index c44cbfa8..1e012dfb 100755
--- a/R/methods.R
+++ b/R/methods.R
@@ -175,28 +175,56 @@ setGeneric("as_SummarizedExperiment", function(.data,
 	feature_cols = col_direction$vertical_cols
 	counts_cols = col_direction$counts_cols
 
-	colData = .data %>% select(!!.sample, sample_cols) %>% distinct %>% arrange(!!.sample) %>% {
-		S4Vectors::DataFrame((.) %>% select(-!!.sample),
-												 row.names = (.) %>% pull(!!.sample))
+	colData =
+	  .data %>%
+	  select(!!.sample, sample_cols) %>%
+	  distinct() %>%
+
+	  # Unite if multiple sample columns
+	  tidyr::unite(!!sample__$name, !!.sample, remove = FALSE, sep = "___") |>
+
+	  arrange(!!sample__$symbol) %>% {
+		S4Vectors::DataFrame(
+		  (.) %>% select(-!!sample__$symbol),
+		  row.names = (.) %>% pull(!!sample__$symbol)
+		)
 	}
 
-	rowData = .data %>% select(!!.transcript, feature_cols) %>% distinct %>% arrange(!!.transcript) %>% {
-		S4Vectors::DataFrame((.) %>% select(-!!.transcript),
-												 row.names = (.) %>% pull(!!.transcript))
+	rowData =
+	  .data %>%
+	  select(!!.transcript, feature_cols) %>%
+	  distinct() %>%
+
+	  # Unite if multiple sample columns
+	  tidyr::unite(!!feature__$name, !!.transcript, remove = FALSE, sep = "___") |>
+
+	  arrange(!!feature__$symbol) %>% {
+		S4Vectors::DataFrame(
+		  (.) %>% select(-!!feature__$symbol),
+		  row.names = (.) %>% pull(!!feature__$symbol)
+		)
 	}
 
 	my_assays =
 		.data %>%
-		select(!!.sample,
-					 !!.transcript,
-					 !!.abundance,
-					 !!.abundance_scaled,
-					 counts_cols) %>%
-		distinct() %>%
-		pivot_longer( cols=-c(!!.transcript,!!.sample), names_to="assay", values_to= ".a") %>%
+
+	  # Unite if multiple sample columns
+	  tidyr::unite(!!sample__$name, !!.sample, remove = FALSE, sep = "___") |>
+
+	  # Unite if multiple sample columns
+	  tidyr::unite(!!feature__$name, !!.transcript, remove = FALSE, sep = "___") |>
+
+	  select(!!sample__$symbol,
+	         !!feature__$symbol,
+	         !!.abundance,
+	         !!.abundance_scaled,
+	         counts_cols) %>%
+	  distinct() %>%
+
+		pivot_longer( cols=-c(!!feature__$symbol,!!sample__$symbol), names_to="assay", values_to= ".a") %>%
 		nest(`data` = -`assay`) %>%
 		mutate(`data` = `data` %>%  map(
-			~ .x %>% spread(!!.sample, .a) %>% as_matrix(rownames = quo_name(.transcript))
+			~ .x %>% spread(!!sample__$symbol, .a) %>% as_matrix(rownames = feature__$name)
 		))
 
 	# Build the object
diff --git a/R/utilities.R b/R/utilities.R
index ecc98fc8..4e35138b 100755
--- a/R/utilities.R
+++ b/R/utilities.R
@@ -463,17 +463,17 @@ add_class = function(var, name) {
 #' @return A list of column enquo or error
 get_sample_transcript_counts = function(.data, .sample, .transcript, .abundance){
 
-    if( .sample %>% quo_is_symbol() ) .sample = .sample
+    if( quo_is_symbolic(.sample) ) .sample = .sample
     else if(".sample" %in% (.data %>% get_tt_columns() %>% names))
       .sample =  get_tt_columns(.data)$.sample
     else my_stop()
 
-    if( .transcript %>% quo_is_symbol() ) .transcript = .transcript
+    if( quo_is_symbolic(.transcript) ) .transcript = .transcript
     else if(".transcript" %in% (.data %>% get_tt_columns() %>% names))
       .transcript =  get_tt_columns(.data)$.transcript
     else my_stop()
 
-    if( .abundance %>% quo_is_symbolic() ) .abundance = .abundance
+    if(  quo_is_symbolic(.abundance) ) .abundance = .abundance
     else if(".abundance" %in% (.data %>% get_tt_columns() %>% names))
       .abundance = get_tt_columns(.data)$.abundance
     else my_stop()
@@ -894,8 +894,8 @@ get_x_y_annotation_columns = function(.data, .horizontal, .vertical, .abundance,
   .abundance_scaled = enquo(.abundance_scaled)
 
   # x-annotation df
-  n_x = .data %>% distinct(!!.horizontal) %>% nrow
-  n_y = .data %>% distinct(!!.vertical) %>% nrow
+  n_x = .data %>% select(!!.horizontal) |> distinct() |> nrow()
+  n_y = .data %>% select(!!.vertical) |> distinct() |> nrow()
 
   # Sample wise columns
   horizontal_cols=
@@ -907,8 +907,9 @@ get_x_y_annotation_columns = function(.data, .horizontal, .vertical, .abundance,
         .x %>%
         when(
           .data %>%
-            distinct(!!.horizontal, !!as.symbol(.x)) %>%
-            nrow %>%
+            select(!!.horizontal, !!as.symbol(.x)) %>%
+            distinct() |>
+            nrow() %>%
             equals(n_x) ~ .x,
           ~ NULL
         )
@@ -928,8 +929,9 @@ get_x_y_annotation_columns = function(.data, .horizontal, .vertical, .abundance,
         .x %>%
         ifelse_pipe(
           .data %>%
-            distinct(!!.vertical, !!as.symbol(.x)) %>%
-            nrow %>%
+            select(!!.vertical, !!as.symbol(.x)) |>
+            distinct() |>
+            nrow() %>%
             equals(n_y),
           ~ .x,
           ~ NULL
@@ -963,8 +965,9 @@ get_x_y_annotation_columns = function(.data, .horizontal, .vertical, .abundance,
         .x %>%
         ifelse_pipe(
           .data %>%
-            distinct(!!.vertical, !!.horizontal, !!as.symbol(.x)) %>%
-            nrow %>%
+            select(!!.vertical, !!.horizontal, !!as.symbol(.x)) %>%
+            distinct() |>
+            nrow() %>%
             equals(n_x * n_y),
           ~ .x,
           ~ NULL
diff --git a/tests/testthat/test-bulk_methods.R b/tests/testthat/test-bulk_methods.R
index f6d962c6..ef28ef0d 100755
--- a/tests/testthat/test-bulk_methods.R
+++ b/tests/testthat/test-bulk_methods.R
@@ -1940,6 +1940,17 @@ test_that("gene over representation",{
 
 })
 
+test_that("as_SummarizedExperiment",{
+  input_df |>
+    as_SummarizedExperiment(
+      .sample = c(a, condition),
+      .transcript = c(b, entrez),
+      .abundance = c
+    ) |>
+    nrow() |>
+    expect_equal(527)
+
+})
 
 # test_that("bibliography",{
 #
@@ -1954,12 +1965,4 @@ test_that("gene over representation",{
 #
 # })
 #
-# test_that("as_SummarizedExperiment",{
-# 	input_df |>
-# 	as_SummarizedExperiment(
-# 		.sample = a,
-# 		.transcript = b,
-# 		.abundance = c
-# 	)
-#
-# })
+

From d5de0caea039c777d5f09d117dc95eff011f4d66 Mon Sep 17 00:00:00 2001
From: stemangiola <mangiolastefano@gmail.com>
Date: Wed, 20 Jul 2022 15:19:40 +1000
Subject: [PATCH 6/7] add function

---
 R/utilities.R | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/R/utilities.R b/R/utilities.R
index ecc98fc8..bcdb4131 100755
--- a/R/utilities.R
+++ b/R/utilities.R
@@ -1577,3 +1577,16 @@ get_special_column_name_symbol = function(name){
 feature__ =  get_special_column_name_symbol(".feature")
 sample__ = get_special_column_name_symbol(".sample")
 
+
+is_filer_columns_in_column_selection = function(.data, ...){
+
+  # columns = enquos(columns)
+  tryCatch({
+    .data |>
+      slice(0) |>
+      dplyr::filter(..., .preserve=.preserve)
+
+    TRUE
+  },
+  error = function(e) FALSE)
+}

From 7fd7953f1555171b72a581f56fb671f9be8708b6 Mon Sep 17 00:00:00 2001
From: stemangiola <mangiolastefano@gmail.com>
Date: Wed, 20 Jul 2022 15:26:31 +1000
Subject: [PATCH 7/7] drop function added by mistake while working on tidySE

---
 R/utilities.R | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/R/utilities.R b/R/utilities.R
index e4c4dee8..4e35138b 100755
--- a/R/utilities.R
+++ b/R/utilities.R
@@ -1580,16 +1580,3 @@ get_special_column_name_symbol = function(name){
 feature__ =  get_special_column_name_symbol(".feature")
 sample__ = get_special_column_name_symbol(".sample")
 
-
-is_filer_columns_in_column_selection = function(.data, ...){
-
-  # columns = enquos(columns)
-  tryCatch({
-    .data |>
-      slice(0) |>
-      dplyr::filter(..., .preserve=.preserve)
-
-    TRUE
-  },
-  error = function(e) FALSE)
-}