From 27beb3c9e343ee0f3c18d84b1be96fe0ef6c281a Mon Sep 17 00:00:00 2001 From: stemangiola Date: Tue, 5 Jul 2022 10:52:29 +1000 Subject: [PATCH 1/7] error if there are NAs in the .transcript column --- R/methods_SE.R | 3 +++ 1 file changed, 3 insertions(+) diff --git a/R/methods_SE.R b/R/methods_SE.R index 747507db..7346a163 100755 --- a/R/methods_SE.R +++ b/R/methods_SE.R @@ -825,6 +825,9 @@ setMethod("adjust_abundance", rownames(.x) = rowData(.data)[,quo_name(.transcript)] # Combine + if(rownames(.x) |> is.na() |> which() |> length() |> gt(0)) + stop(sprintf("tidybulk says: you have some %s that are NAs", quo_name(.transcript))) + .x = combineByRow(.x, aggregation_function) .x = .x[match(new_row_data[,quo_name(.transcript)], rownames(.x)),,drop=FALSE] rownames(.x) = rownames(new_row_data) From 8cdf39d9c22e9c0c904268f88a383cbb068e3728 Mon Sep 17 00:00:00 2001 From: stemangiola Date: Tue, 5 Jul 2022 13:25:24 +1000 Subject: [PATCH 2/7] convert DelayedMatrix and fixed sample column name --- R/functions.R | 3 +++ R/methods_SE.R | 5 +++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/R/functions.R b/R/functions.R index e9460eca..9e770207 100755 --- a/R/functions.R +++ b/R/functions.R @@ -2917,6 +2917,9 @@ run_epic = function(mix, reference = NULL) { ) } else { Y <- mix } + # Check if it is not matrix or data.frame, for example DelayedMatrix + if(!is(Y, "matrix") & !is(Y, "data.frame")) + Y = as.matrix(Y) results <- EPIC(Y, reference = reference)$cellFractions %>% data.frame() #results[results < 0] <- 0 diff --git a/R/methods_SE.R b/R/methods_SE.R index 747507db..1154eb53 100755 --- a/R/methods_SE.R +++ b/R/methods_SE.R @@ -931,6 +931,7 @@ setMethod("aggregate_duplicates", ...) { .transcript = enquo(.transcript) + .sample = s_(.data)$symbol my_assay = .data %>% @@ -1043,7 +1044,7 @@ setMethod("aggregate_duplicates", # Parse results and return setNames(c( - "sample", + quo_name(.sample), (.) %>% select(-1) %>% colnames() %>% sprintf("%s%s", prefix, .) )) @@ -1051,7 +1052,7 @@ setMethod("aggregate_duplicates", # Att proportions colData(.data) = colData(.data) %>% cbind( my_proportions %>% - as_matrix(rownames = "sample") %>% + as_matrix(rownames = .sample) %>% .[match(rownames(colData(.data)), rownames(.)),] ) From 021b01cdf36eb19c0e53abf1c2d34d31c63cdce8 Mon Sep 17 00:00:00 2001 From: stemangiola Date: Tue, 5 Jul 2022 17:31:01 +1000 Subject: [PATCH 3/7] improve docs for deconvolution --- R/methods.R | 2 +- man/deconvolve_cellularity-methods.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/methods.R b/R/methods.R index cec35919..c44cbfa8 100755 --- a/R/methods.R +++ b/R/methods.R @@ -1737,7 +1737,7 @@ setMethod("aggregate_duplicates", "tidybulk", .aggregate_duplicates) #' @param .sample The name of the sample column #' @param .transcript The name of the transcript/gene column #' @param .abundance The name of the transcript/gene abundance column -#' @param reference A data frame. A rectangular dataframe with genes as rows names, cell types as column names and gene-transcript abundance as values. The transcript/cell_type data frame of integer transcript abundance. If NULL, the default reference for each algorithm will be used. For llsr will be LM22. +#' @param reference A data frame. The methods cibersort and llsr can accept a custom rectangular dataframe with genes as rows names, cell types as column names and gene-transcript abundance as values. For exampler tidybulk::X_cibersort. The transcript/cell_type data frame of integer transcript abundance. If NULL, the default reference for each algorithm will be used. For llsr will be LM22. #' @param method A character string. The method to be used. At the moment Cibersort (default), epic and llsr (linear least squares regression) are available. #' @param prefix A character string. The prefix you would like to add to the result columns. It is useful if you want to reshape data. #' @param action A character string. Whether to join the new information to the input tbl (add), or just get the non-redundant tbl with the new information (get). diff --git a/man/deconvolve_cellularity-methods.Rd b/man/deconvolve_cellularity-methods.Rd index a393e5ac..2d3acce8 100644 --- a/man/deconvolve_cellularity-methods.Rd +++ b/man/deconvolve_cellularity-methods.Rd @@ -91,7 +91,7 @@ deconvolve_cellularity( \item{.abundance}{The name of the transcript/gene abundance column} -\item{reference}{A data frame. A rectangular dataframe with genes as rows names, cell types as column names and gene-transcript abundance as values. The transcript/cell_type data frame of integer transcript abundance. If NULL, the default reference for each algorithm will be used. For llsr will be LM22.} +\item{reference}{A data frame. The methods cibersort and llsr can accept a custom rectangular dataframe with genes as rows names, cell types as column names and gene-transcript abundance as values. For exampler tidybulk::X_cibersort. The transcript/cell_type data frame of integer transcript abundance. If NULL, the default reference for each algorithm will be used. For llsr will be LM22.} \item{method}{A character string. The method to be used. At the moment Cibersort (default), epic and llsr (linear least squares regression) are available.} From 4db16f1bb0ff1e90ac9a529afd9875d92bb45cbf Mon Sep 17 00:00:00 2001 From: Stefano Mangiola Date: Tue, 5 Jul 2022 17:33:37 +1000 Subject: [PATCH 4/7] Update DESCRIPTION --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 9d4bae37..3aa6462c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: tidybulk Title: Brings transcriptomics to the tidyverse -Version: 1.9.1 +Version: 1.9.2 Authors@R: c(person("Stefano", "Mangiola", email = "mangiolastefano@gmail.com", role = c("aut", "cre")), person("Maria", "Doyle", email = "Maria.Doyle@petermac.org", From 8baff7cf23371c2bca4a6b4ab824110fa027f881 Mon Sep 17 00:00:00 2001 From: stemangiola Date: Wed, 20 Jul 2022 11:09:13 +1000 Subject: [PATCH 5/7] as_SE with multiple columns as input --- R/methods.R | 56 ++++++++++++++++++++++-------- R/utilities.R | 25 +++++++------ tests/testthat/test-bulk_methods.R | 21 ++++++----- 3 files changed, 68 insertions(+), 34 deletions(-) diff --git a/R/methods.R b/R/methods.R index c44cbfa8..1e012dfb 100755 --- a/R/methods.R +++ b/R/methods.R @@ -175,28 +175,56 @@ setGeneric("as_SummarizedExperiment", function(.data, feature_cols = col_direction$vertical_cols counts_cols = col_direction$counts_cols - colData = .data %>% select(!!.sample, sample_cols) %>% distinct %>% arrange(!!.sample) %>% { - S4Vectors::DataFrame((.) %>% select(-!!.sample), - row.names = (.) %>% pull(!!.sample)) + colData = + .data %>% + select(!!.sample, sample_cols) %>% + distinct() %>% + + # Unite if multiple sample columns + tidyr::unite(!!sample__$name, !!.sample, remove = FALSE, sep = "___") |> + + arrange(!!sample__$symbol) %>% { + S4Vectors::DataFrame( + (.) %>% select(-!!sample__$symbol), + row.names = (.) %>% pull(!!sample__$symbol) + ) } - rowData = .data %>% select(!!.transcript, feature_cols) %>% distinct %>% arrange(!!.transcript) %>% { - S4Vectors::DataFrame((.) %>% select(-!!.transcript), - row.names = (.) %>% pull(!!.transcript)) + rowData = + .data %>% + select(!!.transcript, feature_cols) %>% + distinct() %>% + + # Unite if multiple sample columns + tidyr::unite(!!feature__$name, !!.transcript, remove = FALSE, sep = "___") |> + + arrange(!!feature__$symbol) %>% { + S4Vectors::DataFrame( + (.) %>% select(-!!feature__$symbol), + row.names = (.) %>% pull(!!feature__$symbol) + ) } my_assays = .data %>% - select(!!.sample, - !!.transcript, - !!.abundance, - !!.abundance_scaled, - counts_cols) %>% - distinct() %>% - pivot_longer( cols=-c(!!.transcript,!!.sample), names_to="assay", values_to= ".a") %>% + + # Unite if multiple sample columns + tidyr::unite(!!sample__$name, !!.sample, remove = FALSE, sep = "___") |> + + # Unite if multiple sample columns + tidyr::unite(!!feature__$name, !!.transcript, remove = FALSE, sep = "___") |> + + select(!!sample__$symbol, + !!feature__$symbol, + !!.abundance, + !!.abundance_scaled, + counts_cols) %>% + distinct() %>% + + pivot_longer( cols=-c(!!feature__$symbol,!!sample__$symbol), names_to="assay", values_to= ".a") %>% nest(`data` = -`assay`) %>% mutate(`data` = `data` %>% map( - ~ .x %>% spread(!!.sample, .a) %>% as_matrix(rownames = quo_name(.transcript)) + ~ .x %>% spread(!!sample__$symbol, .a) %>% as_matrix(rownames = feature__$name) )) # Build the object diff --git a/R/utilities.R b/R/utilities.R index ecc98fc8..4e35138b 100755 --- a/R/utilities.R +++ b/R/utilities.R @@ -463,17 +463,17 @@ add_class = function(var, name) { #' @return A list of column enquo or error get_sample_transcript_counts = function(.data, .sample, .transcript, .abundance){ - if( .sample %>% quo_is_symbol() ) .sample = .sample + if( quo_is_symbolic(.sample) ) .sample = .sample else if(".sample" %in% (.data %>% get_tt_columns() %>% names)) .sample = get_tt_columns(.data)$.sample else my_stop() - if( .transcript %>% quo_is_symbol() ) .transcript = .transcript + if( quo_is_symbolic(.transcript) ) .transcript = .transcript else if(".transcript" %in% (.data %>% get_tt_columns() %>% names)) .transcript = get_tt_columns(.data)$.transcript else my_stop() - if( .abundance %>% quo_is_symbolic() ) .abundance = .abundance + if( quo_is_symbolic(.abundance) ) .abundance = .abundance else if(".abundance" %in% (.data %>% get_tt_columns() %>% names)) .abundance = get_tt_columns(.data)$.abundance else my_stop() @@ -894,8 +894,8 @@ get_x_y_annotation_columns = function(.data, .horizontal, .vertical, .abundance, .abundance_scaled = enquo(.abundance_scaled) # x-annotation df - n_x = .data %>% distinct(!!.horizontal) %>% nrow - n_y = .data %>% distinct(!!.vertical) %>% nrow + n_x = .data %>% select(!!.horizontal) |> distinct() |> nrow() + n_y = .data %>% select(!!.vertical) |> distinct() |> nrow() # Sample wise columns horizontal_cols= @@ -907,8 +907,9 @@ get_x_y_annotation_columns = function(.data, .horizontal, .vertical, .abundance, .x %>% when( .data %>% - distinct(!!.horizontal, !!as.symbol(.x)) %>% - nrow %>% + select(!!.horizontal, !!as.symbol(.x)) %>% + distinct() |> + nrow() %>% equals(n_x) ~ .x, ~ NULL ) @@ -928,8 +929,9 @@ get_x_y_annotation_columns = function(.data, .horizontal, .vertical, .abundance, .x %>% ifelse_pipe( .data %>% - distinct(!!.vertical, !!as.symbol(.x)) %>% - nrow %>% + select(!!.vertical, !!as.symbol(.x)) |> + distinct() |> + nrow() %>% equals(n_y), ~ .x, ~ NULL @@ -963,8 +965,9 @@ get_x_y_annotation_columns = function(.data, .horizontal, .vertical, .abundance, .x %>% ifelse_pipe( .data %>% - distinct(!!.vertical, !!.horizontal, !!as.symbol(.x)) %>% - nrow %>% + select(!!.vertical, !!.horizontal, !!as.symbol(.x)) %>% + distinct() |> + nrow() %>% equals(n_x * n_y), ~ .x, ~ NULL diff --git a/tests/testthat/test-bulk_methods.R b/tests/testthat/test-bulk_methods.R index f6d962c6..ef28ef0d 100755 --- a/tests/testthat/test-bulk_methods.R +++ b/tests/testthat/test-bulk_methods.R @@ -1940,6 +1940,17 @@ test_that("gene over representation",{ }) +test_that("as_SummarizedExperiment",{ + input_df |> + as_SummarizedExperiment( + .sample = c(a, condition), + .transcript = c(b, entrez), + .abundance = c + ) |> + nrow() |> + expect_equal(527) + +}) # test_that("bibliography",{ # @@ -1954,12 +1965,4 @@ test_that("gene over representation",{ # # }) # -# test_that("as_SummarizedExperiment",{ -# input_df |> -# as_SummarizedExperiment( -# .sample = a, -# .transcript = b, -# .abundance = c -# ) -# -# }) + From d5de0caea039c777d5f09d117dc95eff011f4d66 Mon Sep 17 00:00:00 2001 From: stemangiola Date: Wed, 20 Jul 2022 15:19:40 +1000 Subject: [PATCH 6/7] add function --- R/utilities.R | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/R/utilities.R b/R/utilities.R index ecc98fc8..bcdb4131 100755 --- a/R/utilities.R +++ b/R/utilities.R @@ -1577,3 +1577,16 @@ get_special_column_name_symbol = function(name){ feature__ = get_special_column_name_symbol(".feature") sample__ = get_special_column_name_symbol(".sample") + +is_filer_columns_in_column_selection = function(.data, ...){ + + # columns = enquos(columns) + tryCatch({ + .data |> + slice(0) |> + dplyr::filter(..., .preserve=.preserve) + + TRUE + }, + error = function(e) FALSE) +} From 7fd7953f1555171b72a581f56fb671f9be8708b6 Mon Sep 17 00:00:00 2001 From: stemangiola Date: Wed, 20 Jul 2022 15:26:31 +1000 Subject: [PATCH 7/7] drop function added by mistake while working on tidySE --- R/utilities.R | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/R/utilities.R b/R/utilities.R index e4c4dee8..4e35138b 100755 --- a/R/utilities.R +++ b/R/utilities.R @@ -1580,16 +1580,3 @@ get_special_column_name_symbol = function(name){ feature__ = get_special_column_name_symbol(".feature") sample__ = get_special_column_name_symbol(".sample") - -is_filer_columns_in_column_selection = function(.data, ...){ - - # columns = enquos(columns) - tryCatch({ - .data |> - slice(0) |> - dplyr::filter(..., .preserve=.preserve) - - TRUE - }, - error = function(e) FALSE) -}