Skip to content

Commit

Permalink
Merge pull request #246 from stemangiola/dev
Browse files Browse the repository at this point in the history
improve docs for deconvolution
  • Loading branch information
stemangiola authored Jul 20, 2022
2 parents 9d162fe + 7fd7953 commit 08bd673
Show file tree
Hide file tree
Showing 7 changed files with 80 additions and 39 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Type: Package
Package: tidybulk
Title: Brings transcriptomics to the tidyverse
Version: 1.9.1
Version: 1.9.2
Authors@R: c(person("Stefano", "Mangiola", email = "[email protected]",
role = c("aut", "cre")),
person("Maria", "Doyle", email = "[email protected]",
Expand Down
3 changes: 3 additions & 0 deletions R/functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -2917,6 +2917,9 @@ run_epic = function(mix, reference = NULL) {
)
} else { Y <- mix }

# Check if it is not matrix or data.frame, for example DelayedMatrix
if(!is(Y, "matrix") & !is(Y, "data.frame"))
Y = as.matrix(Y)

results <- EPIC(Y, reference = reference)$cellFractions %>% data.frame()
#results[results < 0] <- 0
Expand Down
58 changes: 43 additions & 15 deletions R/methods.R
Original file line number Diff line number Diff line change
Expand Up @@ -175,28 +175,56 @@ setGeneric("as_SummarizedExperiment", function(.data,
feature_cols = col_direction$vertical_cols
counts_cols = col_direction$counts_cols

colData = .data %>% select(!!.sample, sample_cols) %>% distinct %>% arrange(!!.sample) %>% {
S4Vectors::DataFrame((.) %>% select(-!!.sample),
row.names = (.) %>% pull(!!.sample))
colData =
.data %>%
select(!!.sample, sample_cols) %>%
distinct() %>%

# Unite if multiple sample columns
tidyr::unite(!!sample__$name, !!.sample, remove = FALSE, sep = "___") |>

arrange(!!sample__$symbol) %>% {
S4Vectors::DataFrame(
(.) %>% select(-!!sample__$symbol),
row.names = (.) %>% pull(!!sample__$symbol)
)
}

rowData = .data %>% select(!!.transcript, feature_cols) %>% distinct %>% arrange(!!.transcript) %>% {
S4Vectors::DataFrame((.) %>% select(-!!.transcript),
row.names = (.) %>% pull(!!.transcript))
rowData =
.data %>%
select(!!.transcript, feature_cols) %>%
distinct() %>%

# Unite if multiple sample columns
tidyr::unite(!!feature__$name, !!.transcript, remove = FALSE, sep = "___") |>

arrange(!!feature__$symbol) %>% {
S4Vectors::DataFrame(
(.) %>% select(-!!feature__$symbol),
row.names = (.) %>% pull(!!feature__$symbol)
)
}

my_assays =
.data %>%
select(!!.sample,
!!.transcript,
!!.abundance,
!!.abundance_scaled,
counts_cols) %>%
distinct() %>%
pivot_longer( cols=-c(!!.transcript,!!.sample), names_to="assay", values_to= ".a") %>%

# Unite if multiple sample columns
tidyr::unite(!!sample__$name, !!.sample, remove = FALSE, sep = "___") |>

# Unite if multiple sample columns
tidyr::unite(!!feature__$name, !!.transcript, remove = FALSE, sep = "___") |>

select(!!sample__$symbol,
!!feature__$symbol,
!!.abundance,
!!.abundance_scaled,
counts_cols) %>%
distinct() %>%

pivot_longer( cols=-c(!!feature__$symbol,!!sample__$symbol), names_to="assay", values_to= ".a") %>%
nest(`data` = -`assay`) %>%
mutate(`data` = `data` %>% map(
~ .x %>% spread(!!.sample, .a) %>% as_matrix(rownames = quo_name(.transcript))
~ .x %>% spread(!!sample__$symbol, .a) %>% as_matrix(rownames = feature__$name)
))

# Build the object
Expand Down Expand Up @@ -1737,7 +1765,7 @@ setMethod("aggregate_duplicates", "tidybulk", .aggregate_duplicates)
#' @param .sample The name of the sample column
#' @param .transcript The name of the transcript/gene column
#' @param .abundance The name of the transcript/gene abundance column
#' @param reference A data frame. A rectangular dataframe with genes as rows names, cell types as column names and gene-transcript abundance as values. The transcript/cell_type data frame of integer transcript abundance. If NULL, the default reference for each algorithm will be used. For llsr will be LM22.
#' @param reference A data frame. The methods cibersort and llsr can accept a custom rectangular dataframe with genes as rows names, cell types as column names and gene-transcript abundance as values. For exampler tidybulk::X_cibersort. The transcript/cell_type data frame of integer transcript abundance. If NULL, the default reference for each algorithm will be used. For llsr will be LM22.
#' @param method A character string. The method to be used. At the moment Cibersort (default), epic and llsr (linear least squares regression) are available.
#' @param prefix A character string. The prefix you would like to add to the result columns. It is useful if you want to reshape data.
#' @param action A character string. Whether to join the new information to the input tbl (add), or just get the non-redundant tbl with the new information (get).
Expand Down
8 changes: 6 additions & 2 deletions R/methods_SE.R
Original file line number Diff line number Diff line change
Expand Up @@ -825,6 +825,9 @@ setMethod("adjust_abundance",
rownames(.x) = rowData(.data)[,quo_name(.transcript)]

# Combine
if(rownames(.x) |> is.na() |> which() |> length() |> gt(0))
stop(sprintf("tidybulk says: you have some %s that are NAs", quo_name(.transcript)))

.x = combineByRow(.x, aggregation_function)
.x = .x[match(new_row_data[,quo_name(.transcript)], rownames(.x)),,drop=FALSE]
rownames(.x) = rownames(new_row_data)
Expand Down Expand Up @@ -931,6 +934,7 @@ setMethod("aggregate_duplicates",
...) {

.transcript = enquo(.transcript)
.sample = s_(.data)$symbol

my_assay =
.data %>%
Expand Down Expand Up @@ -1043,15 +1047,15 @@ setMethod("aggregate_duplicates",

# Parse results and return
setNames(c(
"sample",
quo_name(.sample),
(.) %>% select(-1) %>% colnames() %>% sprintf("%s%s", prefix, .)

))

# Att proportions
colData(.data) = colData(.data) %>% cbind(
my_proportions %>%
as_matrix(rownames = "sample") %>%
as_matrix(rownames = .sample) %>%
.[match(rownames(colData(.data)), rownames(.)),]
)

Expand Down
25 changes: 14 additions & 11 deletions R/utilities.R
Original file line number Diff line number Diff line change
Expand Up @@ -463,17 +463,17 @@ add_class = function(var, name) {
#' @return A list of column enquo or error
get_sample_transcript_counts = function(.data, .sample, .transcript, .abundance){

if( .sample %>% quo_is_symbol() ) .sample = .sample
if( quo_is_symbolic(.sample) ) .sample = .sample
else if(".sample" %in% (.data %>% get_tt_columns() %>% names))
.sample = get_tt_columns(.data)$.sample
else my_stop()

if( .transcript %>% quo_is_symbol() ) .transcript = .transcript
if( quo_is_symbolic(.transcript) ) .transcript = .transcript
else if(".transcript" %in% (.data %>% get_tt_columns() %>% names))
.transcript = get_tt_columns(.data)$.transcript
else my_stop()

if( .abundance %>% quo_is_symbolic() ) .abundance = .abundance
if( quo_is_symbolic(.abundance) ) .abundance = .abundance
else if(".abundance" %in% (.data %>% get_tt_columns() %>% names))
.abundance = get_tt_columns(.data)$.abundance
else my_stop()
Expand Down Expand Up @@ -894,8 +894,8 @@ get_x_y_annotation_columns = function(.data, .horizontal, .vertical, .abundance,
.abundance_scaled = enquo(.abundance_scaled)

# x-annotation df
n_x = .data %>% distinct(!!.horizontal) %>% nrow
n_y = .data %>% distinct(!!.vertical) %>% nrow
n_x = .data %>% select(!!.horizontal) |> distinct() |> nrow()
n_y = .data %>% select(!!.vertical) |> distinct() |> nrow()

# Sample wise columns
horizontal_cols=
Expand All @@ -907,8 +907,9 @@ get_x_y_annotation_columns = function(.data, .horizontal, .vertical, .abundance,
.x %>%
when(
.data %>%
distinct(!!.horizontal, !!as.symbol(.x)) %>%
nrow %>%
select(!!.horizontal, !!as.symbol(.x)) %>%
distinct() |>
nrow() %>%
equals(n_x) ~ .x,
~ NULL
)
Expand All @@ -928,8 +929,9 @@ get_x_y_annotation_columns = function(.data, .horizontal, .vertical, .abundance,
.x %>%
ifelse_pipe(
.data %>%
distinct(!!.vertical, !!as.symbol(.x)) %>%
nrow %>%
select(!!.vertical, !!as.symbol(.x)) |>
distinct() |>
nrow() %>%
equals(n_y),
~ .x,
~ NULL
Expand Down Expand Up @@ -963,8 +965,9 @@ get_x_y_annotation_columns = function(.data, .horizontal, .vertical, .abundance,
.x %>%
ifelse_pipe(
.data %>%
distinct(!!.vertical, !!.horizontal, !!as.symbol(.x)) %>%
nrow %>%
select(!!.vertical, !!.horizontal, !!as.symbol(.x)) %>%
distinct() |>
nrow() %>%
equals(n_x * n_y),
~ .x,
~ NULL
Expand Down
2 changes: 1 addition & 1 deletion man/deconvolve_cellularity-methods.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

21 changes: 12 additions & 9 deletions tests/testthat/test-bulk_methods.R
Original file line number Diff line number Diff line change
Expand Up @@ -1940,6 +1940,17 @@ test_that("gene over representation",{

})

test_that("as_SummarizedExperiment",{
input_df |>
as_SummarizedExperiment(
.sample = c(a, condition),
.transcript = c(b, entrez),
.abundance = c
) |>
nrow() |>
expect_equal(527)

})

# test_that("bibliography",{
#
Expand All @@ -1954,12 +1965,4 @@ test_that("gene over representation",{
#
# })
#
# test_that("as_SummarizedExperiment",{
# input_df |>
# as_SummarizedExperiment(
# .sample = a,
# .transcript = b,
# .abundance = c
# )
#
# })

0 comments on commit 08bd673

Please sign in to comment.