Skip to content

Commit

Permalink
Merge pull request #237 from stemangiola/SE_to_tidybulk_new_vocabulary
Browse files Browse the repository at this point in the history
implement new vocabulary
  • Loading branch information
stemangiola authored Jun 25, 2022
2 parents f8d38ba + 6def641 commit b31ffe0
Show file tree
Hide file tree
Showing 27 changed files with 708 additions and 635 deletions.
6 changes: 3 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ Imports:
scales,
SummarizedExperiment,
GenomicRanges,
methods
methods,
S4Vectors
Suggests:
BiocStyle,
testthat,
Expand All @@ -53,7 +54,6 @@ Suggests:
Seurat,
KernSmooth,
Rtsne,
S4Vectors,
ggplot2,
widyr,
clusterProfiler,
Expand Down Expand Up @@ -82,7 +82,7 @@ Biarch: true
biocViews: AssayDomain, Infrastructure, RNASeq, DifferentialExpression, GeneExpression, Normalization, Clustering, QualityControl, Sequencing, Transcription, Transcriptomics
Encoding: UTF-8
LazyData: true
RoxygenNote: 7.1.2
RoxygenNote: 7.2.0
LazyDataCompression: xz
URL: https://github.com/stemangiola/tidybulk
BugReports: https://github.com/stemangiola/tidybulk/issues
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ import(readr)
import(tibble)
import(tidyr)
importFrom(GenomicRanges,makeGRangesListFromDataFrame)
importFrom(S4Vectors,metadata)
importFrom(SummarizedExperiment,SummarizedExperiment)
importFrom(SummarizedExperiment,assays)
importFrom(SummarizedExperiment,colData)
Expand Down
8 changes: 4 additions & 4 deletions R/dplyr_methods.R
Original file line number Diff line number Diff line change
Expand Up @@ -723,7 +723,7 @@ rowwise.tidybulk <- function(data, ...)
#'
#' @examples
#'`%>%` = magrittr::`%>%`
#' annotation = tidybulk::counts_SE %>% tidybulk() %>% as_tibble() %>% distinct(sample) %>% mutate(source = "AU")
#' annotation = tidybulk::counts_SE %>% tidybulk() %>% as_tibble() %>% distinct(.sample) %>% mutate(source = "AU")
#' tidybulk::counts_SE %>% tidybulk() %>% as_tibble() %>% left_join(annotation)
#'
#' @rdname dplyr-methods
Expand Down Expand Up @@ -763,7 +763,7 @@ left_join.tidybulk <- function (x, y, by = NULL, copy = FALSE, suffix = c(".x",
#'
#' @examples
#'`%>%` = magrittr::`%>%`
#' annotation = tidybulk::counts_SE %>% tidybulk() %>% as_tibble() %>% distinct(sample) %>% mutate(source = "AU")
#' annotation = tidybulk::counts_SE %>% tidybulk() %>% as_tibble() %>% distinct(.sample) %>% mutate(source = "AU")
#' tidybulk::counts_SE %>% tidybulk() %>% as_tibble() %>% inner_join(annotation)
#'
#' @rdname join-methods
Expand Down Expand Up @@ -802,7 +802,7 @@ inner_join.tidybulk <- function (x, y, by = NULL, copy = FALSE, suffix = c(".x",
#'
#' @examples
#'`%>%` = magrittr::`%>%`
#' annotation = tidybulk::counts_SE %>% tidybulk() %>% as_tibble() %>% distinct(sample) %>% mutate(source = "AU")
#' annotation = tidybulk::counts_SE %>% tidybulk() %>% as_tibble() %>% distinct(.sample) %>% mutate(source = "AU")
#' tidybulk::counts_SE %>% tidybulk() %>% as_tibble() %>% right_join(annotation)
#'
#' @rdname join-methods
Expand Down Expand Up @@ -843,7 +843,7 @@ right_join.tidybulk <- function (x, y, by = NULL, copy = FALSE, suffix = c(".x",
#'
#' @examples
#'`%>%` = magrittr::`%>%`
#' annotation = tidybulk::counts_SE %>% tidybulk() %>% as_tibble() %>% distinct(sample) %>% mutate(source = "AU")
#' annotation = tidybulk::counts_SE %>% tidybulk() %>% as_tibble() %>% distinct(.sample) %>% mutate(source = "AU")
#' tidybulk::counts_SE %>% tidybulk() %>% as_tibble() %>% full_join(annotation)
#'
#' @rdname join-methods
Expand Down
63 changes: 22 additions & 41 deletions R/methods.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ setOldClass("tidybulk")
#'
#' @examples
#'
#' my_tt = tidybulk(tidybulk::se_mini)
#' tidybulk(tidybulk::se_mini)
#'
#'
#' @docType methods
Expand Down Expand Up @@ -1353,9 +1353,7 @@ setMethod("remove_redundancy", "tidybulk", .remove_redundancy)
#' cm$batch = 0
#' cm$batch[colnames(cm) %in% c("SRR1740035", "SRR1740043")] = 1
#'
#' res =
#' cm %>%
#' tidybulk(sample, transcript, count) |>
#' identify_abundant() |>
#' adjust_abundance( ~ condition + batch )
#'
Expand Down Expand Up @@ -1675,7 +1673,7 @@ setMethod("aggregate_duplicates", "tidybulk", .aggregate_duplicates)
#' library(dplyr)
#'
#' # Subsetting for time efficiency
#' tidybulk::se_mini |> tidybulk() |>filter(sample=="SRR1740034") |> deconvolve_cellularity(sample, feature, count, cores = 1)
#' tidybulk::se_mini |> deconvolve_cellularity(cores = 1)
#'
#'
#' @docType methods
Expand Down Expand Up @@ -1815,7 +1813,10 @@ setMethod("deconvolve_cellularity",
#'
#' @examples
#'
#' tidybulk::se_mini |> tidybulk() |> as_tibble() |> symbol_to_entrez(.transcript = feature, .sample = sample)
#' # This function was designed for data.frame
#' # Convert from SummarizedExperiment for this example. It is NOT reccomended.
#'
#' tidybulk::se_mini |> tidybulk() |> as_tibble() |> symbol_to_entrez(.transcript = .feature, .sample = .sample)
#'
#' @export
#'
Expand Down Expand Up @@ -2014,7 +2015,10 @@ setMethod("describe_transcript", "tidybulk", .describe_transcript)
#'
#' library(dplyr)
#'
#' tidybulk::counts_SE |> tidybulk() |> as_tibble() |> ensembl_to_symbol(feature)
#' # This function was designed for data.frame
#' # Convert from SummarizedExperiment for this example. It is NOT reccomended.
#'
#' tidybulk::counts_SE |> tidybulk() |> as_tibble() |> ensembl_to_symbol(.feature)
#'
#'
#'
Expand Down Expand Up @@ -2882,8 +2886,10 @@ setMethod("keep_abundant", "tidybulk", .keep_abundant)
#' @examples
#' \dontrun{
#'
#' df_entrez = tidybulk::se_mini |> tidybulk() |> as_tibble() |> symbol_to_entrez( .transcript = feature, .sample = sample)
#' df_entrez = aggregate_duplicates(df_entrez, aggregation_function = sum, .sample = sample, .transcript = entrez, .abundance = count)
#' library(SummarizedExperiment)
#' se = tidybulk::se_mini
#' rowData( se)$entrez = rownames(se )
#' df_entrez = aggregate_duplicates(se,.transcript = entrez )
#'
#' library("EGSEA")
#'
Expand Down Expand Up @@ -3075,9 +3081,8 @@ setMethod("test_gene_enrichment",
#'
#' @examples
#'
#' df_entrez = tidybulk::se_mini |> tidybulk() |> as_tibble() |> symbol_to_entrez( .transcript = feature, .sample = sample)
#' df_entrez = aggregate_duplicates(df_entrez, aggregation_function = sum, .sample = sample, .transcript = entrez, .abundance = count)
#' df_entrez = mutate(df_entrez, do_test = feature %in% c("TNFRSF4", "PLCH2", "PADI4", "PAX7"))
#' #se_mini = aggregate_duplicates(tidybulk::se_mini, .transcript = entrez)
#' #df_entrez = mutate(df_entrez, do_test = feature %in% c("TNFRSF4", "PLCH2", "PADI4", "PAX7"))
#'
#' \dontrun{
#' test_gene_overrepresentation(
Expand Down Expand Up @@ -3245,15 +3250,14 @@ setMethod("test_gene_overrepresentation",
#'
#' \dontrun{
#'
#' df_entrez = tidybulk::se_mini |> tidybulk() |> as_tibble() |> symbol_to_entrez( .transcript = feature, .sample = sample)
#' df_entrez = aggregate_duplicates(df_entrez, aggregation_function = sum, .sample = sample, .transcript = entrez, .abundance = count)
#' df_entrez = mutate(df_entrez, do_test = feature %in% c("TNFRSF4", "PLCH2", "PADI4", "PAX7"))
#' df_entrez = tidybulk::se_mini
#' df_entrez = mutate(df_entrez, do_test = .feature %in% c("TNFRSF4", "PLCH2", "PADI4", "PAX7"))
#' df_entrez = df_entrez %>% test_differential_abundance(~ condition)
#'
#'
#' test_gene_rank(
#' df_entrez,
#' .sample = sample,
#' .sample = .sample,
#' .entrez = entrez,
#' species="Homo sapiens",
#' gene_sets =c("C2"),
Expand Down Expand Up @@ -3591,7 +3595,7 @@ setMethod("pivot_transcript",
#'
#' @examples
#'
#' tidybulk::se_mini |> tidybulk() |> fill_missing_abundance( fill_with = 0)
#' # tidybulk::se_mini |> fill_missing_abundance( fill_with = 0)
#'
#'
#' @docType methods
Expand Down Expand Up @@ -3862,19 +3866,8 @@ setMethod("impute_missing_abundance", "tidybulk", .impute_missing_abundance)
#' )
#'
#' # Cox regression - multiple
#' library(dplyr)
#' library(tidyr)
#'
#' tidybulk::se_mini |>
#' tidybulk() |>
#'
#' # Add survival data
#' nest(data = -sample) |>
#' mutate(
#' days = c(1, 10, 500, 1000, 2000),
#' dead = c(1, 1, 1, 0, 1)
#' ) %>%
#' unnest(data) |>
#'
#' # Test
#' test_differential_cellularity(
Expand Down Expand Up @@ -4019,15 +4012,6 @@ setMethod("test_differential_cellularity",
#' library(tidyr)
#'
#' tidybulk::se_mini |>
#' tidybulk() |>
#'
#' # Add survival data
#' nest(data = -sample) |>
#' mutate(
#' days = c(1, 10, 500, 1000, 2000),
#' dead = c(1, 1, 1, 0, 1)
#' ) %>%
#' unnest(data) |>
#' test_stratification_cellularity(
#' survival::Surv(days, dead) ~ .,
#' cores = 1
Expand Down Expand Up @@ -4138,10 +4122,8 @@ setMethod("test_stratification_cellularity",
#'
#' @examples
#'
#' # Define tidybulk tibble
#' df = tidybulk(tidybulk::se_mini)
#'
#' get_bibliography(df)
#' get_bibliography(tidybulk::se_mini)
#'
#'
#'
Expand Down Expand Up @@ -4236,9 +4218,8 @@ setMethod("get_bibliography",
#'
#' @examples
#'
#' library(dplyr)
#'
#' tidybulk::se_mini |> tidybulk() |> select(feature, count) |> head() |> as_matrix(rownames=feature)
#' tibble(.feature = "CD3G", count=1) |> as_matrix(rownames=.feature)
#'
#' @export
as_matrix <- function(tbl,
Expand Down
68 changes: 19 additions & 49 deletions R/methods_SE.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,41 +24,12 @@
~ as.symbol(.x),
~ NULL)

sample_info <-
colData(.data) %>%
.as_tibble_optimised(.data) %>%

# If reserved column names are present add .x
change_reserved_column_names() %>%

# Convert to tibble
tibble::as_tibble(rownames="sample")


range_info <-
get_special_datasets(.data) %>%
reduce(left_join, by="coordinate")

gene_info <-
rowData(.data) %>%

# If reserved column names are present add .x
change_reserved_column_names() %>%

# Convert to tibble
tibble::as_tibble(rownames="feature")

count_info <- get_count_datasets(.data)

# Return
count_info %>%
left_join(sample_info, by="sample") %>%
left_join(gene_info, by="feature") %>%
when(nrow(range_info) > 0 ~ (.) %>% left_join(range_info) %>% suppressMessages(), ~ (.)) %>%

mutate_if(is.character, as.factor) %>%
# mutate_if(is.character, as.factor) %>%
tidybulk(
sample,
feature,
!!as.symbol(sample__$name),
!!as.symbol(feature__$name),
!!as.symbol(SummarizedExperiment::assays(.data)[1] %>% names ),
!!norm_col # scaled counts if any
)
Expand Down Expand Up @@ -787,23 +758,22 @@ setMethod("adjust_abundance",

collapse_function = function(x){ x %>% unique() %>% paste(collapse = "___") }

feature_column_name = ".feature"

# Row data
new_row_data =
.data %>%
rowData() %>%
as_tibble(rownames = feature_column_name) %>%
as_tibble(rownames = feature__$name) %>%
group_by(!!as.symbol(quo_name(.transcript))) %>%
summarise(
across(everything(), ~ .x %>% collapse_function()),
merged.transcripts = n()
) %>%
arrange(!!as.symbol(feature_column_name)) %>%
arrange(!!as.symbol(feature__$name)) %>%
as.data.frame()

rownames(new_row_data) = new_row_data[,feature_column_name]
new_row_data = new_row_data %>% select(-feature_column_name)
rownames(new_row_data) = new_row_data[,feature__$name]
new_row_data = new_row_data %>% select(-feature__$name)

# Counts
new_count_data =
Expand All @@ -824,7 +794,7 @@ setMethod("adjust_abundance",
)

# GRanges
columns_to_collapse = .data %>% rowData() %>% colnames() %>% setdiff(quo_name(.transcript)) %>% c(feature_column_name)
columns_to_collapse = .data %>% rowData() %>% colnames() %>% setdiff(quo_name(.transcript)) %>% c(feature__$name)

rr = rowRanges(.data)

Expand All @@ -834,27 +804,27 @@ setMethod("adjust_abundance",
as_tibble() %>%
# Add names
when(
is(rr, "CompressedGRangesList") ~ mutate(., !!as.symbol(feature_column_name) := group_name),
~ mutate(., !!as.symbol(feature_column_name) := rr@ranges@NAME)
is(rr, "CompressedGRangesList") ~ mutate(., !!as.symbol(feature__$name) := group_name),
~ mutate(., !!as.symbol(feature__$name) := rr@ranges@NAME)
) %>%
left_join(
rowData(.data) %>%
as.data.frame() %>%
select(!!as.symbol(quo_name(.transcript))) %>%
as_tibble(rownames =feature_column_name),
by = feature_column_name
as_tibble(rownames =feature__$name),
by = feature__$name
) %>%
group_by(!!as.symbol(quo_name(.transcript))) %>%
mutate(
across(columns_to_collapse, ~ .x %>% collapse_function()),
merged.transcripts = n()
) %>%
arrange(!!as.symbol(feature_column_name)) %>%
arrange(!!as.symbol(feature__$name)) %>%

select(-one_of("group_name", "group")) %>%
suppressWarnings() %>%

makeGRangesListFromDataFrame( split.field = feature_column_name,
makeGRangesListFromDataFrame( split.field = feature__$name,
keep.extra.columns = TRUE) %>%

.[match(rownames(new_count_data[[1]]), names(.))]
Expand Down Expand Up @@ -1894,7 +1864,7 @@ setMethod("test_gene_rank",
) %>%

# Convert to tibble
tibble::as_tibble(rownames="sample")
tibble::as_tibble(rownames=sample__$name)



Expand Down Expand Up @@ -1934,7 +1904,7 @@ setMethod("pivot_sample",

range_info <-
get_special_datasets(.data) %>%
reduce(left_join, by="feature")
reduce(left_join, by=feature__$name)

gene_info <-
rowData(.data) %>%
Expand All @@ -1946,11 +1916,11 @@ setMethod("pivot_sample",
) %>%

# Convert to tibble
tibble::as_tibble(rownames="feature")
tibble::as_tibble(rownames=feature__$name)

gene_info %>%
when(
nrow(range_info) > 0 ~ (.) %>% left_join(range_info, by="feature"),
nrow(range_info) > 0 ~ (.) %>% left_join(range_info, by=feature__$name),
~ (.)
)
}
Expand Down
Loading

0 comments on commit b31ffe0

Please sign in to comment.