diff --git a/DESCRIPTION b/DESCRIPTION index c71d1b30..f1042cb8 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -32,7 +32,8 @@ Imports: scales, SummarizedExperiment, GenomicRanges, - methods + methods, + S4Vectors Suggests: BiocStyle, testthat, @@ -53,7 +54,6 @@ Suggests: Seurat, KernSmooth, Rtsne, - S4Vectors, ggplot2, widyr, clusterProfiler, @@ -82,7 +82,7 @@ Biarch: true biocViews: AssayDomain, Infrastructure, RNASeq, DifferentialExpression, GeneExpression, Normalization, Clustering, QualityControl, Sequencing, Transcription, Transcriptomics Encoding: UTF-8 LazyData: true -RoxygenNote: 7.1.2 +RoxygenNote: 7.2.0 LazyDataCompression: xz URL: https://github.com/stemangiola/tidybulk BugReports: https://github.com/stemangiola/tidybulk/issues diff --git a/NAMESPACE b/NAMESPACE index e28aaff1..e93b80ba 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -81,6 +81,7 @@ import(readr) import(tibble) import(tidyr) importFrom(GenomicRanges,makeGRangesListFromDataFrame) +importFrom(S4Vectors,metadata) importFrom(SummarizedExperiment,SummarizedExperiment) importFrom(SummarizedExperiment,assays) importFrom(SummarizedExperiment,colData) diff --git a/R/dplyr_methods.R b/R/dplyr_methods.R index 80f8a1ea..f03b8028 100755 --- a/R/dplyr_methods.R +++ b/R/dplyr_methods.R @@ -723,7 +723,7 @@ rowwise.tidybulk <- function(data, ...) #' #' @examples #'`%>%` = magrittr::`%>%` -#' annotation = tidybulk::counts_SE %>% tidybulk() %>% as_tibble() %>% distinct(sample) %>% mutate(source = "AU") +#' annotation = tidybulk::counts_SE %>% tidybulk() %>% as_tibble() %>% distinct(.sample) %>% mutate(source = "AU") #' tidybulk::counts_SE %>% tidybulk() %>% as_tibble() %>% left_join(annotation) #' #' @rdname dplyr-methods @@ -763,7 +763,7 @@ left_join.tidybulk <- function (x, y, by = NULL, copy = FALSE, suffix = c(".x", #' #' @examples #'`%>%` = magrittr::`%>%` -#' annotation = tidybulk::counts_SE %>% tidybulk() %>% as_tibble() %>% distinct(sample) %>% mutate(source = "AU") +#' annotation = tidybulk::counts_SE %>% tidybulk() %>% as_tibble() %>% distinct(.sample) %>% mutate(source = "AU") #' tidybulk::counts_SE %>% tidybulk() %>% as_tibble() %>% inner_join(annotation) #' #' @rdname join-methods @@ -802,7 +802,7 @@ inner_join.tidybulk <- function (x, y, by = NULL, copy = FALSE, suffix = c(".x", #' #' @examples #'`%>%` = magrittr::`%>%` -#' annotation = tidybulk::counts_SE %>% tidybulk() %>% as_tibble() %>% distinct(sample) %>% mutate(source = "AU") +#' annotation = tidybulk::counts_SE %>% tidybulk() %>% as_tibble() %>% distinct(.sample) %>% mutate(source = "AU") #' tidybulk::counts_SE %>% tidybulk() %>% as_tibble() %>% right_join(annotation) #' #' @rdname join-methods @@ -843,7 +843,7 @@ right_join.tidybulk <- function (x, y, by = NULL, copy = FALSE, suffix = c(".x", #' #' @examples #'`%>%` = magrittr::`%>%` -#' annotation = tidybulk::counts_SE %>% tidybulk() %>% as_tibble() %>% distinct(sample) %>% mutate(source = "AU") +#' annotation = tidybulk::counts_SE %>% tidybulk() %>% as_tibble() %>% distinct(.sample) %>% mutate(source = "AU") #' tidybulk::counts_SE %>% tidybulk() %>% as_tibble() %>% full_join(annotation) #' #' @rdname join-methods diff --git a/R/methods.R b/R/methods.R index 52a4a2fb..94e5ccfc 100755 --- a/R/methods.R +++ b/R/methods.R @@ -31,7 +31,7 @@ setOldClass("tidybulk") #' #' @examples #' -#' my_tt = tidybulk(tidybulk::se_mini) +#' tidybulk(tidybulk::se_mini) #' #' #' @docType methods @@ -1353,9 +1353,7 @@ setMethod("remove_redundancy", "tidybulk", .remove_redundancy) #' cm$batch = 0 #' cm$batch[colnames(cm) %in% c("SRR1740035", "SRR1740043")] = 1 #' -#' res = #' cm %>% -#' tidybulk(sample, transcript, count) |> #' identify_abundant() |> #' adjust_abundance( ~ condition + batch ) #' @@ -1675,7 +1673,7 @@ setMethod("aggregate_duplicates", "tidybulk", .aggregate_duplicates) #' library(dplyr) #' #' # Subsetting for time efficiency -#' tidybulk::se_mini |> tidybulk() |>filter(sample=="SRR1740034") |> deconvolve_cellularity(sample, feature, count, cores = 1) +#' tidybulk::se_mini |> deconvolve_cellularity(cores = 1) #' #' #' @docType methods @@ -1815,7 +1813,10 @@ setMethod("deconvolve_cellularity", #' #' @examples #' -#' tidybulk::se_mini |> tidybulk() |> as_tibble() |> symbol_to_entrez(.transcript = feature, .sample = sample) +#' # This function was designed for data.frame +#' # Convert from SummarizedExperiment for this example. It is NOT reccomended. +#' +#' tidybulk::se_mini |> tidybulk() |> as_tibble() |> symbol_to_entrez(.transcript = .feature, .sample = .sample) #' #' @export #' @@ -2014,7 +2015,10 @@ setMethod("describe_transcript", "tidybulk", .describe_transcript) #' #' library(dplyr) #' -#' tidybulk::counts_SE |> tidybulk() |> as_tibble() |> ensembl_to_symbol(feature) +#' # This function was designed for data.frame +#' # Convert from SummarizedExperiment for this example. It is NOT reccomended. +#' +#' tidybulk::counts_SE |> tidybulk() |> as_tibble() |> ensembl_to_symbol(.feature) #' #' #' @@ -2882,8 +2886,10 @@ setMethod("keep_abundant", "tidybulk", .keep_abundant) #' @examples #' \dontrun{ #' -#' df_entrez = tidybulk::se_mini |> tidybulk() |> as_tibble() |> symbol_to_entrez( .transcript = feature, .sample = sample) -#' df_entrez = aggregate_duplicates(df_entrez, aggregation_function = sum, .sample = sample, .transcript = entrez, .abundance = count) +#' library(SummarizedExperiment) +#' se = tidybulk::se_mini +#' rowData( se)$entrez = rownames(se ) +#' df_entrez = aggregate_duplicates(se,.transcript = entrez ) #' #' library("EGSEA") #' @@ -3075,9 +3081,8 @@ setMethod("test_gene_enrichment", #' #' @examples #' -#' df_entrez = tidybulk::se_mini |> tidybulk() |> as_tibble() |> symbol_to_entrez( .transcript = feature, .sample = sample) -#' df_entrez = aggregate_duplicates(df_entrez, aggregation_function = sum, .sample = sample, .transcript = entrez, .abundance = count) -#' df_entrez = mutate(df_entrez, do_test = feature %in% c("TNFRSF4", "PLCH2", "PADI4", "PAX7")) +#' #se_mini = aggregate_duplicates(tidybulk::se_mini, .transcript = entrez) +#' #df_entrez = mutate(df_entrez, do_test = feature %in% c("TNFRSF4", "PLCH2", "PADI4", "PAX7")) #' #' \dontrun{ #' test_gene_overrepresentation( @@ -3245,15 +3250,14 @@ setMethod("test_gene_overrepresentation", #' #' \dontrun{ #' -#' df_entrez = tidybulk::se_mini |> tidybulk() |> as_tibble() |> symbol_to_entrez( .transcript = feature, .sample = sample) -#' df_entrez = aggregate_duplicates(df_entrez, aggregation_function = sum, .sample = sample, .transcript = entrez, .abundance = count) -#' df_entrez = mutate(df_entrez, do_test = feature %in% c("TNFRSF4", "PLCH2", "PADI4", "PAX7")) +#' df_entrez = tidybulk::se_mini +#' df_entrez = mutate(df_entrez, do_test = .feature %in% c("TNFRSF4", "PLCH2", "PADI4", "PAX7")) #' df_entrez = df_entrez %>% test_differential_abundance(~ condition) #' #' #' test_gene_rank( #' df_entrez, -#' .sample = sample, +#' .sample = .sample, #' .entrez = entrez, #' species="Homo sapiens", #' gene_sets =c("C2"), @@ -3591,7 +3595,7 @@ setMethod("pivot_transcript", #' #' @examples #' -#' tidybulk::se_mini |> tidybulk() |> fill_missing_abundance( fill_with = 0) +#' # tidybulk::se_mini |> fill_missing_abundance( fill_with = 0) #' #' #' @docType methods @@ -3862,19 +3866,8 @@ setMethod("impute_missing_abundance", "tidybulk", .impute_missing_abundance) #' ) #' #' # Cox regression - multiple -#' library(dplyr) -#' library(tidyr) #' #' tidybulk::se_mini |> -#' tidybulk() |> -#' -#' # Add survival data -#' nest(data = -sample) |> -#' mutate( -#' days = c(1, 10, 500, 1000, 2000), -#' dead = c(1, 1, 1, 0, 1) -#' ) %>% -#' unnest(data) |> #' #' # Test #' test_differential_cellularity( @@ -4019,15 +4012,6 @@ setMethod("test_differential_cellularity", #' library(tidyr) #' #' tidybulk::se_mini |> -#' tidybulk() |> -#' -#' # Add survival data -#' nest(data = -sample) |> -#' mutate( -#' days = c(1, 10, 500, 1000, 2000), -#' dead = c(1, 1, 1, 0, 1) -#' ) %>% -#' unnest(data) |> #' test_stratification_cellularity( #' survival::Surv(days, dead) ~ ., #' cores = 1 @@ -4138,10 +4122,8 @@ setMethod("test_stratification_cellularity", #' #' @examples #' -#' # Define tidybulk tibble -#' df = tidybulk(tidybulk::se_mini) #' -#' get_bibliography(df) +#' get_bibliography(tidybulk::se_mini) #' #' #' @@ -4236,9 +4218,8 @@ setMethod("get_bibliography", #' #' @examples #' -#' library(dplyr) #' -#' tidybulk::se_mini |> tidybulk() |> select(feature, count) |> head() |> as_matrix(rownames=feature) +#' tibble(.feature = "CD3G", count=1) |> as_matrix(rownames=.feature) #' #' @export as_matrix <- function(tbl, diff --git a/R/methods_SE.R b/R/methods_SE.R index dd65d816..a214d3e7 100755 --- a/R/methods_SE.R +++ b/R/methods_SE.R @@ -24,41 +24,12 @@ ~ as.symbol(.x), ~ NULL) - sample_info <- - colData(.data) %>% + .as_tibble_optimised(.data) %>% - # If reserved column names are present add .x - change_reserved_column_names() %>% - - # Convert to tibble - tibble::as_tibble(rownames="sample") - - - range_info <- - get_special_datasets(.data) %>% - reduce(left_join, by="coordinate") - - gene_info <- - rowData(.data) %>% - - # If reserved column names are present add .x - change_reserved_column_names() %>% - - # Convert to tibble - tibble::as_tibble(rownames="feature") - - count_info <- get_count_datasets(.data) - - # Return - count_info %>% - left_join(sample_info, by="sample") %>% - left_join(gene_info, by="feature") %>% - when(nrow(range_info) > 0 ~ (.) %>% left_join(range_info) %>% suppressMessages(), ~ (.)) %>% - - mutate_if(is.character, as.factor) %>% + # mutate_if(is.character, as.factor) %>% tidybulk( - sample, - feature, + !!as.symbol(sample__$name), + !!as.symbol(feature__$name), !!as.symbol(SummarizedExperiment::assays(.data)[1] %>% names ), !!norm_col # scaled counts if any ) @@ -787,23 +758,22 @@ setMethod("adjust_abundance", collapse_function = function(x){ x %>% unique() %>% paste(collapse = "___") } - feature_column_name = ".feature" # Row data new_row_data = .data %>% rowData() %>% - as_tibble(rownames = feature_column_name) %>% + as_tibble(rownames = feature__$name) %>% group_by(!!as.symbol(quo_name(.transcript))) %>% summarise( across(everything(), ~ .x %>% collapse_function()), merged.transcripts = n() ) %>% - arrange(!!as.symbol(feature_column_name)) %>% + arrange(!!as.symbol(feature__$name)) %>% as.data.frame() - rownames(new_row_data) = new_row_data[,feature_column_name] - new_row_data = new_row_data %>% select(-feature_column_name) + rownames(new_row_data) = new_row_data[,feature__$name] + new_row_data = new_row_data %>% select(-feature__$name) # Counts new_count_data = @@ -824,7 +794,7 @@ setMethod("adjust_abundance", ) # GRanges - columns_to_collapse = .data %>% rowData() %>% colnames() %>% setdiff(quo_name(.transcript)) %>% c(feature_column_name) + columns_to_collapse = .data %>% rowData() %>% colnames() %>% setdiff(quo_name(.transcript)) %>% c(feature__$name) rr = rowRanges(.data) @@ -834,27 +804,27 @@ setMethod("adjust_abundance", as_tibble() %>% # Add names when( - is(rr, "CompressedGRangesList") ~ mutate(., !!as.symbol(feature_column_name) := group_name), - ~ mutate(., !!as.symbol(feature_column_name) := rr@ranges@NAME) + is(rr, "CompressedGRangesList") ~ mutate(., !!as.symbol(feature__$name) := group_name), + ~ mutate(., !!as.symbol(feature__$name) := rr@ranges@NAME) ) %>% left_join( rowData(.data) %>% as.data.frame() %>% select(!!as.symbol(quo_name(.transcript))) %>% - as_tibble(rownames =feature_column_name), - by = feature_column_name + as_tibble(rownames =feature__$name), + by = feature__$name ) %>% group_by(!!as.symbol(quo_name(.transcript))) %>% mutate( across(columns_to_collapse, ~ .x %>% collapse_function()), merged.transcripts = n() ) %>% - arrange(!!as.symbol(feature_column_name)) %>% + arrange(!!as.symbol(feature__$name)) %>% select(-one_of("group_name", "group")) %>% suppressWarnings() %>% - makeGRangesListFromDataFrame( split.field = feature_column_name, + makeGRangesListFromDataFrame( split.field = feature__$name, keep.extra.columns = TRUE) %>% .[match(rownames(new_count_data[[1]]), names(.))] @@ -1894,7 +1864,7 @@ setMethod("test_gene_rank", ) %>% # Convert to tibble - tibble::as_tibble(rownames="sample") + tibble::as_tibble(rownames=sample__$name) @@ -1934,7 +1904,7 @@ setMethod("pivot_sample", range_info <- get_special_datasets(.data) %>% - reduce(left_join, by="feature") + reduce(left_join, by=feature__$name) gene_info <- rowData(.data) %>% @@ -1946,11 +1916,11 @@ setMethod("pivot_sample", ) %>% # Convert to tibble - tibble::as_tibble(rownames="feature") + tibble::as_tibble(rownames=feature__$name) gene_info %>% when( - nrow(range_info) > 0 ~ (.) %>% left_join(range_info, by="feature"), + nrow(range_info) > 0 ~ (.) %>% left_join(range_info, by=feature__$name), ~ (.) ) } diff --git a/R/tidySummarizedExperiment.R b/R/tidySummarizedExperiment.R index f7ab582e..443b394e 100644 --- a/R/tidySummarizedExperiment.R +++ b/R/tidySummarizedExperiment.R @@ -1,16 +1,12 @@ -change_reserved_column_names = function(.data){ - - .data %>% - - setNames( - colnames(.) %>% - str_replace("^feature$", "feature.x") %>% - str_replace("^sample$", "sample.x") %>% - str_replace("^coordinate$", "coordinate.x") - ) +eliminate_GRanges_metadata_columns_also_present_in_Rowdata = function(.my_data, se){ + .my_data %>% + select(-one_of(colnames(rowData(se)))) %>% + # In case there is not metadata column + suppressWarnings() } + #' @importFrom dplyr select #' @importFrom tidyselect one_of #' @importFrom tibble as_tibble @@ -18,71 +14,60 @@ change_reserved_column_names = function(.data){ #' @importFrom SummarizedExperiment rowRanges #' @importFrom tibble rowid_to_column #' -#' @keywords internal #' @noRd -get_special_datasets <- function(SummarizedExperiment_object) { - - SummarizedExperiment_object %>% - rowRanges() %>% - when( - # If no ranges - as.data.frame(.) %>% - nrow() %>% - equals(0) ~ tibble(), - - # If it is a range list (multiple rows per feature) - class(.) %>% equals("CompressedGRangesList") ~ - tibble::as_tibble(.) %>% - eliminate_GRanges_metadata_columns_also_present_in_Rowdata(SummarizedExperiment_object) %>% - nest(coordinate = -group_name) %>% - rename(feature = group_name), - - # If standard GRanges (one feature per line) - ~ { - transcript_column = - rowRanges(SummarizedExperiment_object) %>% - as.data.frame() %>% - lapply(function(x) rownames(SummarizedExperiment_object)[1] %in% x) %>% - unlist() %>% - which() %>% - names() - - - # Just rename - (.) %>% - - # If transcript_column exists all good - when( - !is.null(transcript_column) ~ tibble::as_tibble(.) %>% - eliminate_GRanges_metadata_columns_also_present_in_Rowdata(SummarizedExperiment_object) %>% - rename(feature := !!transcript_column) , - - # If transcript_column is NULL add numeric column - ~ tibble::as_tibble(.) %>% - eliminate_GRanges_metadata_columns_also_present_in_Rowdata(SummarizedExperiment_object) %>% - rowid_to_column(var = "feature") %>% - mutate(feature = as.character(feature)) - ) %>% - - # Always nest - nest(coordinate = -feature) - - } - ) %>% - list() +get_special_datasets <- function(se) { + + rr = se %>% + rowRanges() + + rr %>% + when( + + # If no ranges + as.data.frame(.) %>% + nrow() %>% + equals(0) ~ tibble(), + + # If it is a range list (multiple rows per feature) + is(., "CompressedGRangesList") ~ { + + # If GRanges does not have row names + if(is.null(rr@partitioning@NAMES)) rr@partitioning@NAMES = as.character(1:nrow(se)) + + tibble::as_tibble(rr) %>% + eliminate_GRanges_metadata_columns_also_present_in_Rowdata(se) %>% + nest(GRangesList = -group_name) %>% + rename(!!f_(se)$symbol := group_name) + + }, + + # If standard GRanges (one feature per line) + ~ { + + # If GRanges does not have row names + if(is.null(rr@ranges@NAMES)) rr@ranges@NAMES = as.character(1:nrow(se)) + + tibble::as_tibble(rr) %>% + eliminate_GRanges_metadata_columns_also_present_in_Rowdata(se) %>% + mutate(!!f_(se)$symbol := rr@ranges@NAMES) + } + + ) %>% + list() } -change_reserved_column_names = function(.data){ +#' @importFrom stringr str_replace +change_reserved_column_names = function(col_data, .data ){ - .data %>% + col_data %>% - setNames( - colnames(.) %>% - str_replace("^feature$", "feature.x") %>% - str_replace("^sample$", "sample.x") %>% - str_replace("^coordinate$", "coordinate.x") - ) + setNames( + colnames(.) %>% + sapply(function(x) if(x==f_(.data)$name) sprintf("%s.x", f_(.data)$name) else x) %>% + sapply(function(x) if(x==s_(.data)$name) sprintf("%s.x", s_(.data)$name) else x) %>% + str_replace("^coordinate$", "coordinate.x") + ) } @@ -93,24 +78,201 @@ change_reserved_column_names = function(.data){ #' @importFrom purrr reduce #' @importFrom SummarizedExperiment assays #' -#' @keywords internal #' @noRd -get_count_datasets <- function(SummarizedExperiment_object) { - map2( - assays(SummarizedExperiment_object) %>% as.list(), - names(assays(SummarizedExperiment_object)), - ~ .x %>% - tibble::as_tibble(rownames = "feature", .name_repair = "minimal") %>% - - # If the matrix does not have sample names, fix column names - when(colnames(.x) %>% is.null() ~ setNames(., c( - "feature", seq_len(ncol(.x)) - )), - ~ (.) - ) %>% - - gather(sample, count,-feature) %>% - rename(!!.y := count) - ) %>% - reduce(left_join, by = c("feature", "sample")) +get_count_datasets <- function(se) { + map2( + assays(se) %>% as.list(), + names(assays(se)), + ~ { + + # If the counts are in a sparse matrix convert to a matrix + # This might happen because the user loaded tidySummarizedExperiment and is + # print a SingleCellExperiment + if(is(.x, "dgCMatrix")) { + .x = as.matrix(.x) + } + + .x %>% + # matrix() %>% + # as.data.frame() %>% + tibble::as_tibble(rownames = f_(se)$name, .name_repair = "minimal") %>% + + # If the matrix does not have sample names, fix column names + when(colnames(.x) %>% is.null() ~ setNames(., c( + f_(se)$name, seq_len(ncol(.x)) + )), + ~ (.) + ) %>% + + gather(!!s_(se)$symbol, !!.y,-!!f_(se)$symbol) + + #%>% + # rename(!!.y := count) + }) %>% + when( + length(.)>0 ~ bind_cols(., .name_repair = c("minimal")) %>% .[!duplicated(colnames(.))], # reduce(., left_join, by = c(f_(se)$name, s_(se)$name)), + ~ expand.grid( + rownames(se), colnames(se) + ) %>% + setNames(c(f_(se)$name, s_(se)$name)) %>% + tibble::as_tibble() + ) %>% + + # Add dummy sample or feature if we have empty assay. + # This is needed for a correct isualisation of the tibble form + when( + f_(se)$name %in% colnames(.) %>% not ~ mutate(., !!f_(se)$symbol := as.character(NA)), + s_(se)$name %in% colnames(.) %>% not ~ mutate(., !!s_(se)$symbol := as.character(NA)), + ~ (.) + ) +} + +subset_tibble_output = function(.data, count_info, sample_info, gene_info, range_info, .subset){ + # This function outputs a tibble after subsetting the columns + .subset = enquo(.subset) + + # Build template of the output + output_colnames = + slice(count_info, 0) %>% + left_join(slice(sample_info, 0), by=s_(.data)$name) %>% + left_join(slice(gene_info, 0), by = f_(.data)$name) %>% + when(nrow(range_info) > 0 ~ (.) %>% left_join(range_info, by=f_(.data)$name), ~ (.)) %>% + select(!!.subset) %>% + colnames() + + + # Sample table + sample_info = + sample_info %>% + when( + colnames(.) %>% intersect(output_colnames) %>% length() %>% equals(0) ~ NULL, + select(., one_of(s_(.data)$name, output_colnames)) %>% + suppressWarnings() + ) + + # Ranges table + range_info = + range_info %>% + when( + colnames(.) %>% intersect(output_colnames) %>% length() %>% equals(0) ~ NULL, + select(., one_of(f_(.data)$name, output_colnames)) %>% + suppressWarnings() + ) + + # Ranges table + gene_info = + gene_info %>% + when( + colnames(.) %>% intersect(output_colnames) %>% length() %>% equals(0) ~ NULL, + select(., one_of(f_(.data)$name, output_colnames)) %>% + suppressWarnings() + ) + + # Ranges table + count_info = + count_info %>% + when( + colnames(.) %>% intersect(output_colnames) %>% length() %>% equals(0) ~ NULL, + select(., one_of(f_(.data)$name, s_(.data)$name, output_colnames)) %>% + suppressWarnings() + ) + + + if( + !is.null(count_info) & + ( + !is.null(sample_info) & !is.null(gene_info) | + + # Make exception for weirs cases (e.g. c(sample, counts)) + (colnames(count_info) %>% outersect(c(f_(.data)$name, s_(.data)$name)) %>% length() %>% gt(0)) + ) + ) { + output_df = + count_info %>% + when(!is.null(sample_info) ~ (.) %>% left_join(sample_info, by=s_(.data)$name), ~ (.)) %>% + when(!is.null(gene_info) ~ (.) %>% left_join(gene_info, by=f_(.data)$name), ~ (.)) %>% + when(!is.null(range_info) ~ (.) %>% left_join(range_info, by=f_(.data)$name), ~ (.)) + } + else if(!is.null(sample_info) ){ + output_df = sample_info + } + else if(!is.null(gene_info)){ + output_df = gene_info %>% + + # If present join GRanges + when(!is.null(range_info) ~ (.) %>% left_join(range_info, by=f_(.data)$name), ~ (.)) + } + + output_df %>% + + # Cleanup + select(one_of(output_colnames)) %>% + suppressWarnings() + +} + + +.as_tibble_optimised = function(x, skip_GRanges = FALSE, .subset = NULL, + .name_repair=c("check_unique", "unique", "universal", "minimal"), + rownames=pkgconfig::get_config("tibble::rownames", NULL)){ + + .subset = enquo(.subset) + + sample_info <- + colData(x) %>% + + # If reserved column names are present add .x + change_reserved_column_names(x) %>% + + # Convert to tibble + tibble::as_tibble(rownames=s_(x)$name) %>% + setNames(c(s_(x)$name, colnames(colData(x)))) + + range_info <- + skip_GRanges %>% + when( + (.) ~ tibble() %>% list, + ~ get_special_datasets(x) + ) %>% + reduce(left_join, by="coordinate") + + gene_info <- + rowData(x) %>% + + # If reserved column names are present add .x + change_reserved_column_names(x)%>% + + # Convert to tibble + tibble::as_tibble(rownames=f_(x)$name) %>% + setNames(c(f_(x)$name, colnames(rowData(x)))) + + + count_info <- get_count_datasets(x) + + # Return + if(quo_is_null(.subset)) + + # If I want to return all columns + count_info %>% + inner_join(sample_info, by=s_(x)$name) %>% + inner_join(gene_info, by=f_(x)$name) %>% + when(nrow(range_info) > 0 ~ (.) %>% left_join(range_info) %>% suppressMessages(), ~ (.)) + + # This function outputs a tibble after subsetting the columns + else subset_tibble_output(x, count_info, sample_info, gene_info, range_info, !!.subset) + + +} + +#' @importFrom S4Vectors metadata +f_ = function(x){ + # Check if old deprecated columns are used + if("feature__" %in% names(metadata(x))) feature__ = metadata(x)$feature__ + return(feature__) +} + +#' @importFrom S4Vectors metadata +s_ = function(x){ + if("sample__" %in% names(metadata(x))) sample__ = metadata(x)$sample__ + return(sample__) } diff --git a/R/tidyr_methods.R b/R/tidyr_methods.R index c9693c4e..dc103381 100755 --- a/R/tidyr_methods.R +++ b/R/tidyr_methods.R @@ -42,8 +42,8 @@ #' @examples #' #' library(dplyr) -#' -#' tidybulk::se_mini %>% tidybulk() %>% nest( data = -feature) %>% +#' +#' tidybulk::se_mini %>% tidybulk() %>% nest( data = -.feature) %>% #' unnest(data) #' #' @rdname nest-methods @@ -57,16 +57,16 @@ unnest.nested_tidybulk <- function (data, cols, ..., keep_empty=FALSE, ptype=NUL { cols <- enquo(cols) - - + + data %>% drop_class(c("nested_tidybulk", "tt")) %>% - tidyr::unnest(!!cols, ..., keep_empty = keep_empty, ptype = ptype, + tidyr::unnest(!!cols, ..., keep_empty = keep_empty, ptype = ptype, names_sep = names_sep, names_repair = names_repair) %>% # Attach attributes reattach_internals(data) %>% - + # Add class add_class("tt") %>% add_class("tidybulk") @@ -84,7 +84,7 @@ unnest.nested_tidybulk <- function (data, cols, ..., keep_empty=FALSE, ptype=NUL #' #' @examples #' -#' tidybulk::se_mini %>% tidybulk() %>% nest( data = -feature) +#' tidybulk::se_mini %>% tidybulk() %>% nest( data = -.feature) #' #' @rdname nest-methods #' @name nest @@ -97,26 +97,26 @@ nest.tidybulk <- function (.data, ..., .names_sep = NULL) { cols <- enquos(...) col_name_data = names(cols) - + .data %>% - + # This is needed otherwise nest goes into loop and fails drop_class(c("tidybulk", "tt")) %>% tidyr::nest(...) %>% - + # Add classes afterwards mutate(!!as.symbol(col_name_data) := map( - !!as.symbol(col_name_data), - ~ .x %>% + !!as.symbol(col_name_data), + ~ .x %>% add_class("tt") %>% add_class("tidybulk") )) %>% - + # Attach attributes reattach_internals(.data) %>% - + # Add class add_class("tt") %>% add_class("nested_tidybulk") - -} \ No newline at end of file + +} diff --git a/R/utilities.R b/R/utilities.R index e6476b26..6a2bcab9 100755 --- a/R/utilities.R +++ b/R/utilities.R @@ -1460,36 +1460,6 @@ rotation = function(m, d) { ) %>% as_matrix) %*% m) } -#' -#' @keywords internal -#' @noRd -#' -#' @importFrom dplyr select -#' @importFrom tibble as_tibble -#' @importFrom tibble tibble -get_special_datasets <- function(SummarizedExperiment_object) { - if ( - "RangedSummarizedExperiment" %in% .class2(SummarizedExperiment_object) & - - rowRanges(SummarizedExperiment_object) %>% - as.data.frame() %>% - nrow() %>% - gt(0) - ) { - rowRanges(SummarizedExperiment_object) %>% - as.data.frame() %>% - - # Take off rowData columns as there is a recursive anomaly within gene ranges - suppressWarnings( - select(-one_of(colnames(rowData(SummarizedExperiment_object)))) - ) %>% - tibble::as_tibble(rownames="feature") %>% - list() - } else { - tibble() %>% list() - } -} - combineByRow <- function(m, fun = NULL) { # Shown here #https://stackoverflow.com/questions/8139301/aggregate-rows-in-a-large-matrix-by-rowname @@ -1595,3 +1565,10 @@ fill_NA_matrix_with_factor_colwise = function(.data, factor){ .[rn, cn] } + +get_special_column_name_symbol = function(name){ + list(name = name, symbol = as.symbol(name)) +} + +feature__ = get_special_column_name_symbol(".feature") +sample__ = get_special_column_name_symbol(".sample") diff --git a/data/se_mini.rda b/data/se_mini.rda index 518d41dc..678bc2c5 100755 Binary files a/data/se_mini.rda and b/data/se_mini.rda differ diff --git a/man/adjust_abundance-methods.Rd b/man/adjust_abundance-methods.Rd index 9c970b64..1310c087 100644 --- a/man/adjust_abundance-methods.Rd +++ b/man/adjust_abundance-methods.Rd @@ -126,9 +126,7 @@ cm = tidybulk::se_mini cm$batch = 0 cm$batch[colnames(cm) \%in\% c("SRR1740035", "SRR1740043")] = 1 -res = cm \%>\% - tidybulk(sample, transcript, count) |> identify_abundant() |> adjust_abundance( ~ condition + batch ) diff --git a/man/as_matrix.Rd b/man/as_matrix.Rd index 31c9b089..f03e4296 100644 --- a/man/as_matrix.Rd +++ b/man/as_matrix.Rd @@ -21,8 +21,7 @@ Get matrix from tibble } \examples{ -library(dplyr) -tidybulk::se_mini |> tidybulk() |> select(feature, count) |> head() |> as_matrix(rownames=feature) +tibble(.feature = "CD3G", count=1) |> as_matrix(rownames=.feature) } diff --git a/man/deconvolve_cellularity-methods.Rd b/man/deconvolve_cellularity-methods.Rd index 5c885ed1..a393e5ac 100644 --- a/man/deconvolve_cellularity-methods.Rd +++ b/man/deconvolve_cellularity-methods.Rd @@ -131,7 +131,7 @@ CIBERSORT(Y = data, X = reference, ...) library(dplyr) # Subsetting for time efficiency -tidybulk::se_mini |> tidybulk() |>filter(sample=="SRR1740034") |> deconvolve_cellularity(sample, feature, count, cores = 1) +tidybulk::se_mini |> deconvolve_cellularity(cores = 1) } diff --git a/man/dplyr-methods.Rd b/man/dplyr-methods.Rd index 20999c40..d5e32a52 100644 --- a/man/dplyr-methods.Rd +++ b/man/dplyr-methods.Rd @@ -24,7 +24,7 @@ Left join datasets } \examples{ `\%>\%` = magrittr::`\%>\%` -annotation = tidybulk::counts_SE \%>\% tidybulk() \%>\% as_tibble() \%>\% distinct(sample) \%>\% mutate(source = "AU") +annotation = tidybulk::counts_SE \%>\% tidybulk() \%>\% as_tibble() \%>\% distinct(.sample) \%>\% mutate(source = "AU") tidybulk::counts_SE \%>\% tidybulk() \%>\% as_tibble() \%>\% left_join(annotation) } diff --git a/man/ensembl_to_symbol-methods.Rd b/man/ensembl_to_symbol-methods.Rd index 8cc48e50..73365d83 100644 --- a/man/ensembl_to_symbol-methods.Rd +++ b/man/ensembl_to_symbol-methods.Rd @@ -44,7 +44,10 @@ This is useful since different resources use ensembl IDs while others use gene s library(dplyr) -tidybulk::counts_SE |> tidybulk() |> as_tibble() |> ensembl_to_symbol(feature) +# This function was designed for data.frame +# Convert from SummarizedExperiment for this example. It is NOT reccomended. + +tidybulk::counts_SE |> tidybulk() |> as_tibble() |> ensembl_to_symbol(.feature) diff --git a/man/fill_missing_abundance-methods.Rd b/man/fill_missing_abundance-methods.Rd index eff5fa8e..429c64da 100644 --- a/man/fill_missing_abundance-methods.Rd +++ b/man/fill_missing_abundance-methods.Rd @@ -70,7 +70,7 @@ This function fills the abundance of missing sample-transcript pair using the me } \examples{ -tidybulk::se_mini |> tidybulk() |> fill_missing_abundance( fill_with = 0) +# tidybulk::se_mini |> fill_missing_abundance( fill_with = 0) } diff --git a/man/get_bibliography-methods.Rd b/man/get_bibliography-methods.Rd index 6aed72a6..759e8a0e 100644 --- a/man/get_bibliography-methods.Rd +++ b/man/get_bibliography-methods.Rd @@ -45,10 +45,8 @@ This methods returns the bibliography list of your workflow from the internals o } \examples{ -# Define tidybulk tibble -df = tidybulk(tidybulk::se_mini) -get_bibliography(df) +get_bibliography(tidybulk::se_mini) diff --git a/man/join-methods.Rd b/man/join-methods.Rd index 6febe09c..0abea038 100644 --- a/man/join-methods.Rd +++ b/man/join-methods.Rd @@ -34,15 +34,15 @@ Full join datasets } \examples{ `\%>\%` = magrittr::`\%>\%` -annotation = tidybulk::counts_SE \%>\% tidybulk() \%>\% as_tibble() \%>\% distinct(sample) \%>\% mutate(source = "AU") +annotation = tidybulk::counts_SE \%>\% tidybulk() \%>\% as_tibble() \%>\% distinct(.sample) \%>\% mutate(source = "AU") tidybulk::counts_SE \%>\% tidybulk() \%>\% as_tibble() \%>\% inner_join(annotation) `\%>\%` = magrittr::`\%>\%` -annotation = tidybulk::counts_SE \%>\% tidybulk() \%>\% as_tibble() \%>\% distinct(sample) \%>\% mutate(source = "AU") +annotation = tidybulk::counts_SE \%>\% tidybulk() \%>\% as_tibble() \%>\% distinct(.sample) \%>\% mutate(source = "AU") tidybulk::counts_SE \%>\% tidybulk() \%>\% as_tibble() \%>\% right_join(annotation) `\%>\%` = magrittr::`\%>\%` -annotation = tidybulk::counts_SE \%>\% tidybulk() \%>\% as_tibble() \%>\% distinct(sample) \%>\% mutate(source = "AU") +annotation = tidybulk::counts_SE \%>\% tidybulk() \%>\% as_tibble() \%>\% distinct(.sample) \%>\% mutate(source = "AU") tidybulk::counts_SE \%>\% tidybulk() \%>\% as_tibble() \%>\% full_join(annotation) } diff --git a/man/nest-methods.Rd b/man/nest-methods.Rd index b81e5897..8e0e16cb 100644 --- a/man/nest-methods.Rd +++ b/man/nest-methods.Rd @@ -54,10 +54,10 @@ nest library(dplyr) -tidybulk::se_mini \%>\% tidybulk() \%>\% nest( data = -feature) \%>\% +tidybulk::se_mini \%>\% tidybulk() \%>\% nest( data = -.feature) \%>\% unnest(data) -tidybulk::se_mini \%>\% tidybulk() \%>\% nest( data = -feature) +tidybulk::se_mini \%>\% tidybulk() \%>\% nest( data = -.feature) } diff --git a/man/symbol_to_entrez.Rd b/man/symbol_to_entrez.Rd index 7a2304f7..f02ca98d 100644 --- a/man/symbol_to_entrez.Rd +++ b/man/symbol_to_entrez.Rd @@ -21,6 +21,9 @@ Get ENTREZ id from gene SYMBOL } \examples{ -tidybulk::se_mini |> tidybulk() |> as_tibble() |> symbol_to_entrez(.transcript = feature, .sample = sample) +# This function was designed for data.frame +# Convert from SummarizedExperiment for this example. It is NOT reccomended. + +tidybulk::se_mini |> tidybulk() |> as_tibble() |> symbol_to_entrez(.transcript = .feature, .sample = .sample) } diff --git a/man/test_differential_cellularity-methods.Rd b/man/test_differential_cellularity-methods.Rd index 9c55e048..8be30ec7 100644 --- a/man/test_differential_cellularity-methods.Rd +++ b/man/test_differential_cellularity-methods.Rd @@ -153,19 +153,8 @@ deconvolve_cellularity( ) # Cox regression - multiple - library(dplyr) - library(tidyr) tidybulk::se_mini |> - tidybulk() |> - - # Add survival data - nest(data = -sample) |> - mutate( - days = c(1, 10, 500, 1000, 2000), - dead = c(1, 1, 1, 0, 1) - ) \%>\% - unnest(data) |> # Test test_differential_cellularity( diff --git a/man/test_gene_enrichment-methods.Rd b/man/test_gene_enrichment-methods.Rd index b91308fa..d4126abd 100644 --- a/man/test_gene_enrichment-methods.Rd +++ b/man/test_gene_enrichment-methods.Rd @@ -178,8 +178,10 @@ dge %>% \examples{ \dontrun{ -df_entrez = tidybulk::se_mini |> tidybulk() |> as_tibble() |> symbol_to_entrez( .transcript = feature, .sample = sample) -df_entrez = aggregate_duplicates(df_entrez, aggregation_function = sum, .sample = sample, .transcript = entrez, .abundance = count) +library(SummarizedExperiment) +se = tidybulk::se_mini +rowData( se)$entrez = rownames(se ) +df_entrez = aggregate_duplicates(se,.transcript = entrez ) library("EGSEA") diff --git a/man/test_gene_overrepresentation-methods.Rd b/man/test_gene_overrepresentation-methods.Rd index a946dc46..1886d484 100644 --- a/man/test_gene_overrepresentation-methods.Rd +++ b/man/test_gene_overrepresentation-methods.Rd @@ -122,9 +122,8 @@ Undelying method: } \examples{ -df_entrez = tidybulk::se_mini |> tidybulk() |> as_tibble() |> symbol_to_entrez( .transcript = feature, .sample = sample) -df_entrez = aggregate_duplicates(df_entrez, aggregation_function = sum, .sample = sample, .transcript = entrez, .abundance = count) -df_entrez = mutate(df_entrez, do_test = feature \%in\% c("TNFRSF4", "PLCH2", "PADI4", "PAX7")) +#se_mini = aggregate_duplicates(tidybulk::se_mini, .transcript = entrez) +#df_entrez = mutate(df_entrez, do_test = feature \%in\% c("TNFRSF4", "PLCH2", "PADI4", "PAX7")) \dontrun{ test_gene_overrepresentation( diff --git a/man/test_gene_rank-methods.Rd b/man/test_gene_rank-methods.Rd index 2603d497..bd76b5ce 100644 --- a/man/test_gene_rank-methods.Rd +++ b/man/test_gene_rank-methods.Rd @@ -134,15 +134,14 @@ mutate(fit = \dontrun{ -df_entrez = tidybulk::se_mini |> tidybulk() |> as_tibble() |> symbol_to_entrez( .transcript = feature, .sample = sample) -df_entrez = aggregate_duplicates(df_entrez, aggregation_function = sum, .sample = sample, .transcript = entrez, .abundance = count) -df_entrez = mutate(df_entrez, do_test = feature \%in\% c("TNFRSF4", "PLCH2", "PADI4", "PAX7")) +df_entrez = tidybulk::se_mini +df_entrez = mutate(df_entrez, do_test = .feature \%in\% c("TNFRSF4", "PLCH2", "PADI4", "PAX7")) df_entrez = df_entrez \%>\% test_differential_abundance(~ condition) test_gene_rank( df_entrez, - .sample = sample, + .sample = .sample, .entrez = entrez, species="Homo sapiens", gene_sets =c("C2"), diff --git a/man/test_stratification_cellularity-methods.Rd b/man/test_stratification_cellularity-methods.Rd index 629b8d6e..bfad1fd1 100644 --- a/man/test_stratification_cellularity-methods.Rd +++ b/man/test_stratification_cellularity-methods.Rd @@ -130,15 +130,6 @@ library(dplyr) library(tidyr) tidybulk::se_mini |> - tidybulk() |> - -# Add survival data -nest(data = -sample) |> -mutate( - days = c(1, 10, 500, 1000, 2000), - dead = c(1, 1, 1, 0, 1) -) \%>\% -unnest(data) |> test_stratification_cellularity( survival::Surv(days, dead) ~ ., cores = 1 diff --git a/man/tidybulk-methods.Rd b/man/tidybulk-methods.Rd index 1ef8cbce..0ef4e838 100644 --- a/man/tidybulk-methods.Rd +++ b/man/tidybulk-methods.Rd @@ -54,7 +54,7 @@ arguments are stored as metadata. They can be extracted as attr(, "inter } \examples{ -my_tt = tidybulk(tidybulk::se_mini) +tidybulk(tidybulk::se_mini) } diff --git a/tests/testthat/test-bulk_methods.R b/tests/testthat/test-bulk_methods.R index ed44cd17..b0fdd2b2 100755 --- a/tests/testthat/test-bulk_methods.R +++ b/tests/testthat/test-bulk_methods.R @@ -3,7 +3,7 @@ context('Bulk methods') data("se_mini") data("breast_tcga_mini_SE") -input_df = se_mini %>% tidybulk() %>% as_tibble() %>% setNames(c("b","a", "c", "Cell type", "time" , "condition")) +input_df = se_mini %>% tidybulk() %>% as_tibble() %>% setNames(c("b","a", "c", "Cell type", "time" , "condition", "days", "dead", "entrez")) input_df_breast = breast_tcga_mini_SE %>% tidybulk() %>% as_tibble() %>% setNames(c( "b","a", "c", "c norm", "call")) @@ -128,7 +128,7 @@ test_that("Adding scaled counts - no object",{ expect_equal( ncol(res), - 10 + 13 ) }) @@ -379,7 +379,7 @@ test_that("Add differential trancript abundance - no object",{ expect_equal( ncol(res), - 12 + 15 ) expect_equal( class(attr(res, "internals")$edgeR)[1], "DGEGLM" ) @@ -824,7 +824,9 @@ test_that("test prefix",{ test_that("Get entrez from symbol - no object",{ res = - symbol_to_entrez(input_df, .transcript = b, .sample = a) + input_df %>% + select(-entrez) %>% + symbol_to_entrez(.transcript = b, .sample = a) expect_equal( res$entrez[1:4], @@ -928,7 +930,7 @@ test_that("Get adjusted counts - no object",{ expect_equal( ncol(res), - 7 + 9 ) }) @@ -957,7 +959,7 @@ test_that("Add adjusted counts - no object",{ expect_equal( ncol(res), - 9 + 12 ) }) @@ -1008,7 +1010,7 @@ test_that("Get cluster lables based on Kmeans - no object",{ expect_equal( ncol(res), - 5 + 7 ) expect_equal( nrow(res), @@ -1037,7 +1039,7 @@ test_that("Add cluster lables based on Kmeans - no object",{ expect_equal( ncol(res), - 7 + 10 ) }) @@ -1179,7 +1181,7 @@ test_that("Get reduced dimensions MDS - no object",{ expect_equal( ncol(res), - 6 + 8 ) expect_equal( nrow(res), @@ -1208,7 +1210,7 @@ test_that("Add reduced dimensions MDS - no object",{ expect_equal( ncol(res), - 9 + 12 ) expect_equal( class(attr(res, "internals")$MDS[[1]])[1], "MDS" ) @@ -1271,7 +1273,7 @@ test_that("Get reduced dimensions PCA - no object",{ expect_equal( ncol(res), - 6 + 8 ) expect_equal( class(attr(res, "internals")$PCA), "prcomp" ) @@ -1298,7 +1300,7 @@ test_that("Add reduced dimensions PCA - no object",{ expect_equal( ncol(res), - 9 + 12 ) expect_equal( class(attr(res, "internals")$PCA), "prcomp" ) @@ -1447,7 +1449,7 @@ test_that("Get rotated dimensions - no object",{ expect_equal( ncol(res), - 8 + 10 ) expect_equal( nrow(res), @@ -1486,7 +1488,7 @@ test_that("Add rotated dimensions - no object",{ expect_equal( ncol(res), - 11 + 14 ) }) @@ -1508,7 +1510,7 @@ test_that("Aggregate duplicated transcript - no object",{ expect_equal( ncol(res), - 7 + 10 ) }) @@ -1531,7 +1533,7 @@ test_that("Drop redundant correlated - no object",{ expect_equal( ncol(res), - 6 + 9 ) }) @@ -1595,7 +1597,7 @@ test_that("Add description to symbol",{ expect_equal( ncol(res), - 7 + 10 ) res = @@ -1606,7 +1608,7 @@ test_that("Add description to symbol",{ expect_equal( ncol(res), - 7 + 10 ) }) @@ -1816,7 +1818,7 @@ test_that("filter abundant - no object",{ expect_equal( ncol(res1), - 7 + 10 ) res2 = @@ -1831,7 +1833,7 @@ test_that("filter abundant - no object",{ expect_equal( ncol(res2), - 7 + 10 ) expect_gt( @@ -1849,7 +1851,7 @@ test_that("filter abundant - no object",{ expect_equal( ncol(res), - 7 + 10 ) }) @@ -1867,7 +1869,7 @@ test_that("filter abundant with design - no object",{ expect_equal( ncol(res), - 7 + 10 ) @@ -1882,13 +1884,13 @@ test_that("nest - no object",{ test_that("pivot",{ - expect_equal( ncol(pivot_sample(tidybulk(input_df, a, b, c))), 4 ) + expect_equal( ncol(pivot_sample(tidybulk(input_df, a, b, c))), 6 ) - expect_equal( ncol(pivot_sample(input_df, a)), 4 ) + expect_equal( ncol(pivot_sample(input_df, a)), 6 ) - expect_equal( ncol(pivot_transcript(tidybulk(input_df, a, b, c))), 1 ) + expect_equal( ncol(pivot_transcript(tidybulk(input_df, a, b, c))), 2 ) - expect_equal( ncol(pivot_transcript(input_df, b)), 1 ) + expect_equal( ncol(pivot_transcript(input_df, b)), 2 ) }) @@ -1913,14 +1915,14 @@ test_that("pivot",{ test_that("gene over representation",{ - df_entrez = se_mini %>% tidybulk() %>% as_tibble() %>% symbol_to_entrez(.transcript = feature, .sample = sample) - df_entrez = aggregate_duplicates(df_entrez, aggregation_function = sum, .sample = sample, .transcript = entrez, .abundance = count) - df_entrez = mutate(df_entrez, do_test = feature %in% c("TNFRSF4", "PLCH2", "PADI4", "PAX7")) + df_entrez = se_mini %>% tidybulk() %>% as_tibble() + df_entrez = aggregate_duplicates(df_entrez, aggregation_function = sum, .sample = .sample, .transcript = entrez, .abundance = count) + df_entrez = mutate(df_entrez, do_test = .feature %in% c("TNFRSF4", "PLCH2", "PADI4", "PAX7")) res = test_gene_overrepresentation( df_entrez, - .sample = sample, + .sample = .sample, .entrez = entrez, .do_test = do_test, species="Homo sapiens" diff --git a/tests/testthat/test-bulk_methods_SummarizedExperiment.R b/tests/testthat/test-bulk_methods_SummarizedExperiment.R index d10faafc..4b4c212c 100755 --- a/tests/testthat/test-bulk_methods_SummarizedExperiment.R +++ b/tests/testthat/test-bulk_methods_SummarizedExperiment.R @@ -3,65 +3,65 @@ context('Bulk methods SummarizedExperiment') data("se_mini") data("breast_tcga_mini_SE") -input_df = setNames(se_mini %>% tidybulk() %>% as_tibble(), c( "b","a", "c", "Cell type", "time" , "condition")) +input_df = setNames(se_mini %>% tidybulk() %>% as_tibble(), c( "b","a", "c", "Cell type", "time" , "condition", "days", "dead", "entrez")) input_df_breast = setNames( breast_tcga_mini_SE %>% tidybulk() %>% as_tibble(), c( "b", "a","c", "c norm", "call" )) test_that("tidybulk SummarizedExperiment conversion",{ - res = tidybulk(tidybulk::se) + res = tidybulk(tidybulk::se) - expect_equal( class(res)[1], "tidybulk" ) + expect_equal( class(res)[1], "tidybulk" ) - expect_equal( nrow(res), 800 ) + expect_equal( nrow(res), 800 ) - expect_equal( ncol(res), 21 ) + expect_equal( ncol(res), 13 ) - res = res %>% tidybulk:::tidybulk_to_SummarizedExperiment() + res = res %>% tidybulk:::tidybulk_to_SummarizedExperiment() - expect_equal( class(res)[1], "SummarizedExperiment" ) + expect_equal( class(res)[1], "SummarizedExperiment" ) - expect_equal( nrow(res), 100 ) + expect_equal( nrow(res), 100 ) - expect_equal( ncol(res), 8 ) + expect_equal( ncol(res), 8 ) }) test_that("tidybulk SummarizedExperiment normalisation manual",{ - res = tidybulk(tidybulk:::tidybulk_to_SummarizedExperiment(scale_abundance(tidybulk(se) %>% identify_abundant()))) + res = tidybulk(tidybulk:::tidybulk_to_SummarizedExperiment(scale_abundance(tidybulk(se) %>% identify_abundant()))) - res2 = tidybulk(se) %>% identify_abundant() %>% scale_abundance() + res2 = tidybulk(se) %>% identify_abundant() %>% scale_abundance() - res %>% distinct(sample, multiplier) %>% pull(multiplier) - res2 %>% distinct(sample, multiplier) %>% pull(multiplier) + res %>% distinct(.sample, multiplier) %>% pull(multiplier) + res2 %>% distinct(.sample, multiplier) %>% pull(multiplier) - expect_equal( - res %>% distinct(sample, multiplier) %>% pull(multiplier), - res2 %>% distinct(sample, multiplier) %>% pull(multiplier) %>% as.numeric(), - tolerance=1e-3 - ) + expect_equal( + res %>% distinct(.sample, multiplier) %>% pull(multiplier), + res2 %>% distinct(.sample, multiplier) %>% pull(multiplier) %>% as.numeric(), + tolerance=1e-3 + ) - expect_equal( nrow(res), 800 ) + expect_equal( nrow(res), 800 ) - expect_equal( ncol(res), 25 ) + expect_equal( ncol(res), 17 ) - res = rlang::quo_name(attr(res, "internals")$tt_columns[[4]]) + res = rlang::quo_name(attr(res, "internals")$tt_columns[[4]]) - expect_equal( res, "counts_scaled" ) + expect_equal( res, "counts_scaled" ) }) test_that("tidybulk SummarizedExperiment normalisation",{ - res = se %>% identify_abundant() %>% scale_abundance() + res = se %>% identify_abundant() %>% scale_abundance() - expect_equal( - names(SummarizedExperiment::assays(res)), - c("counts" ,"counts_scaled") - ) + expect_equal( + names(SummarizedExperiment::assays(res)), + c("counts" ,"counts_scaled") + ) }) @@ -85,80 +85,80 @@ test_that("tidybulk SummarizedExperiment normalisation subset",{ test_that("tidybulk SummarizedExperiment clustering",{ - res = cluster_elements(se, method="kmeans", centers = 2) + res = cluster_elements(se, method="kmeans", centers = 2) - expect_equal( - tail(names(SummarizedExperiment::colData(res)), 1), - "cluster_kmeans" - ) + expect_equal( + tail(names(SummarizedExperiment::colData(res)), 1), + "cluster_kmeans" + ) - expect_equal( - levels(SummarizedExperiment::colData(res)$cluster_kmeans), - c("1", "2") - ) + expect_equal( + levels(SummarizedExperiment::colData(res)$cluster_kmeans), + c("1", "2") + ) }) test_that("tidybulk SummarizedExperiment clustering",{ - res = se %>% identify_abundant() %>% reduce_dimensions(method="PCA") + res = se %>% identify_abundant() %>% reduce_dimensions(method="PCA") - expect_equal( - tail(names(SummarizedExperiment::colData(res)), 1), - "PC2" - ) + expect_equal( + tail(names(SummarizedExperiment::colData(res)), 1), + "PC2" + ) }) test_that("Get rotated dimensions - SummarizedExperiment",{ - res.pca = - reduce_dimensions(se %>% identify_abundant(), method="PCA" ) + res.pca = + reduce_dimensions(se %>% identify_abundant(), method="PCA" ) - res = - rotate_dimensions( - res.pca, - dimension_1_column = PC1, - dimension_2_column = PC2, - rotation_degrees = 45 - ) + res = + rotate_dimensions( + res.pca, + dimension_1_column = PC1, + dimension_2_column = PC2, + rotation_degrees = 45 + ) - expect_equal( - tail(names(SummarizedExperiment::colData(res)), 1), - "PC2_rotated_45" - ) + expect_equal( + tail(names(SummarizedExperiment::colData(res)), 1), + "PC2_rotated_45" + ) }) test_that("Drop redundant correlated - SummarizedExperiment",{ - res = - remove_redundancy( - se, - method = "correlation", correlation_threshold = 0.99 ) + res = + remove_redundancy( + se, + method = "correlation", correlation_threshold = 0.99 ) - expect_equal( - nrow(res), - 100 - ) + expect_equal( + nrow(res), + 100 + ) }) test_that("Get adjusted counts - SummarizedExperiment",{ - cm = se_mini - cm$batch = 0 - cm$batch[colnames(cm) %in% c("SRR1740035", "SRR1740043")] = 1 + cm = se_mini + cm$batch = 0 + cm$batch[colnames(cm) %in% c("SRR1740035", "SRR1740043")] = 1 - res = - adjust_abundance( - cm %>% identify_abundant(), - ~ condition + batch - ) + res = + adjust_abundance( + cm %>% identify_abundant(), + ~ condition + batch + ) - expect_equal(nrow(res), 527 ) + expect_equal(nrow(res), 527 ) - expect_equal( names(SummarizedExperiment::assays(res)), c("count" ,"count_adjusted") ) + expect_equal( names(SummarizedExperiment::assays(res)), c("count" ,"count_adjusted") ) }) @@ -172,287 +172,287 @@ test_that("Aggregate duplicated transcript - SummarizedExperiment",{ res = se %>% - aggregate_duplicates( .transcript = bla ) + aggregate_duplicates( .transcript = bla ) - expect_equal( dim(res), c( 99, 8 ) ) + expect_equal( dim(res), c( 99, 8 ) ) }) test_that("Add cell type proportions - SummarizedExperiment",{ - res = deconvolve_cellularity(se_mini, cores=1 ) + res = deconvolve_cellularity(se_mini, cores=1 ) - expect_equal( - as.numeric(as.data.frame(res@colData[1, 4:7])), - c( 0.6196622 ,0.2525598, 0.0000000, 0.0000000), - tolerance=1e-3 - ) + expect_equal( + as.numeric(as.data.frame(res@colData[1, 6:9])), + c( 0.619662 , 0.25256 , 0 , 0), + tolerance=1e-3 + ) }) test_that("differential trancript abundance - SummarizedExperiment",{ - res = test_differential_abundance( - se_mini %>% - identify_abundant(factor_of_interest = condition), - ~ condition - ) - - w = match( c("CLEC7A" , "FAM198B", "FCN1" , "HK3" ), rownames(res) ) - - # Quasi likelihood - res_tibble = test_differential_abundance( - input_df %>% identify_abundant(a, b, c, factor_of_interest = condition), - ~ condition , - a, b, c - ) - - expect_equal( - res@elementMetadata[w,]$logFC, - c(-11.58385, -13.53406, -12.58204, -12.19271), - tolerance=1e-4 - ) - - expect_equal( - res@elementMetadata[w,]$logFC, - res_tibble %>% - pivot_transcript(b) %>% - filter(b %in% rownames(res)[w]) %>% - dplyr::arrange(b) %>% - dplyr::pull(logFC), - tolerance=1e-4 - ) - - # Likelihood ratio - res2 = test_differential_abundance( - se_mini %>% - identify_abundant(factor_of_interest = condition), - ~ condition, method = "edgeR_likelihood_ratio" ) - - res2_tibble = test_differential_abundance( - input_df %>% identify_abundant(a, b, c, factor_of_interest = condition), - ~ condition , - a, b, c, method = "edgeR_likelihood_ratio" - ) - - expect_equal( - res2@elementMetadata[w,]$logFC, - c(-11.57989, -13.53476, -12.57969, -12.19303), - tolerance=1e-4 - ) - - expect_equal( - res2@elementMetadata[w,]$logFC, - res2_tibble %>% - pivot_transcript(b) %>% - filter(b %in% rownames(res)[w]) %>% - dplyr::arrange(b) %>% - dplyr::pull(logFC), - tolerance=1e-4 - ) - - # Treat - se_mini %>% - identify_abundant(a, b, c, factor_of_interest = condition) %>% - test_differential_abundance( - ~ condition, - .sample = a, - .transcript = b, - .abundance = c, - scaling_method = "TMM", - method = "edgeR_likelihood_ratio", - test_above_log2_fold_change = 1, - action="only" - ) %>% - `@` (elementMetadata) %>% - as_tibble() %>% - filter(FDR<0.05) %>% - nrow %>% - expect_equal(169) + res = test_differential_abundance( + se_mini %>% + identify_abundant(factor_of_interest = condition), + ~ condition + ) + + w = match( c("CLEC7A" , "FAM198B", "FCN1" , "HK3" ), rownames(res) ) + + # Quasi likelihood + res_tibble = test_differential_abundance( + input_df %>% identify_abundant(a, b, c, factor_of_interest = condition), + ~ condition , + a, b, c + ) + + expect_equal( + res@elementMetadata[w,]$logFC, + c(-11.58385, -13.53406, -12.58204, -12.19271), + tolerance=1e-4 + ) + + expect_equal( + res@elementMetadata[w,]$logFC, + res_tibble %>% + pivot_transcript(b) %>% + filter(b %in% rownames(res)[w]) %>% + dplyr::arrange(b) %>% + dplyr::pull(logFC), + tolerance=1e-4 + ) + + # Likelihood ratio + res2 = test_differential_abundance( + se_mini %>% + identify_abundant(factor_of_interest = condition), + ~ condition, method = "edgeR_likelihood_ratio" ) + + res2_tibble = test_differential_abundance( + input_df %>% identify_abundant(a, b, c, factor_of_interest = condition), + ~ condition , + a, b, c, method = "edgeR_likelihood_ratio" + ) + + expect_equal( + res2@elementMetadata[w,]$logFC, + c(-11.57989, -13.53476, -12.57969, -12.19303), + tolerance=1e-4 + ) + + expect_equal( + res2@elementMetadata[w,]$logFC, + res2_tibble %>% + pivot_transcript(b) %>% + filter(b %in% rownames(res)[w]) %>% + dplyr::arrange(b) %>% + dplyr::pull(logFC), + tolerance=1e-4 + ) + + # Treat + se_mini %>% + identify_abundant(a, b, c, factor_of_interest = condition) %>% + test_differential_abundance( + ~ condition, + .sample = a, + .transcript = b, + .abundance = c, + scaling_method = "TMM", + method = "edgeR_likelihood_ratio", + test_above_log2_fold_change = 1, + action="only" + ) %>% + `@` (elementMetadata) %>% + as_tibble() %>% + filter(FDR<0.05) %>% + nrow %>% + expect_equal(169) }) test_that("Voom with treat method",{ - se_mini %>% - identify_abundant(a, b, c, factor_of_interest = condition) %>% - test_differential_abundance( - ~ condition, - .sample = a, - .transcript = b, - .abundance = c, - method = "limma_voom", - test_above_log2_fold_change = 1, - action="only" - ) %>% - `@` (elementMetadata) %>% - as_tibble() %>% - filter(adj.P.Val<0.05) %>% - nrow %>% - expect_equal(97) - - # with multiple contrasts - res <- - se_mini %>% - identify_abundant(a, b, c, factor_of_interest = Cell.type) %>% - test_differential_abundance( - ~ 0 + Cell.type, - .sample = a, - .transcript = b, - .abundance = c, - .contrasts = c("Cell.typeb_cell-Cell.typemonocyte", "Cell.typeb_cell-Cell.typet_cell"), - method = "limma_voom", - test_above_log2_fold_change = 1, - action="only" - ) %>% - `@` (elementMetadata) %>% - as_tibble() - - res %>% - filter(adj.P.Val___Cell.typeb_cell.Cell.typemonocyte < 0.05) %>% - nrow %>% - expect_equal(293) - - res %>% - filter(adj.P.Val___Cell.typeb_cell.Cell.typet_cell < 0.05) %>% - nrow %>% - expect_equal(246) + se_mini %>% + identify_abundant(a, b, c, factor_of_interest = condition) %>% + test_differential_abundance( + ~ condition, + .sample = a, + .transcript = b, + .abundance = c, + method = "limma_voom", + test_above_log2_fold_change = 1, + action="only" + ) %>% + `@` (elementMetadata) %>% + as_tibble() %>% + filter(adj.P.Val<0.05) %>% + nrow %>% + expect_equal(97) + + # with multiple contrasts + res <- + se_mini %>% + identify_abundant(a, b, c, factor_of_interest = Cell.type) %>% + test_differential_abundance( + ~ 0 + Cell.type, + .sample = a, + .transcript = b, + .abundance = c, + .contrasts = c("Cell.typeb_cell-Cell.typemonocyte", "Cell.typeb_cell-Cell.typet_cell"), + method = "limma_voom", + test_above_log2_fold_change = 1, + action="only" + ) %>% + `@` (elementMetadata) %>% + as_tibble() + + res %>% + filter(adj.P.Val___Cell.typeb_cell.Cell.typemonocyte < 0.05) %>% + nrow %>% + expect_equal(293) + + res %>% + filter(adj.P.Val___Cell.typeb_cell.Cell.typet_cell < 0.05) %>% + nrow %>% + expect_equal(246) }) test_that("filter abundant - SummarizedExperiment",{ - res = keep_abundant( se ) + res = keep_abundant( se ) - expect_equal( nrow(res), 23 ) + expect_equal( nrow(res), 23 ) }) test_that("filter variable - no object",{ - res = keep_variable(se, top = 5 ) + res = keep_variable(se, top = 5 ) - expect_equal( nrow(res),5 ) + expect_equal( nrow(res),5 ) - res = - keep_variable( - se_mini, - top = 5 - ) + res = + keep_variable( + se_mini, + top = 5 + ) - expect_equal( nrow(res),5 ) + expect_equal( nrow(res),5 ) - expect_equivalent( - sort(rownames(res)), - c("FCN1", "IGHD", "IGHM", "IGKC", "TCL1A") - ) + expect_equivalent( + sort(rownames(res)), + c("FCN1", "IGHD", "IGHM", "IGKC", "TCL1A") + ) }) test_that("impute missing",{ - res = - input_df %>% - dplyr::slice(-1) %>% - tidybulk:::tidybulk_to_SummarizedExperiment(a, b, c) %>% - impute_missing_abundance( ~ condition ) + res = + input_df %>% + dplyr::slice(-1) %>% + tidybulk:::tidybulk_to_SummarizedExperiment(a, b, c) %>% + impute_missing_abundance( ~ condition ) - expect_equal( SummarizedExperiment::assays(res) %>% as.list() %>% .[[1]] %>% .["TNFRSF4", "SRR1740034"], 6 ) + expect_equal( SummarizedExperiment::assays(res) %>% as.list() %>% .[[1]] %>% .["TNFRSF4", "SRR1740034"], 6 ) - expect_equal( nrow(res)*ncol(res), nrow(input_df) ) + expect_equal( nrow(res)*ncol(res), nrow(input_df) ) }) test_that("differential composition",{ - # Cibersort - se_mini %>% - test_differential_cellularity(. ~ condition , cores = 1 ) %>% - pull(`estimate_(Intercept)`) %>% - .[[1]] %>% - as.integer %>% - expect_equal( -2, tollerance =1e-3) - - # llsr - se_mini %>% - test_differential_cellularity( - . ~ condition, - method="llsr" - ) %>% - pull(`estimate_(Intercept)`) %>% - .[[1]] %>% - as.integer %>% - expect_equal( -2, tollerance =1e-3) - - # Survival analyses - input_df %>% - dplyr::select(a, b, c) %>% - nest(data = -a) %>% - mutate( - days = c(1, 10, 500, 1000, 2000), - dead = c(1, 1, 1, 0, 1) - ) %>% - unnest(data) %>% - tidybulk:::tidybulk_to_SummarizedExperiment(a, b, c) %>% - test_differential_cellularity( - survival::Surv(days, dead) ~ ., - cores = 1 - ) %>% - pull(estimate) %>% - .[[1]] %>% - expect_equal(26.2662279, tolerance = 30) - # round() %in% c( - # 26, # 97 is the github action MacOS that has different value - # 26, # 112 is the github action UBUNTU that has different value - # 26 # 93 is the github action Windows that has different value - # ) %>% - # expect_true() + # Cibersort + se_mini %>% + test_differential_cellularity(. ~ condition , cores = 1 ) %>% + pull(`estimate_(Intercept)`) %>% + .[[1]] %>% + as.integer %>% + expect_equal( -2, tollerance =1e-3) + + # llsr + se_mini %>% + test_differential_cellularity( + . ~ condition, + method="llsr" + ) %>% + pull(`estimate_(Intercept)`) %>% + .[[1]] %>% + as.integer %>% + expect_equal( -2, tollerance =1e-3) + + # Survival analyses + input_df %>% + dplyr::select(a, b, c) %>% + nest(data = -a) %>% + mutate( + days = c(1, 10, 500, 1000, 2000), + dead = c(1, 1, 1, 0, 1) + ) %>% + unnest(data) %>% + tidybulk:::tidybulk_to_SummarizedExperiment(a, b, c) %>% + test_differential_cellularity( + survival::Surv(days, dead) ~ ., + cores = 1 + ) %>% + pull(estimate) %>% + .[[1]] %>% + expect_equal(26.2662279, tolerance = 30) + # round() %in% c( + # 26, # 97 is the github action MacOS that has different value + # 26, # 112 is the github action UBUNTU that has different value + # 26 # 93 is the github action Windows that has different value + # ) %>% + # expect_true() }) test_that("test_stratification_cellularity",{ - # Cibersort - input_df %>% - select(a, b, c) %>% - nest(data = -a) %>% - mutate( - days = c(1, 10, 500, 1000, 2000), - dead = c(1, 1, 1, 0, 1) - ) %>% - unnest(data) %>% - tidybulk:::tidybulk_to_SummarizedExperiment(a, b, c) %>% - test_stratification_cellularity( - survival::Surv(days, dead) ~ ., - cores = 1 - ) %>% - pull(.low_cellularity_expected) %>% - .[[1]] %>% - expect_equal(3.35, tolerance =1e-1) - - # llsr - input_df %>% - select(a, b, c) %>% - nest(data = -a) %>% - mutate( - days = c(1, 10, 500, 1000, 2000), - dead = c(1, 1, 1, 0, 1) - ) %>% - unnest(data) %>% - test_stratification_cellularity( - survival::Surv(days, dead) ~ ., - .sample = a, - .transcript = b, - .abundance = c, - method = "llsr" - ) %>% - pull(.low_cellularity_expected) %>% - .[[1]] %>% - expect_equal(3.35, tolerance =1e-1) + # Cibersort + input_df %>% + select(a, b, c) %>% + nest(data = -a) %>% + mutate( + days = c(1, 10, 500, 1000, 2000), + dead = c(1, 1, 1, 0, 1) + ) %>% + unnest(data) %>% + tidybulk:::tidybulk_to_SummarizedExperiment(a, b, c) %>% + test_stratification_cellularity( + survival::Surv(days, dead) ~ ., + cores = 1 + ) %>% + pull(.low_cellularity_expected) %>% + .[[1]] %>% + expect_equal(3.35, tolerance =1e-1) + + # llsr + input_df %>% + select(a, b, c) %>% + nest(data = -a) %>% + mutate( + days = c(1, 10, 500, 1000, 2000), + dead = c(1, 1, 1, 0, 1) + ) %>% + unnest(data) %>% + test_stratification_cellularity( + survival::Surv(days, dead) ~ ., + .sample = a, + .transcript = b, + .abundance = c, + method = "llsr" + ) %>% + pull(.low_cellularity_expected) %>% + .[[1]] %>% + expect_equal(3.35, tolerance =1e-1) }) @@ -498,28 +498,27 @@ test_that("test_stratification_cellularity",{ test_that("pivot",{ - expect_equal( ncol(pivot_sample(se_mini) ), 4) + expect_equal( ncol(pivot_sample(se_mini) ), 6) - expect_equal( ncol(pivot_transcript(se_mini) ), 1) + expect_equal( ncol(pivot_transcript(se_mini) ), 2) }) test_that("gene over representation",{ - df_entrez = symbol_to_entrez(input_df, .transcript = b, .sample = a) - df_entrez = aggregate_duplicates(df_entrez, aggregation_function = sum, .sample = a, .transcript = entrez, .abundance = c) - df_entrez = mutate(df_entrez, do_test = b %in% c("TNFRSF4", "PLCH2", "PADI4", "PAX7")) + df_entrez = aggregate_duplicates(input_df, aggregation_function = sum, .sample = a, .transcript = entrez, .abundance = c) + df_entrez = mutate(df_entrez, do_test = b %in% c("TNFRSF4", "PLCH2", "PADI4", "PAX7")) - res = - df_entrez %>% - tidybulk:::tidybulk_to_SummarizedExperiment(a, b, c) %>% - test_gene_overrepresentation( - .entrez = entrez, - .do_test = do_test, - species="Homo sapiens" - ) + res = + df_entrez %>% + tidybulk:::tidybulk_to_SummarizedExperiment(a, b, c) %>% + test_gene_overrepresentation( + .entrez = entrez, + .do_test = do_test, + species="Homo sapiens" + ) - expect_equal( ncol(res), 10 ) + expect_equal( ncol(res), 10 ) @@ -531,22 +530,22 @@ test_that("Only reduced dimensions MDS - no object",{ - res = - breast_tcga_mini_SE %>% - reduce_dimensions(method = "MDS") + res = + breast_tcga_mini_SE %>% + reduce_dimensions(method = "MDS") - expect_equal( - res$`Dim1`[1:4], - c(-0.2723808836, -0.1105770207, -0.3034092668, -0.0064569358), - tolerance=10 - ) + expect_equal( + res$`Dim1`[1:4], + c(-0.2723808836, -0.1105770207, -0.3034092668, -0.0064569358), + tolerance=10 + ) - expect_equal( - ncol(colData(res)), - 3 - ) + expect_equal( + ncol(colData(res)), + 3 + ) - expect_equal( class(attr(res, "internals")$MDS[[1]])[1], "MDS" ) + expect_equal( class(attr(res, "internals")$MDS[[1]])[1], "MDS" ) })