Merge pull request #237 from stemangiola/SE_to_tidybulk_new_vocabulary

implement new vocabulary
stemangiola · Jun 25, 2022 · b31ffe0 · b31ffe0
2 parents f8d38ba + 6def641
commit b31ffe0
Show file tree

Hide file tree

Showing 27 changed files with 708 additions and 635 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -32,7 +32,8 @@ Imports:
     scales,
     SummarizedExperiment,
     GenomicRanges,
-    methods
+    methods,
+    S4Vectors
 Suggests:
     BiocStyle,
     testthat,
@@ -53,7 +54,6 @@ Suggests:
     Seurat,
     KernSmooth,
     Rtsne,
-    S4Vectors,
     ggplot2,
     widyr,
     clusterProfiler,
@@ -82,7 +82,7 @@ Biarch: true
 biocViews: AssayDomain, Infrastructure, RNASeq, DifferentialExpression, GeneExpression, Normalization, Clustering, QualityControl, Sequencing, Transcription, Transcriptomics
 Encoding: UTF-8
 LazyData: true
-RoxygenNote: 7.1.2
+RoxygenNote: 7.2.0
 LazyDataCompression: xz
 URL: https://github.com/stemangiola/tidybulk
 BugReports: https://github.com/stemangiola/tidybulk/issues
diff --git a/NAMESPACE b/NAMESPACE
@@ -81,6 +81,7 @@ import(readr)
 import(tibble)
 import(tidyr)
 importFrom(GenomicRanges,makeGRangesListFromDataFrame)
+importFrom(S4Vectors,metadata)
 importFrom(SummarizedExperiment,SummarizedExperiment)
 importFrom(SummarizedExperiment,assays)
 importFrom(SummarizedExperiment,colData)

diff --git a/R/dplyr_methods.R b/R/dplyr_methods.R
@@ -723,7 +723,7 @@ rowwise.tidybulk <- function(data, ...)
 #'
 #' @examples
 #'`%>%` = magrittr::`%>%`
-#' annotation = tidybulk::counts_SE %>% tidybulk() %>% as_tibble() %>% distinct(sample) %>% mutate(source = "AU")
+#' annotation = tidybulk::counts_SE %>% tidybulk() %>% as_tibble() %>% distinct(.sample) %>% mutate(source = "AU")
 #' tidybulk::counts_SE %>% tidybulk() %>% as_tibble() %>% left_join(annotation)
 #'
 #' @rdname dplyr-methods
@@ -763,7 +763,7 @@ left_join.tidybulk <- function (x, y, by = NULL, copy = FALSE, suffix = c(".x",
 #'
 #' @examples
 #'`%>%` = magrittr::`%>%`
-#' annotation = tidybulk::counts_SE %>% tidybulk() %>% as_tibble() %>% distinct(sample) %>% mutate(source = "AU")
+#' annotation = tidybulk::counts_SE %>% tidybulk() %>% as_tibble() %>% distinct(.sample) %>% mutate(source = "AU")
 #' tidybulk::counts_SE %>% tidybulk() %>% as_tibble() %>% inner_join(annotation)
 #'
 #' @rdname join-methods
@@ -802,7 +802,7 @@ inner_join.tidybulk <- function (x, y, by = NULL, copy = FALSE, suffix = c(".x",
 #'
 #' @examples
 #'`%>%` = magrittr::`%>%`
-#' annotation = tidybulk::counts_SE %>% tidybulk() %>% as_tibble() %>% distinct(sample) %>% mutate(source = "AU")
+#' annotation = tidybulk::counts_SE %>% tidybulk() %>% as_tibble() %>% distinct(.sample) %>% mutate(source = "AU")
 #' tidybulk::counts_SE %>% tidybulk() %>% as_tibble() %>% right_join(annotation)
 #'
 #' @rdname join-methods
@@ -843,7 +843,7 @@ right_join.tidybulk <- function (x, y, by = NULL, copy = FALSE, suffix = c(".x",
 #'
 #' @examples
 #'`%>%` = magrittr::`%>%`
-#' annotation = tidybulk::counts_SE %>% tidybulk() %>% as_tibble() %>% distinct(sample) %>% mutate(source = "AU")
+#' annotation = tidybulk::counts_SE %>% tidybulk() %>% as_tibble() %>% distinct(.sample) %>% mutate(source = "AU")
 #' tidybulk::counts_SE %>% tidybulk() %>% as_tibble() %>% full_join(annotation)
 #'
 #' @rdname join-methods

diff --git a/R/methods.R b/R/methods.R
@@ -31,7 +31,7 @@ setOldClass("tidybulk")
 #'
 #' @examples
 #'
-#' my_tt =  tidybulk(tidybulk::se_mini)
+#' tidybulk(tidybulk::se_mini)
 #'
 #'
 #' @docType methods
@@ -1353,9 +1353,7 @@ setMethod("remove_redundancy", "tidybulk", .remove_redundancy)
 #' cm$batch = 0
 #' cm$batch[colnames(cm) %in% c("SRR1740035", "SRR1740043")] = 1
 #'
-#' res =
 #'  cm %>%
-#'  tidybulk(sample, transcript, count) |>
 #'  identify_abundant() |>
 #' 	adjust_abundance(	~ condition + batch	)
 #'
@@ -1675,7 +1673,7 @@ setMethod("aggregate_duplicates", "tidybulk", .aggregate_duplicates)
 #' library(dplyr)
 #'
 #' # Subsetting for time efficiency
-#' tidybulk::se_mini |> tidybulk() |>filter(sample=="SRR1740034") |> deconvolve_cellularity(sample, feature, count, cores = 1)
+#' tidybulk::se_mini |> deconvolve_cellularity(cores = 1)
 #'
 #'
 #' @docType methods
@@ -1815,7 +1813,10 @@ setMethod("deconvolve_cellularity",
 #'
 #' @examples
 #'
-#' tidybulk::se_mini |> tidybulk() |> as_tibble() |> symbol_to_entrez(.transcript = feature, .sample = sample)
+#' # This function was designed for data.frame
+#' # Convert from SummarizedExperiment for this example. It is NOT reccomended.
+#'
+#' tidybulk::se_mini |> tidybulk() |> as_tibble() |> symbol_to_entrez(.transcript = .feature, .sample = .sample)
 #'
 #' @export
 #'
@@ -2014,7 +2015,10 @@ setMethod("describe_transcript", "tidybulk", .describe_transcript)
 #'
 #' library(dplyr)
 #'
-#' tidybulk::counts_SE |> tidybulk() |> as_tibble() |> ensembl_to_symbol(feature)
+#' # This function was designed for data.frame
+#' # Convert from SummarizedExperiment for this example. It is NOT reccomended.
+#'
+#' tidybulk::counts_SE |> tidybulk() |> as_tibble() |> ensembl_to_symbol(.feature)
 #'
 #'
 #'
@@ -2882,8 +2886,10 @@ setMethod("keep_abundant", "tidybulk", .keep_abundant)
 #' @examples
 #' \dontrun{
 #'
-#' df_entrez = tidybulk::se_mini |> tidybulk() |> as_tibble() |> symbol_to_entrez( .transcript = feature, .sample = sample)
-#' df_entrez = aggregate_duplicates(df_entrez, aggregation_function = sum, .sample = sample, .transcript = entrez, .abundance = count)
+#' library(SummarizedExperiment)
+#' se = tidybulk::se_mini
+#' rowData( se)$entrez = rownames(se )
+#' df_entrez = aggregate_duplicates(se,.transcript = entrez )
 #'
 #' library("EGSEA")
 #'
@@ -3075,9 +3081,8 @@ setMethod("test_gene_enrichment",
 #'
 #' @examples
 #'
-#' df_entrez = tidybulk::se_mini |> tidybulk() |> as_tibble() |> symbol_to_entrez( .transcript = feature, .sample = sample)
-#' df_entrez = aggregate_duplicates(df_entrez, aggregation_function = sum, .sample = sample, .transcript = entrez, .abundance = count)
-#' df_entrez = mutate(df_entrez, do_test = feature %in% c("TNFRSF4", "PLCH2", "PADI4", "PAX7"))
+#' #se_mini = aggregate_duplicates(tidybulk::se_mini, .transcript = entrez)
+#' #df_entrez = mutate(df_entrez, do_test = feature %in% c("TNFRSF4", "PLCH2", "PADI4", "PAX7"))
 #'
 #' \dontrun{
 #' 	test_gene_overrepresentation(
@@ -3245,15 +3250,14 @@ setMethod("test_gene_overrepresentation",
 #'
 #' \dontrun{
 #'
-#' df_entrez = tidybulk::se_mini |> tidybulk() |> as_tibble() |> symbol_to_entrez( .transcript = feature, .sample = sample)
-#' df_entrez = aggregate_duplicates(df_entrez, aggregation_function = sum, .sample = sample, .transcript = entrez, .abundance = count)
-#' df_entrez = mutate(df_entrez, do_test = feature %in% c("TNFRSF4", "PLCH2", "PADI4", "PAX7"))
+#' df_entrez = tidybulk::se_mini
+#' df_entrez = mutate(df_entrez, do_test = .feature %in% c("TNFRSF4", "PLCH2", "PADI4", "PAX7"))
 #' df_entrez  = df_entrez %>% test_differential_abundance(~ condition)
 #'
 #'
 #'	test_gene_rank(
 #'		df_entrez,
-#' 		.sample = sample,
+#' 		.sample = .sample,
 #'		.entrez = entrez,
 #' 		species="Homo sapiens",
 #'    gene_sets =c("C2"),
@@ -3591,7 +3595,7 @@ setMethod("pivot_transcript",
 #'
 #' @examples
 #'
-#' tidybulk::se_mini |> tidybulk() |> fill_missing_abundance( fill_with = 0)
+#' # tidybulk::se_mini |>  fill_missing_abundance( fill_with = 0)
 #'
 #'
 #' @docType methods
@@ -3862,19 +3866,8 @@ setMethod("impute_missing_abundance", "tidybulk", .impute_missing_abundance)
 #' 	)
 #'
 #' 	# Cox regression - multiple
-#' 	library(dplyr)
-#' 	library(tidyr)
 #'
 #'	tidybulk::se_mini |>
-#'	   tidybulk() |>
-#'
-#'		# Add survival data
-#'		nest(data = -sample) |>
-#'		mutate(
-#'			days = c(1, 10, 500, 1000, 2000),
-#'			dead = c(1, 1, 1, 0, 1)
-#'		) %>%
-#'		unnest(data) |>
 #'
 #'		# Test
 #'		test_differential_cellularity(
@@ -4019,15 +4012,6 @@ setMethod("test_differential_cellularity",
 #' library(tidyr)
 #'
 #'	tidybulk::se_mini |>
-#'	   tidybulk() |>
-#'
-#'	# Add survival data
-#'	nest(data = -sample) |>
-#'	mutate(
-#'		days = c(1, 10, 500, 1000, 2000),
-#'		dead = c(1, 1, 1, 0, 1)
-#'	) %>%
-#'	unnest(data) |>
 #'	test_stratification_cellularity(
 #'		survival::Surv(days, dead) ~ .,
 #'		cores = 1
@@ -4138,10 +4122,8 @@ setMethod("test_stratification_cellularity",
 #'
 #' @examples
 #'
-#' # Define tidybulk tibble
-#' df = tidybulk(tidybulk::se_mini)
 #'
-#' get_bibliography(df)
+#' get_bibliography(tidybulk::se_mini)
 #'
 #'
 #'
@@ -4236,9 +4218,8 @@ setMethod("get_bibliography",
 #'
 #' @examples
 #'
-#' library(dplyr)
 #'
-#' tidybulk::se_mini |> tidybulk() |> select(feature, count) |> head() |> as_matrix(rownames=feature)
+#' tibble(.feature = "CD3G", count=1) |> as_matrix(rownames=.feature)
 #'
 #' @export
 as_matrix <- function(tbl,

diff --git a/R/methods_SE.R b/R/methods_SE.R
@@ -24,41 +24,12 @@
 								~ as.symbol(.x),
 								~ NULL)
 
-	sample_info <-
-		colData(.data) %>%
+	.as_tibble_optimised(.data) %>%
 
-		# If reserved column names are present add .x
-		change_reserved_column_names() %>%
-
-		# Convert to tibble
-		tibble::as_tibble(rownames="sample")
-
-
-	range_info <-
-		 get_special_datasets(.data) %>%
-			reduce(left_join, by="coordinate")
-
-	gene_info <-
-		rowData(.data) %>%
-
-		# If reserved column names are present add .x
-		change_reserved_column_names() %>%
-
-		# Convert to tibble
-		tibble::as_tibble(rownames="feature")
-
-	count_info <- get_count_datasets(.data)
-
-	# Return
-	count_info %>%
-	left_join(sample_info, by="sample") %>%
-	left_join(gene_info, by="feature") %>%
-	when(nrow(range_info) > 0 ~ (.) %>% left_join(range_info) %>% suppressMessages(), ~ (.)) %>%
-
-	mutate_if(is.character, as.factor) %>%
+	# mutate_if(is.character, as.factor) %>%
 	tidybulk(
-		sample,
-		feature,
+		!!as.symbol(sample__$name),
+		!!as.symbol(feature__$name),
 		!!as.symbol(SummarizedExperiment::assays(.data)[1] %>%  names	),
 		!!norm_col # scaled counts if any
 	)
@@ -787,23 +758,22 @@ setMethod("adjust_abundance",
 
   collapse_function = function(x){ x %>% unique() %>% paste(collapse = "___")	}
 
-  feature_column_name = ".feature"
 
   # Row data
   new_row_data =
     .data %>%
     rowData() %>%
-    as_tibble(rownames = feature_column_name) %>%
+    as_tibble(rownames = feature__$name) %>%
     group_by(!!as.symbol(quo_name(.transcript))) %>%
     summarise(
       across(everything(), ~ .x %>% collapse_function()),
       merged.transcripts = n()
     ) %>%
-    arrange(!!as.symbol(feature_column_name)) %>%
+    arrange(!!as.symbol(feature__$name)) %>%
     as.data.frame()
 
-  rownames(new_row_data) = new_row_data[,feature_column_name]
-  new_row_data = new_row_data %>% select(-feature_column_name)
+  rownames(new_row_data) = new_row_data[,feature__$name]
+  new_row_data = new_row_data %>% select(-feature__$name)
 
   # Counts
   new_count_data =
@@ -824,7 +794,7 @@ setMethod("adjust_abundance",
     )
 
   # GRanges
-  columns_to_collapse = .data %>% rowData() %>% colnames() %>% setdiff(quo_name(.transcript)) %>% c(feature_column_name)
+  columns_to_collapse = .data %>% rowData() %>% colnames() %>% setdiff(quo_name(.transcript)) %>% c(feature__$name)
 
   rr = rowRanges(.data)
 
@@ -834,27 +804,27 @@ setMethod("adjust_abundance",
       as_tibble() %>%
       # Add names
       when(
-        is(rr, "CompressedGRangesList") ~ mutate(., !!as.symbol(feature_column_name) := group_name),
-        ~ mutate(., !!as.symbol(feature_column_name) := rr@ranges@NAME)
+        is(rr, "CompressedGRangesList") ~ mutate(., !!as.symbol(feature__$name) := group_name),
+        ~ mutate(., !!as.symbol(feature__$name) := rr@ranges@NAME)
       ) %>%
       left_join(
         rowData(.data) %>%
           as.data.frame() %>%
           select(!!as.symbol(quo_name(.transcript))) %>%
-          as_tibble(rownames =feature_column_name),
-            by = feature_column_name
+          as_tibble(rownames =feature__$name),
+            by = feature__$name
       ) %>%
       group_by(!!as.symbol(quo_name(.transcript))) %>%
       mutate(
         across(columns_to_collapse, ~ .x %>% collapse_function()),
         merged.transcripts = n()
       ) %>%
-      arrange(!!as.symbol(feature_column_name)) %>%
+      arrange(!!as.symbol(feature__$name)) %>%
 
       select(-one_of("group_name", "group")) %>%
       suppressWarnings() %>%
 
-      makeGRangesListFromDataFrame( split.field = feature_column_name,
+      makeGRangesListFromDataFrame( split.field = feature__$name,
                                     keep.extra.columns = TRUE) %>%
 
       .[match(rownames(new_count_data[[1]]), names(.))]
@@ -1894,7 +1864,7 @@ setMethod("test_gene_rank",
 		) %>%
 
 		# Convert to tibble
-		tibble::as_tibble(rownames="sample")
+		tibble::as_tibble(rownames=sample__$name)
 
 
 
@@ -1934,7 +1904,7 @@ setMethod("pivot_sample",
 
 	range_info <-
 		get_special_datasets(.data) %>%
-		reduce(left_join, by="feature")
+		reduce(left_join, by=feature__$name)
 
 	gene_info <-
 		rowData(.data) %>%
@@ -1946,11 +1916,11 @@ setMethod("pivot_sample",
 		) %>%
 
 		# Convert to tibble
-		tibble::as_tibble(rownames="feature")
+		tibble::as_tibble(rownames=feature__$name)
 
 	gene_info %>%
 		when(
-			nrow(range_info) > 0 ~ (.) %>% left_join(range_info, by="feature"),
+			nrow(range_info) > 0 ~ (.) %>% left_join(range_info, by=feature__$name),
 			~ (.)
 		)
 }