From 1c68731f4f9ead006b40bf0d731eade7953eeb27 Mon Sep 17 00:00:00 2001 From: stemangiola Date: Tue, 7 Jun 2022 13:39:46 +1000 Subject: [PATCH 01/38] implementation parallel cibersort --- DESCRIPTION | 2 +- R/cibersort.R | 72 +++++++++++++++++++++++++++++++++------------------ 2 files changed, 48 insertions(+), 26 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index e3742888..d9666716 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -82,7 +82,7 @@ Biarch: true biocViews: AssayDomain, Infrastructure, RNASeq, DifferentialExpression, GeneExpression, Normalization, Clustering, QualityControl, Sequencing, Transcription, Transcriptomics Encoding: UTF-8 LazyData: true -RoxygenNote: 7.1.2 +RoxygenNote: 7.2.0 LazyDataCompression: xz URL: https://github.com/stemangiola/tidybulk BugReports: https://github.com/stemangiola/tidybulk/issues diff --git a/R/cibersort.R b/R/cibersort.R index fe1b73a5..22f27e91 100755 --- a/R/cibersort.R +++ b/R/cibersort.R @@ -1,3 +1,5 @@ +# Public code https://rdrr.io/github/IOBR/IOBR/src/R/CIBERSORT.R + # CIBERSORT R script v1.03 (last updated 07-10-2015) # Note: Signature matrix construction is not currently available; use java version for full functionality. # Author: Aaron M. Newman, Stanford University (amnewman@stanford.edu) @@ -172,6 +174,34 @@ doPerm <- function(perm, X, Y, cores = 3){ newList <- list("dist" = dist) } +# MADE BY STEFANO TO ALLOW PARALLELISM +call_core = function(itor, Y, X, P, pval, CoreAlg){ + ################################## + ## Analyze the first mixed sample + ################################## + + y <- Y[,itor] + + #standardize mixture + y <- (y - mean(y)) / sd(y) + + #run SVR core algorithm + result <- CoreAlg(X, y, cores = 1) + + #get results + w <- result$w + mix_r <- result$mix_r + mix_rmse <- result$mix_rmse + + #calculate p-value + if(P > 0) {pval <- 1 - (which.min(abs(nulldist - mix_r)) / length(nulldist))} + + #print output + c(colnames(Y)[itor],w,pval,mix_r,mix_rmse) + +} + + #' @importFrom stats sd #' @importFrom utils install.packages #' @@ -248,45 +278,37 @@ my_CIBERSORT <- function(Y, X, perm=0, QN=TRUE, cores = 3, exp_transform = FALSE #empirical null distribution of correlation coefficients if(P > 0) {nulldist <- sort(doPerm(P, X, Y, cores = cores)$dist)} - #print(nulldist) header <- c('Mixture',colnames(X),"P-value","Correlation","RMSE") - #print(header) output <- matrix() itor <- 1 mix <- dim(Y)[2] pval <- 9999 - #iterate through mix - while(itor <= mix){ - - ################################## - ## Analyze the first mixed sample - ################################## - - y <- Y[,itor] + # If not Windows + if(Sys.info()['sysname'] == 'Windows') + { + while(itor <= mix){ - #standardize mixture - y <- (y - mean(y)) / sd(y) + ################################## + ## Analyze the first mixed sample + ################################## - #run SVR core algorithm - result <- CoreAlg(X, y, cores = cores) - #get results - w <- result$w - mix_r <- result$mix_r - mix_rmse <- result$mix_rmse + out <- call_core(itor, Y, X, P, pval, CoreAlg) + if(itor == 1) {output <- out} + else {output <- rbind(output, out)} + itor <- itor + 1 - #calculate p-value - if(P > 0) {pval <- 1 - (which.min(abs(nulldist - mix_r)) / length(nulldist))} + } - #print output - out <- c(colnames(Y)[itor],w,pval,mix_r,mix_rmse) - if(itor == 1) {output <- out} - else {output <- rbind(output, out)} + } - itor <- itor + 1 + # If Linux of Mac + else { + output <- parallel::mclapply(1:mix, call_core, Y, X, P, pval, CoreAlg, mc.cores=cores) + output= matrix(unlist(output), nrow=length(output), byrow=TRUE) } From 11e05b391d0bc99a16d46bf544e1d16666a6463c Mon Sep 17 00:00:00 2001 From: stemangiola Date: Mon, 13 Jun 2022 09:49:47 +1000 Subject: [PATCH 02/38] replace if_else once --- R/methods.R | 1 + R/utilities.R | 5 ++--- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/R/methods.R b/R/methods.R index 911e9447..87db987b 100755 --- a/R/methods.R +++ b/R/methods.R @@ -153,6 +153,7 @@ setGeneric("as_SummarizedExperiment", function(.data, install.packages("BiocManager", repos = "https://cloud.r-project.org") BiocManager::install("S4Vectors", ask = FALSE) } + # If present get the scaled abundance .abundance_scaled = .data %>% diff --git a/R/utilities.R b/R/utilities.R index 8cb99cec..e6476b26 100755 --- a/R/utilities.R +++ b/R/utilities.R @@ -905,12 +905,11 @@ get_x_y_annotation_columns = function(.data, .horizontal, .vertical, .abundance, map( ~ .x %>% - ifelse_pipe( + when( .data %>% distinct(!!.horizontal, !!as.symbol(.x)) %>% nrow %>% - equals(n_x), - ~ .x, + equals(n_x) ~ .x, ~ NULL ) ) %>% From 29efe9c341f14e3a1ae7cef02ce992a56b07b7bc Mon Sep 17 00:00:00 2001 From: stemangiola Date: Mon, 13 Jun 2022 11:59:19 +1000 Subject: [PATCH 03/38] make non standard columns not a problem --- DESCRIPTION | 2 +- NAMESPACE | 1 + R/functions.R | 109 ++++++++++++++++++++++++++++++++------------------ 3 files changed, 71 insertions(+), 41 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index e3742888..d9666716 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -82,7 +82,7 @@ Biarch: true biocViews: AssayDomain, Infrastructure, RNASeq, DifferentialExpression, GeneExpression, Normalization, Clustering, QualityControl, Sequencing, Transcription, Transcriptomics Encoding: UTF-8 LazyData: true -RoxygenNote: 7.1.2 +RoxygenNote: 7.2.0 LazyDataCompression: xz URL: https://github.com/stemangiola/tidybulk BugReports: https://github.com/stemangiola/tidybulk/issues diff --git a/NAMESPACE b/NAMESPACE index e28aaff1..f81b5f66 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -86,6 +86,7 @@ importFrom(SummarizedExperiment,assays) importFrom(SummarizedExperiment,colData) importFrom(SummarizedExperiment,rowData) importFrom(SummarizedExperiment,rowRanges) +importFrom(dplyr,across) importFrom(dplyr,arrange) importFrom(dplyr,bind_rows) importFrom(dplyr,distinct) diff --git a/R/functions.R b/R/functions.R index 6bf6348a..f74b8cb3 100755 --- a/R/functions.R +++ b/R/functions.R @@ -2247,6 +2247,8 @@ get_rotated_dimensions = #' @importFrom dplyr bind_rows #' @importFrom magrittr %$% #' @importFrom rlang := +#' @importFrom tidyr replace_na +#' @importFrom dplyr across #' #' @param .data A tibble #' @param .sample The name of the sample column @@ -2291,14 +2293,6 @@ aggregate_duplicated_transcripts_bulk = ret } - # Through warning if there are logicals of factor in the data frame - # because they cannot be merged if they are not unique - if ((lapply(.data, class) %>% unlist %in% c("logical", "factor")) %>% any) { - warning("tidybulk says: for aggregation, factors and logical columns were converted to character") - message("Converted to characters") - message(lapply(.data, class) %>% unlist %>% `[` (. %in% c("logical", "factor") %>% which)) - } - # Select which are the numerical columns numerical_columns = .data %>% @@ -2315,8 +2309,59 @@ aggregate_duplicated_transcripts_bulk = ~ .x %>% select(-!!( .data %>% get_tt_columns() %$% .abundance_scaled ))) %>% - colnames() %>% - c("n_aggr") + colnames() + + # Columns to be converted + columns_to_be_converted = + .data %>% + select_if(function(.x) is.logical(.x) | is.factor(.x)) %>% + colnames() + + # Count column to be aggregated + aggregate_count_columns = + quo_name(.abundance) %>% + when( + ".abundance_scaled" %in% (.data %>% get_tt_columns() %>% names) && + quo_name(.data %>% get_tt_columns() %$% .abundance_scaled) %in% (.data %>% colnames) ~ + (.) %>% c(.data %>% get_tt_columns() %$% .abundance_scaled), + ~ (.) + ) + + # Non standard column classes + non_standard_columns = + .data %>% + select( + -!!numerical_columns, + -columns_to_be_converted, + -group_cols(), + -aggregate_count_columns, + ) %>% + colnames() + + # Count duplicates + count_duplicates = + .data %>% + count(!!.sample,!!.transcript, name = "n_aggr") + + # Convert to character + + # Through warning if there are logicals of factor in the data frame + # because they cannot be merged if they are not unique + if (length(columns_to_be_converted)>0 & filter(count_duplicates, n_aggr>1) %>% nrow() %>% gt(0)) { + warning(paste(capture.output({ + cat(crayon::blue("tidybulk says: The following columns were converted to characters, as aggregating those classes with concatenation is not possible.\n")) + print(.data %>% select(columns_to_be_converted)) + }), collapse = "\n")) + } + + # Through warning if there are logicals of factor in the data frame + # because they cannot be merged if they are not unique + if (length(non_standard_columns)>0 & filter(count_duplicates, n_aggr>1) %>% nrow() %>% gt(0)) { + warning(paste(capture.output({ + cat(crayon::blue("tidybulk says: If duplicates exist from the following columns, only the first instance was taken (lossy behaviour), as aggregating those classes with concatenation is not possible.\n")) + print(.data %>% select(non_standard_columns)) + }), collapse = "\n")) + } # aggregates read .data over samples, concatenates other character columns, and averages other numeric columns .data %>% @@ -2326,8 +2371,7 @@ aggregate_duplicated_transcripts_bulk = mutate_if(is.logical, as.character) %>% # Add the number of duplicates for each gene - dplyr::left_join((.) %>% count(!!.sample,!!.transcript, name = "n_aggr"), - by = c(quo_name(.sample), quo_name(.transcript))) %>% + dplyr::left_join(count_duplicates, by = c(quo_name(.sample), quo_name(.transcript))) %>% # Anonymous function - binds the unique and the reduced genes, # in the way we have to reduce redundancy just for the duplicated genes @@ -2337,40 +2381,25 @@ aggregate_duplicated_transcripts_bulk = dplyr::bind_rows( # Unique symbols (.) %>% - filter(n_aggr == 1), + filter(n_aggr == 1) %>% + select(-n_aggr), # Duplicated symbols (.) %>% filter(n_aggr > 1) %>% + select(-n_aggr) %>% group_by(!!.sample,!!.transcript) %>% - dplyr::mutate(!!.abundance := !!.abundance %>% aggregation_function()) %>% - - # If scaled abundance exists aggregate that as well - ifelse_pipe(( - ".abundance_scaled" %in% (.data %>% get_tt_columns() %>% names) && - # .data %>% get_tt_columns() %$% .abundance_scaled %>% is.null %>% not() && - quo_name(.data %>% get_tt_columns() %$% .abundance_scaled) %in% (.data %>% colnames) - ), - ~ { - .abundance_scaled = .data %>% get_tt_columns() %$% .abundance_scaled - .x %>% dplyr::mutate(!!.abundance_scaled := !!.abundance_scaled %>% aggregation_function()) - }) %>% - - mutate_at(vars(numerical_columns), mean) %>% - mutate_at( - vars( - -group_cols(), - -contains(quo_name(.abundance)), - -!!numerical_columns - ), - list( ~ paste3(unique(.), collapse = ", ")) - ) %>% - distinct() - ) - } %>% - # Rename column of number of duplicates for each gene - rename(merged_transcripts = n_aggr) %>% + dplyr::summarise( + across(aggregate_count_columns, ~ .x %>% aggregation_function()), + across(where(is.character), ~ paste3(unique(.x), collapse = ", ")), + across(non_standard_columns, ~ unique(.x)[1]), + merged_transcripts = n() + ) + + ) %>% + replace_na(list(merged_transcripts = 1)) + } %>% # Attach attributes reattach_internals(.data) From 758e08b71be8bc495dea58665d030580ca823069 Mon Sep 17 00:00:00 2001 From: stemangiola Date: Mon, 13 Jun 2022 14:21:26 +1000 Subject: [PATCH 04/38] polish and make more robust also for SE --- R/functions.R | 3 +- R/methods_SE.R | 110 ++++++++++++++++++++++++++++++++++++++----------- R/utilities.R | 10 +++-- 3 files changed, 96 insertions(+), 27 deletions(-) diff --git a/R/functions.R b/R/functions.R index f74b8cb3..d50fbe11 100755 --- a/R/functions.R +++ b/R/functions.R @@ -2330,7 +2330,8 @@ aggregate_duplicated_transcripts_bulk = # Non standard column classes non_standard_columns = .data %>% - select( + select_if(select_non_standard_column_class) %>% + select_if( -!!numerical_columns, -columns_to_be_converted, -group_cols(), diff --git a/R/methods_SE.R b/R/methods_SE.R index dd65d816..8a25f803 100755 --- a/R/methods_SE.R +++ b/R/methods_SE.R @@ -789,6 +789,23 @@ setMethod("adjust_abundance", feature_column_name = ".feature" + # Non standard column classes + non_standard_columns = + .data %>% + rowData() %>% + as_tibble() %>% + select_if(select_non_standard_column_class) %>% + colnames() + + # GRanges + columns_to_collapse = + .data %>% + rowData() %>% + colnames() %>% + outersect(non_standard_columns) %>% + setdiff(quo_name(.transcript)) %>% + c(feature_column_name) + # Row data new_row_data = .data %>% @@ -796,14 +813,19 @@ setMethod("adjust_abundance", as_tibble(rownames = feature_column_name) %>% group_by(!!as.symbol(quo_name(.transcript))) %>% summarise( - across(everything(), ~ .x %>% collapse_function()), - merged.transcripts = n() + across(columns_to_collapse, ~ .x %>% collapse_function()), + across(non_standard_columns, ~ .x[1]), + merged_transcripts = n() ) %>% arrange(!!as.symbol(feature_column_name)) %>% - as.data.frame() + as.data.frame() %>% + { + .x = (.) + rownames(.x) = .x[,feature_column_name] + .x = .x %>% select(-feature_column_name) + .x + } - rownames(new_row_data) = new_row_data[,feature_column_name] - new_row_data = new_row_data %>% select(-feature_column_name) # Counts new_count_data = @@ -816,49 +838,91 @@ setMethod("adjust_abundance", if(is_data_frame) .x = .x %>% as.matrix() rownames(.x) = rowData(.data)[,quo_name(.transcript)] .x = combineByRow(.x, aggregation_function) - rownames(.x) = new_row_data[match(rownames(.x), new_row_data[,quo_name(.transcript)]),] %>% rownames() - .x = .x[match(rownames(new_row_data), rownames(.x)),] + rownames(.x) = new_row_data[match(rownames(.x), new_row_data[,quo_name(.transcript)]),,drop=FALSE] %>% rownames() + .x = .x[match(rownames(new_row_data), rownames(.x)),,drop=FALSE] if(is_data_frame) .x = .x %>% as.data.frame() .x } ) - # GRanges - columns_to_collapse = .data %>% rowData() %>% colnames() %>% setdiff(quo_name(.transcript)) %>% c(feature_column_name) + + + rr = rowRanges(.data) - if(!is.null(rr)) + # Gene ID dataset + gene_id_dataset = + rowData(.data) %>% + as.data.frame() %>% + select(!!as.symbol(quo_name(.transcript))) + + + + + if(!is.null(rr)){ + new_range_data = rr %>% as_tibble() %>% + # Add names when( is(rr, "CompressedGRangesList") ~ mutate(., !!as.symbol(feature_column_name) := group_name), - ~ mutate(., !!as.symbol(feature_column_name) := rr@ranges@NAME) - ) %>% - left_join( - rowData(.data) %>% - as.data.frame() %>% - select(!!as.symbol(quo_name(.transcript))) %>% - as_tibble(rownames =feature_column_name), - by = feature_column_name - ) %>% + .hasSlot(rr@ranges, "NAME") ~ mutate(., !!as.symbol(feature_column_name) := rr@ranges@NAME), + .hasSlot(rr@ranges, "NAMES") ~ mutate(., !!as.symbol(feature_column_name) := rr@ranges@NAMES), + ~ stop("tidybulk says: I don't know how to aggregate the GRanges, as the slot ranges@NAMES, or range@NAME do not exist") + ) + + # Through warning if there are logicals of factor in the data frame + # because they cannot be merged if they are not unique + if (length(non_standard_columns)>0 & new_range_data %>% pull(!!as.symbol(feature_column_name)) %>% duplicated() %>% which() %>% length() %>% gt(0) ) { + warning(paste(capture.output({ + cat(crayon::blue("tidybulk says: If duplicates exist from the following columns, only the first instance was taken (lossy behaviour), as aggregating those classes with concatenation is not possible.\n")) + print(.data %>% select(non_standard_columns)) + }), collapse = "\n")) + } + + + + new_range_data = new_range_data %>% + + # I have to use this trick because rowRanges() and rowData() share @elementMetadata + select(-one_of(colnames(gene_id_dataset))) %>% + suppressWarnings() %>% + + # Join rowData + left_join(gene_id_dataset %>% as_tibble(rownames =feature_column_name), by = feature_column_name) %>% group_by(!!as.symbol(quo_name(.transcript))) %>% mutate( across(columns_to_collapse, ~ .x %>% collapse_function()), - merged.transcripts = n() + across(non_standard_columns, ~ .x[1]), + merged_transcripts = n() ) %>% + distinct() %>% arrange(!!as.symbol(feature_column_name)) %>% select(-one_of("group_name", "group")) %>% suppressWarnings() %>% - makeGRangesListFromDataFrame( split.field = feature_column_name, - keep.extra.columns = TRUE) %>% + as.data.frame() %>% + # Add back rownames + { + .x = (.) + rownames(.x) = .x %>% pull(!!as.symbol(feature_column_name)) + .x + } - .[match(rownames(new_count_data[[1]]), names(.))] + if(is(rr, "CompressedGRangesList")) new_range_data = makeGRangesListFromDataFrame( + new_range_data, + split.field = feature_column_name, + keep.extra.columns = TRUE + ) + else if(is(rr, "GRanges")) new_range_data = makeGRangesFromDataFrame(new_range_data, keep.extra.columns = TRUE) + else stop("tidybulk says: riowRanges should be either GRanges or CompressedGRangesList. Or am I missing something?") + new_range_data = new_range_data %>% .[match(rownames(new_count_data[[1]]), names(.))] + } # Build the object .data_collapsed = diff --git a/R/utilities.R b/R/utilities.R index e6476b26..af9499dc 100755 --- a/R/utilities.R +++ b/R/utilities.R @@ -1494,7 +1494,7 @@ combineByRow <- function(m, fun = NULL) { # Shown here #https://stackoverflow.com/questions/8139301/aggregate-rows-in-a-large-matrix-by-rowname - m <- m[ order(rownames(m)), ] + m <- m[ order(rownames(m)), ,drop=FALSE] ## keep track of previous row name prev <- rownames(m)[1] @@ -1517,7 +1517,7 @@ combineByRow <- function(m, fun = NULL) { ## combine all rows and mark invalid rows if (prev != curr || is.na(curr)) { if (i.start < i.end) { - m[i.start,] <- apply(m[i.start:i.end,], 2, fun) + m[i.start,,drop=FALSE] <- apply(m[i.start:i.end,,drop=FALSE], 2, fun) m.rownames[(1+i.start):i.end] <- NA } @@ -1528,7 +1528,7 @@ combineByRow <- function(m, fun = NULL) { } } - m[ which(!is.na(m.rownames)),] + m[ which(!is.na(m.rownames)),,drop=FALSE] } filter_genes_on_condition = function(.data, .subset_for_scaling){ @@ -1595,3 +1595,7 @@ fill_NA_matrix_with_factor_colwise = function(.data, factor){ .[rn, cn] } + +select_non_standard_column_class = function(.x){ + !is.numeric(.x) & !is.character(.x) & !is.factor(.x) & !is.logical(.x) +} From 442249f6a49830ab252d92722a2bee8d50446170 Mon Sep 17 00:00:00 2001 From: stemangiola Date: Mon, 13 Jun 2022 14:41:51 +1000 Subject: [PATCH 05/38] fix bug --- R/functions.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/functions.R b/R/functions.R index d50fbe11..a682b9bd 100755 --- a/R/functions.R +++ b/R/functions.R @@ -2330,13 +2330,13 @@ aggregate_duplicated_transcripts_bulk = # Non standard column classes non_standard_columns = .data %>% - select_if(select_non_standard_column_class) %>% - select_if( + select( -!!numerical_columns, -columns_to_be_converted, -group_cols(), -aggregate_count_columns, ) %>% + select_if(select_non_standard_column_class) %>% colnames() # Count duplicates From 948256a1705a2454e2f9778b36df96388746bf54 Mon Sep 17 00:00:00 2001 From: stemangiola Date: Mon, 13 Jun 2022 23:10:20 +1000 Subject: [PATCH 06/38] fix bug --- R/methods_SE.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/methods_SE.R b/R/methods_SE.R index 8a25f803..5b9ace49 100755 --- a/R/methods_SE.R +++ b/R/methods_SE.R @@ -876,7 +876,7 @@ setMethod("adjust_abundance", # Through warning if there are logicals of factor in the data frame # because they cannot be merged if they are not unique - if (length(non_standard_columns)>0 & new_range_data %>% pull(!!as.symbol(feature_column_name)) %>% duplicated() %>% which() %>% length() %>% gt(0) ) { + if (length(non_standard_columns)>0 & new_range_data %>% pull(!!.transcript) %>% duplicated() %>% which() %>% length() %>% gt(0) ) { warning(paste(capture.output({ cat(crayon::blue("tidybulk says: If duplicates exist from the following columns, only the first instance was taken (lossy behaviour), as aggregating those classes with concatenation is not possible.\n")) print(.data %>% select(non_standard_columns)) From 558cf13478ec777dd3e56b95f2f9c6884da0145b Mon Sep 17 00:00:00 2001 From: stemangiola Date: Tue, 14 Jun 2022 01:55:26 +1000 Subject: [PATCH 07/38] simplified aggregate for se and add test --- R/methods_SE.R | 123 +++++++++++-------- R/utilities.R | 2 +- data/tximeta_summarizeToGene_object.rda | Bin 0 -> 3000 bytes tests/testthat/test-tximeta_GRnges_IRanges.R | 39 ++++++ 4 files changed, 112 insertions(+), 52 deletions(-) create mode 100644 data/tximeta_summarizeToGene_object.rda create mode 100644 tests/testthat/test-tximeta_GRnges_IRanges.R diff --git a/R/methods_SE.R b/R/methods_SE.R index 5b9ace49..0f5fca24 100755 --- a/R/methods_SE.R +++ b/R/methods_SE.R @@ -805,6 +805,10 @@ setMethod("adjust_abundance", outersect(non_standard_columns) %>% setdiff(quo_name(.transcript)) %>% c(feature_column_name) + # when( + # !is.null(rownames(.data)) ~ c(., feature_column_name), + # ~ (.) + # ) # Row data new_row_data = @@ -826,6 +830,11 @@ setMethod("adjust_abundance", .x } + # If no duplicate exit + if(!nrow(new_row_data)% is("data.frame") if(is_data_frame) .x = .x %>% as.matrix() + + # Gove duplicated rownames rownames(.x) = rowData(.data)[,quo_name(.transcript)] + + # Combine .x = combineByRow(.x, aggregation_function) - rownames(.x) = new_row_data[match(rownames(.x), new_row_data[,quo_name(.transcript)]),,drop=FALSE] %>% rownames() - .x = .x[match(rownames(new_row_data), rownames(.x)),,drop=FALSE] + .x = .x[match(new_row_data[,quo_name(.transcript)], rownames(.x)),,drop=FALSE] + rownames(.x) = rownames(new_row_data) + if(is_data_frame) .x = .x %>% as.data.frame() .x } ) - - - rr = rowRanges(.data) - # Gene ID dataset - gene_id_dataset = - rowData(.data) %>% - as.data.frame() %>% - select(!!as.symbol(quo_name(.transcript))) @@ -864,15 +870,15 @@ setMethod("adjust_abundance", new_range_data = rr %>% - as_tibble() %>% - - # Add names - when( - is(rr, "CompressedGRangesList") ~ mutate(., !!as.symbol(feature_column_name) := group_name), - .hasSlot(rr@ranges, "NAME") ~ mutate(., !!as.symbol(feature_column_name) := rr@ranges@NAME), - .hasSlot(rr@ranges, "NAMES") ~ mutate(., !!as.symbol(feature_column_name) := rr@ranges@NAMES), - ~ stop("tidybulk says: I don't know how to aggregate the GRanges, as the slot ranges@NAMES, or range@NAME do not exist") - ) + as_tibble() + + # # Add names + # when( + # is(rr, "CompressedGRangesList") ~ mutate(., !!as.symbol(feature_column_name) := group_name), + # .hasSlot(rr@ranges, "NAME") ~ mutate(., !!as.symbol(feature_column_name) := rr@ranges@NAME), + # .hasSlot(rr@ranges, "NAMES") ~ mutate(., !!as.symbol(feature_column_name) := rr@ranges@NAMES), + # ~ stop("tidybulk says: I don't know how to aggregate the GRanges, as the slot ranges@NAMES, or range@NAME do not exist") + # ) # Through warning if there are logicals of factor in the data frame # because they cannot be merged if they are not unique @@ -888,40 +894,55 @@ setMethod("adjust_abundance", new_range_data = new_range_data %>% # I have to use this trick because rowRanges() and rowData() share @elementMetadata - select(-one_of(colnames(gene_id_dataset))) %>% - suppressWarnings() %>% - - # Join rowData - left_join(gene_id_dataset %>% as_tibble(rownames =feature_column_name), by = feature_column_name) %>% - group_by(!!as.symbol(quo_name(.transcript))) %>% - mutate( - across(columns_to_collapse, ~ .x %>% collapse_function()), - across(non_standard_columns, ~ .x[1]), - merged_transcripts = n() - ) %>% - distinct() %>% - arrange(!!as.symbol(feature_column_name)) %>% - - select(-one_of("group_name", "group")) %>% - suppressWarnings() %>% - - as.data.frame() %>% - # Add back rownames - { - .x = (.) - rownames(.x) = .x %>% pull(!!as.symbol(feature_column_name)) - .x - } + select(-one_of(colnames(new_row_data) %>% outersect(quo_name(.transcript)))) %>% + suppressWarnings() + + # %>% + # + # # Join rowData - I DON'T KNOW IF ALWAYS ROWRANGES INCLUDE 100% OF ROWDATA + # bind_cols(gene_id_dataset) %>% + # group_by(!!as.symbol(quo_name(.transcript))) %>% + # + # # If I have rownames add them + # when( + # !is.null(rownames(.data)) ~ rowid_to_column(.), + # ~ (.) + # ) %>% + # + # mutate( + # across(columns_to_collapse, ~ .x %>% collapse_function()), + # across(non_standard_columns, ~ .x[1]), + # merged_transcripts = n() + # ) %>% + # distinct() %>% + # #arrange(!!as.symbol(feature_column_name)) %>% + # + # select(-one_of("group_name", "group")) %>% + # suppressWarnings() %>% + # + # as.data.frame() %>% + # # Add back rownames + # { + # .x = (.) + # rownames(.x) = .x %>% pull(!!as.symbol(feature_column_name)) + # .x + # } + + #if(is(rr, "CompressedGRangesList") | nrow(new_row_data)% .[match(new_row_data[,quo_name(.transcript)], names(.))] + names(new_range_data) = rownames(new_row_data) + #} + # else if(is(rr, "GRanges")) new_range_data = makeGRangesFromDataFrame(new_range_data, keep.extra.columns = TRUE) + # else stop("tidybulk says: riowRanges should be either GRanges or CompressedGRangesList. Or am I missing something?") - if(is(rr, "CompressedGRangesList")) new_range_data = makeGRangesListFromDataFrame( - new_range_data, - split.field = feature_column_name, - keep.extra.columns = TRUE - ) - else if(is(rr, "GRanges")) new_range_data = makeGRangesFromDataFrame(new_range_data, keep.extra.columns = TRUE) - else stop("tidybulk says: riowRanges should be either GRanges or CompressedGRangesList. Or am I missing something?") - new_range_data = new_range_data %>% .[match(rownames(new_count_data[[1]]), names(.))] } # Build the object diff --git a/R/utilities.R b/R/utilities.R index af9499dc..366f1c86 100755 --- a/R/utilities.R +++ b/R/utilities.R @@ -1517,7 +1517,7 @@ combineByRow <- function(m, fun = NULL) { ## combine all rows and mark invalid rows if (prev != curr || is.na(curr)) { if (i.start < i.end) { - m[i.start,,drop=FALSE] <- apply(m[i.start:i.end,,drop=FALSE], 2, fun) + m[i.start,] <- apply(m[i.start:i.end,,drop=FALSE], 2, fun) m.rownames[(1+i.start):i.end] <- NA } diff --git a/data/tximeta_summarizeToGene_object.rda b/data/tximeta_summarizeToGene_object.rda new file mode 100644 index 0000000000000000000000000000000000000000..5a07bdd3d5b31127ad424038c67eaf0629722f05 GIT binary patch literal 3000 zcmV;p3rF<*H+ooF0004LBHlIv03iV!0000G&sfahET;>5T>vQ&2UKVgRpfklJ zQo|O*OH9P@$%I2o_kN^aB9Yp+atPtW(G=tf5ha+O4mE0$U zOa830lyj-|EjJBRmCZ}O<_8eq z98N(VTntICuPwI|%RqC&BFzmBXIIR-Cf6o(f|?u)d;MEqFtiY@qr=d6 z)jC(${gSC3@;f%ju(M;)qS+hLeUeBJytj#R2xU*}|GC?Ho(O&P*J7`XnKGIolqspu z&&3PI!A5o-UJBe*+|eLoc#|#^viQ(X5>!pGmrav zw*3V|8MCnQD7KuSWXC|`^K6l_iWe3~y&Xz=27W@+@O$f0m~Vs3Fg|l6gw^ zMk^!`4Gj$9xH(azbgjH(4*SVVEvAFjo385<>mAgDL3@F1k{)djvD6( zmj?8QSU+8bLD_U~8SyD+U>!Jb*k5LI@;qS^VWZ77`u@mCDxjCHT6fS2Apt$#N1!}I zgm!*AH+iffTwTEAjE=xFL}}KIRp{9}W;RV+h&a$K-T0}-PJFtD)(PJ)r?#WG-^AO# z@>MB;S`acP=dZ7(aw_(;Oj=AaxW)^y6D)5+Pp=~U5z4BPk904+&SeYGN_SOo#^ z5zycuR>lnyvP5kKDQc1IHnGIMTg2|L8SDFvV@3>2_HDW*Ob#!?o)K#r?yZ`dWO&jt z@zj8<$@|ageXSOCNH%F$YwkQlimT$na3Tl$KW&$OkCgm6yCS)(OPC44&Cm?)ck=rK zTyq11@}1(Y_Xi@DlsKKAfNhR=AEFr!;&G*LO@+MCM6Kh5N1!yeJEh>h zChD~|2%ojnun5`@rxheX1TVP4M(tiS@_Z4221phRXT4 zFV(v@G*AI=zeMAOyrTeg1?*-B)};i&#j)>tjBi8-?GRM{;2P5#$5h44F_9>4A7_SR zvbn}wfk?$#Y#9a&2BIhodw3^+-iJQMy^uJV{!SRxY{S)IDMaE_lerNuuF0%q9lI9@}G%CQSIFCYdNG$R>0yf)eY^+Ru+~di?g(rQWJu7 zp;<;IlOCL)ij#ouaG&m|#q@*C4fk>Nk~vZ0{x;RWPylL*@eth!{@xooARYS`Dh z&$t}kSu=j5RAvMChmPE2URQ~Ewb0Z%y!I&H%{$6nBserv7EIyS1Cx+JU1=R8Vgxx!aFef&Q4c)p`eQ{h6Fz)L#C1o`_=#e#B9 z>Ke0RrijuA@OR^R9;kqQAgaltHOu4y%8bX40T?JlA+a3bXSfP!P{w+0(sOeIc)m0M zau^_?_Oc+KCbV~q#NFvLg1#^BR}#3UZr(1onAvmD@^4yV4O7$`=Ay$AUr<0+m6A;+ zbuCaw`+Fsp0S}}m)s~&FkiEgC%UXG$Pub!w5A|147od4q_9MYw5J@JNqdz+;gsXJe zs48!}0G}3OcYDF$dgDnWU%%UfY~C!Pw99XGF%B#2W-wO#dPckY1R*hDMqfvBd$pg5 zAs=XEK~ZJ}s(x@=i??1m219op6G5hPfo!`+M4l$T1Y}c zG7vBF^sojFHXU6-T7L|^R(FeH7`(&@>9&{vs1PJi6^vKsnJfZ(R=wD3|6E&vYP4x5 z+D-E57~wUAnNqD&mtwTLH+2qzOTCk_&l&^$u^k{3{|gfa!ZzZaeuYb&X@XamC`l{x zq~%_;i&b?^T+8j1F-d0PefO6{&ZB$jgLZfjHks)4uNSaz7#^UueN>8oq8k|pRP00N z+Of$^d9+MumrUEiG|F`B%FFZ)>gB73xy}>gh@Y<5C#cY9rc0-x7PKuzavrEt2pas) z&0ti0vufvC&M;OldUq-ZF9Hz#$vLndaw_-*?4K=Vqmit{*9g!ork#VWD!S9^KGh5O zdAY>HqlUyqpC2h*aW^@sx(`Nc@3A?EaI3;sYO9C+fb~@QQ4mR_)Gqc8+cwClrV1$i z2nG&N(~*3@KBCbfQZH_E?uJJGHLoFl|FN1WiJqo2vP zxa*GRE&dA;J5l9;0>7bM>YJ;1=EY@hRM9Ef-7R**5fP}QLY4|b9+c)mdv5(S524w2 zt7+~k9wdmoypY28+j7EpUiUxz=ECv&pgIYrMdc^#ImOH)=loDBr5nZ#g=0#BlHMD4rUFy=n=AY*6n3Y%oH_E%eMU39;LePRt2 zCSF1|yO>!{B-Vi)PIL}4g2S!varEU3ct_0$;OAg`7|G-8vQtwl--a5j)ozN(o_%th zU?qN(<=LS?gVhL2&uj{Cvp!}OHbC!V?J(ri^KWfOVbmdQ0WC9FAbKhq<0@%&J?e{c z!QfVD#_QYVErq0ERTzM!s6|j1!pl3RQD2g;%A|m_j;*;39VoYWQB;M+rOVWP^p+-| zsr(Ki2!MJy*0F>7NZ5XL=g9OY@g%L_a7n1{n1h@7f0Q#bIVHz8iSmca2(@Bjc)PD#`N0h1S~SpWcc-_>D0Fb#_W000000a;o)jMV%9 literal 0 HcmV?d00001 diff --git a/tests/testthat/test-tximeta_GRnges_IRanges.R b/tests/testthat/test-tximeta_GRnges_IRanges.R new file mode 100644 index 00000000..425311a1 --- /dev/null +++ b/tests/testthat/test-tximeta_GRnges_IRanges.R @@ -0,0 +1,39 @@ +context('tximeta and Granges') + +test_that("tximeta 1",{ + + duplicate = tximeta_summarizeToGene_object[1,] + rownames(duplicate) = "dup" + + + rbind(duplicate, tximeta_summarizeToGene_object) %>% + aggregate_duplicates(.transcript = gene_id) + + tximeta_summarizeToGene_object %>% + aggregate_duplicates(.transcript = gene_id) + +}) + + + +test_that("se no features",{ + + # Create dataset + nrows <- 200; ncols <- 6 + counts <- matrix(runif(nrows * ncols, 1, 1e4), nrows) + rowRanges <- GRanges(rep(c("chr1", "chr2"), c(50, 150)), + IRanges(floor(runif(200, 1e5, 1e6)), width=100), + strand=sample(c("+", "-"), 200, TRUE), + feature_id=sprintf("ID%03d", 1:200)) + colData <- DataFrame(Treatment=rep(c("ChIP", "Input"), 3), row.names=LETTERS[1:6]) + se <- SummarizedExperiment(assays=SimpleList(counts=counts), rowRanges=rowRanges, colData=colData) + se= rbind( se[1,], se) + + se %>% + aggregate_duplicates(.transcript = feature_id) +}) + + + + + From 32f915febbc3c353b714eca7902ff611e76089a7 Mon Sep 17 00:00:00 2001 From: stemangiola Date: Fri, 24 Jun 2022 15:35:36 +1000 Subject: [PATCH 08/38] implement new vocabulary --- DESCRIPTION | 2 +- R/methods_SE.R | 15 +++++++++------ 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index e3742888..d9666716 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -82,7 +82,7 @@ Biarch: true biocViews: AssayDomain, Infrastructure, RNASeq, DifferentialExpression, GeneExpression, Normalization, Clustering, QualityControl, Sequencing, Transcription, Transcriptomics Encoding: UTF-8 LazyData: true -RoxygenNote: 7.1.2 +RoxygenNote: 7.2.0 LazyDataCompression: xz URL: https://github.com/stemangiola/tidybulk BugReports: https://github.com/stemangiola/tidybulk/issues diff --git a/R/methods_SE.R b/R/methods_SE.R index dd65d816..edd55d1d 100755 --- a/R/methods_SE.R +++ b/R/methods_SE.R @@ -17,6 +17,9 @@ .abundance = enquo(.abundance) .abundance_scaled = enquo(.abundance_scaled) + feature_column_name = ".feature" + sample_column_name = ".sample" + # Set scaled col names norm_col = SummarizedExperiment::assays(.data)[1] %>% names %>% paste0(scaled_string) %>% @@ -31,7 +34,7 @@ change_reserved_column_names() %>% # Convert to tibble - tibble::as_tibble(rownames="sample") + tibble::as_tibble(rownames=sample_column_name) range_info <- @@ -45,20 +48,20 @@ change_reserved_column_names() %>% # Convert to tibble - tibble::as_tibble(rownames="feature") + tibble::as_tibble(rownames=feature_column_name) count_info <- get_count_datasets(.data) # Return count_info %>% - left_join(sample_info, by="sample") %>% - left_join(gene_info, by="feature") %>% + left_join(sample_info, by=sample_column_name) %>% + left_join(gene_info, by=feature_column_name) %>% when(nrow(range_info) > 0 ~ (.) %>% left_join(range_info) %>% suppressMessages(), ~ (.)) %>% mutate_if(is.character, as.factor) %>% tidybulk( - sample, - feature, + !!as.symbol(sample_column_name), + !!as.symbol(feature_column_name), !!as.symbol(SummarizedExperiment::assays(.data)[1] %>% names ), !!norm_col # scaled counts if any ) From f4f58229f0daa1364cfc9ae8b70486cc94ec10fc Mon Sep 17 00:00:00 2001 From: stemangiola Date: Mon, 13 Jun 2022 09:49:47 +1000 Subject: [PATCH 09/38] replace if_else once --- R/methods.R | 1 + R/utilities.R | 5 ++--- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/R/methods.R b/R/methods.R index a24c6018..52a4a2fb 100755 --- a/R/methods.R +++ b/R/methods.R @@ -153,6 +153,7 @@ setGeneric("as_SummarizedExperiment", function(.data, install.packages("BiocManager", repos = "https://cloud.r-project.org") BiocManager::install("S4Vectors", ask = FALSE) } + # If present get the scaled abundance .abundance_scaled = .data %>% diff --git a/R/utilities.R b/R/utilities.R index 8cb99cec..e6476b26 100755 --- a/R/utilities.R +++ b/R/utilities.R @@ -905,12 +905,11 @@ get_x_y_annotation_columns = function(.data, .horizontal, .vertical, .abundance, map( ~ .x %>% - ifelse_pipe( + when( .data %>% distinct(!!.horizontal, !!as.symbol(.x)) %>% nrow %>% - equals(n_x), - ~ .x, + equals(n_x) ~ .x, ~ NULL ) ) %>% From 60f426082d1607a8b0e6aed541eb0db721fe0ef2 Mon Sep 17 00:00:00 2001 From: stemangiola Date: Fri, 24 Jun 2022 17:28:05 +1000 Subject: [PATCH 10/38] allow other feature column for SE --- DESCRIPTION | 2 +- R/cibersort.R | 6 +++--- R/methods_SE.R | 19 +++++++++++++++---- R/validation.R | 43 ++++++++++++++++++++++++------------------- 4 files changed, 43 insertions(+), 27 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index c71d1b30..6e6539b3 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -82,7 +82,7 @@ Biarch: true biocViews: AssayDomain, Infrastructure, RNASeq, DifferentialExpression, GeneExpression, Normalization, Clustering, QualityControl, Sequencing, Transcription, Transcriptomics Encoding: UTF-8 LazyData: true -RoxygenNote: 7.1.2 +RoxygenNote: 7.2.0 LazyDataCompression: xz URL: https://github.com/stemangiola/tidybulk BugReports: https://github.com/stemangiola/tidybulk/issues diff --git a/R/cibersort.R b/R/cibersort.R index fe1b73a5..b21cb561 100755 --- a/R/cibersort.R +++ b/R/cibersort.R @@ -189,9 +189,9 @@ my_CIBERSORT <- function(Y, X, perm=0, QN=TRUE, cores = 3, exp_transform = FALSE ################################### ## This is needed to make the two tables consistent in gene ################################### - - X <- X[order(rownames(X)),,drop=FALSE] - Y <- Y[order(rownames(Y)),,drop=FALSE] + common_genes = intersect(rownames(X), rownames(Y)) + X <- X[common_genes,,drop=FALSE] + Y <- Y[common_genes,,drop=FALSE] P <- perm #number of permutations diff --git a/R/methods_SE.R b/R/methods_SE.R index dd65d816..4ab9e4f2 100755 --- a/R/methods_SE.R +++ b/R/methods_SE.R @@ -901,19 +901,30 @@ setMethod("aggregate_duplicates", - +#' @importFrom rlang quo_is_symbol .deconvolve_cellularity_se = function(.data, reference = X_cibersort, method = "cibersort", prefix = "", ...) { + .transcript = enquo(.transcript) + my_assay = .data %>% assays() %>% as.list() %>% - .[[get_assay_scaled_if_exists_SE(.data)]] + .[[get_assay_scaled_if_exists_SE(.data)]] %>% + + # Change row names + when(quo_is_symbol(.transcript) ~ { + .x = (.) + rownames(.x) = .data %>% pivot_transcript() %>% pull(!!.transcript) + .x + }, + ~ (.) + ) # Get the dots arguments dots_args = rlang::dots_list(...) @@ -952,7 +963,7 @@ setMethod("aggregate_duplicates", reference = reference %>% when(is.null(.) ~ X_cibersort, ~ .) # Validate reference - validate_signature_SE(.data, reference, !!.transcript) + validate_signature_SE(., reference) do.call(my_CIBERSORT, list(Y = ., X = reference, QN=FALSE) %>% c(dots_args)) %$% proportions %>% @@ -967,7 +978,7 @@ setMethod("aggregate_duplicates", reference = reference %>% when(is.null(.) ~ X_cibersort, ~ .) # Validate reference - validate_signature_SE(.data, reference, !!.transcript) + validate_signature_SE(., reference) (.) %>% run_llsr(reference, ...) %>% diff --git a/R/validation.R b/R/validation.R index 0441ac6f..3cda4fd6 100755 --- a/R/validation.R +++ b/R/validation.R @@ -307,33 +307,38 @@ validate_signature = function(.data, reference, .transcript){ .transcript = enquo(.transcript) - if ((.data %>% - pull(!!.transcript) %in% (reference %>% rownames)) %>% - which %>% - length %>% - st(50)) - warning( - "tidybulk says: You have less than 50 genes in common between the query data and the reference data. Please check again your input dataframes" - ) + overlapping_genes = (.data %>% pull(!!.transcript) %in% rownames(reference) %>% which + + if(length(overlapping_genes) == 0 ) + stop(sprintf( + "\ntidybulk says: You have NO genes in common between the query data and the reference data. Please check again your input dataframes\nthe genes in the reference look like this %s", paste(rownames(reference)[1:10], collapse = ", ") + )) + + if ( length(overlapping_genes) %>% st(50) ) + warning(sprintf( + "\ntidybulk says: You have less than 50 genes in common between the query data and the reference data. Please check again your input dataframes\nthe genes in the reference look like this %s", paste(rownames(reference)[1:10], collapse = ", ") + )) # Check if rownames exist if (reference %>% sapply(class) %in% c("numeric", "double", "integer") %>% not() %>% any) - stop("tidybulk says: your reference has non-numeric/integer columns.") + stop("tidybulk says: your reference has non-numeric/integer columns.") + } -validate_signature_SE = function(.data, reference, .transcript){ +validate_signature_SE = function(assay, reference){ - .transcript = enquo(.transcript) + overlapping_genes = (rownames(assay) %in% rownames(reference)) %>% which - if ((.data %>% - rownames %in% (reference %>% rownames)) %>% - which %>% - length %>% - st(50)) - warning( - "tidybulk says: You have less than 50 genes in common between the query data and the reference data. Please check again your input dataframes" - ) + if(length(overlapping_genes) == 0 ) + stop(sprintf( + "\ntidybulk says: You have NO genes in common between the query data and the reference data. Please check again your input dataframes\nthe genes in the reference look like this %s", paste(rownames(reference)[1:10], collapse = ", ") + )) + + if ( length(overlapping_genes) %>% st(50) ) + warning(sprintf( + "\ntidybulk says: You have less than 50 genes in common between the query data and the reference data. Please check again your input dataframes\nthe genes in the reference look like this %s", paste(rownames(reference)[1:10], collapse = ", ") + )) # Check if rownames exist if (reference %>% sapply(class) %in% c("numeric", "double", "integer") %>% not() %>% any) From fedd878f7f1efa5b64c440bf893870cdf543e164 Mon Sep 17 00:00:00 2001 From: stemangiola Date: Fri, 24 Jun 2022 17:55:00 +1000 Subject: [PATCH 11/38] fix typo --- R/validation.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/validation.R b/R/validation.R index 3cda4fd6..3c16b662 100755 --- a/R/validation.R +++ b/R/validation.R @@ -307,7 +307,7 @@ validate_signature = function(.data, reference, .transcript){ .transcript = enquo(.transcript) - overlapping_genes = (.data %>% pull(!!.transcript) %in% rownames(reference) %>% which + overlapping_genes = .data %>% pull(!!.transcript) %in% rownames(reference) %>% which if(length(overlapping_genes) == 0 ) stop(sprintf( From f93a90af07f0bcdf4d13291936f12b5adbc0b7e8 Mon Sep 17 00:00:00 2001 From: stemangiola Date: Fri, 24 Jun 2022 19:03:01 +1000 Subject: [PATCH 12/38] fix tidybulk() --- R/methods_SE.R | 2 +- R/tidySummarizedExperiment.R | 12 +++++++----- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/R/methods_SE.R b/R/methods_SE.R index edd55d1d..c7545ec5 100755 --- a/R/methods_SE.R +++ b/R/methods_SE.R @@ -50,7 +50,7 @@ # Convert to tibble tibble::as_tibble(rownames=feature_column_name) - count_info <- get_count_datasets(.data) + count_info <- get_count_datasets(.data, feature_column_name, sample_column_name) # Return count_info %>% diff --git a/R/tidySummarizedExperiment.R b/R/tidySummarizedExperiment.R index f7ab582e..67e7cbce 100644 --- a/R/tidySummarizedExperiment.R +++ b/R/tidySummarizedExperiment.R @@ -95,22 +95,24 @@ change_reserved_column_names = function(.data){ #' #' @keywords internal #' @noRd -get_count_datasets <- function(SummarizedExperiment_object) { +get_count_datasets <- function(SummarizedExperiment_object, feature_column_name, sample_column_name) { + + map2( assays(SummarizedExperiment_object) %>% as.list(), names(assays(SummarizedExperiment_object)), ~ .x %>% - tibble::as_tibble(rownames = "feature", .name_repair = "minimal") %>% + tibble::as_tibble(rownames = feature_column_name, .name_repair = "minimal") %>% # If the matrix does not have sample names, fix column names when(colnames(.x) %>% is.null() ~ setNames(., c( - "feature", seq_len(ncol(.x)) + feature_column_name, seq_len(ncol(.x)) )), ~ (.) ) %>% - gather(sample, count,-feature) %>% + gather(!!as.symbol(sample_column_name), count,-!!as.symbol(feature_column_name)) %>% rename(!!.y := count) ) %>% - reduce(left_join, by = c("feature", "sample")) + reduce(left_join, by = c(feature_column_name, sample_column_name)) } From 601aa4a0dedbe655aed297a1ec112dc47cb2294c Mon Sep 17 00:00:00 2001 From: stemangiola Date: Fri, 24 Jun 2022 19:12:13 +1000 Subject: [PATCH 13/38] avoid human annotation --- R/functions.R | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/R/functions.R b/R/functions.R index daa6988b..823c8b2c 100755 --- a/R/functions.R +++ b/R/functions.R @@ -83,22 +83,22 @@ create_tt_from_bam_sam_bulk <- # Anonymous function # input: edgeR::DGEList object # output: edgeR::DGEList object with added transcript symbol - when( - "annot.ext" %in% (rlang::dots_list(...) %>% names) %>% not() ~ { - dge <- (.) - dge$genes$symbol <- - AnnotationDbi::mapIds( - org.Hs.eg.db::org.Hs.eg.db, - keys = as.character(dge$genes$GeneID), - column = "SYMBOL", - keytype = "ENTREZID", - multiVals = "first" - ) - - dge - }, - ~ (.) - ) %>% + # when( + # "annot.ext" %in% (rlang::dots_list(...) %>% names) %>% not() ~ { + # dge <- (.) + # dge$genes$symbol <- + # AnnotationDbi::mapIds( + # org.Hs.eg.db::org.Hs.eg.db, + # keys = as.character(dge$genes$GeneID), + # column = "SYMBOL", + # keytype = "ENTREZID", + # multiVals = "first" + # ) + # + # dge + # }, + # ~ (.) + # ) %>% # Anonymous function # input: annotated edgeR::DGEList object From 5ddf54a6ded25717193150343405f7daea05c809 Mon Sep 17 00:00:00 2001 From: stemangiola Date: Fri, 24 Jun 2022 19:20:56 +1000 Subject: [PATCH 14/38] update docs --- DESCRIPTION | 2 +- R/functions.R | 2 +- R/methods.R | 2 +- man/tidybulk_SAM_BAM-methods.Rd | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index c71d1b30..6e6539b3 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -82,7 +82,7 @@ Biarch: true biocViews: AssayDomain, Infrastructure, RNASeq, DifferentialExpression, GeneExpression, Normalization, Clustering, QualityControl, Sequencing, Transcription, Transcriptomics Encoding: UTF-8 LazyData: true -RoxygenNote: 7.1.2 +RoxygenNote: 7.2.0 LazyDataCompression: xz URL: https://github.com/stemangiola/tidybulk BugReports: https://github.com/stemangiola/tidybulk/issues diff --git a/R/functions.R b/R/functions.R index 823c8b2c..4b92a025 100755 --- a/R/functions.R +++ b/R/functions.R @@ -47,7 +47,7 @@ create_tt_from_tibble_bulk = function(.data, #' @importFrom purrr reduce #' #' @param file_names A character vector -#' @param genome A character string +#' @param genome A character string specifying an in-built annotation used for read summarization. It has four possible values including "mm10", "mm9", "hg38" and "hg19" #' @param ... Further parameters passed to the function Rsubread::featureCounts #' #' @return A tibble of gene counts diff --git a/R/methods.R b/R/methods.R index 52a4a2fb..4dab3434 100755 --- a/R/methods.R +++ b/R/methods.R @@ -253,7 +253,7 @@ setMethod("as_SummarizedExperiment", "tidybulk", .as_SummarizedExperiment) #' @name tidybulk_SAM_BAM #' #' @param file_names A character vector -#' @param genome A character string +#' @param genome A character string specifying an in-built annotation used for read summarization. It has four possible values including "mm10", "mm9", "hg38" and "hg19" #' @param ... Further parameters passed to the function Rsubread::featureCounts #' #' @details This function is based on FeatureCounts package (DOI: 10.1093/bioinformatics/btt656). This function creates a tidybulk object and is useful if you want diff --git a/man/tidybulk_SAM_BAM-methods.Rd b/man/tidybulk_SAM_BAM-methods.Rd index 13dc3151..7087bddd 100644 --- a/man/tidybulk_SAM_BAM-methods.Rd +++ b/man/tidybulk_SAM_BAM-methods.Rd @@ -13,7 +13,7 @@ tidybulk_SAM_BAM(file_names, genome = "hg38", ...) \arguments{ \item{file_names}{A character vector} -\item{genome}{A character string} +\item{genome}{A character string specifying an in-built annotation used for read summarization. It has four possible values including "mm10", "mm9", "hg38" and "hg19"} \item{...}{Further parameters passed to the function Rsubread::featureCounts} } From 1a1a7580f61118e07783dbf015bfee8de796e700 Mon Sep 17 00:00:00 2001 From: stemangiola Date: Fri, 24 Jun 2022 19:45:58 +1000 Subject: [PATCH 15/38] fixed pivot_ --- R/methods_SE.R | 48 ++++++++++++++++++++++-------------------------- R/utilities.R | 7 +++++++ 2 files changed, 29 insertions(+), 26 deletions(-) diff --git a/R/methods_SE.R b/R/methods_SE.R index c7545ec5..230c5207 100755 --- a/R/methods_SE.R +++ b/R/methods_SE.R @@ -17,9 +17,6 @@ .abundance = enquo(.abundance) .abundance_scaled = enquo(.abundance_scaled) - feature_column_name = ".feature" - sample_column_name = ".sample" - # Set scaled col names norm_col = SummarizedExperiment::assays(.data)[1] %>% names %>% paste0(scaled_string) %>% @@ -34,7 +31,7 @@ change_reserved_column_names() %>% # Convert to tibble - tibble::as_tibble(rownames=sample_column_name) + tibble::as_tibble(rownames=sample__$name) range_info <- @@ -48,20 +45,20 @@ change_reserved_column_names() %>% # Convert to tibble - tibble::as_tibble(rownames=feature_column_name) + tibble::as_tibble(rownames=feature__$name) - count_info <- get_count_datasets(.data, feature_column_name, sample_column_name) + count_info <- get_count_datasets(.data, feature__$name, sample__$name) # Return count_info %>% - left_join(sample_info, by=sample_column_name) %>% - left_join(gene_info, by=feature_column_name) %>% + left_join(sample_info, by=sample__$name) %>% + left_join(gene_info, by=feature__$name) %>% when(nrow(range_info) > 0 ~ (.) %>% left_join(range_info) %>% suppressMessages(), ~ (.)) %>% mutate_if(is.character, as.factor) %>% tidybulk( - !!as.symbol(sample_column_name), - !!as.symbol(feature_column_name), + !!as.symbol(sample__$name), + !!as.symbol(feature__$name), !!as.symbol(SummarizedExperiment::assays(.data)[1] %>% names ), !!norm_col # scaled counts if any ) @@ -790,23 +787,22 @@ setMethod("adjust_abundance", collapse_function = function(x){ x %>% unique() %>% paste(collapse = "___") } - feature_column_name = ".feature" # Row data new_row_data = .data %>% rowData() %>% - as_tibble(rownames = feature_column_name) %>% + as_tibble(rownames = feature__$name) %>% group_by(!!as.symbol(quo_name(.transcript))) %>% summarise( across(everything(), ~ .x %>% collapse_function()), merged.transcripts = n() ) %>% - arrange(!!as.symbol(feature_column_name)) %>% + arrange(!!as.symbol(feature__$name)) %>% as.data.frame() - rownames(new_row_data) = new_row_data[,feature_column_name] - new_row_data = new_row_data %>% select(-feature_column_name) + rownames(new_row_data) = new_row_data[,feature__$name] + new_row_data = new_row_data %>% select(-feature__$name) # Counts new_count_data = @@ -827,7 +823,7 @@ setMethod("adjust_abundance", ) # GRanges - columns_to_collapse = .data %>% rowData() %>% colnames() %>% setdiff(quo_name(.transcript)) %>% c(feature_column_name) + columns_to_collapse = .data %>% rowData() %>% colnames() %>% setdiff(quo_name(.transcript)) %>% c(feature__$name) rr = rowRanges(.data) @@ -837,27 +833,27 @@ setMethod("adjust_abundance", as_tibble() %>% # Add names when( - is(rr, "CompressedGRangesList") ~ mutate(., !!as.symbol(feature_column_name) := group_name), - ~ mutate(., !!as.symbol(feature_column_name) := rr@ranges@NAME) + is(rr, "CompressedGRangesList") ~ mutate(., !!as.symbol(feature__$name) := group_name), + ~ mutate(., !!as.symbol(feature__$name) := rr@ranges@NAME) ) %>% left_join( rowData(.data) %>% as.data.frame() %>% select(!!as.symbol(quo_name(.transcript))) %>% - as_tibble(rownames =feature_column_name), - by = feature_column_name + as_tibble(rownames =feature__$name), + by = feature__$name ) %>% group_by(!!as.symbol(quo_name(.transcript))) %>% mutate( across(columns_to_collapse, ~ .x %>% collapse_function()), merged.transcripts = n() ) %>% - arrange(!!as.symbol(feature_column_name)) %>% + arrange(!!as.symbol(feature__$name)) %>% select(-one_of("group_name", "group")) %>% suppressWarnings() %>% - makeGRangesListFromDataFrame( split.field = feature_column_name, + makeGRangesListFromDataFrame( split.field = feature__$name, keep.extra.columns = TRUE) %>% .[match(rownames(new_count_data[[1]]), names(.))] @@ -1897,7 +1893,7 @@ setMethod("test_gene_rank", ) %>% # Convert to tibble - tibble::as_tibble(rownames="sample") + tibble::as_tibble(rownames=sample__$name) @@ -1937,7 +1933,7 @@ setMethod("pivot_sample", range_info <- get_special_datasets(.data) %>% - reduce(left_join, by="feature") + reduce(left_join, by=feature__$name) gene_info <- rowData(.data) %>% @@ -1949,11 +1945,11 @@ setMethod("pivot_sample", ) %>% # Convert to tibble - tibble::as_tibble(rownames="feature") + tibble::as_tibble(rownames=feature__$name) gene_info %>% when( - nrow(range_info) > 0 ~ (.) %>% left_join(range_info, by="feature"), + nrow(range_info) > 0 ~ (.) %>% left_join(range_info, by=feature__$name), ~ (.) ) } diff --git a/R/utilities.R b/R/utilities.R index e6476b26..e3f9cdc6 100755 --- a/R/utilities.R +++ b/R/utilities.R @@ -1595,3 +1595,10 @@ fill_NA_matrix_with_factor_colwise = function(.data, factor){ .[rn, cn] } + +get_special_column_name_symbol = function(name){ + list(name = name, symbol = as.symbol(name)) +} + +feature__ = get_special_column_name_symbol(".feature") +sample__ = get_special_column_name_symbol(".sample") From a284fc65c15f4572b50be9fb34a4d9f8017e6898 Mon Sep 17 00:00:00 2001 From: stemangiola Date: Sat, 25 Jun 2022 10:25:05 +1000 Subject: [PATCH 16/38] deprecate .contrasts and update tests --- DESCRIPTION | 2 +- R/methods.R | 71 +++++++++++-------- R/methods_SE.R | 34 +++++++-- man/test_differential_abundance-methods.Rd | 38 ++++++---- man/test_gene_enrichment-methods.Rd | 34 +++++---- tests/testthat/test-bulk_methods.R | 14 ++-- .../test-bulk_methods_SummarizedExperiment.R | 2 +- 7 files changed, 122 insertions(+), 73 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index c71d1b30..6e6539b3 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -82,7 +82,7 @@ Biarch: true biocViews: AssayDomain, Infrastructure, RNASeq, DifferentialExpression, GeneExpression, Normalization, Clustering, QualityControl, Sequencing, Transcription, Transcriptomics Encoding: UTF-8 LazyData: true -RoxygenNote: 7.1.2 +RoxygenNote: 7.2.0 LazyDataCompression: xz URL: https://github.com/stemangiola/tidybulk BugReports: https://github.com/stemangiola/tidybulk/issues diff --git a/R/methods.R b/R/methods.R index 52a4a2fb..950a712f 100755 --- a/R/methods.R +++ b/R/methods.R @@ -2114,7 +2114,7 @@ setMethod("ensembl_to_symbol", "tidybulk", .ensembl_to_symbol) #' @param .sample The name of the sample column #' @param .transcript The name of the transcript/gene column #' @param .abundance The name of the transcript/gene abundance column -#' @param .contrasts This parameter takes the format of the contrast parameter of the method of choice. For edgeR and limma-voom is a character vector. For DESeq2 is a list including a character vector of length three. The first covariate is the one the model is tested against (e.g., ~ factor_of_interest) +#' @param contrasts This parameter takes the format of the contrast parameter of the method of choice. For edgeR and limma-voom is a character vector. For DESeq2 is a list including a character vector of length three. The first covariate is the one the model is tested against (e.g., ~ factor_of_interest) #' @param method A string character. Either "edgeR_quasi_likelihood" (i.e., QLF), "edgeR_likelihood_ratio" (i.e., LRT), "edger_robust_likelihood_ratio", "DESeq2", "limma_voom", "limma_voom_sample_weights" #' @param test_above_log2_fold_change A positive real value. This works for edgeR and limma_voom methods. It uses the `treat` function, which tests that the difference in abundance is bigger than this threshold rather than zero \url{https://pubmed.ncbi.nlm.nih.gov/19176553}. #' @param scaling_method A character string. The scaling method passed to the back-end functions: edgeR and limma-voom (i.e., edgeR::calcNormFactors; "TMM","TMMwsp","RLE","upperquartile"). Setting the parameter to \"none\" will skip the compensation for sequencing-depth for the method edgeR or limma-voom. @@ -2123,6 +2123,7 @@ setMethod("ensembl_to_symbol", "tidybulk", .ensembl_to_symbol) #' @param action A character string. Whether to join the new information to the input tbl (add), or just get the non-redundant tbl with the new information (get). #' @param significance_threshold DEPRECATED - A real between 0 and 1 (usually 0.05). #' @param fill_missing_values DEPRECATED - A boolean. Whether to fill missing sample/transcript values with the median of the transcript. This is rarely needed. +#' @param .contrasts DEPRECATED - This parameter takes the format of the contrast parameter of the method of choice. For edgeR and limma-voom is a character vector. For DESeq2 is a list including a character vector of length three. The first covariate is the one the model is tested against (e.g., ~ factor_of_interest) #' @param ... Further arguments passed to some of the internal functions. Currently, it is needed just for internal debug. #' #' @@ -2185,7 +2186,7 @@ setMethod("ensembl_to_symbol", "tidybulk", .ensembl_to_symbol) #' identify_abundant() |> #' test_differential_abundance( #' ~ 0 + condition, -#' .contrasts = c( "conditionTRUE - conditionFALSE") +#' contrasts = c( "conditionTRUE - conditionFALSE") #' ) #' #' # DESeq2 - equivalent for limma-voom @@ -2203,7 +2204,7 @@ setMethod("ensembl_to_symbol", "tidybulk", .ensembl_to_symbol) #' identify_abundant() |> #' test_differential_abundance( #' ~ 0 + condition, -#' .contrasts = list(c("condition", "TRUE", "FALSE")), +#' contrasts = list(c("condition", "TRUE", "FALSE")), #' method="deseq2" #' ) #' @@ -2216,7 +2217,7 @@ setGeneric("test_differential_abundance", function(.data, .sample = NULL, .transcript = NULL, .abundance = NULL, - .contrasts = NULL, + contrasts = NULL, method = "edgeR_quasi_likelihood", test_above_log2_fold_change = NULL, scaling_method = "TMM", @@ -2227,7 +2228,8 @@ setGeneric("test_differential_abundance", function(.data, # DEPRECATED significance_threshold = NULL, - fill_missing_values = NULL + fill_missing_values = NULL, + .contrasts = NULL ) standardGeneric("test_differential_abundance")) @@ -2238,7 +2240,7 @@ setGeneric("test_differential_abundance", function(.data, .sample = NULL, .transcript = NULL, .abundance = NULL, - .contrasts = NULL, + contrasts = NULL, method = "edgeR_quasi_likelihood", test_above_log2_fold_change = NULL, scaling_method = "TMM", @@ -2250,7 +2252,8 @@ setGeneric("test_differential_abundance", function(.data, # DEPRECATED significance_threshold = NULL, - fill_missing_values = NULL + fill_missing_values = NULL, + .contrasts = NULL ) { # Get column names @@ -2278,6 +2281,15 @@ setGeneric("test_differential_abundance", function(.data, } + # DEPRECATION OF .constrasts + if (is_present(.contrasts) & !is.null(.contrasts)) { + + # Signal the deprecation to the user + deprecate_warn("1.7.4", "tidybulk::test_differential_abundance(.contrasts = )", details = "The argument .contrasts is now deprecated please use contrasts (without the dot).") + + contrasts = .contrasts + } + # Clearly state what counts are used rlang::inform("===================================== tidybulk says: All testing methods use raw counts, irrespective of if scale_abundance @@ -2318,7 +2330,7 @@ such as batch effects (if applicable) in the formula. .sample = !!.sample, .transcript = !!.transcript, .abundance = !!.abundance, - .contrasts = .contrasts, + .contrasts = contrasts, method = method, test_above_log2_fold_change = test_above_log2_fold_change, scaling_method = scaling_method, @@ -2334,7 +2346,7 @@ such as batch effects (if applicable) in the formula. .sample = !!.sample, .transcript = !!.transcript, .abundance = !!.abundance, - .contrasts = .contrasts, + .contrasts = contrasts, method = method, test_above_log2_fold_change = test_above_log2_fold_change, scaling_method = scaling_method, @@ -2350,7 +2362,7 @@ such as batch effects (if applicable) in the formula. .sample = !!.sample, .transcript = !!.transcript, .abundance = !!.abundance, - .contrasts = .contrasts, + .contrasts = contrasts, method = method, scaling_method = scaling_method, omit_contrast_in_colnames = omit_contrast_in_colnames, @@ -2368,10 +2380,6 @@ such as batch effects (if applicable) in the formula. .data %>% dplyr::left_join(.data_processed, by = quo_name(.transcript)) %>% - # # Arrange - # ifelse_pipe(.contrasts %>% is.null, - # ~ .x %>% arrange(FDR)) %>% - # Attach attributes reattach_internals(.data_processed) @@ -2382,18 +2390,9 @@ such as batch effects (if applicable) in the formula. # Selecting the right columns pivot_transcript(!!.transcript) %>% - # select( - # !!.transcript, - # get_x_y_annotation_columns(.data, !!.sample,!!.transcript, !!.abundance, NULL)$vertical_cols - # ) %>% - # distinct() %>% dplyr::left_join(.data_processed, by = quo_name(.transcript)) %>% - # # Arrange - # ifelse_pipe(.contrasts %>% is.null, - # ~ .x %>% arrange(FDR)) %>% - # Attach attributes reattach_internals(.data_processed) @@ -2833,13 +2832,14 @@ setMethod("keep_abundant", "tidybulk", .keep_abundant) #' @param .sample The name of the sample column #' @param .entrez The ENTREZ ID of the transcripts/genes #' @param .abundance The name of the transcript/gene abundance column -#' @param .contrasts = NULL, +#' @param contrasts This parameter takes the format of the contrast parameter of the method of choice. For edgeR and limma-voom is a character vector. For DESeq2 is a list including a character vector of length three. The first covariate is the one the model is tested against (e.g., ~ factor_of_interest) #' @param methods A character vector. One or 3 or more methods to use in the testing (currently EGSEA errors if 2 are used). Type EGSEA::egsea.base() to see the supported GSE methods. #' @param gene_sets A character vector or a list. It can take one or more of the following built-in collections as a character vector: c("h", "c1", "c2", "c3", "c4", "c5", "c6", "c7", "kegg_disease", "kegg_metabolism", "kegg_signaling"), to be used with EGSEA buildIdx. c1 is human specific. Alternatively, a list of user-supplied gene sets can be provided, to be used with EGSEA buildCustomIdx. In that case, each gene set is a character vector of Entrez IDs and the names of the list are the gene set names. #' @param species A character. It can be human, mouse or rat. #' @param cores An integer. The number of cores available #' #' @param method DEPRECATED. Please use methods. +#' @param .contrasts DEPRECATED - This parameter takes the format of the contrast parameter of the method of choice. For edgeR and limma-voom is a character vector. For DESeq2 is a list including a character vector of length three. The first covariate is the one the model is tested against (e.g., ~ factor_of_interest) #' #' @details This wrapper executes ensemble gene enrichment analyses of the dataset using EGSEA (DOI:0.12688/f1000research.12544.1) #' @@ -2911,13 +2911,15 @@ setGeneric("test_gene_enrichment", function(.data, .sample = NULL, .entrez, .abundance = NULL, - .contrasts = NULL, + contrasts = NULL, methods = c("camera" , "roast" , "safe", "gage" , "padog" , "globaltest", "ora" ), gene_sets = c("h", "c1", "c2", "c3", "c4", "c5", "c6", "c7", "kegg_disease", "kegg_metabolism", "kegg_signaling"), species, cores = 10, - method = NULL # DEPRECATED + # DEPRECATED + method = NULL, + .contrasts = NULL ) standardGeneric("test_gene_enrichment")) @@ -2928,13 +2930,15 @@ setGeneric("test_gene_enrichment", function(.data, .sample = NULL, .entrez, .abundance = NULL, - .contrasts = NULL, + contrasts = NULL, methods = c("camera" , "roast" , "safe", "gage" , "padog" , "globaltest", "ora" ), gene_sets = c("h", "c1", "c2", "c3", "c4", "c5", "c6", "c7", "kegg_disease", "kegg_metabolism", "kegg_signaling"), species, cores = 10, - method = NULL # DEPRECATED + # DEPRECATED + method = NULL, + .contrasts = NULL ) { # DEPRECATION OF reference function @@ -2945,6 +2949,15 @@ setGeneric("test_gene_enrichment", function(.data, methods = method } + # DEPRECATION OF .constrasts + if (is_present(.contrasts) & !is.null(.contrasts)) { + + # Signal the deprecation to the user + deprecate_warn("1.7.4", "tidybulk::test_differential_abundance(.contrasts = )", details = "The argument .contrasts is now deprecated please use contrasts (without the dot).") + + contrasts = .contrasts + } + # Make col names .sample = enquo(.sample) .abundance = enquo(.abundance) @@ -2987,7 +3000,7 @@ setGeneric("test_gene_enrichment", function(.data, .sample = !!.sample, .entrez = !!.entrez, .abundance = !!.abundance, - .contrasts = .contrasts, + .contrasts = contrasts, methods = methods, gene_sets = gene_sets, species = species, diff --git a/R/methods_SE.R b/R/methods_SE.R index dd65d816..f1044e8e 100755 --- a/R/methods_SE.R +++ b/R/methods_SE.R @@ -1061,7 +1061,7 @@ setMethod( #' @importFrom rlang inform .test_differential_abundance_se = function(.data, .formula, - .contrasts = NULL, + contrasts = NULL, method = "edgeR_quasi_likelihood", test_above_log2_fold_change = NULL, scaling_method = "TMM", @@ -1070,6 +1070,15 @@ setMethod( ...) { + # DEPRECATION OF .constrasts + if (is_present(.contrasts) & !is.null(.contrasts)) { + + # Signal the deprecation to the user + deprecate_warn("1.7.4", "tidybulk::test_differential_abundance(.contrasts = )", details = "The argument .contrasts is now deprecated please use contrasts (without the dot).") + + contrasts = .contrasts + } + # Clearly state what counts are used # Clearly state what counts are used rlang::inform("===================================== @@ -1098,7 +1107,7 @@ such as batch effects (if applicable) in the formula. get_differential_transcript_abundance_bulk_SE( ., .formula, - .contrasts = .contrasts, + .contrasts = contrasts, colData(.data), method = method, test_above_log2_fold_change = test_above_log2_fold_change, @@ -1112,7 +1121,7 @@ such as batch effects (if applicable) in the formula. grepl("voom", method) ~ get_differential_transcript_abundance_bulk_voom_SE( ., .formula, - .contrasts = .contrasts, + .contrasts = contrasts, colData(.data), method = method, test_above_log2_fold_change = test_above_log2_fold_change, @@ -1125,7 +1134,7 @@ such as batch effects (if applicable) in the formula. tolower(method)=="deseq2" ~ get_differential_transcript_abundance_deseq2_SE( ., .formula, - .contrasts = .contrasts, + .contrasts = contrasts, method = method, scaling_method = scaling_method, omit_contrast_in_colnames = omit_contrast_in_colnames, @@ -1431,13 +1440,15 @@ setMethod("keep_abundant", .sample = NULL, .entrez, .abundance = NULL, - .contrasts = NULL, + contrasts = NULL, methods = c("camera" , "roast" , "safe", "gage" , "padog" , "globaltest", "ora" ), gene_sets = c("h", "c1", "c2", "c3", "c4", "c5", "c6", "c7", "kegg_disease", "kegg_metabolism", "kegg_signaling"), species, cores = 10, - method = NULL # DEPRECATED + # DEPRECATED + method = NULL, + .contrasts = NULL ) { # DEPRECATION OF reference function @@ -1448,6 +1459,15 @@ setMethod("keep_abundant", methods = method } + # DEPRECATION OF .constrasts + if (is_present(.contrasts) & !is.null(.contrasts)) { + + # Signal the deprecation to the user + deprecate_warn("1.7.4", "tidybulk::test_differential_abundance(.contrasts = )", details = "The argument .contrasts is now deprecated please use contrasts (without the dot).") + + contrasts = .contrasts + } + .entrez = enquo(.entrez) # Check that there are no entrez missing @@ -1495,7 +1515,7 @@ setMethod("keep_abundant", ) my_contrasts = - .contrasts %>% + contrasts %>% when( length(.) > 0 ~ limma::makeContrasts(contrasts = ., levels = design), ~ NULL diff --git a/man/test_differential_abundance-methods.Rd b/man/test_differential_abundance-methods.Rd index d2be4bc1..1efbc70e 100755 --- a/man/test_differential_abundance-methods.Rd +++ b/man/test_differential_abundance-methods.Rd @@ -16,7 +16,7 @@ test_differential_abundance( .sample = NULL, .transcript = NULL, .abundance = NULL, - .contrasts = NULL, + contrasts = NULL, method = "edgeR_quasi_likelihood", test_above_log2_fold_change = NULL, scaling_method = "TMM", @@ -25,7 +25,8 @@ test_differential_abundance( action = "add", ..., significance_threshold = NULL, - fill_missing_values = NULL + fill_missing_values = NULL, + .contrasts = NULL ) \S4method{test_differential_abundance}{spec_tbl_df}( @@ -34,7 +35,7 @@ test_differential_abundance( .sample = NULL, .transcript = NULL, .abundance = NULL, - .contrasts = NULL, + contrasts = NULL, method = "edgeR_quasi_likelihood", test_above_log2_fold_change = NULL, scaling_method = "TMM", @@ -43,7 +44,8 @@ test_differential_abundance( action = "add", ..., significance_threshold = NULL, - fill_missing_values = NULL + fill_missing_values = NULL, + .contrasts = NULL ) \S4method{test_differential_abundance}{tbl_df}( @@ -52,7 +54,7 @@ test_differential_abundance( .sample = NULL, .transcript = NULL, .abundance = NULL, - .contrasts = NULL, + contrasts = NULL, method = "edgeR_quasi_likelihood", test_above_log2_fold_change = NULL, scaling_method = "TMM", @@ -61,7 +63,8 @@ test_differential_abundance( action = "add", ..., significance_threshold = NULL, - fill_missing_values = NULL + fill_missing_values = NULL, + .contrasts = NULL ) \S4method{test_differential_abundance}{tidybulk}( @@ -70,7 +73,7 @@ test_differential_abundance( .sample = NULL, .transcript = NULL, .abundance = NULL, - .contrasts = NULL, + contrasts = NULL, method = "edgeR_quasi_likelihood", test_above_log2_fold_change = NULL, scaling_method = "TMM", @@ -79,7 +82,8 @@ test_differential_abundance( action = "add", ..., significance_threshold = NULL, - fill_missing_values = NULL + fill_missing_values = NULL, + .contrasts = NULL ) \S4method{test_differential_abundance}{SummarizedExperiment}( @@ -88,7 +92,7 @@ test_differential_abundance( .sample = NULL, .transcript = NULL, .abundance = NULL, - .contrasts = NULL, + contrasts = NULL, method = "edgeR_quasi_likelihood", test_above_log2_fold_change = NULL, scaling_method = "TMM", @@ -97,7 +101,8 @@ test_differential_abundance( action = "add", ..., significance_threshold = NULL, - fill_missing_values = NULL + fill_missing_values = NULL, + .contrasts = NULL ) \S4method{test_differential_abundance}{RangedSummarizedExperiment}( @@ -106,7 +111,7 @@ test_differential_abundance( .sample = NULL, .transcript = NULL, .abundance = NULL, - .contrasts = NULL, + contrasts = NULL, method = "edgeR_quasi_likelihood", test_above_log2_fold_change = NULL, scaling_method = "TMM", @@ -115,7 +120,8 @@ test_differential_abundance( action = "add", ..., significance_threshold = NULL, - fill_missing_values = NULL + fill_missing_values = NULL, + .contrasts = NULL ) } \arguments{ @@ -129,7 +135,7 @@ test_differential_abundance( \item{.abundance}{The name of the transcript/gene abundance column} -\item{.contrasts}{This parameter takes the format of the contrast parameter of the method of choice. For edgeR and limma-voom is a character vector. For DESeq2 is a list including a character vector of length three. The first covariate is the one the model is tested against (e.g., ~ factor_of_interest)} +\item{contrasts}{This parameter takes the format of the contrast parameter of the method of choice. For edgeR and limma-voom is a character vector. For DESeq2 is a list including a character vector of length three. The first covariate is the one the model is tested against (e.g., ~ factor_of_interest)} \item{method}{A string character. Either "edgeR_quasi_likelihood" (i.e., QLF), "edgeR_likelihood_ratio" (i.e., LRT), "edger_robust_likelihood_ratio", "DESeq2", "limma_voom", "limma_voom_sample_weights"} @@ -148,6 +154,8 @@ test_differential_abundance( \item{significance_threshold}{DEPRECATED - A real between 0 and 1 (usually 0.05).} \item{fill_missing_values}{DEPRECATED - A boolean. Whether to fill missing sample/transcript values with the median of the transcript. This is rarely needed.} + +\item{.contrasts}{DEPRECATED - This parameter takes the format of the contrast parameter of the method of choice. For edgeR and limma-voom is a character vector. For DESeq2 is a list including a character vector of length three. The first covariate is the one the model is tested against (e.g., ~ factor_of_interest)} } \value{ A consistent object (to the input) with additional columns for the statistics from the test (e.g., log fold change, p-value and false discovery rate). @@ -221,7 +229,7 @@ DESeq2::results() identify_abundant() |> test_differential_abundance( ~ 0 + condition, - .contrasts = c( "conditionTRUE - conditionFALSE") + contrasts = c( "conditionTRUE - conditionFALSE") ) # DESeq2 - equivalent for limma-voom @@ -239,7 +247,7 @@ my_se_mini |> identify_abundant() |> test_differential_abundance( ~ 0 + condition, - .contrasts = list(c("condition", "TRUE", "FALSE")), + contrasts = list(c("condition", "TRUE", "FALSE")), method="deseq2" ) diff --git a/man/test_gene_enrichment-methods.Rd b/man/test_gene_enrichment-methods.Rd index b91308fa..e6f283d8 100644 --- a/man/test_gene_enrichment-methods.Rd +++ b/man/test_gene_enrichment-methods.Rd @@ -16,13 +16,14 @@ test_gene_enrichment( .sample = NULL, .entrez, .abundance = NULL, - .contrasts = NULL, + contrasts = NULL, methods = c("camera", "roast", "safe", "gage", "padog", "globaltest", "ora"), gene_sets = c("h", "c1", "c2", "c3", "c4", "c5", "c6", "c7", "kegg_disease", "kegg_metabolism", "kegg_signaling"), species, cores = 10, - method = NULL + method = NULL, + .contrasts = NULL ) \S4method{test_gene_enrichment}{spec_tbl_df}( @@ -31,13 +32,14 @@ test_gene_enrichment( .sample = NULL, .entrez, .abundance = NULL, - .contrasts = NULL, + contrasts = NULL, methods = c("camera", "roast", "safe", "gage", "padog", "globaltest", "ora"), gene_sets = c("h", "c1", "c2", "c3", "c4", "c5", "c6", "c7", "kegg_disease", "kegg_metabolism", "kegg_signaling"), species, cores = 10, - method = NULL + method = NULL, + .contrasts = NULL ) \S4method{test_gene_enrichment}{tbl_df}( @@ -46,13 +48,14 @@ test_gene_enrichment( .sample = NULL, .entrez, .abundance = NULL, - .contrasts = NULL, + contrasts = NULL, methods = c("camera", "roast", "safe", "gage", "padog", "globaltest", "ora"), gene_sets = c("h", "c1", "c2", "c3", "c4", "c5", "c6", "c7", "kegg_disease", "kegg_metabolism", "kegg_signaling"), species, cores = 10, - method = NULL + method = NULL, + .contrasts = NULL ) \S4method{test_gene_enrichment}{tidybulk}( @@ -61,13 +64,14 @@ test_gene_enrichment( .sample = NULL, .entrez, .abundance = NULL, - .contrasts = NULL, + contrasts = NULL, methods = c("camera", "roast", "safe", "gage", "padog", "globaltest", "ora"), gene_sets = c("h", "c1", "c2", "c3", "c4", "c5", "c6", "c7", "kegg_disease", "kegg_metabolism", "kegg_signaling"), species, cores = 10, - method = NULL + method = NULL, + .contrasts = NULL ) \S4method{test_gene_enrichment}{SummarizedExperiment}( @@ -76,13 +80,14 @@ test_gene_enrichment( .sample = NULL, .entrez, .abundance = NULL, - .contrasts = NULL, + contrasts = NULL, methods = c("camera", "roast", "safe", "gage", "padog", "globaltest", "ora"), gene_sets = c("h", "c1", "c2", "c3", "c4", "c5", "c6", "c7", "kegg_disease", "kegg_metabolism", "kegg_signaling"), species, cores = 10, - method = NULL + method = NULL, + .contrasts = NULL ) \S4method{test_gene_enrichment}{RangedSummarizedExperiment}( @@ -91,13 +96,14 @@ test_gene_enrichment( .sample = NULL, .entrez, .abundance = NULL, - .contrasts = NULL, + contrasts = NULL, methods = c("camera", "roast", "safe", "gage", "padog", "globaltest", "ora"), gene_sets = c("h", "c1", "c2", "c3", "c4", "c5", "c6", "c7", "kegg_disease", "kegg_metabolism", "kegg_signaling"), species, cores = 10, - method = NULL + method = NULL, + .contrasts = NULL ) } \arguments{ @@ -111,7 +117,7 @@ test_gene_enrichment( \item{.abundance}{The name of the transcript/gene abundance column} -\item{.contrasts}{= NULL,} +\item{contrasts}{This parameter takes the format of the contrast parameter of the method of choice. For edgeR and limma-voom is a character vector. For DESeq2 is a list including a character vector of length three. The first covariate is the one the model is tested against (e.g., ~ factor_of_interest)} \item{methods}{A character vector. One or 3 or more methods to use in the testing (currently EGSEA errors if 2 are used). Type EGSEA::egsea.base() to see the supported GSE methods.} @@ -122,6 +128,8 @@ test_gene_enrichment( \item{cores}{An integer. The number of cores available} \item{method}{DEPRECATED. Please use methods.} + +\item{.contrasts}{DEPRECATED - This parameter takes the format of the contrast parameter of the method of choice. For edgeR and limma-voom is a character vector. For DESeq2 is a list including a character vector of length three. The first covariate is the one the model is tested against (e.g., ~ factor_of_interest)} } \value{ A consistent object (to the input) diff --git a/tests/testthat/test-bulk_methods.R b/tests/testthat/test-bulk_methods.R index ed44cd17..9bfc1bda 100755 --- a/tests/testthat/test-bulk_methods.R +++ b/tests/testthat/test-bulk_methods.R @@ -338,7 +338,7 @@ test_that("Only differential trancript abundance - no object - with contrasts",{ .sample = a, .transcript = b, .abundance = c, - .contrasts = c( "conditionTRUE - conditionFALSE", "conditionFALSE - conditionTRUE"), + contrasts = c( "conditionTRUE - conditionFALSE", "conditionFALSE - conditionTRUE"), method = "edgeR_likelihood_ratio", action="only" ) @@ -502,7 +502,7 @@ test_that("Only differential trancript abundance - no object - with contrasts",{ .sample = a, .transcript = b, .abundance = c, - .contrasts = c( "conditionTRUE - conditionFALSE", "conditionFALSE - conditionTRUE"), + contrasts = c( "conditionTRUE - conditionFALSE", "conditionFALSE - conditionTRUE"), method = "limma_voom", action="only" ) @@ -574,7 +574,7 @@ test_that("Voom with treat method",{ .sample = a, .transcript = b, .abundance = c, - .contrasts = c("cell_typeb_cell-cell_typemonocyte", "cell_typeb_cell-cell_typet_cell"), + contrasts = c("cell_typeb_cell-cell_typemonocyte", "cell_typeb_cell-cell_typet_cell"), method = "limma_voom", test_above_log2_fold_change = 1, action="only" @@ -753,10 +753,10 @@ test_that("DESeq2 differential trancript abundance - no object",{ # .transcript = b, # .abundance = c, # method = "deseq2", - # .contrasts = "this_is - wrong", + # contrasts = "this_is - wrong", # action="only" # ) %>% - # expect_error("for the moment, the .contrasts argument") + # expect_error("for the moment, the contrasts argument") deseq2_contrasts = input_df %>% @@ -767,7 +767,7 @@ test_that("DESeq2 differential trancript abundance - no object",{ .transcript = b, .abundance = c, method = "deseq2", - .contrasts = list(c("condition", "TRUE", "FALSE")), + contrasts = list(c("condition", "TRUE", "FALSE")), action="only" ) @@ -779,7 +779,7 @@ test_that("DESeq2 differential trancript abundance - no object",{ .sample = a, .transcript = b, .abundance = c, - .contrasts = "conditionTRUE - conditionFALSE", + contrasts = "conditionTRUE - conditionFALSE", action="only" ) diff --git a/tests/testthat/test-bulk_methods_SummarizedExperiment.R b/tests/testthat/test-bulk_methods_SummarizedExperiment.R index d10faafc..a272a352 100755 --- a/tests/testthat/test-bulk_methods_SummarizedExperiment.R +++ b/tests/testthat/test-bulk_methods_SummarizedExperiment.R @@ -302,7 +302,7 @@ test_that("Voom with treat method",{ .sample = a, .transcript = b, .abundance = c, - .contrasts = c("Cell.typeb_cell-Cell.typemonocyte", "Cell.typeb_cell-Cell.typet_cell"), + contrasts = c("Cell.typeb_cell-Cell.typemonocyte", "Cell.typeb_cell-Cell.typet_cell"), method = "limma_voom", test_above_log2_fold_change = 1, action="only" From 78c6838a3d0956e88adf16c95d9944c6c1a4399f Mon Sep 17 00:00:00 2001 From: stemangiola Date: Sat, 25 Jun 2022 11:04:23 +1000 Subject: [PATCH 17/38] temporary save to work on other branch --- R/methods_SE.R | 25 ++++++------------- .../test-bulk_methods_SummarizedExperiment.R | 12 ++++----- 2 files changed, 14 insertions(+), 23 deletions(-) diff --git a/R/methods_SE.R b/R/methods_SE.R index 0f5fca24..4bf0dff7 100755 --- a/R/methods_SE.R +++ b/R/methods_SE.R @@ -860,25 +860,16 @@ setMethod("adjust_abundance", ) - rr = rowRanges(.data) - - - - - - if(!is.null(rr)){ + if(!is.null(rowRanges(.data))){ new_range_data = - rr %>% + rowRanges(.data) %>% as_tibble() - # # Add names - # when( - # is(rr, "CompressedGRangesList") ~ mutate(., !!as.symbol(feature_column_name) := group_name), - # .hasSlot(rr@ranges, "NAME") ~ mutate(., !!as.symbol(feature_column_name) := rr@ranges@NAME), - # .hasSlot(rr@ranges, "NAMES") ~ mutate(., !!as.symbol(feature_column_name) := rr@ranges@NAMES), - # ~ stop("tidybulk says: I don't know how to aggregate the GRanges, as the slot ranges@NAMES, or range@NAME do not exist") - # ) + # If GRangesList & and .transcript is not there add .transcript + if(is(rowRanges(.data), "CompressedGRangesList") & !quo_name(.transcript) %in% colnames(new_range_data)){ + new_range_data %>% left_join(). <<<<< + } # Through warning if there are logicals of factor in the data frame # because they cannot be merged if they are not unique @@ -927,7 +918,7 @@ setMethod("adjust_abundance", # rownames(.x) = .x %>% pull(!!as.symbol(feature_column_name)) # .x # } - +browser() #if(is(rr, "CompressedGRangesList") | nrow(new_row_data)% purrr::map_chr(~ { + if(.x %in% c("LRG_239", "LRG_405")) "BLAAA" + else .x + }) - rowData(se)$bla = if_else(rowData(se)$bla %in% c("LRG_239", "LRG_405"), "BLAAA", rowData(se)$bla ) - - res = - se %>% - aggregate_duplicates( .transcript = bla ) + res = aggregate_duplicates(se, .transcript = bla ) expect_equal( dim(res), c( 99, 8 ) ) From 0f31e395f515b10f0b4a34eb6fb74e4d48d0dce3 Mon Sep 17 00:00:00 2001 From: stemangiola Date: Sat, 25 Jun 2022 12:44:06 +1000 Subject: [PATCH 18/38] get as_tibble from tidySE for tidybulk() --- NAMESPACE | 1 + R/methods_SE.R | 33 +--- R/tidySummarizedExperiment.R | 340 +++++++++++++++++++++++++---------- R/utilities.R | 30 ---- 4 files changed, 253 insertions(+), 151 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index e28aaff1..e93b80ba 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -81,6 +81,7 @@ import(readr) import(tibble) import(tidyr) importFrom(GenomicRanges,makeGRangesListFromDataFrame) +importFrom(S4Vectors,metadata) importFrom(SummarizedExperiment,SummarizedExperiment) importFrom(SummarizedExperiment,assays) importFrom(SummarizedExperiment,colData) diff --git a/R/methods_SE.R b/R/methods_SE.R index 230c5207..a214d3e7 100755 --- a/R/methods_SE.R +++ b/R/methods_SE.R @@ -24,38 +24,9 @@ ~ as.symbol(.x), ~ NULL) - sample_info <- - colData(.data) %>% + .as_tibble_optimised(.data) %>% - # If reserved column names are present add .x - change_reserved_column_names() %>% - - # Convert to tibble - tibble::as_tibble(rownames=sample__$name) - - - range_info <- - get_special_datasets(.data) %>% - reduce(left_join, by="coordinate") - - gene_info <- - rowData(.data) %>% - - # If reserved column names are present add .x - change_reserved_column_names() %>% - - # Convert to tibble - tibble::as_tibble(rownames=feature__$name) - - count_info <- get_count_datasets(.data, feature__$name, sample__$name) - - # Return - count_info %>% - left_join(sample_info, by=sample__$name) %>% - left_join(gene_info, by=feature__$name) %>% - when(nrow(range_info) > 0 ~ (.) %>% left_join(range_info) %>% suppressMessages(), ~ (.)) %>% - - mutate_if(is.character, as.factor) %>% + # mutate_if(is.character, as.factor) %>% tidybulk( !!as.symbol(sample__$name), !!as.symbol(feature__$name), diff --git a/R/tidySummarizedExperiment.R b/R/tidySummarizedExperiment.R index 67e7cbce..443b394e 100644 --- a/R/tidySummarizedExperiment.R +++ b/R/tidySummarizedExperiment.R @@ -1,16 +1,12 @@ -change_reserved_column_names = function(.data){ - - .data %>% - - setNames( - colnames(.) %>% - str_replace("^feature$", "feature.x") %>% - str_replace("^sample$", "sample.x") %>% - str_replace("^coordinate$", "coordinate.x") - ) +eliminate_GRanges_metadata_columns_also_present_in_Rowdata = function(.my_data, se){ + .my_data %>% + select(-one_of(colnames(rowData(se)))) %>% + # In case there is not metadata column + suppressWarnings() } + #' @importFrom dplyr select #' @importFrom tidyselect one_of #' @importFrom tibble as_tibble @@ -18,71 +14,60 @@ change_reserved_column_names = function(.data){ #' @importFrom SummarizedExperiment rowRanges #' @importFrom tibble rowid_to_column #' -#' @keywords internal #' @noRd -get_special_datasets <- function(SummarizedExperiment_object) { - - SummarizedExperiment_object %>% - rowRanges() %>% - when( - # If no ranges - as.data.frame(.) %>% - nrow() %>% - equals(0) ~ tibble(), - - # If it is a range list (multiple rows per feature) - class(.) %>% equals("CompressedGRangesList") ~ - tibble::as_tibble(.) %>% - eliminate_GRanges_metadata_columns_also_present_in_Rowdata(SummarizedExperiment_object) %>% - nest(coordinate = -group_name) %>% - rename(feature = group_name), - - # If standard GRanges (one feature per line) - ~ { - transcript_column = - rowRanges(SummarizedExperiment_object) %>% - as.data.frame() %>% - lapply(function(x) rownames(SummarizedExperiment_object)[1] %in% x) %>% - unlist() %>% - which() %>% - names() - - - # Just rename - (.) %>% - - # If transcript_column exists all good - when( - !is.null(transcript_column) ~ tibble::as_tibble(.) %>% - eliminate_GRanges_metadata_columns_also_present_in_Rowdata(SummarizedExperiment_object) %>% - rename(feature := !!transcript_column) , - - # If transcript_column is NULL add numeric column - ~ tibble::as_tibble(.) %>% - eliminate_GRanges_metadata_columns_also_present_in_Rowdata(SummarizedExperiment_object) %>% - rowid_to_column(var = "feature") %>% - mutate(feature = as.character(feature)) - ) %>% - - # Always nest - nest(coordinate = -feature) - - } - ) %>% - list() +get_special_datasets <- function(se) { + + rr = se %>% + rowRanges() + + rr %>% + when( + + # If no ranges + as.data.frame(.) %>% + nrow() %>% + equals(0) ~ tibble(), + + # If it is a range list (multiple rows per feature) + is(., "CompressedGRangesList") ~ { + + # If GRanges does not have row names + if(is.null(rr@partitioning@NAMES)) rr@partitioning@NAMES = as.character(1:nrow(se)) + + tibble::as_tibble(rr) %>% + eliminate_GRanges_metadata_columns_also_present_in_Rowdata(se) %>% + nest(GRangesList = -group_name) %>% + rename(!!f_(se)$symbol := group_name) + + }, + + # If standard GRanges (one feature per line) + ~ { + + # If GRanges does not have row names + if(is.null(rr@ranges@NAMES)) rr@ranges@NAMES = as.character(1:nrow(se)) + + tibble::as_tibble(rr) %>% + eliminate_GRanges_metadata_columns_also_present_in_Rowdata(se) %>% + mutate(!!f_(se)$symbol := rr@ranges@NAMES) + } + + ) %>% + list() } -change_reserved_column_names = function(.data){ +#' @importFrom stringr str_replace +change_reserved_column_names = function(col_data, .data ){ - .data %>% + col_data %>% - setNames( - colnames(.) %>% - str_replace("^feature$", "feature.x") %>% - str_replace("^sample$", "sample.x") %>% - str_replace("^coordinate$", "coordinate.x") - ) + setNames( + colnames(.) %>% + sapply(function(x) if(x==f_(.data)$name) sprintf("%s.x", f_(.data)$name) else x) %>% + sapply(function(x) if(x==s_(.data)$name) sprintf("%s.x", s_(.data)$name) else x) %>% + str_replace("^coordinate$", "coordinate.x") + ) } @@ -93,26 +78,201 @@ change_reserved_column_names = function(.data){ #' @importFrom purrr reduce #' @importFrom SummarizedExperiment assays #' -#' @keywords internal #' @noRd -get_count_datasets <- function(SummarizedExperiment_object, feature_column_name, sample_column_name) { - - - map2( - assays(SummarizedExperiment_object) %>% as.list(), - names(assays(SummarizedExperiment_object)), - ~ .x %>% - tibble::as_tibble(rownames = feature_column_name, .name_repair = "minimal") %>% - - # If the matrix does not have sample names, fix column names - when(colnames(.x) %>% is.null() ~ setNames(., c( - feature_column_name, seq_len(ncol(.x)) - )), - ~ (.) - ) %>% - - gather(!!as.symbol(sample_column_name), count,-!!as.symbol(feature_column_name)) %>% - rename(!!.y := count) - ) %>% - reduce(left_join, by = c(feature_column_name, sample_column_name)) +get_count_datasets <- function(se) { + map2( + assays(se) %>% as.list(), + names(assays(se)), + ~ { + + # If the counts are in a sparse matrix convert to a matrix + # This might happen because the user loaded tidySummarizedExperiment and is + # print a SingleCellExperiment + if(is(.x, "dgCMatrix")) { + .x = as.matrix(.x) + } + + .x %>% + # matrix() %>% + # as.data.frame() %>% + tibble::as_tibble(rownames = f_(se)$name, .name_repair = "minimal") %>% + + # If the matrix does not have sample names, fix column names + when(colnames(.x) %>% is.null() ~ setNames(., c( + f_(se)$name, seq_len(ncol(.x)) + )), + ~ (.) + ) %>% + + gather(!!s_(se)$symbol, !!.y,-!!f_(se)$symbol) + + #%>% + # rename(!!.y := count) + }) %>% + when( + length(.)>0 ~ bind_cols(., .name_repair = c("minimal")) %>% .[!duplicated(colnames(.))], # reduce(., left_join, by = c(f_(se)$name, s_(se)$name)), + ~ expand.grid( + rownames(se), colnames(se) + ) %>% + setNames(c(f_(se)$name, s_(se)$name)) %>% + tibble::as_tibble() + ) %>% + + # Add dummy sample or feature if we have empty assay. + # This is needed for a correct isualisation of the tibble form + when( + f_(se)$name %in% colnames(.) %>% not ~ mutate(., !!f_(se)$symbol := as.character(NA)), + s_(se)$name %in% colnames(.) %>% not ~ mutate(., !!s_(se)$symbol := as.character(NA)), + ~ (.) + ) +} + +subset_tibble_output = function(.data, count_info, sample_info, gene_info, range_info, .subset){ + # This function outputs a tibble after subsetting the columns + .subset = enquo(.subset) + + # Build template of the output + output_colnames = + slice(count_info, 0) %>% + left_join(slice(sample_info, 0), by=s_(.data)$name) %>% + left_join(slice(gene_info, 0), by = f_(.data)$name) %>% + when(nrow(range_info) > 0 ~ (.) %>% left_join(range_info, by=f_(.data)$name), ~ (.)) %>% + select(!!.subset) %>% + colnames() + + + # Sample table + sample_info = + sample_info %>% + when( + colnames(.) %>% intersect(output_colnames) %>% length() %>% equals(0) ~ NULL, + select(., one_of(s_(.data)$name, output_colnames)) %>% + suppressWarnings() + ) + + # Ranges table + range_info = + range_info %>% + when( + colnames(.) %>% intersect(output_colnames) %>% length() %>% equals(0) ~ NULL, + select(., one_of(f_(.data)$name, output_colnames)) %>% + suppressWarnings() + ) + + # Ranges table + gene_info = + gene_info %>% + when( + colnames(.) %>% intersect(output_colnames) %>% length() %>% equals(0) ~ NULL, + select(., one_of(f_(.data)$name, output_colnames)) %>% + suppressWarnings() + ) + + # Ranges table + count_info = + count_info %>% + when( + colnames(.) %>% intersect(output_colnames) %>% length() %>% equals(0) ~ NULL, + select(., one_of(f_(.data)$name, s_(.data)$name, output_colnames)) %>% + suppressWarnings() + ) + + + if( + !is.null(count_info) & + ( + !is.null(sample_info) & !is.null(gene_info) | + + # Make exception for weirs cases (e.g. c(sample, counts)) + (colnames(count_info) %>% outersect(c(f_(.data)$name, s_(.data)$name)) %>% length() %>% gt(0)) + ) + ) { + output_df = + count_info %>% + when(!is.null(sample_info) ~ (.) %>% left_join(sample_info, by=s_(.data)$name), ~ (.)) %>% + when(!is.null(gene_info) ~ (.) %>% left_join(gene_info, by=f_(.data)$name), ~ (.)) %>% + when(!is.null(range_info) ~ (.) %>% left_join(range_info, by=f_(.data)$name), ~ (.)) + } + else if(!is.null(sample_info) ){ + output_df = sample_info + } + else if(!is.null(gene_info)){ + output_df = gene_info %>% + + # If present join GRanges + when(!is.null(range_info) ~ (.) %>% left_join(range_info, by=f_(.data)$name), ~ (.)) + } + + output_df %>% + + # Cleanup + select(one_of(output_colnames)) %>% + suppressWarnings() + +} + + +.as_tibble_optimised = function(x, skip_GRanges = FALSE, .subset = NULL, + .name_repair=c("check_unique", "unique", "universal", "minimal"), + rownames=pkgconfig::get_config("tibble::rownames", NULL)){ + + .subset = enquo(.subset) + + sample_info <- + colData(x) %>% + + # If reserved column names are present add .x + change_reserved_column_names(x) %>% + + # Convert to tibble + tibble::as_tibble(rownames=s_(x)$name) %>% + setNames(c(s_(x)$name, colnames(colData(x)))) + + range_info <- + skip_GRanges %>% + when( + (.) ~ tibble() %>% list, + ~ get_special_datasets(x) + ) %>% + reduce(left_join, by="coordinate") + + gene_info <- + rowData(x) %>% + + # If reserved column names are present add .x + change_reserved_column_names(x)%>% + + # Convert to tibble + tibble::as_tibble(rownames=f_(x)$name) %>% + setNames(c(f_(x)$name, colnames(rowData(x)))) + + + count_info <- get_count_datasets(x) + + # Return + if(quo_is_null(.subset)) + + # If I want to return all columns + count_info %>% + inner_join(sample_info, by=s_(x)$name) %>% + inner_join(gene_info, by=f_(x)$name) %>% + when(nrow(range_info) > 0 ~ (.) %>% left_join(range_info) %>% suppressMessages(), ~ (.)) + + # This function outputs a tibble after subsetting the columns + else subset_tibble_output(x, count_info, sample_info, gene_info, range_info, !!.subset) + + +} + +#' @importFrom S4Vectors metadata +f_ = function(x){ + # Check if old deprecated columns are used + if("feature__" %in% names(metadata(x))) feature__ = metadata(x)$feature__ + return(feature__) +} + +#' @importFrom S4Vectors metadata +s_ = function(x){ + if("sample__" %in% names(metadata(x))) sample__ = metadata(x)$sample__ + return(sample__) } diff --git a/R/utilities.R b/R/utilities.R index e3f9cdc6..6a2bcab9 100755 --- a/R/utilities.R +++ b/R/utilities.R @@ -1460,36 +1460,6 @@ rotation = function(m, d) { ) %>% as_matrix) %*% m) } -#' -#' @keywords internal -#' @noRd -#' -#' @importFrom dplyr select -#' @importFrom tibble as_tibble -#' @importFrom tibble tibble -get_special_datasets <- function(SummarizedExperiment_object) { - if ( - "RangedSummarizedExperiment" %in% .class2(SummarizedExperiment_object) & - - rowRanges(SummarizedExperiment_object) %>% - as.data.frame() %>% - nrow() %>% - gt(0) - ) { - rowRanges(SummarizedExperiment_object) %>% - as.data.frame() %>% - - # Take off rowData columns as there is a recursive anomaly within gene ranges - suppressWarnings( - select(-one_of(colnames(rowData(SummarizedExperiment_object)))) - ) %>% - tibble::as_tibble(rownames="feature") %>% - list() - } else { - tibble() %>% list() - } -} - combineByRow <- function(m, fun = NULL) { # Shown here #https://stackoverflow.com/questions/8139301/aggregate-rows-in-a-large-matrix-by-rowname From 7b4b3102bdb6c7e7a62d663cdc220a71e03e1322 Mon Sep 17 00:00:00 2001 From: stemangiola Date: Sat, 25 Jun 2022 12:58:41 +1000 Subject: [PATCH 19/38] fix tests --- tests/testthat/test-bulk_methods.R | 8 ++++---- .../test-bulk_methods_SummarizedExperiment.R | 12 ++++++------ 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/tests/testthat/test-bulk_methods.R b/tests/testthat/test-bulk_methods.R index ed44cd17..b3fc096a 100755 --- a/tests/testthat/test-bulk_methods.R +++ b/tests/testthat/test-bulk_methods.R @@ -1913,14 +1913,14 @@ test_that("pivot",{ test_that("gene over representation",{ - df_entrez = se_mini %>% tidybulk() %>% as_tibble() %>% symbol_to_entrez(.transcript = feature, .sample = sample) - df_entrez = aggregate_duplicates(df_entrez, aggregation_function = sum, .sample = sample, .transcript = entrez, .abundance = count) - df_entrez = mutate(df_entrez, do_test = feature %in% c("TNFRSF4", "PLCH2", "PADI4", "PAX7")) + df_entrez = se_mini %>% tidybulk() %>% as_tibble() %>% symbol_to_entrez(.transcript = .feature, .sample = .sample) + df_entrez = aggregate_duplicates(df_entrez, aggregation_function = sum, .sample = .sample, .transcript = entrez, .abundance = count) + df_entrez = mutate(df_entrez, do_test = .feature %in% c("TNFRSF4", "PLCH2", "PADI4", "PAX7")) res = test_gene_overrepresentation( df_entrez, - .sample = sample, + .sample = .sample, .entrez = entrez, .do_test = do_test, species="Homo sapiens" diff --git a/tests/testthat/test-bulk_methods_SummarizedExperiment.R b/tests/testthat/test-bulk_methods_SummarizedExperiment.R index d10faafc..c2b5f988 100755 --- a/tests/testthat/test-bulk_methods_SummarizedExperiment.R +++ b/tests/testthat/test-bulk_methods_SummarizedExperiment.R @@ -15,7 +15,7 @@ test_that("tidybulk SummarizedExperiment conversion",{ expect_equal( nrow(res), 800 ) - expect_equal( ncol(res), 21 ) + expect_equal( ncol(res), 13 ) res = res %>% tidybulk:::tidybulk_to_SummarizedExperiment() @@ -33,19 +33,19 @@ test_that("tidybulk SummarizedExperiment normalisation manual",{ res2 = tidybulk(se) %>% identify_abundant() %>% scale_abundance() - res %>% distinct(sample, multiplier) %>% pull(multiplier) - res2 %>% distinct(sample, multiplier) %>% pull(multiplier) + res %>% distinct(.sample, multiplier) %>% pull(multiplier) + res2 %>% distinct(.sample, multiplier) %>% pull(multiplier) expect_equal( - res %>% distinct(sample, multiplier) %>% pull(multiplier), - res2 %>% distinct(sample, multiplier) %>% pull(multiplier) %>% as.numeric(), + res %>% distinct(.sample, multiplier) %>% pull(multiplier), + res2 %>% distinct(.sample, multiplier) %>% pull(multiplier) %>% as.numeric(), tolerance=1e-3 ) expect_equal( nrow(res), 800 ) - expect_equal( ncol(res), 25 ) + expect_equal( ncol(res), 17 ) res = rlang::quo_name(attr(res, "internals")$tt_columns[[4]]) From 4a867ff378ad1e735616d165f57c2cb7247f18ac Mon Sep 17 00:00:00 2001 From: stemangiola Date: Sat, 25 Jun 2022 13:22:33 +1000 Subject: [PATCH 20/38] fix dependencies --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index d9666716..1bbdbd10 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -32,7 +32,8 @@ Imports: scales, SummarizedExperiment, GenomicRanges, - methods + methods, + S4Vectors Suggests: BiocStyle, testthat, @@ -53,7 +54,6 @@ Suggests: Seurat, KernSmooth, Rtsne, - S4Vectors, ggplot2, widyr, clusterProfiler, From a76efa9199cc0213721e0f90c91942da173b96e6 Mon Sep 17 00:00:00 2001 From: stemangiola Date: Sat, 25 Jun 2022 16:42:49 +1000 Subject: [PATCH 21/38] fix examples --- R/dplyr_methods.R | 8 +-- R/methods.R | 63 ++++++------------ R/tidyr_methods.R | 32 ++++----- data/se_mini.rda | Bin 31269 -> 43019 bytes man/adjust_abundance-methods.Rd | 2 - man/as_matrix.Rd | 3 +- man/deconvolve_cellularity-methods.Rd | 2 +- man/dplyr-methods.Rd | 2 +- man/ensembl_to_symbol-methods.Rd | 5 +- man/fill_missing_abundance-methods.Rd | 2 +- man/get_bibliography-methods.Rd | 4 +- man/join-methods.Rd | 6 +- man/nest-methods.Rd | 4 +- man/symbol_to_entrez.Rd | 5 +- man/test_differential_cellularity-methods.Rd | 11 --- man/test_gene_enrichment-methods.Rd | 6 +- man/test_gene_overrepresentation-methods.Rd | 5 +- man/test_gene_rank-methods.Rd | 7 +- ...test_stratification_cellularity-methods.Rd | 9 --- man/tidybulk-methods.Rd | 2 +- 20 files changed, 70 insertions(+), 108 deletions(-) diff --git a/R/dplyr_methods.R b/R/dplyr_methods.R index 80f8a1ea..f03b8028 100755 --- a/R/dplyr_methods.R +++ b/R/dplyr_methods.R @@ -723,7 +723,7 @@ rowwise.tidybulk <- function(data, ...) #' #' @examples #'`%>%` = magrittr::`%>%` -#' annotation = tidybulk::counts_SE %>% tidybulk() %>% as_tibble() %>% distinct(sample) %>% mutate(source = "AU") +#' annotation = tidybulk::counts_SE %>% tidybulk() %>% as_tibble() %>% distinct(.sample) %>% mutate(source = "AU") #' tidybulk::counts_SE %>% tidybulk() %>% as_tibble() %>% left_join(annotation) #' #' @rdname dplyr-methods @@ -763,7 +763,7 @@ left_join.tidybulk <- function (x, y, by = NULL, copy = FALSE, suffix = c(".x", #' #' @examples #'`%>%` = magrittr::`%>%` -#' annotation = tidybulk::counts_SE %>% tidybulk() %>% as_tibble() %>% distinct(sample) %>% mutate(source = "AU") +#' annotation = tidybulk::counts_SE %>% tidybulk() %>% as_tibble() %>% distinct(.sample) %>% mutate(source = "AU") #' tidybulk::counts_SE %>% tidybulk() %>% as_tibble() %>% inner_join(annotation) #' #' @rdname join-methods @@ -802,7 +802,7 @@ inner_join.tidybulk <- function (x, y, by = NULL, copy = FALSE, suffix = c(".x", #' #' @examples #'`%>%` = magrittr::`%>%` -#' annotation = tidybulk::counts_SE %>% tidybulk() %>% as_tibble() %>% distinct(sample) %>% mutate(source = "AU") +#' annotation = tidybulk::counts_SE %>% tidybulk() %>% as_tibble() %>% distinct(.sample) %>% mutate(source = "AU") #' tidybulk::counts_SE %>% tidybulk() %>% as_tibble() %>% right_join(annotation) #' #' @rdname join-methods @@ -843,7 +843,7 @@ right_join.tidybulk <- function (x, y, by = NULL, copy = FALSE, suffix = c(".x", #' #' @examples #'`%>%` = magrittr::`%>%` -#' annotation = tidybulk::counts_SE %>% tidybulk() %>% as_tibble() %>% distinct(sample) %>% mutate(source = "AU") +#' annotation = tidybulk::counts_SE %>% tidybulk() %>% as_tibble() %>% distinct(.sample) %>% mutate(source = "AU") #' tidybulk::counts_SE %>% tidybulk() %>% as_tibble() %>% full_join(annotation) #' #' @rdname join-methods diff --git a/R/methods.R b/R/methods.R index 87db987b..02ae9a1d 100755 --- a/R/methods.R +++ b/R/methods.R @@ -31,7 +31,7 @@ setOldClass("tidybulk") #' #' @examples #' -#' my_tt = tidybulk(tidybulk::se_mini) +#' tidybulk(tidybulk::se_mini) #' #' #' @docType methods @@ -1353,9 +1353,7 @@ setMethod("remove_redundancy", "tidybulk", .remove_redundancy) #' cm$batch = 0 #' cm$batch[colnames(cm) %in% c("SRR1740035", "SRR1740043")] = 1 #' -#' res = #' cm %>% -#' tidybulk(sample, transcript, count) |> #' identify_abundant() |> #' adjust_abundance( ~ condition + batch ) #' @@ -1675,7 +1673,7 @@ setMethod("aggregate_duplicates", "tidybulk", .aggregate_duplicates) #' library(dplyr) #' #' # Subsetting for time efficiency -#' tidybulk::se_mini |> tidybulk() |>filter(sample=="SRR1740034") |> deconvolve_cellularity(sample, feature, count, cores = 1) +#' tidybulk::se_mini |> deconvolve_cellularity(cores = 1) #' #' #' @docType methods @@ -1815,7 +1813,10 @@ setMethod("deconvolve_cellularity", #' #' @examples #' -#' tidybulk::se_mini |> tidybulk() |> as_tibble() |> symbol_to_entrez(.transcript = feature, .sample = sample) +#' # This function was designed for data.frame +#' # Convert from SummarizedExperiment for this example. It is NOT reccomended. +#' +#' tidybulk::se_mini |> tidybulk() |> as_tibble() |> symbol_to_entrez(.transcript = .feature, .sample = .sample) #' #' @export #' @@ -2014,7 +2015,10 @@ setMethod("describe_transcript", "tidybulk", .describe_transcript) #' #' library(dplyr) #' -#' tidybulk::counts_SE |> tidybulk() |> as_tibble() |> ensembl_to_symbol(feature) +#' # This function was designed for data.frame +#' # Convert from SummarizedExperiment for this example. It is NOT reccomended. +#' +#' tidybulk::counts_SE |> tidybulk() |> as_tibble() |> ensembl_to_symbol(.feature) #' #' #' @@ -2882,8 +2886,10 @@ setMethod("keep_abundant", "tidybulk", .keep_abundant) #' @examples #' \dontrun{ #' -#' df_entrez = tidybulk::se_mini |> tidybulk() |> as_tibble() |> symbol_to_entrez( .transcript = feature, .sample = sample) -#' df_entrez = aggregate_duplicates(df_entrez, aggregation_function = sum, .sample = sample, .transcript = entrez, .abundance = count) +#' library(SummarizedExperiment) +#' se = tidybulk::se_mini +#' rowData( se)$entrez = rownames(se ) +#' df_entrez = aggregate_duplicates(se,.transcript = entrez ) #' #' library("EGSEA") #' @@ -3075,9 +3081,8 @@ setMethod("test_gene_enrichment", #' #' @examples #' -#' df_entrez = tidybulk::se_mini |> tidybulk() |> as_tibble() |> symbol_to_entrez( .transcript = feature, .sample = sample) -#' df_entrez = aggregate_duplicates(df_entrez, aggregation_function = sum, .sample = sample, .transcript = entrez, .abundance = count) -#' df_entrez = mutate(df_entrez, do_test = feature %in% c("TNFRSF4", "PLCH2", "PADI4", "PAX7")) +#' #se_mini = aggregate_duplicates(tidybulk::se_mini, .transcript = entrez) +#' #df_entrez = mutate(df_entrez, do_test = feature %in% c("TNFRSF4", "PLCH2", "PADI4", "PAX7")) #' #' \dontrun{ #' test_gene_overrepresentation( @@ -3245,15 +3250,14 @@ setMethod("test_gene_overrepresentation", #' #' \dontrun{ #' -#' df_entrez = tidybulk::se_mini |> tidybulk() |> as_tibble() |> symbol_to_entrez( .transcript = feature, .sample = sample) -#' df_entrez = aggregate_duplicates(df_entrez, aggregation_function = sum, .sample = sample, .transcript = entrez, .abundance = count) -#' df_entrez = mutate(df_entrez, do_test = feature %in% c("TNFRSF4", "PLCH2", "PADI4", "PAX7")) +#' df_entrez = tidybulk::se_mini +#' df_entrez = mutate(df_entrez, do_test = .feature %in% c("TNFRSF4", "PLCH2", "PADI4", "PAX7")) #' df_entrez = df_entrez %>% test_differential_abundance(~ condition) #' #' #' test_gene_rank( #' df_entrez, -#' .sample = sample, +#' .sample = .sample, #' .entrez = entrez, #' species="Homo sapiens", #' gene_sets =c("C2"), @@ -3591,7 +3595,7 @@ setMethod("pivot_transcript", #' #' @examples #' -#' tidybulk::se_mini |> tidybulk() |> fill_missing_abundance( fill_with = 0) +#' # tidybulk::se_mini |> fill_missing_abundance( fill_with = 0) #' #' #' @docType methods @@ -3862,19 +3866,8 @@ setMethod("impute_missing_abundance", "tidybulk", .impute_missing_abundance) #' ) #' #' # Cox regression - multiple -#' library(dplyr) -#' library(tidyr) #' #' tidybulk::se_mini |> -#' tidybulk() |> -#' -#' # Add survival data -#' nest(data = -sample) |> -#' mutate( -#' days = c(1, 10, 500, 1000, 2000), -#' dead = c(1, 1, 1, 0, 1) -#' ) %>% -#' unnest(data) |> #' #' # Test #' test_differential_cellularity( @@ -4019,15 +4012,6 @@ setMethod("test_differential_cellularity", #' library(tidyr) #' #' tidybulk::se_mini |> -#' tidybulk() |> -#' -#' # Add survival data -#' nest(data = -sample) |> -#' mutate( -#' days = c(1, 10, 500, 1000, 2000), -#' dead = c(1, 1, 1, 0, 1) -#' ) %>% -#' unnest(data) |> #' test_stratification_cellularity( #' survival::Surv(days, dead) ~ ., #' cores = 1 @@ -4138,10 +4122,8 @@ setMethod("test_stratification_cellularity", #' #' @examples #' -#' # Define tidybulk tibble -#' df = tidybulk(tidybulk::se_mini) #' -#' get_bibliography(df) +#' get_bibliography(tidybulk::se_mini) #' #' #' @@ -4236,9 +4218,8 @@ setMethod("get_bibliography", #' #' @examples #' -#' library(dplyr) #' -#' tidybulk::se_mini |> tidybulk() |> select(feature, count) |> head() |> as_matrix(rownames=feature) +#' tibble(.feature = "CD3G", count=1) |> as_matrix(rownames=.feature) #' #' @export as_matrix <- function(tbl, diff --git a/R/tidyr_methods.R b/R/tidyr_methods.R index c9693c4e..dc103381 100755 --- a/R/tidyr_methods.R +++ b/R/tidyr_methods.R @@ -42,8 +42,8 @@ #' @examples #' #' library(dplyr) -#' -#' tidybulk::se_mini %>% tidybulk() %>% nest( data = -feature) %>% +#' +#' tidybulk::se_mini %>% tidybulk() %>% nest( data = -.feature) %>% #' unnest(data) #' #' @rdname nest-methods @@ -57,16 +57,16 @@ unnest.nested_tidybulk <- function (data, cols, ..., keep_empty=FALSE, ptype=NUL { cols <- enquo(cols) - - + + data %>% drop_class(c("nested_tidybulk", "tt")) %>% - tidyr::unnest(!!cols, ..., keep_empty = keep_empty, ptype = ptype, + tidyr::unnest(!!cols, ..., keep_empty = keep_empty, ptype = ptype, names_sep = names_sep, names_repair = names_repair) %>% # Attach attributes reattach_internals(data) %>% - + # Add class add_class("tt") %>% add_class("tidybulk") @@ -84,7 +84,7 @@ unnest.nested_tidybulk <- function (data, cols, ..., keep_empty=FALSE, ptype=NUL #' #' @examples #' -#' tidybulk::se_mini %>% tidybulk() %>% nest( data = -feature) +#' tidybulk::se_mini %>% tidybulk() %>% nest( data = -.feature) #' #' @rdname nest-methods #' @name nest @@ -97,26 +97,26 @@ nest.tidybulk <- function (.data, ..., .names_sep = NULL) { cols <- enquos(...) col_name_data = names(cols) - + .data %>% - + # This is needed otherwise nest goes into loop and fails drop_class(c("tidybulk", "tt")) %>% tidyr::nest(...) %>% - + # Add classes afterwards mutate(!!as.symbol(col_name_data) := map( - !!as.symbol(col_name_data), - ~ .x %>% + !!as.symbol(col_name_data), + ~ .x %>% add_class("tt") %>% add_class("tidybulk") )) %>% - + # Attach attributes reattach_internals(.data) %>% - + # Add class add_class("tt") %>% add_class("nested_tidybulk") - -} \ No newline at end of file + +} diff --git a/data/se_mini.rda b/data/se_mini.rda index 518d41dc3e801bdd0c756003530351acb83201f5..678bc2c518ecb560b5555e026ac8747323c59e19 100755 GIT binary patch literal 43019 zcmeHwdyt&fb!VIYzMg?Nj2aL+^mO+;te&1$cT0=}m=Tgd z0zC#j011pgA@MQ~OFV@909qh~u)&rE#=_Xpe#8dH2_k+Jwd-UjwJ9%Ur?OPmRZH!9 zojv!S`#bmR79>v9{y9^nd!F~+bI(2Z+^x4+B(-eJ4$AJFiY%{zBlvlHjM zTjYHvrY^g5{l+a@Tu1%X@Ye0ywy)o_Ya^t);=2E4!^UkJc5L3YdHtnZ_iWs$h*)z|Ppd%{38hShr`V;v3f) z@BC(9_I_vMmW^9CZrjx{ZHn_?L9*OfGobx@PJ3qKu5}wsAbNXK?`W*5*V$8jqZr=m zHO<`fU!Oc&zh&J{4xV$&tfAvnbz%pH@__2l!n*ZWt-E5-%5&TK)f?CE+P=etAde0s z9R4T2N|OJSNSw?6OxFQJgz7oJC%lk(HZ;vdvn9-zNeBwrJl0K^&E*TD5R8qg;2d2x z=M+>qlP(ppn!@a8c5F;*7SdQ$$&6M=W?~xoJo-sy^F>#Y(X^tYK*K^NH;VSc(Xr8N z)+wd3yy!Dn<6$wKFTsYelrH7eIzynjWUh$*!a}K##(0u>Z*)F~E|PiQ%IH{W44nl< z-*K|wTVxxGYHOj8&Z7CCRLH5Im>9k8( zb$H1SoL!j1z}ba5^NjnzV;s#hzXpq@K5B=VOh%gu@@cg?Ii_tU$I|dmax9}R9LvCZ z=E8VJ;R8m|dzj;3NB7(^RCpQr1rL0$2{Y+z zjIZe@IW`8{f=pWHCmhRWb%t1=V`036{tb?JOpO+@Y4vYVh^0mj5;!nm93C`TP(wZ} z6bpH@#ae1#FjmaMO+1FYE|@UOaf9K9oS!whgvETJ1PQkl8(>yvmBV;Utt#;%aLYW% z$I61sy$@#F-r?E?-#VQfD?wc zQ_N{lutf#+Wx%7+;j@xXS3awWG2rZ^4KWTECyTlUsAl z#j*egsdkc1_ES=Sruaag=Uju$ z$Zab*AGd`Q2u5SXZ%lW7&sHp9Z0pDvS;V!ki<4TqBf9KbE|kUvs-_YSNX(Wwy8DA1XIkU}vuCkjz~oY~q&0aK{5c|@@$sI|#_Zv32ey^? zh70#_Fvcpxj!}H@@cTnP=T8WHUq$T{A7;ih_3~8!Mw{Id?qW9d`K*xjI%BbokgvvS zUOsk&De&zEe&k7xrD47WV_BrK1-@-SE#S=n5?#laC#6^L*k|kHqdJ z=5XU9uE6ntmVQ? z*pa-D@w-AHls_#*&6sL&eb9O@lWO#9<{A$Bom z9MchSmVmiIDeq^7A7Y@@fN|)U_2v%&oX>Sh7x-?jK49;}@EG5pm-rMPYc%cSILi_Z z1YYWxGLBZh$*6Dmu>cG-`Ay?Ea)U@KFwK;JqAGcwaC0cqzuV^`gJ>aOCo32&3imuD|mY{i5Z; zj_HI4td-SN*q;*3-sru`&ptE&iX6PIn-pJy_ymnk3kA+cAow|xZbvE3U%cx%QNWKO zF%v=2KhWb#vVR1~0zHj~r97_~G|V|noov%QW`*nLyfaG~-W#+Ihk~k^N1rg0ZPBph z#(2;$vRL$+X0fQx=Yp}4rsAMDs=nlqik*~sA^N~A#-4I=2mVnU8>sE_NippgG9Qrr zS*++Yeo)HzEBK~(6 z7sF5%c(*c++`#KO=MQ8Be$J0*4n||CnNPE5$+}7VE18p-y9+U1&~+|6b2HIHWmw{b zh>nBNystUxZ?Z;7gCQ&y*a?_Xv+tl|erm0$Vl;L*8y(g83`R%w3LfwyXjCo>7av0s0-L-~Oe~I|mzm zySF_4oe{rAGH<)ierd{Y!QyXB@gECqUKa2ledmP0VG9x@-3ejj%N_qS@39CSarFcaPk(3 z{JlbhFBG=}#6i&X%qx~dubyLBs7J78dnEGi2L$esel5WTMUn7-^qj!ITGcZM9UCz~ z|LDk~#fje#n?E{VAWFL9v_;8WVq$;fa5Bq=|H#}J3>>k*zi8!{{OA5KuD8>nWg>wd z?4wTaEvwKiGJ#YUF>PAx)1wO<|G{T0LSJ-r#lgqieD?lCKEE1~D+Wdm7#^-tQ8TF|LKwm7+bN4wq-`piUR5V}t6 zuuMa{cmxedD;MJ3-H^vKW2# zjN=Hv{DVm@zbbu`IFcL#Q03t7T>#5M$_hzm%qN%FzPlF%{_jrLAoMqXNETic8IXV- z^hcIFdw*Nu_#+Mq&=T_q2KGU^&Ben^1d)a0DEiU6Evo)TyzK8;4XQdt2k861hzzR! zee8W!LgN-B=wH}JMT_gd2UtJUeRDdp0{T;tD?Bh|85l3(Q)##E>3Wj-gLwk$>nbN- zI4OEJGzHp{?OTvIp)aI_ziC9^l_cD3Yrv*_KjGCwmOLQ^R*X+ zf70cnYf0$c0xiMCVE(~;L9XL}{wP=$(rnaEUoYbVlm1@zRnXcF@|L(ILD$Y(6`h|4 z-W2+3$KTTv`W9I)+RmrugfD3?@wRq%$m*fMK1OJNMkLT0Y=`#ETLsMnw=4>M@_K>C zR|}MI<#XqNA6#5mB`&CY6Hux5v@^hQ5x?q>6_M!e-z0FkDAE@lf7H>-1frxn->@is ztY5DFJ9NSdl)iFN^sz3`kE<`Ou>C+B)b>yRt>w^K5zuFPLXSAw*++kQb!0g7)GjNa zGrN11K3*5PWI=-NoD#&98s8U^cRYIZ?wFM43wC|HXZiH- z-wS;1X971_!297rL0n0f_AEwsI6JXU?s-Z0ZhgRi>UY#Xa-ZmJ>aU==%?8n0Yv%e77h%7`w*9xZj^zrK~gD4XCy~rYDDQ<5>5cSIk z9sRjCEkfP*3Uv7bex-ptA+>x7J#e)BXTK-yoq10LnSm8hw`s5qJ zKjYv}-2VHh*lUGc&=FaORE;c)Z0QTS+xf+vf3^J)(V>!qA8-)s>iv$cM1s6cNpHgk z8{^e}my3kfK%*bh?atoG$O_2i1FfGIefr#!B7gF@KzF~Tqdyn=TR)8qLMwX)=pP;W zhQ;aAZs1OWb*KC0U+*L%qWPWy`h0;7eb>@zk9+Pwmdkp-e4=YmapTf@7r6cg@}A1ePVV|uzDx^9h3E$m_S09~_(nuk$0z9~K?h~Q(Um=` zfcm1%em*MkLHEDYvgnz=6F3nGJa|IjlYziNdv~OTe@Nz=E{!aW0_O;&Pi?fQK8M`X z75dCjWDvTJ3`j`HaKunPu3$gOaaPaM)ayh>k50YY0Dlg^JoKu?jbi{ zZhg?DB>d~T0^}*X=;wu`A6+iAI|q~#TkS~c?_ORs0$SfQ zz@J}bnP@y&p^?k9oRwc8AB6h+;#QF$4xp$0{pl-~rQ=xZ^!K|wT#Hb}Li&LisP!NF zmGC93&wB6CS2;#d3^%Cuf4&Ixo`n32c)aO#tDyRS;`Bf0ptw6hpOpDHD}MjT>EYbB zvuPQ$f0IBFCupau|HZw+ze)tOwJY>1Uylr;`-ywLOW^86D2OSzl$T~8AS8Z)lJJMcTR#5;!o=@ zIK8xJ&>gPdQ>&!@vE>5YK1etAMA4Ol{-y5E`sGjm!qVE`1tQjS`3grL=vf4Lg1+GP zHN>x*C(-6{9y%@hQ^LWxRDSPwEvo%)TC_CYKWP3RxHq7_)@zc~`_?xigV0(F67-Ov zn08^S?fv;Jk>!6$78d>ZcZGgz!mty1PnNK?Jj>r96HFHh3++Nd?YZw6%ToG)IEFsa zvwT`h0zYhl7Q*HJKXKCXwEodYEKNVSM|BNC^NW_Iosqyn|3A@xu%kng$n<^G^`= zF7(sl{shT(o1h0J;OT|`X!Yo-uE6K|4qdgZpVOA~=T4b^e*Q5KH@g?5-gc)yDi9@I;`rY@X%X5fw2u4R*G0mSQr8Rm zd;A6~pxZ=^zWjBepZvE1kNtzdtrjHcD-IrV^Y!_r$Zxt%1g@lyMZzBu23L+SdgF-VjtSWy@FPf=p8vB@EKWO)bFN* zK2N_Q620XXoORw4?%Czw{s23miTC5!Kb4R1Z4(aKp_30-2JWl0Ekoj}0Vz+`m6NCGzdSK6@~^n(W8c~v8HV07 z-fPAFys*sC-~a2#V7ksQes`Y5xsYD{N3wdTcSX;T`lsh^#C4mjOy}`c@SgwXo)uA7&M)+x zvB)rJ&H{tlKib(7*c51qSV3QUSB&EN7e)QFixV*Ewb!iz_5mHgQ_%a#QfhD*gIesRzp9hbLUP&}`UEn;n->nD8u};2rjje$Gu}*1cSK6C(?Hvw;-bMod z=PO-<_Hh zig47fwXU5Ddlo@H60R>b-y)AVeL(mWRMHX%*1NeE;8G-lSCUDR%kBfnNu0GxqJ=oidgtjHO=gui~ z|A^!C1L`LegP+&a@jlY%(?p&D6YUJn2hIb~8-&`vyT1?0>G)svu~gsXA6i9ya(e1D zOXG6}g!ulTsL?{7y2g7ym;L7Q0{VkoKTqSHw_%>4NB6k-M!aEOK<{#P6diQ!X!}op z+@e&C1fF@F3_>oBwV#bW!%om=WnlER-xvA=79{8@68a;Ke(t*=KR(n!{&RggOAJ`QvYc4yv6A&zfoQs z!SiE!aCw}q`!JY<=SM3e%c3K$e>Xomjz^yqIr8~omoFb}TE6OSu%s4r@9T;iMD-AV zhzs1OFfVty^#VP>!_uDS1Mm;@`ud@AWNFRgO%gh5K|((-ds$Z)u0;NzTU?yo>Gapn zI}Jx)-n9tbv&BLk*JV!bp1;ruX+Za*Z@n`z8nmrvfd0|JO%~VpxhfHa?wAt!L-TzS zy6?Nfze4!>KIx*K@V|W6LbdPq3xtn#{=f;LUv>$=Qf%M4dEsB_ApHFBM}&W1PT+_I zXBFf)dd}R{wH&HN0^Rzd#|5FIF9_ewza_ZP&jG$qIJ)ocS+ooiwqHLr#{mD9*DXft zsZXHKi9YM{2=s>qp_#}c>Ni)Po|bczQ%}(I3l^n=vd-w~KM>k2J0j7<=K*QYvOc*; zeX0Fj+4amAru!@6+1U#f)CUa+J|NDUNu3SV@{d(7v(C+>OCFC>E z+D|7EeSDq}=e4J|3x5|0v?b4fHFB5$%nR-9Sq80-1Y+J^d6#AAI7Y14=N-iT>Yf!~ z|HAVP_=PTsME?C>S%`veJScoOzsh$AtsfEjr6jO!fxdr3yTax1Ax`i)KG27rk@}A> z2>g-^Q2mI!`56-VG7J1T-_E`hZ-_qpCDr)H^7NlNxh!+lp{N>80L zSVj0xx;(xj5_(^M9MCN!{Jkdye(T1_AZNwrKz_5zMsHv~l>dx7Ke_pZ{e*S$>~X6~ zkNi~NGq(%Gb3E4&z4)f^F<)OMp?_G63__3IAaw6yp(_?7=yN6Ee^U&gKTiui0lC|M zzPe5LBIf`%9jaW9@B?t+bPzd|Jxz9@U909&N7o%UDkKGGaU-`S{rM@kFLA#s6$9%izs9NuX zqDy!6M9;;UaLqi)xIzE6P{)n)+XC6DbcM8_`49Q=)}F}AA|EW(-hJX2`iX4N`n=#^ zB=x`W?*%^e9)Sn06Zn}E0tff&WtOe;Arz0h_uoYSa8KZ+BB5VAw`efB#kGt2oyf5D zkFaicoU}ZRE6gj#EywqSK3@~FHGk{4pSn`o!8*Zuh)TkDcEAAC10>{+Bd7cH>2j$- zpYEZm$ZvCe$E6J;!r!Og8{d1e&=0tA3($Q{$Lsu|>jiqYHqRgY481G5((ZdjLEFLlST20pHZOXp z54p>;*eB@lL8;(MiiZ1X>^tc`c7i%WQ$JfjM*=pj>jAZp_FX4%f47eh0{3&JFA$7w zS+od!@i!0&?aZZ<|5a#=@57=-&7Sao`MyQrNU87dUVc^N&;NIU?|1Tff&WrT58h@~ zXk%C4rbU5R&zD6)-|qCT61in^K?{*ZX{B(eLqdQ3>jJ;~q`;Tmz%0e}90btw(o=6* z7UZ;dL}+}@>Z+d5E-%w}LZSb}0$tB^;$6aUB~3Z`4Lys{j;6rfZwNfKTHu#n6S#F= z;HsX58mIlk(D$rXi3=UaohQMvkPc8k?c!P6ZHpW?*2 zyCCo$fiV@~I|k1CbgU(OBo64<&qZ&KqrdtSp~W2u`uq)+)^Q_$?`ewO z_wEq*(-$Ly=ySBXCHZrG|7h6yhxfrc{`K=#rtLdTJm+_0f_A4ZN_$8~L{21>ed(Q@=2xmFbDp)JB6b+nk1px54vgfqau!P&du;KxNx{o^Xpt#4S2 z{`QFD2%w95!gtT#wBFjt@|A`i3tiFATX0rErL$O~kLMCsb%h={_t8sNMn<6bwxbiG zhy1cYLT~H}TsJRpgM(6?@0rUYBcPiX3`meIW6uSz5C(m#7depyI!@#n=`IN*{eRvci=_TfUg;SG=K%Pb4!Asu z=LwNjOVFM(M%};HxG)kLeu6sSeowd34Hi`fsaK3uUQbdeqP|$|ChjxJ%N`-0&f_x5QU!WT)fg{J%}vS zd*CB(U%i}!KPGBI32sU{a?%nypLd=S8tZ%i0KM-vD@UQ{`pHP@Jt7N*Uf3b@9kTxL z`2*cA4jmKuhz0-Z0)7V6absVrMsL|BiCO;n=Iw6$?-6zSgc~pB0r69H@*}^ph`x84 zcKnYz_&ACDGj6^w9}#-XqQF13fIo-O6#D4L1pcw(JNt#=`8o9=kw5C}`tAclwf!vP=UjL0N2|K8eq=x&&w)l9-_48G!@9l6`RC?2E7Ei8?XL;_)DHw!P*WgX^M>&E zx%iahf-2D-k%K{pyP{tb4nF5W?mR;`T_AF-r}sO(Lkq$elJAi{p^yHJz^#sc)a8}$ ztrz}(eu@mD{yXN@<+jMM`E#Sr?)^uEKJ~0X#P`0Jgx;SPIJiD=KDfu#$NI`xb^RQz zP~4y6>pZ*959;UZ-17nG1M2q%%tn?)pCWb?52oh0S0c~3}Vt+sUL?rYo5@<`7Uo43BbL`t9 zmvGR1UG2m<;nsc!kZTWmfI5B`pUS`QhpvVI?1Ny^RU;NfoGO3!GT{pVTOm0Nc>)6M z_g>LZyb|J@@RN&ARM&K4ex_Qp#DG8i+Yx!@0UhIF5jT3-%y#{ zuz4#BewY89V}D}I{?1&$xT*}Z3V!zO-J+LR7W+A}usmKF$JfGI0^cJVmMi5ZzO^?f zSEg$CI?{5biLZH0m8<0${YKnjLse@P)EY8$r>)-)TCTRn@jb9Xd3pkRjL(3dD%WS{ z^hL!q;2w9WFO#$7_{b*_jM_65n{r^13&R349l7 zP+r-D2Zqb7smXFvfBq~iw_4-qKPX4LxuOE4sZBVY{f{Z`FXTxG+r#&E3sWF=;G)qkTh>&F~Zjs3tO3-n8n zjTUS*IJ}XnRQda|V>an`5wn(lCAz^;!{9OdaCA@fSS4ovD4OJ6^_%5+4o1f?6idYX8=nvT@@1}FLIs1z^!5ACxnroX{grG1 z6!$RYzZjhfT~yrE3XEr{QsZ1cgIIDToEcVXGny!pm5HXC_*7+TI;TrXD1;_cyzt?= z&`|J9vQkGFr5G*e(B4p`&O3w7GYe~#EN0PCc?`=^KCM;i(-rg_RAw~!r7E-aQrSyp z>nk;An`I0)=~q-(Y1PJIb*j>uZa1ODqE2~6hu&H zsh555b=nBGpwekysC~?F)7-XE$~+Uzb&ZCI^M=zxc+wFYlPh~reX)~r_0dr+NRp|i~4 z+SW0L)u}oC<)pCMtTkbNP;EBVo7J|SF2d@nwpXf^bz*|r__X?~Hqj_+@3l$Iu3@cS zZXj`lwdT0r9c#@gbxBZbHq?+>rww-mwa#kvF*H%0$z<~wu_Z_Q$uV8m;Y7Ku`6rwh zpVM6-HBqUxG&hHaLeDH>%h7?Qa-f2VN&{ALrB)r4k`uMI3$}1#q8w|i-pQ#6sfqe( zze0kE*)|#uCuZk#eFPIt4K9PD6NAGK$%z&Y52=aPOjXx&Xei{HU}CNAA_hn8p~-SR z*VYqV$fWOaa$*`|3??T#I-!%(%@$VRWMdLZD41;2*C@YP(eRn{qevOt|E`dsV4@Z2 zj|hcEq^pdHEV?%m|4RasEj}x{PKPE}wa1$p+`;6UCSom^Tswm$U~stG;8SYz+8Mag z@R693Q{}o-NlsOuk({a`<%UzW_MCcZYN|Ge7N@3l;hZ})&Dnb@TZTzsj3ZGcr&j2+ zOx4RDs?Ng7VN0&`>tm`h({X)-Q?qj_2&bBpRm5K~)oi1)V5;Sl%2c~q#x@mBb$pBm z(-m!saescEuFhh7sp*O7d>#&As!`U28Zd>?1k;mK*k}x{mM{*_GJge}n3|rf*LB|t zrzhJJYU6Z0qw8?U5dKCJGO3$ZiiwiOuhA@VcwjP^)DU1C&NY=$_)8%s7|5C)zis@Fis-trxR;G~cIGL+zDhpTCV>d4*^!8#Ty1Wgc7eqtozHNv} zIMWb%)e2UuX{^G$)XS5I`>QkJrUENfDy57=L ziclD)rSwW1)~9D`F!J2`bfc0^`yZ=zjM$H-)?2y*2X%iDOV&Gx4n`3hVZBqy>w2uO z(F16*z6NtVG*h0el{IT6XUdr7)J(aN(FGYYRYMB$%1gfK1M;!DIw8YxR^h+$fjp zWxp_jM#Z0G8nG*2W1@!6&uvUM+YR=+HrQC9n_t+dw=`IT#>{xjxs34|mT=IR^}BFm zb`B#68_lvFa)uhs>E^he^OKDxl5N;%`OycBPQ%yl#MahqOE;b5Y#X^YY-U@!Nm&9p zDaDdCn!|-c%R*u0m}+a_j<8v-PJ2mtrJi;J9t~oc@j1+EsyRM4t+$?3ldn#CN(}kD zs*^TsiMnPr7S@V7yUnUb`cQMSzNV#34mGD&nE>S_s@ie zn)TX>DNQRvYK97$GZj5BG#j(ts^&O$nxSTMVww*kh=8U~i9xd!8&#`Sz}yDSc1+Nj ztRj9=&CaAhlZH%9Lp0RvG+X?<5R220*nY$2>ee)fU}dv{?3rqn$Frjd;h|Q!KG|&R zoVLnsU0*EYr)~apC8S0MlkEwef3`!zG1;m$U=yS2m#|gyT?MU44f)XEzTrl>t&1+z zG8A^D^3EP>bMs2EHo@x6-eY?`o zX9}q{Ki1R(L(s0()r5AfsR!fG5LoM6r!`r_1H4pwD(fEs4Ym2Ob$k5^+{jQUg2{h;~HM6Ht&-fu0p|xp?1BJ@eeK?*(WXNd~?-GRu6_omoKSlH<5;% zEyOI~r^1-Ap*9nBO^acBw&JIjpNHymppXxqlL(oh-JC`krrOQfie7Yswm(h}wL3Y! zlpp}kYj=uFHp+VZV1-h9I%ZfW?wU@GH%2#vRJ$`b-b71bXME0|^g5LZWT~*j4+V5r z>C|+EcX;n}O$D9F8KlpkQ`aMl!O@Pv)zh7s+Km6v$YDcf3;IN9sMDAzPpfI?GTGwC zA8B+AhMEP2`h3_`LFcALZURF$BsT~$?C@5G*-Ler?Xqr+L8nzlw;|)&gdsE-hnZNN z$$kinPw3o-;2+SCr;NUOd&p3w4`C)OIi_uKN8`GIu{r*l)oHO4kys27YrZz)6Md&O z?T>ezR&&OWn+bFqblOubZLL$$2c5~z9Coi{XAM+_R*je2Q~GivwQ91H(Ff&2Ol8OQ zmsP`6%@wK{te)0%5v*=#hA}t>U~rv})lGh21}eO;(b2~`hS1ASxVGH%k3-k`{U)_` zyfdx~*HZdJp+jpYnz^F7Dz&yT!9RbE1!5?@S`RVBKW>lR#i(YpFxU!9diXNJoc_44 zq4NAg%<_wKp=8y=vqa_xyfv z=~ZI}&*u1*7ZNw=za-y=~d&>t44kwsc&1CUNtVgYFv8Nxb&)V=~ZLfoh6rE zH7>nsTzb`bF5kVEUNyRuu=J|YIb!KmV@p3wzVxb*-$E|EYBY&5Tzb{$5=6N4s&VO6 zVQdG6?fi0@xEgIza9S>`m8r@Z}T?oEnnlkjjnm?Cjb5) DDIml6 literal 31269 zcmV)hK%>7OiwFP!000001MPhYcwEJiaF1@w2QoGgAcVny53m(mmM_3&bX!=G2T3+I zNAOr0*#n~)F*EYTC36!3F-Hu90OkmggTx#m5J)h&fSe`?37gY7Hs>DMCI2S7*~S0s z*H!(h`yKOUJhFu|`o2<4zwWNCuCA`GuI_o$-MnUg^_ptNSSc&52(Z#}<0C6y)!RB} zF=J&VMk2t<*f;}MrlS`R#u70WC_Pros)(f1k*#T`oZ2nTkxaz+RK~>Le*-Ro5>tDI z2L~gm*tTeY%a);NDmEBRWU%(U+IjW0^NdgL7%;_v6X3H6WoldHwdrVlz+$3wzk&B_ z43+oA4Q7qzYskZ6dNxMl@#N;7*x*n++7U}<8bpUQ2@FO0E{$x68XswG=N=F~y+G^3 zgxXYepoz2C9360w=Y)Y+G~RC(i1J=cG4oV@s;zG5olistc{QbYS-%*dceKJsm`#OK z$xJe{b;zuSWB8wLBT`sU*b!_7Dogb~nQS`uNhvta9QiIo0@>tMxnI+dqx+)+k>Plz zF_!3$B{sAUC;Bq6WI~j#+ON^89E@f*Ci~M?N6KwlxG54J=6w7e3RRHIz~$!jP#>1W z1{|JN4#v`HgCmhOc|TB7l(u%xG8kc~edK;ZiBc;)p=`8|{Ognj)G|^2bNxQ9h}Ajd`H4 zSlZCFR-ZUfx-*)IKnJ8b40NsP=m_Y|4_YNj^ENfKDXLBN#`V;%j`n4eDWZIiv#HuJ zGo5m1J?6D)&5ibxsf@!ctQ?GNZ8B^q@KcHvqFc-g(9aFgjA^>GsjJTOgnfo$yP7RT zQ|U0L$43*Jx*=PuR+An!jI@~}B%oDwll}xLGH#mvPJP>YCyRg#=GQLdD_j}s@!xe* z0e#>zwQ|3M!qXW|cnO?X-EK-P9W;&2>|iQ76p!>7)*_hM7!3|ZQkh_K!2H}doJtuA z8%(ah#L%AVzNC>%4<$`w7tADsCJVutv4LPDwPBd+Hav=@gJ6NGV;P>xL@tdcc$M`A zRbFJK+4!tkwZUMo(UPb(IGoP#63Ilg8dxxd8#Q?7iw4c%=@ToQ&S0NlB$=wtY>Z$s zmf`FE?BM$03@@ArZcPpc`;x=){@^9Nq2#7$Dz!P5iJGKDA7|B8Gid~;P=hF~@JZIg zsZd}#mAlPCpEZY!j+1>2oT3#7XwyUzo~~OoAU9L5S=EQWf}J62y16p%*z=O<)198i zfX?YoCjMxgi`~&7LwAg^iij1`QB;AMP7Pdfh4plX6JzDEbZsKylrb2|^lc>Fv^5wK z569{9JHp_*N7{p^gW0sv8W-y~x){lye{y06~r7^lh+s3@qux$=} zO^T#zjht92nE(e=w-jy!EBX9sp+px!kqVFM)r3dAf7MDedc+YBM?9*H@?wY@hjPcn{R`9G-(xx8|B3iS+pA* zmK)xo#{Dkh(1ka#R*(!w#3U$pFgL0zHO$v=bO@z;cM|$HOr)pj{*GGDEOWA6#_o!p zq8DKWb-7N}=XGgqZ7oywT8<9WW5I4YQb05~oHi!L`mJJ|#6!aeiUzrpYJ5tY1B^~l zx;hv&CY3S7mvPPS4;lkJoDiQ4|EFC{tG;BvQ6&+HZ%rFChc-sirl!Oa>5Sn(gL_sj zd^E9?Lc_CRwi^1wrz^6!ITnv|XDXKHix2mUmdyDcO{a}@DiW{e{uop<8s4gQ&Y0{G zLt}aCiT+5cKe#!vH8?X`yPa;1F-8gT&Ez6c+gk`;|7g>!+!K- zj57U6Q~4879;w?2+|vqn;q1*M9H^rK5Tj|3XUOtwkfWg^H#}yq~SR6O>B!}NnhNdVpS$( z?BJvuMYkJh3$?{El`_|Hg~)&mjqr*}g#e9rlxWz8A7oAEzKBz>;1mp1ao3C(U04~# z*2cSul}<*TSOrRo&smXnV6ocF>4>b4f~e1-t0bi!sMd{DIyz)mCr|&a!c^t7Rl>=a za&*lI)g?AH8HkA|-Go|JX;qp5q2M%;)#Fs8blMrv>2n<>j}HO~XR z$sV8KAXMqu`yEV%!zRknQfT>O!&>m7Z4gXA%i(A($`i)gg_ViWd-toNo=rsEMm~@8 za^Wh66PZ+`FPeitA*+m0VX9HVJ|%`k1C$_SmygtiSBUcH}iq{kb}Hyb`mRM>T%ao&CbbQDi>niM5ArT4Lx z%HYP>8q(?7{BRtr)z#h6%y24^21htUUTDW_2e8TOb&)R0;EuFsWiRLblv%ewbJop3eVVYlMcy*UoM=KNzx zbPqm{e6FM1xX7*EZGmuDwS>tDl zgWw%!;FP|sp|_ zF&9F2u^mUMM+8UAP>C7wklmR%%M2yKNLmAzOvJaE=Vv0gGLT9R1|vKa5>0`-=wr~z z`jSIiJzO|&*=S*xP>7W`b|g=ZF2v0p=Iwtw9;5W*so6NqtT?Ut7sBVL$3kq$Y?I?@ zN+A|~uf>}>08tMIz$u8-Po6b7wBfbTV|w5qI1e_5!CfDn#H<|C7%)qF-bcHjYm~=w zj5KVbaBwm8>niHSj94O;X%+Dt|2C4=8e>ycH60RKj~FS%oQKO^WbBL=cTJ>0bn)~qAtKeUDyWf$ zTmwMmV?O+Io^;hjb6)X-z|LTMA{|X-f+FD4A6-AZVFQmFh_gS#F&m2Z#f+sGk8|Lf zj66}|9)dUvN=F8xJV1msO^SvKff0A^c*6bE;I#FTbaWcfQlm-L#69F>_(2vPR6;CD z9Nd`n`)8A*EW-tBvc6hSu3gQ*SCL^Ku27_Gy04Nj(}89VQ3vw}tz_lhzbBZ+Et zUzf(J=s{T*hG_g{4Y4|Wbl2|p4vQV(6c%$;W)c<~JIXwbC^OzTLnU@H$oejl8U)Lh za*Y$u&LrihktI>L!)^Wfe0X%$KcFhs)k& z-2O28`9THWm7DiINTA*v)}ugWCK@oAvnhFE32u>RwVzq4PM)hel((3XoK9Y8wx+Pn1%Uk9&_saL zt28ZvdDD{!CF3L%G@YX)43FlmiG5mTaOkwr=f%*|Qd3t)sKod+Z~^5mP)eZ00$lVI zK5C*{csMd`cpd5XM1OP(a|n85qUP23Qin)Eg}h~j>)lbn21iq{#AY#Wyoa=DcVh1)}7Poy=5IF`kI_z05KhCxH5JTxq~`putT` z6_l$6dc4d8CQQskD7OsCl|f!5pPUYT&p+Pi;j*xtei`)13-}&tjjUYsc{{_@!>i#)hcJ| zM*uj)@~G%#BUU+bj_g<2{j|GnG}@&G+@gVtt})7{{r(GZLB{P07dx(uTZtwu9G_fz zTUx;0dhkD-3;)Q^$r?YU8n_vAgmD1lvjCRU>2s*EAaC~~+@x4P}bIj`e@ z*As!)V}P#{fd6SA+v&jnnauL3xJft;>Qq9VaZsn4(ByTip#Jep>@{!^p8)MmVmU41 zQ}ldo8J?l%YfERXX0r69(^;d3XP3Z6AbeBo{e4xzKc z4Ei;?8NtmFUU62n2alFiXfMmvUN(63(%rDw1njTz3XjHlekJqVWa8e%&0lK7d>VUk zDCbBo07`eLy`>sgNunP^{JUk;lR%DZm})Af0f*C|+zHStn&xUTo%FC0=$47}*~mTR zY%EOzcd4ONs$8T(dtC;-E*I$rr(SzZ3RHnAPyvs>yc}q~3nzGVp@MV)>!Uk%9F)bl z5dVtn3W+OJm%WcU_j9!5hjeTF@m#QWr7r>P2Eat=vOE#;OR@mE))fS* zd_si=6D-Kpm$_2&_4u=^us+D>YFY-Kjqqph3K!WPB+tiBcJlRH{p2#{9K6XV&U-X@ zRs-YjMrSRRFnwwkLZf4h7ZOZuVS<#OgwNdVVP3{O?Nz&HYAtuDqa z3}|BBF$?}46pULO4da6A4oglW1;=ypxiQMt>K)f8B9G0!SUmVF9)!eTAQ0%d~cY? z3VKnNAhyQ=xKfNN7Gvt<%VTBJ+1l>OmU@v>Z6 zn^(7-5301tjGF-ULnYsbQx)EH3UWAwH~<5UE4+_jisBKBRxm}=^8gGyiKr+s9eQF^ z7O|c8UvmJuOjbfghix3I)jV3)PkHib9YaYv7tV)_5FC<+jGVb#!e$lh&>S)rvR$gV z5H>Q8-yFi3IUg%lkdeuyf0YI7-+>$}QvQ*jCg3MqtOJ)51l=!l^T!Zq>;rwii3BDA z)M#*I7C0KGU}qWkEq>FYX%>?{G|r;osN{$4scr56ad~;FV>cv}hmTy)p5N)@|YFq$mc7z1DJ^U%IyG}L^pK7h&ql@fyt;%qhC4D{zU=@SMy_CojfgaFGTIC$VCn&`!P>C_JD*p<8}UG0Haj z%@?C=2NGpN*J9>~$Qx@xBjVB&13;`804fH6oY?Z*KNAno)+Q?ke~Q7Mu^jxN%3IdX1@B4&1@*WAo>bG24(^DiMhfWm1^Ue@_}22g;aSKOBo^VhK!cjz%su>^7S4 z^;@Lj&wO|}5Kz`f#_3&?gn#sLQ$)F8 zQ(oe_Dc4N(7LN|PZkDx9&Lp32*{|`0bv0hIE*;FzIx$LL1b5L%Rr2a!8PhZ~Z2p$B z+l!vY4Z#l!8k$);#VRmf>#LQIYuGwdWxi=6mf31-{dKLZ zi1hVEO?spYHM-z!9Qm*TS!SX|J3YLFO zJ!Wp|U)mQR^497P@6T0qK0x)0@r(g~=1eaQE?-ei=#dA|c| zWy9%cIJ>5s#$l$5R21JyacZbdNoeU?9hM`me!7*peZOTp@6c$}*mo0z6+gD{PCu@Y z>XN$cV{&X6{h@1f@Q3j2>KI)pB}1BztK!slKCX%zU%p)yHs)OxO$V^tT@D(%(}C;4 zuY?ZMknC^{d>Z=L!&r+}W^m}ho5We3(~O5LvCpYaNgdjuqphB$Em^q5R%Coc%5bu3 zQ_;R;qAwQbMw`xd`Wq6~a$xw$L(xg!ZV7-u^mR(Ed|4~M99!xS8qjDbM zqUU<_3=d;7Jg=OoFSV|KGF7BZHM3r7Jx$Oz%Z`V#lc4MrNKc0JRLEPw#Je^*Pivsu ze4u?a(0&iI&h4f{9B;d}#TG3$KsG$=QlS?`BC-GB?A9Pcw-8{CNn`M^+q>4Nqs&h3z_(0=HH;ti=eMd0H2}tc`@|)Ow#8wq0hXHWd`ih0r-ivZ7tzLuQDoON&_vtC5+q=QjHuMg-M+qJ+F zptk{VOlzO(#-&j25_rb*Gs$Rf}KV|5vfQ5tXt#wrDRFbw=| zGU{|QRgbqYXFc9b+MxDqE3@Xs!HkbF0^k1nm0qan@G8QY@k8Av`5$GfesO$1 zwD(bF>DdG1^T(j>gQTuoou$CTF;Irfm(P_)4B)XBJ~tjU_~Y~bLZG=?JTvP&#;m)j zPZ;nCCTuU-1=OjfFrK{5rgD}cWWyndOfYb!7Nbtv;1^zU^j`zrAF z4FgSpvfqTV--a^Z0(#$Jc3a7xp+4?~a&Hjc`Dboxzi;$)2YFV*g#9;s3HAdf)~)Yr z_4PFa>z<_X@G{}yE#|zI{1YkjLy*mnp}n6t@cT1^U#>S7LVmA!W{$%z4EVW`|0AaA z+b;qC3gosA+W9re_%}wKH<_w$zh%z)_Rmb{2j`Wny(KR(^$r>G`#U4=xA5#I(C+Vn z@83Dd^)#l+lgs6gz^7p=O2?7$<@Eo>p#Q%hk2ip>@yxUfT>jk7{F_m)oRnEDo|!h` zWznC~Y9@8R^k_{M|KTEwKa>8SPx{a6{wLJ^3*i3;b>4Qnu{5KOm z>HmNXjQve1-UUSd{tsyXFYvF=fAraTePeG_iuN+w8fNULO1p_%HVb|O0cP11;5Ser z>I4iZ-6-e>$^_j&g=i-L`>FumcLcjr8e^{ZDqxIv=_clAuS|Y`x3W1*S~r{N0+ zjJ8UjVu~LWcrWmDJn+QFXRRp9{pNtN=PLao(Hmn;51dG7^L6I}QBL-S%j(OjjAg%! zdZj;5^(laBsHX3MX+Y~urtW95=uyD%sY-w2FjijcA+KlnU}Z4(%E1S=kiOkvY)L`=Om$;2r%j?1zu}P&S+8G#^a5xyW~} zQS=JXg?TXUbIk7C?Ut`&NcF3UzXsi%u>DnQbHE0{(yCY^d7+WkZ^5(62Gt3N%4pvzBMrvOufx z$neMc`eEkxif3l|Hd4L{${YLgvWK+t?HYY2znkh$2l)(p;y|a>mPv>6&G+zztqT}? z>_C{Y@)GEKH}t(1@G6i&571f-w2UzftOc5@$++>g>=X@71cSE(9GhpzOCy zi5CPu0Qh2P`y!~b4#s;8jCVwY-b}vNL!ExWQK-xJ4#d|FY+!QFX5Izjycu-^mq1zK z@0SOd;;WRs$kaFi?$dF9eIk6OV{#&V=6&aTc!OqOkR8zdcp1-}+>?nU`Xj0Suys?w z>*Bn1J+D7H5E+hVXgx;Cy`#}7@n&dH??9pI->RDHGfHeUWm zk2akesZtxU{a(^IHGhttU#;fbeotwi&S#Q&%fcoR8N0ctU2=)?cU9|CHbPbgW8ep5CwH)ciWH`Bb^Oy}Nf+ zLq{I0c&)t0tNHrZpU!dWx*xjQH{x6sxx6nfZgA1cCopHzH-1A@*gLff;nTOOzK?mU z>WR!#g`RaY6hY=~LQi7;P3Sp^B$!Zs6m9+(*FbVO(Qo`u1od=;gUJDDcr`AjF;8=Lj$7|e_O3V0Md~@5!Bdz&?U|6-7)#?ysL?o8rSY`W2YT;x*gEf?i>MJDOAtiE5Rpe&n(?g?*iu1}7~C8C>G;*YLV0`r*n5Y{ngJ6m>Y zEODyZ*ymk6k=lm!iDYWf`l5~A6|X^=@3V_lu|$6?W$$6VOApLlw-cfF)FLHh+qxa2 zch_rd7BKfQC(l2pb?8N>*~VOSVl;JYA@dacQ~E%7eRbR~rtL3c-m1A+&DUM9d!6Ly zrD#jCw0DM@Z*$eoRP#MuHMs{S1qSmq=Vvj`f%>FgekpUA5q(<0Wop3~5^~zfJBpjR zL?mbDInSyU)0|CbM0hA5b*NgD;!n_A+k;TBYXi@#+`yK0W8<_XB+Q{6e70-_`cVo))FJbPB1JA#dxi1ntKYjpm zApP}R_n8leSUYL9Im8Jy-_s%9X863gf&|4ZH-oV9{AzP7vk^1W@nnWZTxSQ3X=4Uu z#S`LCr=ph)$5Mu~97$m9%*IHDAAPyRYm+M1HYa*0%jjHIfYEW1cFKbw{~hb z&AewrhI!A1Vdg#?nBBF=s&$)~x7KZD{mO7|+2Zo$q5 zulDBmS-t#-T3(N->U?B9``gaEt;aQLeytaO{I7<{*!fGOHa5J(bFH^{tZ$AQ(?1;z z)9->(GS{hOEG^g&&F~9Jnif2pO={QsWUJa_OK*WwxIv|$2lrmj#=(u=9DGw3vp&NbIs(zUHA6et?+in(a2lKWBw=-`M-=XH)yy`pEd_B1NnwI{K z8=rT1(|O&Cj*cnakEnEQe#=fZ-_vjDYgR(Wra#Obl~{s!EfMp!0tTAX!h^P)7hZZ} zG!@Gj-p)WOIjGmv*t=V0FXUI8_o(%J-vYbWqaM%ysP`~=4!2W#_j&W_dQIbFS@^u) zo6lU;DeqAE11vX#{vgZEpzmVt)0b&x!Hds_n75gDSk2cDE5GSBPLFu>-=m+p9=Xzg z)SHez&foNshZFseGjH?p81rvFX3D!vaKy&h0%|0Xi`%i>%q18y)j+HSqsa}Mqe@>j|`=w4uwl* z2MM>?FzwB=%w3B-Ht;!L8u^^Rc1HhXDi@}o^I)3if1bHmj99vszj2rOvk1>KPdjEi z?R>$l+5Tj0UnCx9?job_1$E_3$F%J)c!=Zx0zRV>(D54D7rooJ4kht>Gxn?1FR3JE zdrc81QGU@U3lsY`M*A*xIT`(uO2oFUdYL)vN_8x~#(G^VY4%OOny`ntYl89jL!FaX zyu15D7frH#gW=0+zS}n#Z1ujP=G(rc^i?(Aw#WaPny>Hi^~=E4@YR}^BCNZSUPApk z^B&Gu)%;hqMm-15slC@!dh7J|URU$=(~dW_wByyhT=@A0%T1}isg}1n72i_xJ)Mdn z>nl^ycx*##eLNb>B>7jS_~C%{1uJ7m9x)0Tn`?BXeSfve+@mLwrdMKfzrU^WI%;2x z`i_T;59w0Onfq>Gn9JEe+3V3R&wqn?TaWJ@fbhz|S8$TY_nG$u{ehbAxeL)TrSm3p zUMDIyM^gN|fFA6pEl1p#K9L~r4QM+5#;Y-zqWp!M#ysgu4#)e=i88o8wqZCq%pG$h zoyiz;$(U-xX~UNvPV||9FG+RgES?wHl#KNUGpXdTF)wR<^vZ`8eZP=i3MuykuhGuS z1qYEhf}^b0j#cb|Tok0(Lt?^Eg9Jb_=j zbfwZTU$I0+ckX}XQoJg$ft#x>x*z^)H%X<32eqQV$s(zLE}*2`=JPVmG#fsRWySlG zrW4ecOr@gfp=6?;+h6k#g4fbQDLUn|^QW8!(oVzsTNnA3n^707@bBCPqqhHCt?2L7 zqIz8S$NJ#>AW+E*-t>;z&HH`!!j=g!5-_28dc}mqU2v|6rcFrhh-f@z*9P zz5mQn8gF~IKbUL#fAJ}dl7wyo|DRgZc8T`CSpIrL>HJltqo0f4ptt)swfrx=B;v>G z`EQ>Rs^aR>C4Q5F*8d;1{(W8}jQ#PF$KO?YHvi*))qI=(@qcQ*Ej*#gV}ZusZ@gSg zU*AC5@6CYe%aHsRvM}&#pNbUFUWVlNhiG*N(I$W_Z{9KA=;nCXkV$Rrif-0QYp+hy z)#X=S>O!7-ejUY2N@Z&M_&rFps%YSF`Z`WyLmE%==WjFlE!j_FT1bF>2GhL1_{*UD z9Yy-@Xmj4CTlLJBM7<3-0Lzt+Ul#r3eo}8@fqq#O=~Mw7kH4Jg2EdTN4gC+mr35to zx%9W9fc>puF4H{#(Hh<$f7=%Qp>}BSDa@|~JPz=9z)=2aEH}|#NAIQO3)N8{?XDe- zb~(KXfU89PW!TOn!q)_*PLyd(D9Q=I-wD7woj4Uh=LnHL8+oF9jh(kt=xCuFC4K^VhdgvusmX;q(CM1|Ueu=^x3g*IIK7I=v3_ z-y_nWf_n4t`LQBhhw0;(-I9^_gMopTL+G)!_xp-5Tl@`3)8`bw4U!MQO)8!~UZg*V z=@a~o&N;OFp;+$<*=_(X+H<<^1$-jl_W=&F!mN8IG52+^u&)-&$5u~cIoWEd1=9*R z8e?hgIypCa$hG|BcY!<(*4oA8ajHN0$ys=60p*vARXND*-a8uY2H^E^obD-rX9Au9 zcsk%&fad_74fu4xr!mVf;OX~^^ye{ME7C7uy3Sv}KTpdaM0tNfX^1s_u%s;GLY5G*-%e|0i_1J9O^fV^zK~aG0p5C#3!(UYG}a6->6xf zrUG`D(`y9W0Jwz}#$Re>?*3BY-i?)yZ_vi* zFj1vB1guOR+aWa2yrre_kSOQ75u z2WH7%uJhw7*#2t3=L22~_(H%J0KN$D#iIV#u>J=CN5u12@%eg@{yL`D0qz6b&peOB zhxbFrv=d3$x6%vFn&VJ~q`wa7$;d*~ndEP2J!H)Q6bB+fzPD?{NgMZLSJ6;7OO(}1ss$r_$Rmk_Ad9ug-gV!^eckmj1&q!`k_x}joO-V z2`ikTL%IiREzf0Pi50M)oZd#j8vqZo!fYoJ%zZm?Xx}WBkH?y1c8@jJ*)zn{*>f2i z-PtpiJpPn}{QPKpiWNP6I$MYR$xpg}!v&OIF5_0nb~(Ka;56V(tT0w;Gjq362j@+6 z`OKy*EW0(zb`ZUlS=K0U5Ducpk~P}qAU{7^y^R&E5uKHn1HMA^|234;m4L4T%vs6h zkaL6`RI9aiG}`6#t`_xbk^VLQ_<5F=AFa*1%bWUmv)cvvZy+1j<;eDR1!Ow_@9E)m zuLXRAc>Ya%em&ru0N)JwM!+8ed@B=IUbWu`^?aL)$JvkazNNQ;Zazmnw}}3H8~gJ? zz#kURzk|xNkUqHF$GJgu$?z?sVAl)PV^Ig#P`8YfXco*OY z06z@)A;6CUeuTLnB{KUN7jBE>QR1V(sMnDsb9t?+8y%kJuU{=h4Dd1L?xyAPTGo7Q z!Q(7H-{Nt0c-qD%a+8N#E1q~4$m3u|=8XRCMY(*E$T8Ol@ux(3{K1M4=ga4(RX$%E zZ9W6=7Fgc)CjfsIc%KNoF?jwNz)u3+%?e|)o?`AcE8iFG<}+WPX8GB#r`gz_uh^fO zEaQafY0&o*nefr5yzooV&xrIJPUB|2J=lj3rSd&fKq2bPi1*Y4;4L=TN=#Bm|c3x`{H~I>1~u)p@x*xdmiu?0KepqA9>>S zQUUo1z`I8|-4{jrN0@%uA75|M@`aSsmqfiEBKb=G5c^nn! zFPeQzOZz@5cr`~ky;@*7eI4*?fL{mv3gB-5{w8zZ+v@hyX>a>2=Dxliig&)|vo?O4 z<+L`+Irr@Xj@fAYvZEQU{|?~q0)7MV_x#CI-hugE0p(evmBZJ-@^-!t_y>UZ3i|(q ze7xyU{}8>0m9z6hz;6jQe~fJY2=G4v{t4ipigG`}az6w7bHKk~xxYm(9~1av!28&N zh0lH|(m$os)9r%oG-kPaZZ7$+MEYlx-my&G${Yvujt4qZq5etmxe`9}cDSiInTajW zIA(2sN|}8tgzwb(wMhRQwnFj^3cnF)`38l5X4aSoU@YYJ1;${s#}cRHu}}{GN&(9A zfKme5-vLeY1Aq0VPO$*5+&oy$vERwT_dRZt()(!rOc>(Rdl0XgKLO?|jnXM5|)zMC&EIR0A)+XUgP}lk5*non(JxhkkM$KVtEZaS8kz zg^4yMe3Kfjoby(8v}es|dq@Fz7arF%{z}(;qi6pr(ytz@C?9Y0Pk{ejw7CuY@o#|t zLp=W#KL0adg#QHm{{a7&DF16L{};f26VGqJ=YJLH`!M}K%$Fy*jg~)%CucW|M!Nxc zqa~;N-+=#DJYRs%wUe6sUI|JI(6=s(=5yvzo-s=S|4(4^v5$d3ftlqGq5}MSG}`6; z$ASL4sQ=H{P6<08pE>}1-{*77ixb?ecL*j{LE9Z|gP+4iW^)`1s4oGicLlG*Z_sk% zKn%ncnnB3v6}sez*W{b|@2{Mf6$R|CN5Ax=?!5ZN(`6RFXW)V2u1F+0Y0<{2hbA(y~ zNUNT}Y=DItEcOX!5jYcIDHAUQLO9zeEYqODC$ti10pM?GSq{)n0Q1iwumXU;sG<{~ zi$E9v%Y_Ma6X?wX=Mh*7u!g|70GPgjz=Z%FNNFeC}Ljc%zDhmt)Y$CAPC(t^)t*rq33tN{Hm<(_Qfr|jH z1i10{934l;=)>y9v1U z@g9J?HMkexJ^~*Dc)$ZbMqn2KuR0F_JnRA=C-5KurXP2KhX|lfeG=f)SpdtSTtAxy zo(6b^z_UJq)@C5=d`UF~My#{oievtr%mc9f)^XfH-1MDGyYwRl;d^rnz zjlio};2Q+K2jK4n{?`Tmt^xJg-yx7~4cl*l zx!uCgB_2>ppgapy0hFph?Nwl$1-iXb@}N(FY8RMD;0PC(3^0YjkpNQ(90Ty4Jm44t zM-w>S1x^6Kaz_z(AHazK?<0U^f&f!Bz9DH(B})(Tj04*2b&XE2(W;_Vt^$8 zXXF7(2rMRmZJb45sZXGFP!=H4uso*E zA+Q3V!vj_j=pul0R|1@?K~EMqkHGl{02czRA#hO^SO&iOg(wQdlJ3T5|MJpOMI@vD^{3}M-m%`BO9=p@zKPlSSp#|mpbI5 zEe2DGNPH+GJGOo}7SF_xNTvDX60&0@J@xX_eB_~PRY!+4AgSTL%y25|H4s1{lST^s z>Ii-|q=>BrF2|y=0sM5<9F)s|9E|eK8)H(M6{+Y@G=e=0%(T?z99`vjAKaCzJTV-P z8`i3fuPFcLtc|ux6}9FqO|2okisy$y)h1P?$BhXUbLCwEkFTHLqueL~m*MTk0Ka#? z_RN_#jN57iW>qJ7C!f3$xn zo*e7f9#vk8#-wOW4uLVzlrS%|;-JWft$`V5ejO_n$&B^8ON7y)X(^hPLtt8{2Igf~ zbQL~ajEZ7VN>gM08l=f$(TEg{$RRKyLih4J3`!kt7Z9C5F(%dV*!ol?wYB}+?lEpt zD5pi!QZy}xz_chTn3sjIDe>WEXcXg8$@h4vX#W_uERxNlSt**8Lvi9P^)D}*qN?xV zVo(%ABHluzSP{yhult#ThVvA!Svka+i^N;;ElZ;@)p7bB?5Q|~e4ar**TUz?_$-S^ zq@9QQ%LuKrooL*|e;1R#oM>1K{u#j;k@U>u`b(mHnc34$O;4K@JZnyHQzSkdoxy*G z(D)SV4{qKVO$0?D^Jk;wZ%8K-@vXtwKyXGZJ=3f-J7^Ly>-Ki9Y6;GqVHOs-#`Db) zUU62n&;CJQ8l3t9>LdG~E3FDR9j2y_sCuK+ml2`YMAle-Oq<92Cf=kZNUqZ)SDbb> zfgDzo&fdFxr(AYQhx>$6{gDz^Q{&`pCHeeXlBF}g6yMPfjiIk6Azud`*(uQl{Bo;v>ufoFzSrcCExPojEyXq0L(L1n=K52h!(i$4(y_yJ<#iJ3cJk~Z zUC@pKD{&aHckF~I!R<~29Rx!s5~Ven(s3D_yxfOed<1GW$B>HvY!%K!&`p4SzU1sD zI9ip%#a*zu45fo(|E*es#kXn|-GUt64C1aYhk-9pd^5;_tfBh4E_5ZQrS%`+%^-t? zc&pan;r3Q7W!RVJ zRNuxiZdfFnMWa$QDu=+RSo)WjK~dHBa55(Ltq|JIvHu-7!fDa06wS(^*a}e<%x^2C zEAe4%T*R%7zOjCnL9*#=R*F{#|KUwa-nw%boLLmF4j!Op+s-X4oyqC!;venmU|%7x z4)z^pcL9`jc9Q%J#M;DgJU+&kTb)~H%bCOPIv%oxSO4+K;@az>?xS#p(3c_|21{@@ zJu9xld91=V?a65|*3HuxT8{~9gfESS?` zkj>7|m{^rvRIUI)HmQDj85Bi%4_iZ0HaIqZ6R_0l)fa>AKl0}5e%T?jAJ)CXqUrEZ z=}~xFiigS^tlQ$F;=sFWR6$*L&+Em9h0_iil9Q`$8Jk1mr01vZhrzBjccnkP@7y1% zNqCU%;2+$%TE&?4e3ZP}Qp%L2Ob71jmMRi*Y$QMLmspg;$@g{ify>R%!5DCYy<*+K zq_d6B&iloxW zRHC*olE&a%r5|;+885w1URKPhrNhabP22C|cceZ2lZ3~Mh~tsOhT+JD(YecrM&e!Z zL$*W|`sVRn{S&sdU(g5VAFoX)xA}b2s9^s{KU?dfEndb})){}+s;=gi*7mNJ=4Jj2 zo!dP2VZ40&R;`C!@ZVPPw`%R9K8E!cM;!}{0XMy5O#B{>*Hn>56h();=#cxLYlYa~ z=N}^UbFTKTa)$kYuUT~z9~&k%Jo5|~Z@@8-e*)u?Tf^(-U4V4C0VfJ=Wz#`(dG9Ns z46R!Yb;m<`f>w8&N8KZ!ZVjX-Yjr1Sb!Wn;1fWk9&_70I8&Cg5JfrzDX+9$dGFU#K zmcPiWJky45e9y*6Jf7U#9LYqOWw4lqWwmz*887xra%8{sK-iBaHg!h_aOM_*{jwa{ zFFyeGC)67J$5P3}pfSU8;QuesCTVwmmUc^>=;=5B{J+gCO`Qlb;j?5al;wG)0;gzs zqim^Je#9zAtfwvVP!rw4%?mf*>Gni_bPMoN!#uUr2V3dUU<>KeZZp88If|=vG{vRP zid@81NVYh=62Rp-il=-O#iP%Skp!k# zkc%w9vt_QH?NLllWphl%Lwf<}do@#i2b5(g$P)f?fF4#p-N z=4!hfSb70akg;4}lRE>BQ?;kZ)!vUSqcSzrAsA}ITneHcH?2J`mHs!EbA@VUE0mRf zK8li7y@Pb`aO|$@PL(jQ_#k_x|7T?W$jQ4y#I7P_>YHnUugRWA%@?Vz-zGlW!GW0M zOplY6h>*(fp^3sC^oPHVW3$|E&-?nZm19bajPweNT)LvwFCG4=(>kppTmIxE?XZ37 zcIy#q@n+J37)eB1oXicB*p3z9)~K3Nx>LsAmXuIV(Ak$z7pS+{61bLsH-QTc<_$X* z4T>C=Rpq9vF!E5>jNMp=HTRZGkK>>JcOn2vf7#v}ahyt&r__v5YrRgDL9zYY!kuy# zmm#-PxX=mXW=t8SGNkSlx#6Uor;$)}mH5COC(Wwbwd@J>KV!W4!dds@!Q{3}?9czV zonUHbXs*fVeWbmZO5K#&E8aXV+;ct6a6PZiyV7dvHHk|j>p`hekxPA*u|(IjRDQn5 zs`I4j^5X(I{^Q2UpYD`gO*;C+o!c~O+i6=FCMUNWBj|Tqi+ohPcxKD5a{BN8pDqQu zL@TNB1gKwRL?!I~$jF$QQ^5WD8=6>YD+U;9{=d-G;KT9Qq)kg#dpZjBjL8Y1)XfBD z9qe}S!kav7T@ig&s~Xm_sZ&2YEOGm55lA*jZ==vxnpQL{&Wvh1=*5GM+PKL(ioke- zq_lAws$XUI4ItLC7v@Q1rFs-Bs#L0=`nhEi=|2}u7v<9;56}n1P^#H|WLs)&X5w?` z%b7AR+|ld$|8NY%BE9HQ`_yC$R~HGG#^)4%tr_~w(T)oyj>vw>bNfzR)FzQ6v|QK# zI6r?0g((HTN8V#{I1+ntBsN3&(f#J~8x>wZtxs*XvUCF;G{1tQR6zU6L)6%9Yr5Zml-`)hN;h|B*ZL*(oA&KXb}T-DzEp8O456qG`qC>YXHH z({37E5Q<2Ryn3qqA8}k8x&51%O!jZ5n4Q)0Om+tVla`#c|CHiqD2`D^0&zm|vY_siJZBd0mzKL7nm zb={FY&Dg_{LKt>)L6@C)MA}7r-QBWiCQZg=3+UYvEm1csa!NU*udAv^je@nHLhMyq z1XyM1CXlMNX#rks3Oq>9O!v59Gs#Lp_Q$^O-B#|YH#Hy~gj`%f@RH{Ol@f^>z&)q+ zYJ;1Ba#>F$vgV}=%&4{i(pBj+xC!20jXmCq#$bUY*@)Cv<<{NZE`#d z4`pMgtn(68%(scFJEBN0Akh}*PwLCb3|htB5V&LuZ~5WF8Flv)?RgW-g?vcPMCFhl+P}F??2=nXZyF*?RFKwqe79EsKaVB7r1;r~ds zbkS?RM`;sLkDzs_ogjBzo^5SDdKB`8YpRHR-ROrj4TfQw!cqCDGV@rd=Np81;ZksE zq$*F&c?EzOVSf$Zg-ObSqMnh1si#I&=ujjmBhDltGU%G-A$=tFnr2X;J>QpgJPzYS z>6s4MbVLeL`?=`s-;!IyNxnWddK5v9pv4TU&l3vA&CD`K$drOn6;?slQr)TZ(qvyr z#}aw8$i@?A2ExIm{jqLEOO0=swmN2Gv$R^q;9=sJDT4*WXkVfIR$W*S@3~sS7j&#f z=xd+$*uxAu;=3}7Z?{tMffBO9YeDleaA?Z6kGJHn|a~3%<#ByrxY!B zr_{W!TK86<&DDN9^UE=`i;rcWu+06P_o!g3qs%r8gO+sC zmK#^m%^^4OB!OlSxVtcVaX5E>kz>?Pyi^xoS-4!86dJ+pE1IdD$LriHQRe{p;X9aY?h5(;CrrqxY@SQ5|AIgczIhw3EZjh~&y-)XhWo)}@K zG*y)I4#VFZdN+Sb71W~G_y`Gl+szYNv#~YWkQWJKmYdxS>h`1WKA_P!ps;=CL)9}G zj@;=b7{*$uT$UfKM(W^gwa(iUl+n{&OfC>;-yg`$@P|I33=32ncx}#n0E=1{@8fU{ zwuPwoNvgFYI2^0+9URKt+h zNrz}=WNl#S*(Tzqv- zyzREblQp6{F~FSE31YK(Zim%%`}?)^QItJBY#A>*qutTxY3iIQg$+dtHYw#nsmX5* zQZ*OpN$0oIAC|w_iZ>R+)0)rYfjV74-7cUuOw%K}zsSr_!k z`en6md*>qF7V-A#!5m$nHCgk$&I<(QUyi>kA+}1RbDEO&a=M85UJL9AF~BP(&kDko zE3`{gIdZ6;>e2#*b$-1>N$jia#+i4pXZK9FW<-2||JnA!iB#a+RgX}!^fwZ}nOxtUY>WCms(ftq>CXUgrKX|b0gt%h#j6rA?$qNVUWfn8 zXD*~825qRkr2 ztfM>5S^*g4>0e;v{a`H%w+?P_><3YEu|MA;oG+bokCsv_-1E+ZR))ZG3o_@aNtGw$ znKX@zc`wC`Jg!Ip_Tj$B6$Jjfch_V%bbHZnKza6qt3aau&2 zWAiK5v=_q68_ho&@z+)}IRZVr<3+|tl6^MIjl6S%KU_{O8kvB{j=p>>FdCVEfOu|G zWHehIEv?+8nV7pNhL3wEv2PkC$BgYHRG*>M6jefO&hu(F4}WJNFL=*UD9#@!>A>C8 z__F&wx1UJ8eG-#U3-^(#T?{uSh=Nf%W>}H7(j={?^CG!KF zB7XumVHoApKBM}@E|iZQX8b6vQDeN5c>VC$Ia(0)y^MZ@Vi(^DX%ambI~5oh9%2iSFH5Ms-Zqz zIV;zlW}idqQ9E5}xA9{sMfs-6pM`QJ$yF{ZU(2J#Ps<|7kqVdYkulJ(mqF@1z_GLD zrH-*Aa~FoAZ$H5XH1wbE%WA&gnRokO;zV~ZE%U!Arn|YXZo86=)v6WYog%w*nTvW$ z+0dtk5$*li^TCa_yoxMFC30P|SbA$u$}&!&2|B6gJVV^w?Jp#2qv}!ymb_^b33~;% zKkk1NC9UJ=XD`~_o2SY(9jQ#uIh3}_e*c_(94S}a??%n(?&2{dqRW0)_cXm|w4&+|tnu<)Qxa zODnt&M6Cq`6|j;++@2~!q_H-GTi6T4dgR38g8a#(Dqs^TE+yi~ObT&taq zqb$=Nb2A{m97fXTVC}gC?m7nN`Kt6AmHBF=gQI!V2x3iLL+E6}vDzPLyr0`=*Ir zDXC}UPs>m@a4WY>m|u85oPuTxX|mrKGNs78Z4&nS$ED3vz*?fT5i~_q0x16J;O-|4 z&gpc0m82bRxcM)vA2{ajY7qrM@;FKcQn6|jla5i3P31sE9^qwUQ#lb_73NEJ?wY7%biFN<5<&D8Vg*V===b#|scVbPc?Qevq6imkkZ$1F3 zbiB)y{xZJ#AmM)yva=XCR*$6qXPn53lD%a4U6i&T7#Z?CAFPO^f&?n1W@`QyDEg{{ z@_2J05}p{i;XbLI5nqz<)+cM)?pk>L4+x{*&vPs3_tM|HA}8=k)p{}C9MtVbN+7NO z)j4wM8|%ab3Pnmkx~1!FwEjY?sMtjc zD0MjPYN$Y#>mMkSAPD@HDuy|d8xu-3M55tJ-+84b7Hd-W2%6kbdMtI~ta5Buauh{{ zF~W;eliT$cg6je&Yo#ookskaXinH^kiPW)mPP$sv+hA0O7XPC#ozQmrnG~>Z$GrrD zFVnEtzSt;=ux}_TPABD<$M6J=nqt%qn9~WBqw)Cs@fZ$O+21>8-D#hX(wdao9Hap~&FzT`R53jHPsZ5&w#U5iG#8K`YT3 z*AvST^N+7^vJg@tr9~e2^{RGq_`~`p1+M`>!aR2W1py?oRXz+#^#?*BT?>c%EIqBp zD|GGnHc`&6MEVVW+)FrsqBF%6ii)wnc}tzVJAJMS#%;y*|lD<7oY_a zn^+5|y!|w~KiQ-3=_iMJ=l#Qv8!?YTIo#}ISy=3CFXPR3Y z8T@Bx6LVntB{}-6j75BxqOtae!f1s4hIq-mT7WHwqD;G(ZO|*BM#Lv>HLfdgQ?8rlP|W9S-K&vyT( zX%WJ>XhYRPtJ`qo3C*gY0fnEKe|G|f`>(5J5H-ed_Ekwe{`KtkmnO6kyOl!2!Ih7! z4n%yN0E4cFF5{$8*QJ)a-ejJ#*4r3VY%3){9k|4!hk3Dw3NuSS&&1V8-9bin$UW}3 zH?i_wHwvF(O5r!|-){9G^p?|w?I%FL=vnnsnE9<)wFT<-!t}&W5TAZT)XtW+U+X1H zGtceh&}FID&Z>S{p}G0=26&iNM-nEQadkho_DlrmIbSPX01ld5*J&BqaiBk^gDYNF zInK)WbD{0hdz-+84r&5S0o$u%Nz4?`15#sNK5DkdtD5(4&^Q)!I)7P0c#Mz#5ylDl z9y))uF?7p}kIv$ihe`3Y8`Rrhh~g9X17aUgaqw%!gjs;0aN6yTZV}ln;596D`n9L7 zb0DxF?>gn6m#Z@GA=RM}OK@B|L+&o6+_hDvFyXizifoN!icHNsicBbDEW(>%ls21Q zCJPRx!cp8{kf5;EbCRVfMck>QiG|uj^_}j+AH+7wD=kv%&~Q5x=*KHlRMUK=lY%T>GqFaQN8vyr&X7yZA{>j7jh2ExVT^2mdr|pPY*DUan{8Hgr~pHhbJR}D-4Zfd z6cNi5EF)CbOz6Qzt6I2M4S0I{8axC(x2EohEV2j}$|#{*$n zC3G8ML4l-XZgRZ9CWzg}v(nLZzhg5Com=p9FM^s#Sm}grDr%R?9Bn<97Wd~C;k}bg zO|#Ovm`v`pI!RvEz8IKRXn&d-2cS6kz<5Q1@YM&?#gON1djG%8u)=ga-%joFv%1V( z_NDSQZ1;z+mg6)C{;*kz8xqx~Iz0Q=7GOmy`taH`vOMoPbceZ&U;o%soyk!5f(w2q zfD@~$>V3JN>qlEns@Ho1*NFdmZro%{;J?U(0}1co%TMt)Nf|3>AJDYaM0rf1FOA&nw+9f*Ra5PD2F#$d+L;8$c`j=+dH*GaPU}01ZuuBp5G`#t%M?14 z0ldiF45rc4zG#KWi@%d%s6c#@Dt*ob4Yx(DQ^(cIz zE^W$WyUR=Yw|nXn>61DqwWePsYMH9W-mP^088?9L^_}Dm;=qYJJjQtTI3Q;E`b9BX zIdd^29v*hUa-QQHXJS4nY5&a9_?UhG}x!yJ#c9UGEHGytzQBQZ_tz#`l`~HFwp4&XbDS--%sdq z({s`@iP+O?;4vwj@;EYKg*Ci=LHZkCWGDxw-pbeg>zrSDX181E6T>a`qZR>N1m9{G zJ8icw6n;7F9eVc!dzNFN%b6q!e&a=nMlW86YrAXsV7cdhP8b(3=LwC2A&`%&)Toaa zf&2V%-xS(w$&(UyP=o;4pSsCu;|(cKR1QNTQn1B*=hg!+z@Q zv@49K9&!@^1CxSoH;8{H2GO9&T#m9#`LQ!{R@*Ccfc9zBXqlyhEDvyndEyJyO2Gs=7z3 zs1?nvKzdsC)j&1=*jiQ))BI1T@@eHJznd%kI)6sMWvob ziJz=TV#=OF{Fk01{o?VMRV#~&$^wAZm<8T}J{U-G$-h6e)enD@kB}+rnZPB^o(Q#; zdv2hqqhg_uxX>6enWa`H^$y+YFxf9XwCBHKR*t^*LVY0#xI>uH>b6webLr(hwS{Fv z$DmO}3-WXDga*7lo|FY%p(n*WsKH30=_x@h0RQEHb)n##@H<<-Q=(>wi=j8vIHy$m zsJsKO^`9|-6aIw1ZArXdh<56VVI(3)3c#k1Rw;i1w_mGMf#@#E1>*Xi9fW2x*Gb>( z0v9fji&*H-7g}i#xSwMcsSQXm;6%7U!C6Rs0JAgx&MG&&?%>@)d_=tw7gZByAXr|t z@bd3Q>Isz44v#{r{>z3g2w?gaX;xIew?yHtN;y^)ok`IfAg`#iSd#ZGoGv${a8XOD z9l%DD6v{R)mun?DFZ?!o5xSBu*<;{swWzo_jJ!!~`4BqE|L2tQEZTO}R8v@H52VXX zvRt)_@z8)re=LR&jHn>ijpiQG{>wxgl~?~lzH1C{>a;>EQ1notJk98rSs$ePL@>OB zu_m&bOzJi$r@W`mg?7#m2Wr%uE^L|e2JzM&p!?{14(kF9csV#ds6(|mgmo~Q z-|5d_g2C%2>NfoNsdXDGW(IJXOmHCJ4PJp8fXJ1S8L;z@o*+F2$2dNUCR=C0r6R=^ zbIKj zeWv%dG3}!{$Qk=MP=eB2ztELOW=Y=p0arq_9I%ChN^E4C?%kcT=D%=7UwXlnYEMr- zUYMFbKmW2`XXvw`ljTjc=dc`cXfj;IdqthQCxRSh?_vyd%%uP0XSGgDTx<7eRD@Ei z>uPR4O*Sq5wjzSrmvL-EbSd4%WI9}}S(|qk{CP6}g1|CBE5h;GNe!h+b}Dv%4Jk(Y zbn;9(J{C&&3|~zc3iWCVb&meD!{a>@y8Q3W{DCF9P_0Mvj?6hFqtC>i&9-9gpM&FO92(N|<8O4ctaW}ITRGVDebT6&n4)E#W?v?To!CO>oe-?=2FVsc_t zDz1m{mX&=yTpO&~g#NEtJr^<`e;Ko*5+|n}2Yhof4S2JqKm}T?oSNKgAB*`H4)(0P z;3ZOtci%R{Udl)DjkW#_c@If=SZxkH>}~6FED&E{SiXr@hz03s8b;Tg2?ejcALbl~ zj`ThQj?DktE))<|M)J(!H90lEhJQ){x~@a3CeO4$;THh@Gp4gwsLWH`lT+6 zi#R*zl4iGX`z)yr2cIEjn+e)Wj^j6Sr5Cy>FJGxIrFI^Fwvh?$76n979OFnQiXax0 z2K82{X13@Q0Az8Mdr3yVNQQ5f2m2D2a8+NsnTT-lvuj`tOyyt~D-dQ^eL2xb1zBfm zCbmaYl^N2^p$H7K^*5H8&~ghIBVi2SX^j%iK>mVdn0Er4S+jJ?j+)d)cad=1A3l_L zDK2xc<#_%cjsUrtKnlM#M^EbNdbQJGBH?F!_>C+Rj&rkZ#ElASwq5F^82N>CRXFr( zZZkZG|Jq=wijRS0}E^(qCl8FMb1qj`*&Fi2Y zALTf|i1vyy1sfHLtfy~X*U&kGYoKi2hgH68_{Yps08lcuKec-p<8ak{0N8)tb;?V~ zdbaZ>6e;P?SS-Zd^J7plcCwaPhkMR9qNeWgt2g;`M4cICHO^krnc>gw0?kx3_b`h6 zpO*N^f4>GD|GGUCJ-gl3qX(fR9PJ|XlylwxK-2~Yf@T%Ta>X-?b%y(z7t259xi0$x zz!DO52dOla$oA*aHH(x>t-Zud1c3%nD6Ofavty*J!5q-=lO^aPeTz6xo#)UCVTgA9 zP|#}NEzOAPjmq7LcKiIbTdPvjLHu&=P+wIhrG>z{o#4XTY$@qJ?TrBy=QHjCbMv# z*Ysqt#-V1@r3(QvTNsSsesa~2bx#e+aP)}=Lplu$yN}agChG$*$)VfA6P$IeQz@O+ zk{ClcWDZBS{x3|s=iwoX(lrDwC_1;+zZO2{}QGI70fAys{U& zAM+wO6%KeCJ-?O8!8Om*DIJGKkveE^IU820tF?A<5DN+g;-;@&F(3iHdO36&;LwTa zKZKCksn)?y=xH6xGdcpGaK5ydMz!J%^hIhQ*uwrx8WHT41y>hn1_LH8$u;W2HE~-2 zde~2OI#?<=JkM5Bc&e~$#n}YHrOC8hOSkQdrCv7_J&Y=d<^udpHC*MT-^ZKOWu?BE z4J2`sR3Wv+A)3AI(0W}b-Yn*>lFW1hg|_JeHJB;j$}Y>7qZLR#qHgL5_p6J^vSl7T zde;S~FDA^Dqv-UyCinuHfo-6-=SJHMN}Bx<In zrF;Lx3mN~x)J%JWmJtUp@Ex}|*~hURKamiJE)kZG6kSFXMY(|gKC-qm%`M&uBI=S+ z=Wd}4No4UF^VGhG16>y696}$%U%=n`r`^=IjLtvN@{(>Mt2^z27q?fW`a(sC?2j-W zX53B4pFDWnd3AL$VWjhwwNdQXMhJ6u-e15ZL_4%`L>5?6`vYNfYJL%mIwF&_B%UG-;)>QUJ3z*Db$OLpziRtH!b_DT!(T0+M>gz z-uD8Zc>{1U3Nwb9H*q|;n}{$VDRF^pp-MA{P-i{+K!+kT&8HSTv6|CW{}~0&?&dqd zs%hNCxX&o1=UScVs46@i3tK;Acn%Le+!WW*J^~vJ{Cx((MfpOdInJpbxu5>Pdz2l; zVkI!NmznYe#~=u-iuB9xFdUlhBZV(gi0VvQxQMXJ9R-U zI{+W9)O*(am2B5mkP;Z_YyY@~4Ak&RI1gdLvjKFhnPrd9`uC}d0C9-s@Lb;1H36J* z470u~LX`6s7XgxE$xM14>menAysf%4I zQVs<7IEntZEP|0!d~l`6eeZ0EVp@Y9Ih||V9)Rl3!|?~}GEl(Vwg)w>s&I`}b?r_N zSR|KuiIFtWmddYKIVqE=tARVT^Z9eH?o?ezS<`=s4Lx&_U5mU#!r~`;Cf@HV{SI-L zGKZd3fQPp($Vv@{#vuCW&QQ*vi^k@7LRcq=d_M%}-roTckU3>uXW$AupT1A{5*Z$I zcELl{bFLaEE7d*zNnN$)Om~U?5cN-4YJ3YPM1{PGlCtQ9q{Nv!7hv7vpzjEm z&^Vlze;)iFy0j(g&gYuNGf&MXyXY8)N5;D9XaZQ^p5%yv$GL4<-+%*E5ik*Z@Ba21 zGa)gO-zO&Znu8e{<62|X&187)*@Bl8c0 z93-62nph9|YrZ?yCG}z7H9xXJBP|Ft2C9iiB>GiyLugdK$E`R+PaJQgVc2e9<} z9CtleyGAhBaNBU&po?HA>D&*S1 zWI?nxgl(Qa34o*Sz5+2y3?^VPkBZ~E_vkN*YWHpz?k^sw7C=Z}B7X`u5uRMx19Fqo`_nen%BC}jj~Xg z_uAQ4EXPgO4q_)0`t=y1@882nuAY6@e7_!``m1C108ehjUk(9kylr!hj=xdaQ=e}x zQx)0QE{V&eP#-6r-8zViy16a#kHoE(5TkY%(UjL}9cI$f zf2fENse3@c;e-jMa-n&POEC&>x*PNZaPeR-6%$o;(YMe9WIr8QJ4j}dBT78JCevUK6d2TV+@s*a-inIag&yU6e8Pd@=G!X zm7cLoT|~3zw$8_bYv?0F`>@{%$?5(;lS_WG@K1jaf@54r4;T%dJsH2+BlW>3z$#+s z@QNtt4cRyqi2KBJ&mSRi3^Bvwtv)d_z{6pD(ZQEhHg5e9EyxoL&Xckg>=0>yf+f`IrUnM z?^!z9LgTLHaa5zs0f-o!`T-Zk*0Jg+(QWu>9&yO@?RMcWjZaLySn)!&+^vK(&^dRp z&(@@LC0KguTu*b&V2s&Kiln1*K|ZVjmE*j#zjnWnNr@DLGS4ez&f~RI;~;bDlWDe0 z`!27t;1?Q)>;Wa}w`)J2Y3JhWjugy|7Y`(Nk@0tDm}-=LpdUltxu`SU3dhlKP-ba|gJBlcsXXHIz;`?hV?`Y859AM^6m?lup`bJr7M& z&E2xzA`xC(F#iBAF6ei#&+qj{75?!{shau|(A?5YrJ zv>4d;LRev=pKNhOcm=G4!Q0|E7=!>nu7`Eho0xfmnt!y0%{~6eCq6%Fcc+r;zWC&A zzkHFAkmk2hXjdZ#@%C?=5@|_qtRA+=kAWtpd_S9oyAFcCT8r8!D|YJ5?K-Z^vD8$i z({^<5Z2i_VwQK!c7dgS>xa1Kq*HP+yG|d0_l4#LjStKy4B0kytdLy{@(+T%CmCZ5l zyXEb4l}_e^-|2EZcDyosy?pjR^;!LG>vV}Q>awWp>R{*qF8rz2(Kx(0U!8{N+aC&c z3Dd2Vm|kwwVS4FeTC3mGTB+-$Gu;{Sdj|dwHl)RFDwX!p?Pl`5&Qe-is^3`;74H0O z5O%@1=Dg!CkFzi~9y7*n&M6GPe%JI$dmRh@`lIZFzI=VQaAum`vi;W|MIZ3RW0A)Z zl=#l~U}4{21@-n}0z|J#hum=W98@R!NOHQPJn*P}Uw=@(s}-E*2tQ%;4w&4M3V zt5xq=FFB#A_LQt*m=qro+{S(!mmXvqz=9P*TF>4{q9>LYCp=xNeSMnr{!Ty9w3~^v z-3qh2-8HATx9?H+rM9&!8LVO2Qa>OgACxhpcF(omt@z6?s;|~MY`>Cf3YQ&SY?$So zJqH&2mj&-X`_z=#=H?PD_@Sa2xL8buAQK9wCw{OM!UVe50 zhJT;U=SSQkp}cQB%JX4reS?BOOz1QFI;L)HnQWnMS(y3G5*_5y@x{u>+bZ6ttKYU{ z#7>2~-S(B>l1Ful8riqR!D@mZfcVz zsyf+wqvF}*!pxMUZ;-O-tc?xbRiJ$UgBRjju4eT79PE}D@_p6_e zs)fBx(8K(R(;40yL{V=>yO#cQdqa(poVkTA9kcxdpPJT-m&L{UzUn=&6mX`QkN0*q zQb@f$P{_0S&)kAvv>!TydSQ+~Q?9PM()SY%=2A`M^{Z&rF#5_t#0uV5&bNKqcneWx zWAx~|ALV0-CbnfpGd}S}OPu_>EgQ93#u5FBed0LXG3*oA(L|&64s=o`LkhH=2KZK7 z*3x;~*bjf4nXk|=iyE<;8$#>{{!6l3@v}Zeg)mAk#2dh*q<{hmt`UZPxtke7t{FLM z^IriY9V$_->*Dj)`8&t{c8w>ISqV|EF0;by3|u2j+nfuZp4h55&aJ#^o?Ll_N9>P@ zqpr2Iw#?6mS(n$)jeL4wHnrE%DhU#X+6`@)pt09se$A~R!9yXHon6&s7RP^9WKEB) zf3we}yxaP%U)lReBD3Anvy{8Hp`K}kUQR>t^OI#h%U0FhZTDVXhcIemC(=|X%!CVo zu5QOI&#lv1j?sQjIs$*P0JGkgitao;nCm|ds9kzc<+1zONYb>aP^rSQNl|{(;chf} zm}7rAWUL`9?L|r=+Xlp-f58j!S(-gm*lJ_svjcu;aOs@PD6!A2S zvYK9k-N=+M14z^de=oADfxGg}8B{eDwf?*it&T7RT>%sNeBLG0whQ&Q2HP4M3#&`C zZL6oo={ymtJnfq{L5q)?<*hS%u~v^j{i3&PvZgrhdmbduu!)y-c;dl{FTo$Fitz;jq=_F*%`7;g=gJnC|}BRnCg={+;fE4w+H5_ zvnpPikUm~=8uYN!z&N#7B)gV)U6 z0&CP%sxM8u4mq&x*$Qy>9M`1yc`qOA`s}r6v<_Lcd6zU_539BpLd|mcrJ0zxTD7S! ziIRv@HU0?_bH2Fls>;@p3Z&8I+^`>P`%BHKGRNd7u`Hf?4OCj-Y{eZ%L;Ji8O!xob%BMC*Kg|df*+xMa|uL_uJ zA@8z(drqUjVf@VUb)o0(jE-%69~ms(soGjffbz=j zmb?|a5%%zLg1}Pq)IYH$QALL}fA6Rwj)5C?!jtDuBWIVp3Uu90(vntM+&q6Ly+NfP zSDpI(H37ACJeKlDsol?8!t5S|!fru%{B+PZlgkqxK8k62VY-HPZHAu@{kA`GyKUf_ zLhM#D9*wtdpPV!eA1R)v?OHD^Pu+vI20w;zF;tc}96T&PMiCqJ+bRnUO;kSFikdsS z5QMjM!&+V4hLhf$aCSs-SACf4nhRuz+#JhWbjm5sk8mz;=1^zMQ_nOe`+)>taKGsQ9(N(<^*_jSnQ! zml}H;J`A`UjYK7dG*}o*W-^Azk2O+drM(E`v_74AH1~Kg&0VU&UNzqy7VAH8yX7IP z+N8kP%Lv4Z+5D!F+gGe`Q8KU(;9VXJap74LFa|2eI|T(@cP3F(mM zbgk?te!B)m54kJzSN1JSCI@c}*3kmQ2IFW~BY3VK8vO^p9`1wU=znYt!ogD3-!vD*`qIVxxbms$d=KVELGy4x) z$he@b@!D2(n*z;pXszZaXEFRM0ZpC+D5R0Zu>!!6URhuD{f0H+l~EdNamL&MqOXzadHX zRrcSkrzeBI5L&lvEyTatnGaBG6_oDEgeU~JoYejMtstIn&uLndC^BCa9_XM3bL&s{ z9u@oFnZR>3*h6s+(;8RHqYc};S4aBO(f@vj9Bs7!?@_7$J>{zU|C^@%O!rpu>2sNe zm(2aNyORPuyZOF0O3Xbgsk|(f2zr`}cD;uVtjR^(mXF%Hq_(G_Pfz!zrue@ATxVDQ diff --git a/man/adjust_abundance-methods.Rd b/man/adjust_abundance-methods.Rd index 9c970b64..1310c087 100644 --- a/man/adjust_abundance-methods.Rd +++ b/man/adjust_abundance-methods.Rd @@ -126,9 +126,7 @@ cm = tidybulk::se_mini cm$batch = 0 cm$batch[colnames(cm) \%in\% c("SRR1740035", "SRR1740043")] = 1 -res = cm \%>\% - tidybulk(sample, transcript, count) |> identify_abundant() |> adjust_abundance( ~ condition + batch ) diff --git a/man/as_matrix.Rd b/man/as_matrix.Rd index 92cd7fed..b2b9303b 100644 --- a/man/as_matrix.Rd +++ b/man/as_matrix.Rd @@ -21,8 +21,7 @@ Get matrix from tibble } \examples{ -library(dplyr) -tidybulk::se_mini |> tidybulk() |> select(feature, count) |> head() |> as_matrix(rownames=feature) +tibble(.feature = "CD3G", count=1) |> as_matrix(rownames=.feature) } diff --git a/man/deconvolve_cellularity-methods.Rd b/man/deconvolve_cellularity-methods.Rd index 5c885ed1..a393e5ac 100644 --- a/man/deconvolve_cellularity-methods.Rd +++ b/man/deconvolve_cellularity-methods.Rd @@ -131,7 +131,7 @@ CIBERSORT(Y = data, X = reference, ...) library(dplyr) # Subsetting for time efficiency -tidybulk::se_mini |> tidybulk() |>filter(sample=="SRR1740034") |> deconvolve_cellularity(sample, feature, count, cores = 1) +tidybulk::se_mini |> deconvolve_cellularity(cores = 1) } diff --git a/man/dplyr-methods.Rd b/man/dplyr-methods.Rd index 20999c40..d5e32a52 100644 --- a/man/dplyr-methods.Rd +++ b/man/dplyr-methods.Rd @@ -24,7 +24,7 @@ Left join datasets } \examples{ `\%>\%` = magrittr::`\%>\%` -annotation = tidybulk::counts_SE \%>\% tidybulk() \%>\% as_tibble() \%>\% distinct(sample) \%>\% mutate(source = "AU") +annotation = tidybulk::counts_SE \%>\% tidybulk() \%>\% as_tibble() \%>\% distinct(.sample) \%>\% mutate(source = "AU") tidybulk::counts_SE \%>\% tidybulk() \%>\% as_tibble() \%>\% left_join(annotation) } diff --git a/man/ensembl_to_symbol-methods.Rd b/man/ensembl_to_symbol-methods.Rd index 8cc48e50..73365d83 100644 --- a/man/ensembl_to_symbol-methods.Rd +++ b/man/ensembl_to_symbol-methods.Rd @@ -44,7 +44,10 @@ This is useful since different resources use ensembl IDs while others use gene s library(dplyr) -tidybulk::counts_SE |> tidybulk() |> as_tibble() |> ensembl_to_symbol(feature) +# This function was designed for data.frame +# Convert from SummarizedExperiment for this example. It is NOT reccomended. + +tidybulk::counts_SE |> tidybulk() |> as_tibble() |> ensembl_to_symbol(.feature) diff --git a/man/fill_missing_abundance-methods.Rd b/man/fill_missing_abundance-methods.Rd index eff5fa8e..429c64da 100644 --- a/man/fill_missing_abundance-methods.Rd +++ b/man/fill_missing_abundance-methods.Rd @@ -70,7 +70,7 @@ This function fills the abundance of missing sample-transcript pair using the me } \examples{ -tidybulk::se_mini |> tidybulk() |> fill_missing_abundance( fill_with = 0) +# tidybulk::se_mini |> fill_missing_abundance( fill_with = 0) } diff --git a/man/get_bibliography-methods.Rd b/man/get_bibliography-methods.Rd index 6aed72a6..759e8a0e 100644 --- a/man/get_bibliography-methods.Rd +++ b/man/get_bibliography-methods.Rd @@ -45,10 +45,8 @@ This methods returns the bibliography list of your workflow from the internals o } \examples{ -# Define tidybulk tibble -df = tidybulk(tidybulk::se_mini) -get_bibliography(df) +get_bibliography(tidybulk::se_mini) diff --git a/man/join-methods.Rd b/man/join-methods.Rd index 6febe09c..0abea038 100644 --- a/man/join-methods.Rd +++ b/man/join-methods.Rd @@ -34,15 +34,15 @@ Full join datasets } \examples{ `\%>\%` = magrittr::`\%>\%` -annotation = tidybulk::counts_SE \%>\% tidybulk() \%>\% as_tibble() \%>\% distinct(sample) \%>\% mutate(source = "AU") +annotation = tidybulk::counts_SE \%>\% tidybulk() \%>\% as_tibble() \%>\% distinct(.sample) \%>\% mutate(source = "AU") tidybulk::counts_SE \%>\% tidybulk() \%>\% as_tibble() \%>\% inner_join(annotation) `\%>\%` = magrittr::`\%>\%` -annotation = tidybulk::counts_SE \%>\% tidybulk() \%>\% as_tibble() \%>\% distinct(sample) \%>\% mutate(source = "AU") +annotation = tidybulk::counts_SE \%>\% tidybulk() \%>\% as_tibble() \%>\% distinct(.sample) \%>\% mutate(source = "AU") tidybulk::counts_SE \%>\% tidybulk() \%>\% as_tibble() \%>\% right_join(annotation) `\%>\%` = magrittr::`\%>\%` -annotation = tidybulk::counts_SE \%>\% tidybulk() \%>\% as_tibble() \%>\% distinct(sample) \%>\% mutate(source = "AU") +annotation = tidybulk::counts_SE \%>\% tidybulk() \%>\% as_tibble() \%>\% distinct(.sample) \%>\% mutate(source = "AU") tidybulk::counts_SE \%>\% tidybulk() \%>\% as_tibble() \%>\% full_join(annotation) } diff --git a/man/nest-methods.Rd b/man/nest-methods.Rd index b81e5897..8e0e16cb 100644 --- a/man/nest-methods.Rd +++ b/man/nest-methods.Rd @@ -54,10 +54,10 @@ nest library(dplyr) -tidybulk::se_mini \%>\% tidybulk() \%>\% nest( data = -feature) \%>\% +tidybulk::se_mini \%>\% tidybulk() \%>\% nest( data = -.feature) \%>\% unnest(data) -tidybulk::se_mini \%>\% tidybulk() \%>\% nest( data = -feature) +tidybulk::se_mini \%>\% tidybulk() \%>\% nest( data = -.feature) } diff --git a/man/symbol_to_entrez.Rd b/man/symbol_to_entrez.Rd index 7a2304f7..f02ca98d 100644 --- a/man/symbol_to_entrez.Rd +++ b/man/symbol_to_entrez.Rd @@ -21,6 +21,9 @@ Get ENTREZ id from gene SYMBOL } \examples{ -tidybulk::se_mini |> tidybulk() |> as_tibble() |> symbol_to_entrez(.transcript = feature, .sample = sample) +# This function was designed for data.frame +# Convert from SummarizedExperiment for this example. It is NOT reccomended. + +tidybulk::se_mini |> tidybulk() |> as_tibble() |> symbol_to_entrez(.transcript = .feature, .sample = .sample) } diff --git a/man/test_differential_cellularity-methods.Rd b/man/test_differential_cellularity-methods.Rd index 9c55e048..8be30ec7 100644 --- a/man/test_differential_cellularity-methods.Rd +++ b/man/test_differential_cellularity-methods.Rd @@ -153,19 +153,8 @@ deconvolve_cellularity( ) # Cox regression - multiple - library(dplyr) - library(tidyr) tidybulk::se_mini |> - tidybulk() |> - - # Add survival data - nest(data = -sample) |> - mutate( - days = c(1, 10, 500, 1000, 2000), - dead = c(1, 1, 1, 0, 1) - ) \%>\% - unnest(data) |> # Test test_differential_cellularity( diff --git a/man/test_gene_enrichment-methods.Rd b/man/test_gene_enrichment-methods.Rd index b91308fa..d4126abd 100644 --- a/man/test_gene_enrichment-methods.Rd +++ b/man/test_gene_enrichment-methods.Rd @@ -178,8 +178,10 @@ dge %>% \examples{ \dontrun{ -df_entrez = tidybulk::se_mini |> tidybulk() |> as_tibble() |> symbol_to_entrez( .transcript = feature, .sample = sample) -df_entrez = aggregate_duplicates(df_entrez, aggregation_function = sum, .sample = sample, .transcript = entrez, .abundance = count) +library(SummarizedExperiment) +se = tidybulk::se_mini +rowData( se)$entrez = rownames(se ) +df_entrez = aggregate_duplicates(se,.transcript = entrez ) library("EGSEA") diff --git a/man/test_gene_overrepresentation-methods.Rd b/man/test_gene_overrepresentation-methods.Rd index a946dc46..1886d484 100644 --- a/man/test_gene_overrepresentation-methods.Rd +++ b/man/test_gene_overrepresentation-methods.Rd @@ -122,9 +122,8 @@ Undelying method: } \examples{ -df_entrez = tidybulk::se_mini |> tidybulk() |> as_tibble() |> symbol_to_entrez( .transcript = feature, .sample = sample) -df_entrez = aggregate_duplicates(df_entrez, aggregation_function = sum, .sample = sample, .transcript = entrez, .abundance = count) -df_entrez = mutate(df_entrez, do_test = feature \%in\% c("TNFRSF4", "PLCH2", "PADI4", "PAX7")) +#se_mini = aggregate_duplicates(tidybulk::se_mini, .transcript = entrez) +#df_entrez = mutate(df_entrez, do_test = feature \%in\% c("TNFRSF4", "PLCH2", "PADI4", "PAX7")) \dontrun{ test_gene_overrepresentation( diff --git a/man/test_gene_rank-methods.Rd b/man/test_gene_rank-methods.Rd index 2603d497..bd76b5ce 100644 --- a/man/test_gene_rank-methods.Rd +++ b/man/test_gene_rank-methods.Rd @@ -134,15 +134,14 @@ mutate(fit = \dontrun{ -df_entrez = tidybulk::se_mini |> tidybulk() |> as_tibble() |> symbol_to_entrez( .transcript = feature, .sample = sample) -df_entrez = aggregate_duplicates(df_entrez, aggregation_function = sum, .sample = sample, .transcript = entrez, .abundance = count) -df_entrez = mutate(df_entrez, do_test = feature \%in\% c("TNFRSF4", "PLCH2", "PADI4", "PAX7")) +df_entrez = tidybulk::se_mini +df_entrez = mutate(df_entrez, do_test = .feature \%in\% c("TNFRSF4", "PLCH2", "PADI4", "PAX7")) df_entrez = df_entrez \%>\% test_differential_abundance(~ condition) test_gene_rank( df_entrez, - .sample = sample, + .sample = .sample, .entrez = entrez, species="Homo sapiens", gene_sets =c("C2"), diff --git a/man/test_stratification_cellularity-methods.Rd b/man/test_stratification_cellularity-methods.Rd index 629b8d6e..bfad1fd1 100644 --- a/man/test_stratification_cellularity-methods.Rd +++ b/man/test_stratification_cellularity-methods.Rd @@ -130,15 +130,6 @@ library(dplyr) library(tidyr) tidybulk::se_mini |> - tidybulk() |> - -# Add survival data -nest(data = -sample) |> -mutate( - days = c(1, 10, 500, 1000, 2000), - dead = c(1, 1, 1, 0, 1) -) \%>\% -unnest(data) |> test_stratification_cellularity( survival::Surv(days, dead) ~ ., cores = 1 diff --git a/man/tidybulk-methods.Rd b/man/tidybulk-methods.Rd index 1ef8cbce..0ef4e838 100644 --- a/man/tidybulk-methods.Rd +++ b/man/tidybulk-methods.Rd @@ -54,7 +54,7 @@ arguments are stored as metadata. They can be extracted as attr(, "inter } \examples{ -my_tt = tidybulk(tidybulk::se_mini) +tidybulk(tidybulk::se_mini) } From d4707955cb66ee6edc3a8def8343b59255be9540 Mon Sep 17 00:00:00 2001 From: stemangiola Date: Sat, 25 Jun 2022 17:19:54 +1000 Subject: [PATCH 22/38] fix tests --- tests/testthat/test-bulk_methods.R | 54 ++++++++++--------- .../test-bulk_methods_SummarizedExperiment.R | 31 +++++------ 2 files changed, 42 insertions(+), 43 deletions(-) diff --git a/tests/testthat/test-bulk_methods.R b/tests/testthat/test-bulk_methods.R index b3fc096a..0437ad8f 100755 --- a/tests/testthat/test-bulk_methods.R +++ b/tests/testthat/test-bulk_methods.R @@ -3,7 +3,7 @@ context('Bulk methods') data("se_mini") data("breast_tcga_mini_SE") -input_df = se_mini %>% tidybulk() %>% as_tibble() %>% setNames(c("b","a", "c", "Cell type", "time" , "condition")) +input_df = se_mini %>% tidybulk() %>% as_tibble() %>% setNames(c("b","a", "c", "Cell type", "time" , "condition", "days", "dead", "entrez")) input_df_breast = breast_tcga_mini_SE %>% tidybulk() %>% as_tibble() %>% setNames(c( "b","a", "c", "c norm", "call")) @@ -128,7 +128,7 @@ test_that("Adding scaled counts - no object",{ expect_equal( ncol(res), - 10 + 13 ) }) @@ -379,7 +379,7 @@ test_that("Add differential trancript abundance - no object",{ expect_equal( ncol(res), - 12 + 15 ) expect_equal( class(attr(res, "internals")$edgeR)[1], "DGEGLM" ) @@ -824,7 +824,9 @@ test_that("test prefix",{ test_that("Get entrez from symbol - no object",{ res = - symbol_to_entrez(input_df, .transcript = b, .sample = a) + input_df %>% + select(-entrez) %>% + symbol_to_entrez(.transcript = b, .sample = a) expect_equal( res$entrez[1:4], @@ -928,7 +930,7 @@ test_that("Get adjusted counts - no object",{ expect_equal( ncol(res), - 7 + 9 ) }) @@ -957,7 +959,7 @@ test_that("Add adjusted counts - no object",{ expect_equal( ncol(res), - 9 + 12 ) }) @@ -1008,7 +1010,7 @@ test_that("Get cluster lables based on Kmeans - no object",{ expect_equal( ncol(res), - 5 + 7 ) expect_equal( nrow(res), @@ -1037,7 +1039,7 @@ test_that("Add cluster lables based on Kmeans - no object",{ expect_equal( ncol(res), - 7 + 10 ) }) @@ -1179,7 +1181,7 @@ test_that("Get reduced dimensions MDS - no object",{ expect_equal( ncol(res), - 6 + 8 ) expect_equal( nrow(res), @@ -1208,7 +1210,7 @@ test_that("Add reduced dimensions MDS - no object",{ expect_equal( ncol(res), - 9 + 12 ) expect_equal( class(attr(res, "internals")$MDS[[1]])[1], "MDS" ) @@ -1271,7 +1273,7 @@ test_that("Get reduced dimensions PCA - no object",{ expect_equal( ncol(res), - 6 + 8 ) expect_equal( class(attr(res, "internals")$PCA), "prcomp" ) @@ -1298,7 +1300,7 @@ test_that("Add reduced dimensions PCA - no object",{ expect_equal( ncol(res), - 9 + 12 ) expect_equal( class(attr(res, "internals")$PCA), "prcomp" ) @@ -1447,7 +1449,7 @@ test_that("Get rotated dimensions - no object",{ expect_equal( ncol(res), - 8 + 10 ) expect_equal( nrow(res), @@ -1486,7 +1488,7 @@ test_that("Add rotated dimensions - no object",{ expect_equal( ncol(res), - 11 + 14 ) }) @@ -1508,7 +1510,7 @@ test_that("Aggregate duplicated transcript - no object",{ expect_equal( ncol(res), - 7 + 10 ) }) @@ -1531,7 +1533,7 @@ test_that("Drop redundant correlated - no object",{ expect_equal( ncol(res), - 6 + 9 ) }) @@ -1606,7 +1608,7 @@ test_that("Add description to symbol",{ expect_equal( ncol(res), - 7 + 10 ) }) @@ -1816,7 +1818,7 @@ test_that("filter abundant - no object",{ expect_equal( ncol(res1), - 7 + 10 ) res2 = @@ -1831,7 +1833,7 @@ test_that("filter abundant - no object",{ expect_equal( ncol(res2), - 7 + 10 ) expect_gt( @@ -1849,7 +1851,7 @@ test_that("filter abundant - no object",{ expect_equal( ncol(res), - 7 + 10 ) }) @@ -1867,7 +1869,7 @@ test_that("filter abundant with design - no object",{ expect_equal( ncol(res), - 7 + 10 ) @@ -1882,13 +1884,13 @@ test_that("nest - no object",{ test_that("pivot",{ - expect_equal( ncol(pivot_sample(tidybulk(input_df, a, b, c))), 4 ) + expect_equal( ncol(pivot_sample(tidybulk(input_df, a, b, c))), 6 ) - expect_equal( ncol(pivot_sample(input_df, a)), 4 ) + expect_equal( ncol(pivot_sample(input_df, a)), 6 ) - expect_equal( ncol(pivot_transcript(tidybulk(input_df, a, b, c))), 1 ) + expect_equal( ncol(pivot_transcript(tidybulk(input_df, a, b, c))), 2 ) - expect_equal( ncol(pivot_transcript(input_df, b)), 1 ) + expect_equal( ncol(pivot_transcript(input_df, b)), 2 ) }) @@ -1913,7 +1915,7 @@ test_that("pivot",{ test_that("gene over representation",{ - df_entrez = se_mini %>% tidybulk() %>% as_tibble() %>% symbol_to_entrez(.transcript = .feature, .sample = .sample) + df_entrez = se_mini %>% tidybulk() %>% as_tibble() df_entrez = aggregate_duplicates(df_entrez, aggregation_function = sum, .sample = .sample, .transcript = entrez, .abundance = count) df_entrez = mutate(df_entrez, do_test = .feature %in% c("TNFRSF4", "PLCH2", "PADI4", "PAX7")) diff --git a/tests/testthat/test-bulk_methods_SummarizedExperiment.R b/tests/testthat/test-bulk_methods_SummarizedExperiment.R index c2b5f988..66a5a52a 100755 --- a/tests/testthat/test-bulk_methods_SummarizedExperiment.R +++ b/tests/testthat/test-bulk_methods_SummarizedExperiment.R @@ -3,7 +3,7 @@ context('Bulk methods SummarizedExperiment') data("se_mini") data("breast_tcga_mini_SE") -input_df = setNames(se_mini %>% tidybulk() %>% as_tibble(), c( "b","a", "c", "Cell type", "time" , "condition")) +input_df = setNames(se_mini %>% tidybulk() %>% as_tibble(), c( "b","a", "c", "Cell type", "time" , "condition", "days", "dead", "entrez")) input_df_breast = setNames( breast_tcga_mini_SE %>% tidybulk() %>% as_tibble(), c( "b", "a","c", "c norm", "call" )) @@ -43,14 +43,14 @@ test_that("tidybulk SummarizedExperiment normalisation manual",{ tolerance=1e-3 ) - expect_equal( nrow(res), 800 ) + expect_equal( nrow(res), 2635 ) - expect_equal( ncol(res), 17 ) + expect_equal( ncol(res), 11 ) res = rlang::quo_name(attr(res, "internals")$tt_columns[[4]]) - expect_equal( res, "counts_scaled" ) + expect_equal( res, "count_scaled" ) }) @@ -60,7 +60,7 @@ test_that("tidybulk SummarizedExperiment normalisation",{ expect_equal( names(SummarizedExperiment::assays(res)), - c("counts" ,"counts_scaled") + c("count" ,"count_scaled") ) }) @@ -73,7 +73,7 @@ test_that("tidybulk SummarizedExperiment normalisation subset",{ expect_equal( unique(SummarizedExperiment::colData(res)$multiplier), - c(1.3648110, 1.5756592, 1.1651309, 2.1282288, 1.2110911, 0.9574359, 1.4434610, 1.4897840), + c(4.008727 , 3.953872 , 8.971316 , 2.245514 ,10.658455 ), tolerance=1e-6 ) @@ -139,7 +139,7 @@ test_that("Drop redundant correlated - SummarizedExperiment",{ expect_equal( nrow(res), - 100 + 527 ) }) @@ -174,7 +174,7 @@ test_that("Aggregate duplicated transcript - SummarizedExperiment",{ se %>% aggregate_duplicates( .transcript = bla ) - expect_equal( dim(res), c( 99, 8 ) ) + expect_equal( dim(res), c( 527, 5 ) ) }) @@ -185,7 +185,7 @@ test_that("Add cell type proportions - SummarizedExperiment",{ expect_equal( as.numeric(as.data.frame(res@colData[1, 4:7])), - c( 0.6196622 ,0.2525598, 0.0000000, 0.0000000), + c( 1.0000000 ,1.0000000, 0.6196622, 0.2525598), tolerance=1e-3 ) @@ -327,7 +327,7 @@ test_that("filter abundant - SummarizedExperiment",{ res = keep_abundant( se ) - expect_equal( nrow(res), 23 ) + expect_equal( nrow(res), 182 ) }) @@ -498,16 +498,15 @@ test_that("test_stratification_cellularity",{ test_that("pivot",{ - expect_equal( ncol(pivot_sample(se_mini) ), 4) + expect_equal( ncol(pivot_sample(se_mini) ), 6) - expect_equal( ncol(pivot_transcript(se_mini) ), 1) + expect_equal( ncol(pivot_transcript(se_mini) ), 2) }) test_that("gene over representation",{ - df_entrez = symbol_to_entrez(input_df, .transcript = b, .sample = a) - df_entrez = aggregate_duplicates(df_entrez, aggregation_function = sum, .sample = a, .transcript = entrez, .abundance = c) + df_entrez = aggregate_duplicates(input_df, aggregation_function = sum, .sample = a, .transcript = entrez, .abundance = c) df_entrez = mutate(df_entrez, do_test = b %in% c("TNFRSF4", "PLCH2", "PADI4", "PAX7")) res = @@ -528,9 +527,6 @@ test_that("gene over representation",{ test_that("Only reduced dimensions MDS - no object",{ - - - res = breast_tcga_mini_SE %>% reduce_dimensions(method = "MDS") @@ -621,3 +617,4 @@ test_that("Only reduced dimensions UMAP - no object",{ expect_equal( class(attr(res, "internals")$UMAP[[1]])[1], "numeric" ) }) + From 9f08092934b1e2c7a0a05b153ee6c203b0fcfb8f Mon Sep 17 00:00:00 2001 From: stemangiola Date: Sat, 25 Jun 2022 23:50:18 +1000 Subject: [PATCH 23/38] fix tests --- tests/testthat/test-bulk_methods.R | 2 +- .../test-bulk_methods_SummarizedExperiment.R | 656 +++++++++--------- 2 files changed, 330 insertions(+), 328 deletions(-) diff --git a/tests/testthat/test-bulk_methods.R b/tests/testthat/test-bulk_methods.R index 0437ad8f..b0fdd2b2 100755 --- a/tests/testthat/test-bulk_methods.R +++ b/tests/testthat/test-bulk_methods.R @@ -1597,7 +1597,7 @@ test_that("Add description to symbol",{ expect_equal( ncol(res), - 7 + 10 ) res = diff --git a/tests/testthat/test-bulk_methods_SummarizedExperiment.R b/tests/testthat/test-bulk_methods_SummarizedExperiment.R index 66a5a52a..1cf2d13c 100755 --- a/tests/testthat/test-bulk_methods_SummarizedExperiment.R +++ b/tests/testthat/test-bulk_methods_SummarizedExperiment.R @@ -9,59 +9,59 @@ input_df_breast = setNames( breast_tcga_mini_SE %>% tidybulk() %>% as_tibble(), test_that("tidybulk SummarizedExperiment conversion",{ - res = tidybulk(tidybulk::se) + res = tidybulk(tidybulk::se) - expect_equal( class(res)[1], "tidybulk" ) + expect_equal( class(res)[1], "tidybulk" ) - expect_equal( nrow(res), 800 ) + expect_equal( nrow(res), 800 ) - expect_equal( ncol(res), 13 ) + expect_equal( ncol(res), 13 ) - res = res %>% tidybulk:::tidybulk_to_SummarizedExperiment() + res = res %>% tidybulk:::tidybulk_to_SummarizedExperiment() - expect_equal( class(res)[1], "SummarizedExperiment" ) + expect_equal( class(res)[1], "SummarizedExperiment" ) - expect_equal( nrow(res), 100 ) + expect_equal( nrow(res), 100 ) - expect_equal( ncol(res), 8 ) + expect_equal( ncol(res), 8 ) }) test_that("tidybulk SummarizedExperiment normalisation manual",{ - res = tidybulk(tidybulk:::tidybulk_to_SummarizedExperiment(scale_abundance(tidybulk(se) %>% identify_abundant()))) + res = tidybulk(tidybulk:::tidybulk_to_SummarizedExperiment(scale_abundance(tidybulk(se) %>% identify_abundant()))) - res2 = tidybulk(se) %>% identify_abundant() %>% scale_abundance() + res2 = tidybulk(se) %>% identify_abundant() %>% scale_abundance() - res %>% distinct(.sample, multiplier) %>% pull(multiplier) - res2 %>% distinct(.sample, multiplier) %>% pull(multiplier) + res %>% distinct(.sample, multiplier) %>% pull(multiplier) + res2 %>% distinct(.sample, multiplier) %>% pull(multiplier) - expect_equal( - res %>% distinct(.sample, multiplier) %>% pull(multiplier), - res2 %>% distinct(.sample, multiplier) %>% pull(multiplier) %>% as.numeric(), - tolerance=1e-3 - ) + expect_equal( + res %>% distinct(.sample, multiplier) %>% pull(multiplier), + res2 %>% distinct(.sample, multiplier) %>% pull(multiplier) %>% as.numeric(), + tolerance=1e-3 + ) - expect_equal( nrow(res), 2635 ) + expect_equal( nrow(res), 800 ) - expect_equal( ncol(res), 11 ) + expect_equal( ncol(res), 17 ) - res = rlang::quo_name(attr(res, "internals")$tt_columns[[4]]) + res = rlang::quo_name(attr(res, "internals")$tt_columns[[4]]) - expect_equal( res, "count_scaled" ) + expect_equal( res, "counts_scaled" ) }) test_that("tidybulk SummarizedExperiment normalisation",{ - res = se %>% identify_abundant() %>% scale_abundance() + res = se %>% identify_abundant() %>% scale_abundance() - expect_equal( - names(SummarizedExperiment::assays(res)), - c("count" ,"count_scaled") - ) + expect_equal( + names(SummarizedExperiment::assays(res)), + c("counts" ,"counts_scaled") + ) }) @@ -73,7 +73,7 @@ test_that("tidybulk SummarizedExperiment normalisation subset",{ expect_equal( unique(SummarizedExperiment::colData(res)$multiplier), - c(4.008727 , 3.953872 , 8.971316 , 2.245514 ,10.658455 ), + c(1.3648110, 1.5756592, 1.1651309, 2.1282288, 1.2110911, 0.9574359, 1.4434610, 1.4897840), tolerance=1e-6 ) @@ -85,80 +85,80 @@ test_that("tidybulk SummarizedExperiment normalisation subset",{ test_that("tidybulk SummarizedExperiment clustering",{ - res = cluster_elements(se, method="kmeans", centers = 2) + res = cluster_elements(se, method="kmeans", centers = 2) - expect_equal( - tail(names(SummarizedExperiment::colData(res)), 1), - "cluster_kmeans" - ) + expect_equal( + tail(names(SummarizedExperiment::colData(res)), 1), + "cluster_kmeans" + ) - expect_equal( - levels(SummarizedExperiment::colData(res)$cluster_kmeans), - c("1", "2") - ) + expect_equal( + levels(SummarizedExperiment::colData(res)$cluster_kmeans), + c("1", "2") + ) }) test_that("tidybulk SummarizedExperiment clustering",{ - res = se %>% identify_abundant() %>% reduce_dimensions(method="PCA") + res = se %>% identify_abundant() %>% reduce_dimensions(method="PCA") - expect_equal( - tail(names(SummarizedExperiment::colData(res)), 1), - "PC2" - ) + expect_equal( + tail(names(SummarizedExperiment::colData(res)), 1), + "PC2" + ) }) test_that("Get rotated dimensions - SummarizedExperiment",{ - res.pca = - reduce_dimensions(se %>% identify_abundant(), method="PCA" ) + res.pca = + reduce_dimensions(se %>% identify_abundant(), method="PCA" ) - res = - rotate_dimensions( - res.pca, - dimension_1_column = PC1, - dimension_2_column = PC2, - rotation_degrees = 45 - ) + res = + rotate_dimensions( + res.pca, + dimension_1_column = PC1, + dimension_2_column = PC2, + rotation_degrees = 45 + ) - expect_equal( - tail(names(SummarizedExperiment::colData(res)), 1), - "PC2_rotated_45" - ) + expect_equal( + tail(names(SummarizedExperiment::colData(res)), 1), + "PC2_rotated_45" + ) }) test_that("Drop redundant correlated - SummarizedExperiment",{ - res = - remove_redundancy( - se, - method = "correlation", correlation_threshold = 0.99 ) + res = + remove_redundancy( + se, + method = "correlation", correlation_threshold = 0.99 ) - expect_equal( - nrow(res), - 527 - ) + expect_equal( + nrow(res), + 100 + ) }) test_that("Get adjusted counts - SummarizedExperiment",{ - cm = se_mini - cm$batch = 0 - cm$batch[colnames(cm) %in% c("SRR1740035", "SRR1740043")] = 1 + cm = se_mini + cm$batch = 0 + cm$batch[colnames(cm) %in% c("SRR1740035", "SRR1740043")] = 1 - res = - adjust_abundance( - cm %>% identify_abundant(), - ~ condition + batch - ) + res = + adjust_abundance( + cm %>% identify_abundant(), + ~ condition + batch + ) - expect_equal(nrow(res), 527 ) + expect_equal(nrow(res), 527 ) - expect_equal( names(SummarizedExperiment::assays(res)), c("count" ,"count_adjusted") ) + expect_equal( names(SummarizedExperiment::assays(res)), c("count" ,"count_adjusted") ) }) @@ -172,287 +172,287 @@ test_that("Aggregate duplicated transcript - SummarizedExperiment",{ res = se %>% - aggregate_duplicates( .transcript = bla ) + aggregate_duplicates( .transcript = bla ) - expect_equal( dim(res), c( 527, 5 ) ) + expect_equal( dim(res), c( 99, 8 ) ) }) test_that("Add cell type proportions - SummarizedExperiment",{ - res = deconvolve_cellularity(se_mini, cores=1 ) + res = deconvolve_cellularity(se_mini, cores=1 ) - expect_equal( - as.numeric(as.data.frame(res@colData[1, 4:7])), - c( 1.0000000 ,1.0000000, 0.6196622, 0.2525598), - tolerance=1e-3 - ) + expect_equal( + as.numeric(as.data.frame(res@colData[1, 4:7])), + c( 1.0000000, 1.0000000, 0.6196622, 0.2525598), + tolerance=1e-3 + ) }) test_that("differential trancript abundance - SummarizedExperiment",{ - res = test_differential_abundance( - se_mini %>% - identify_abundant(factor_of_interest = condition), - ~ condition - ) - - w = match( c("CLEC7A" , "FAM198B", "FCN1" , "HK3" ), rownames(res) ) - - # Quasi likelihood - res_tibble = test_differential_abundance( - input_df %>% identify_abundant(a, b, c, factor_of_interest = condition), - ~ condition , - a, b, c - ) - - expect_equal( - res@elementMetadata[w,]$logFC, - c(-11.58385, -13.53406, -12.58204, -12.19271), - tolerance=1e-4 - ) - - expect_equal( - res@elementMetadata[w,]$logFC, - res_tibble %>% - pivot_transcript(b) %>% - filter(b %in% rownames(res)[w]) %>% - dplyr::arrange(b) %>% - dplyr::pull(logFC), - tolerance=1e-4 - ) - - # Likelihood ratio - res2 = test_differential_abundance( - se_mini %>% - identify_abundant(factor_of_interest = condition), - ~ condition, method = "edgeR_likelihood_ratio" ) - - res2_tibble = test_differential_abundance( - input_df %>% identify_abundant(a, b, c, factor_of_interest = condition), - ~ condition , - a, b, c, method = "edgeR_likelihood_ratio" - ) - - expect_equal( - res2@elementMetadata[w,]$logFC, - c(-11.57989, -13.53476, -12.57969, -12.19303), - tolerance=1e-4 - ) - - expect_equal( - res2@elementMetadata[w,]$logFC, - res2_tibble %>% - pivot_transcript(b) %>% - filter(b %in% rownames(res)[w]) %>% - dplyr::arrange(b) %>% - dplyr::pull(logFC), - tolerance=1e-4 - ) - - # Treat - se_mini %>% - identify_abundant(a, b, c, factor_of_interest = condition) %>% - test_differential_abundance( - ~ condition, - .sample = a, - .transcript = b, - .abundance = c, - scaling_method = "TMM", - method = "edgeR_likelihood_ratio", - test_above_log2_fold_change = 1, - action="only" - ) %>% - `@` (elementMetadata) %>% - as_tibble() %>% - filter(FDR<0.05) %>% - nrow %>% - expect_equal(169) + res = test_differential_abundance( + se_mini %>% + identify_abundant(factor_of_interest = condition), + ~ condition + ) + + w = match( c("CLEC7A" , "FAM198B", "FCN1" , "HK3" ), rownames(res) ) + + # Quasi likelihood + res_tibble = test_differential_abundance( + input_df %>% identify_abundant(a, b, c, factor_of_interest = condition), + ~ condition , + a, b, c + ) + + expect_equal( + res@elementMetadata[w,]$logFC, + c(-11.58385, -13.53406, -12.58204, -12.19271), + tolerance=1e-4 + ) + + expect_equal( + res@elementMetadata[w,]$logFC, + res_tibble %>% + pivot_transcript(b) %>% + filter(b %in% rownames(res)[w]) %>% + dplyr::arrange(b) %>% + dplyr::pull(logFC), + tolerance=1e-4 + ) + + # Likelihood ratio + res2 = test_differential_abundance( + se_mini %>% + identify_abundant(factor_of_interest = condition), + ~ condition, method = "edgeR_likelihood_ratio" ) + + res2_tibble = test_differential_abundance( + input_df %>% identify_abundant(a, b, c, factor_of_interest = condition), + ~ condition , + a, b, c, method = "edgeR_likelihood_ratio" + ) + + expect_equal( + res2@elementMetadata[w,]$logFC, + c(-11.57989, -13.53476, -12.57969, -12.19303), + tolerance=1e-4 + ) + + expect_equal( + res2@elementMetadata[w,]$logFC, + res2_tibble %>% + pivot_transcript(b) %>% + filter(b %in% rownames(res)[w]) %>% + dplyr::arrange(b) %>% + dplyr::pull(logFC), + tolerance=1e-4 + ) + + # Treat + se_mini %>% + identify_abundant(a, b, c, factor_of_interest = condition) %>% + test_differential_abundance( + ~ condition, + .sample = a, + .transcript = b, + .abundance = c, + scaling_method = "TMM", + method = "edgeR_likelihood_ratio", + test_above_log2_fold_change = 1, + action="only" + ) %>% + `@` (elementMetadata) %>% + as_tibble() %>% + filter(FDR<0.05) %>% + nrow %>% + expect_equal(169) }) test_that("Voom with treat method",{ - se_mini %>% - identify_abundant(a, b, c, factor_of_interest = condition) %>% - test_differential_abundance( - ~ condition, - .sample = a, - .transcript = b, - .abundance = c, - method = "limma_voom", - test_above_log2_fold_change = 1, - action="only" - ) %>% - `@` (elementMetadata) %>% - as_tibble() %>% - filter(adj.P.Val<0.05) %>% - nrow %>% - expect_equal(97) - - # with multiple contrasts - res <- - se_mini %>% - identify_abundant(a, b, c, factor_of_interest = Cell.type) %>% - test_differential_abundance( - ~ 0 + Cell.type, - .sample = a, - .transcript = b, - .abundance = c, - .contrasts = c("Cell.typeb_cell-Cell.typemonocyte", "Cell.typeb_cell-Cell.typet_cell"), - method = "limma_voom", - test_above_log2_fold_change = 1, - action="only" - ) %>% - `@` (elementMetadata) %>% - as_tibble() - - res %>% - filter(adj.P.Val___Cell.typeb_cell.Cell.typemonocyte < 0.05) %>% - nrow %>% - expect_equal(293) - - res %>% - filter(adj.P.Val___Cell.typeb_cell.Cell.typet_cell < 0.05) %>% - nrow %>% - expect_equal(246) + se_mini %>% + identify_abundant(a, b, c, factor_of_interest = condition) %>% + test_differential_abundance( + ~ condition, + .sample = a, + .transcript = b, + .abundance = c, + method = "limma_voom", + test_above_log2_fold_change = 1, + action="only" + ) %>% + `@` (elementMetadata) %>% + as_tibble() %>% + filter(adj.P.Val<0.05) %>% + nrow %>% + expect_equal(97) + + # with multiple contrasts + res <- + se_mini %>% + identify_abundant(a, b, c, factor_of_interest = Cell.type) %>% + test_differential_abundance( + ~ 0 + Cell.type, + .sample = a, + .transcript = b, + .abundance = c, + .contrasts = c("Cell.typeb_cell-Cell.typemonocyte", "Cell.typeb_cell-Cell.typet_cell"), + method = "limma_voom", + test_above_log2_fold_change = 1, + action="only" + ) %>% + `@` (elementMetadata) %>% + as_tibble() + + res %>% + filter(adj.P.Val___Cell.typeb_cell.Cell.typemonocyte < 0.05) %>% + nrow %>% + expect_equal(293) + + res %>% + filter(adj.P.Val___Cell.typeb_cell.Cell.typet_cell < 0.05) %>% + nrow %>% + expect_equal(246) }) test_that("filter abundant - SummarizedExperiment",{ - res = keep_abundant( se ) + res = keep_abundant( se ) - expect_equal( nrow(res), 182 ) + expect_equal( nrow(res), 23 ) }) test_that("filter variable - no object",{ - res = keep_variable(se, top = 5 ) + res = keep_variable(se, top = 5 ) - expect_equal( nrow(res),5 ) + expect_equal( nrow(res),5 ) - res = - keep_variable( - se_mini, - top = 5 - ) + res = + keep_variable( + se_mini, + top = 5 + ) - expect_equal( nrow(res),5 ) + expect_equal( nrow(res),5 ) - expect_equivalent( - sort(rownames(res)), - c("FCN1", "IGHD", "IGHM", "IGKC", "TCL1A") - ) + expect_equivalent( + sort(rownames(res)), + c("FCN1", "IGHD", "IGHM", "IGKC", "TCL1A") + ) }) test_that("impute missing",{ - res = - input_df %>% - dplyr::slice(-1) %>% - tidybulk:::tidybulk_to_SummarizedExperiment(a, b, c) %>% - impute_missing_abundance( ~ condition ) + res = + input_df %>% + dplyr::slice(-1) %>% + tidybulk:::tidybulk_to_SummarizedExperiment(a, b, c) %>% + impute_missing_abundance( ~ condition ) - expect_equal( SummarizedExperiment::assays(res) %>% as.list() %>% .[[1]] %>% .["TNFRSF4", "SRR1740034"], 6 ) + expect_equal( SummarizedExperiment::assays(res) %>% as.list() %>% .[[1]] %>% .["TNFRSF4", "SRR1740034"], 6 ) - expect_equal( nrow(res)*ncol(res), nrow(input_df) ) + expect_equal( nrow(res)*ncol(res), nrow(input_df) ) }) test_that("differential composition",{ - # Cibersort - se_mini %>% - test_differential_cellularity(. ~ condition , cores = 1 ) %>% - pull(`estimate_(Intercept)`) %>% - .[[1]] %>% - as.integer %>% - expect_equal( -2, tollerance =1e-3) - - # llsr - se_mini %>% - test_differential_cellularity( - . ~ condition, - method="llsr" - ) %>% - pull(`estimate_(Intercept)`) %>% - .[[1]] %>% - as.integer %>% - expect_equal( -2, tollerance =1e-3) - - # Survival analyses - input_df %>% - dplyr::select(a, b, c) %>% - nest(data = -a) %>% - mutate( - days = c(1, 10, 500, 1000, 2000), - dead = c(1, 1, 1, 0, 1) - ) %>% - unnest(data) %>% - tidybulk:::tidybulk_to_SummarizedExperiment(a, b, c) %>% - test_differential_cellularity( - survival::Surv(days, dead) ~ ., - cores = 1 - ) %>% - pull(estimate) %>% - .[[1]] %>% - expect_equal(26.2662279, tolerance = 30) - # round() %in% c( - # 26, # 97 is the github action MacOS that has different value - # 26, # 112 is the github action UBUNTU that has different value - # 26 # 93 is the github action Windows that has different value - # ) %>% - # expect_true() + # Cibersort + se_mini %>% + test_differential_cellularity(. ~ condition , cores = 1 ) %>% + pull(`estimate_(Intercept)`) %>% + .[[1]] %>% + as.integer %>% + expect_equal( -2, tollerance =1e-3) + + # llsr + se_mini %>% + test_differential_cellularity( + . ~ condition, + method="llsr" + ) %>% + pull(`estimate_(Intercept)`) %>% + .[[1]] %>% + as.integer %>% + expect_equal( -2, tollerance =1e-3) + + # Survival analyses + input_df %>% + dplyr::select(a, b, c) %>% + nest(data = -a) %>% + mutate( + days = c(1, 10, 500, 1000, 2000), + dead = c(1, 1, 1, 0, 1) + ) %>% + unnest(data) %>% + tidybulk:::tidybulk_to_SummarizedExperiment(a, b, c) %>% + test_differential_cellularity( + survival::Surv(days, dead) ~ ., + cores = 1 + ) %>% + pull(estimate) %>% + .[[1]] %>% + expect_equal(26.2662279, tolerance = 30) + # round() %in% c( + # 26, # 97 is the github action MacOS that has different value + # 26, # 112 is the github action UBUNTU that has different value + # 26 # 93 is the github action Windows that has different value + # ) %>% + # expect_true() }) test_that("test_stratification_cellularity",{ - # Cibersort - input_df %>% - select(a, b, c) %>% - nest(data = -a) %>% - mutate( - days = c(1, 10, 500, 1000, 2000), - dead = c(1, 1, 1, 0, 1) - ) %>% - unnest(data) %>% - tidybulk:::tidybulk_to_SummarizedExperiment(a, b, c) %>% - test_stratification_cellularity( - survival::Surv(days, dead) ~ ., - cores = 1 - ) %>% - pull(.low_cellularity_expected) %>% - .[[1]] %>% - expect_equal(3.35, tolerance =1e-1) - - # llsr - input_df %>% - select(a, b, c) %>% - nest(data = -a) %>% - mutate( - days = c(1, 10, 500, 1000, 2000), - dead = c(1, 1, 1, 0, 1) - ) %>% - unnest(data) %>% - test_stratification_cellularity( - survival::Surv(days, dead) ~ ., - .sample = a, - .transcript = b, - .abundance = c, - method = "llsr" - ) %>% - pull(.low_cellularity_expected) %>% - .[[1]] %>% - expect_equal(3.35, tolerance =1e-1) + # Cibersort + input_df %>% + select(a, b, c) %>% + nest(data = -a) %>% + mutate( + days = c(1, 10, 500, 1000, 2000), + dead = c(1, 1, 1, 0, 1) + ) %>% + unnest(data) %>% + tidybulk:::tidybulk_to_SummarizedExperiment(a, b, c) %>% + test_stratification_cellularity( + survival::Surv(days, dead) ~ ., + cores = 1 + ) %>% + pull(.low_cellularity_expected) %>% + .[[1]] %>% + expect_equal(3.35, tolerance =1e-1) + + # llsr + input_df %>% + select(a, b, c) %>% + nest(data = -a) %>% + mutate( + days = c(1, 10, 500, 1000, 2000), + dead = c(1, 1, 1, 0, 1) + ) %>% + unnest(data) %>% + test_stratification_cellularity( + survival::Surv(days, dead) ~ ., + .sample = a, + .transcript = b, + .abundance = c, + method = "llsr" + ) %>% + pull(.low_cellularity_expected) %>% + .[[1]] %>% + expect_equal(3.35, tolerance =1e-1) }) @@ -498,27 +498,27 @@ test_that("test_stratification_cellularity",{ test_that("pivot",{ - expect_equal( ncol(pivot_sample(se_mini) ), 6) + expect_equal( ncol(pivot_sample(se_mini) ), 6) - expect_equal( ncol(pivot_transcript(se_mini) ), 2) + expect_equal( ncol(pivot_transcript(se_mini) ), 2) }) test_that("gene over representation",{ - df_entrez = aggregate_duplicates(input_df, aggregation_function = sum, .sample = a, .transcript = entrez, .abundance = c) - df_entrez = mutate(df_entrez, do_test = b %in% c("TNFRSF4", "PLCH2", "PADI4", "PAX7")) + df_entrez = aggregate_duplicates(input_df, aggregation_function = sum, .sample = a, .transcript = entrez, .abundance = c) + df_entrez = mutate(df_entrez, do_test = b %in% c("TNFRSF4", "PLCH2", "PADI4", "PAX7")) - res = - df_entrez %>% - tidybulk:::tidybulk_to_SummarizedExperiment(a, b, c) %>% - test_gene_overrepresentation( - .entrez = entrez, - .do_test = do_test, - species="Homo sapiens" - ) + res = + df_entrez %>% + tidybulk:::tidybulk_to_SummarizedExperiment(a, b, c) %>% + test_gene_overrepresentation( + .entrez = entrez, + .do_test = do_test, + species="Homo sapiens" + ) - expect_equal( ncol(res), 10 ) + expect_equal( ncol(res), 10 ) @@ -527,22 +527,25 @@ test_that("gene over representation",{ test_that("Only reduced dimensions MDS - no object",{ - res = - breast_tcga_mini_SE %>% - reduce_dimensions(method = "MDS") - expect_equal( - res$`Dim1`[1:4], - c(-0.2723808836, -0.1105770207, -0.3034092668, -0.0064569358), - tolerance=10 - ) - expect_equal( - ncol(colData(res)), - 3 - ) - expect_equal( class(attr(res, "internals")$MDS[[1]])[1], "MDS" ) + res = + breast_tcga_mini_SE %>% + reduce_dimensions(method = "MDS") + + expect_equal( + res$`Dim1`[1:4], + c(-0.2723808836, -0.1105770207, -0.3034092668, -0.0064569358), + tolerance=10 + ) + + expect_equal( + ncol(colData(res)), + 3 + ) + + expect_equal( class(attr(res, "internals")$MDS[[1]])[1], "MDS" ) }) @@ -617,4 +620,3 @@ test_that("Only reduced dimensions UMAP - no object",{ expect_equal( class(attr(res, "internals")$UMAP[[1]])[1], "numeric" ) }) - From 6def641847a7e858258da36828c559b3e58d855e Mon Sep 17 00:00:00 2001 From: stemangiola Date: Sun, 26 Jun 2022 00:01:55 +1000 Subject: [PATCH 24/38] fix back deconvolution --- tests/testthat/test-bulk_methods_SummarizedExperiment.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-bulk_methods_SummarizedExperiment.R b/tests/testthat/test-bulk_methods_SummarizedExperiment.R index 1cf2d13c..4b4c212c 100755 --- a/tests/testthat/test-bulk_methods_SummarizedExperiment.R +++ b/tests/testthat/test-bulk_methods_SummarizedExperiment.R @@ -184,8 +184,8 @@ test_that("Add cell type proportions - SummarizedExperiment",{ res = deconvolve_cellularity(se_mini, cores=1 ) expect_equal( - as.numeric(as.data.frame(res@colData[1, 4:7])), - c( 1.0000000, 1.0000000, 0.6196622, 0.2525598), + as.numeric(as.data.frame(res@colData[1, 6:9])), + c( 0.619662 , 0.25256 , 0 , 0), tolerance=1e-3 ) From 0bc0619814d1c657c18a1b16ec891d372b88eccf Mon Sep 17 00:00:00 2001 From: stemangiola Date: Sun, 26 Jun 2022 11:27:32 +1000 Subject: [PATCH 25/38] fix aggregation --- R/methods_SE.R | 61 +++++-------------- .../test-bulk_methods_SummarizedExperiment.R | 9 ++- tests/testthat/test-tximeta_GRnges_IRanges.R | 10 +-- 3 files changed, 28 insertions(+), 52 deletions(-) diff --git a/R/methods_SE.R b/R/methods_SE.R index 832ed7c3..940c81e8 100755 --- a/R/methods_SE.R +++ b/R/methods_SE.R @@ -774,9 +774,9 @@ setMethod("adjust_abundance", colnames() %>% outersect(non_standard_columns) %>% setdiff(quo_name(.transcript)) %>% - c(feature_column_name) + c(feature__$name) # when( - # !is.null(rownames(.data)) ~ c(., feature_column_name), + # !is.null(rownames(.data)) ~ c(., feature__$name), # ~ (.) # ) @@ -830,17 +830,20 @@ setMethod("adjust_abundance", } ) - if(!is.null(rowRanges(.data))){ - new_range_data = - - rowRanges(.data) %>% - as_tibble() + new_range_data = rowRanges(.data) %>% as_tibble() # If GRangesList & and .transcript is not there add .transcript if(is(rowRanges(.data), "CompressedGRangesList") & !quo_name(.transcript) %in% colnames(new_range_data)){ - new_range_data %>% left_join(). <<<<< + + new_range_data = + new_range_data %>% left_join( + rowData(.data)[,quo_name(.transcript),drop=FALSE] %>% + as_tibble(rownames = feature__$name) , + by=c("group_name" = feature__$name) + ) %>% + select(-group_name, -group) } # Through warning if there are logicals of factor in the data frame @@ -848,52 +851,19 @@ setMethod("adjust_abundance", if (length(non_standard_columns)>0 & new_range_data %>% pull(!!.transcript) %>% duplicated() %>% which() %>% length() %>% gt(0) ) { warning(paste(capture.output({ cat(crayon::blue("tidybulk says: If duplicates exist from the following columns, only the first instance was taken (lossy behaviour), as aggregating those classes with concatenation is not possible.\n")) - print(.data %>% select(non_standard_columns)) + print(rowData(.data)[1,non_standard_columns,drop=FALSE]) }), collapse = "\n")) } - - - new_range_data = new_range_data %>% # I have to use this trick because rowRanges() and rowData() share @elementMetadata select(-one_of(colnames(new_row_data) %>% outersect(quo_name(.transcript)))) %>% suppressWarnings() - # %>% - # - # # Join rowData - I DON'T KNOW IF ALWAYS ROWRANGES INCLUDE 100% OF ROWDATA - # bind_cols(gene_id_dataset) %>% - # group_by(!!as.symbol(quo_name(.transcript))) %>% - # - # # If I have rownames add them - # when( - # !is.null(rownames(.data)) ~ rowid_to_column(.), - # ~ (.) - # ) %>% - # - # mutate( - # across(columns_to_collapse, ~ .x %>% collapse_function()), - # across(non_standard_columns, ~ .x[1]), - # merged_transcripts = n() - # ) %>% - # distinct() %>% - # #arrange(!!as.symbol(feature_column_name)) %>% - # - # select(-one_of("group_name", "group")) %>% - # suppressWarnings() %>% - # - # as.data.frame() %>% - # # Add back rownames - # { - # .x = (.) - # rownames(.x) = .x %>% pull(!!as.symbol(feature_column_name)) - # .x - # } -browser() + #if(is(rr, "CompressedGRangesList") | nrow(new_row_data)% .[match(new_row_data[,quo_name(.transcript)], names(.))] - names(new_range_data) = rownames(new_row_data) + #names(new_range_data) = rownames(new_row_data) #} # else if(is(rr, "GRanges")) new_range_data = makeGRangesFromDataFrame(new_range_data, keep.extra.columns = TRUE) # else stop("tidybulk says: riowRanges should be either GRanges or CompressedGRangesList. Or am I missing something?") - } # Build the object diff --git a/tests/testthat/test-bulk_methods_SummarizedExperiment.R b/tests/testthat/test-bulk_methods_SummarizedExperiment.R index 99a24841..7643eb5e 100755 --- a/tests/testthat/test-bulk_methods_SummarizedExperiment.R +++ b/tests/testthat/test-bulk_methods_SummarizedExperiment.R @@ -165,7 +165,7 @@ test_that("Get adjusted counts - SummarizedExperiment",{ test_that("Aggregate duplicated transcript - SummarizedExperiment",{ - + se = tidybulk::se SummarizedExperiment::rowData(se)$bla = rownames(se) %>% purrr::map_chr(~ { if(.x %in% c("LRG_239", "LRG_405")) "BLAAA" @@ -178,6 +178,13 @@ test_that("Aggregate duplicated transcript - SummarizedExperiment",{ res = aggregate_duplicates(se, .transcript = bla ) + expect_equal( + as.data.frame(rowRanges(se["ENSG00000272397","SRR1039508"])) %>% + mutate_if(is.factor, as.character), + as.data.frame(rowRanges(res["ENSG00000272397","SRR1039508"]))%>% + mutate_if(is.factor, as.character), + ) + expect_equal( dim(res), c( 99, 8 ) ) diff --git a/tests/testthat/test-tximeta_GRnges_IRanges.R b/tests/testthat/test-tximeta_GRnges_IRanges.R index 425311a1..ffdbc2ce 100644 --- a/tests/testthat/test-tximeta_GRnges_IRanges.R +++ b/tests/testthat/test-tximeta_GRnges_IRanges.R @@ -6,7 +6,7 @@ test_that("tximeta 1",{ rownames(duplicate) = "dup" - rbind(duplicate, tximeta_summarizeToGene_object) %>% + SummarizedExperiment::rbind(duplicate, tximeta_summarizeToGene_object) %>% aggregate_duplicates(.transcript = gene_id) tximeta_summarizeToGene_object %>% @@ -21,12 +21,12 @@ test_that("se no features",{ # Create dataset nrows <- 200; ncols <- 6 counts <- matrix(runif(nrows * ncols, 1, 1e4), nrows) - rowRanges <- GRanges(rep(c("chr1", "chr2"), c(50, 150)), - IRanges(floor(runif(200, 1e5, 1e6)), width=100), + rowRanges <- GenomicRanges::GRanges(rep(c("chr1", "chr2"), c(50, 150)), + IRanges::IRanges(floor(runif(200, 1e5, 1e6)), width=100), strand=sample(c("+", "-"), 200, TRUE), feature_id=sprintf("ID%03d", 1:200)) - colData <- DataFrame(Treatment=rep(c("ChIP", "Input"), 3), row.names=LETTERS[1:6]) - se <- SummarizedExperiment(assays=SimpleList(counts=counts), rowRanges=rowRanges, colData=colData) + colData <- S4Vectors::DataFrame(Treatment=rep(c("ChIP", "Input"), 3), row.names=LETTERS[1:6]) + se <- SummarizedExperiment(assays=S4Vectors::SimpleList(counts=counts), rowRanges=rowRanges, colData=colData) se= rbind( se[1,], se) se %>% From b97eec08ee3e28af6130b84c5fa1499d5b2aacec Mon Sep 17 00:00:00 2001 From: stemangiola Date: Sun, 26 Jun 2022 12:21:39 +1000 Subject: [PATCH 26/38] deprecation untested --- R/functions.R | 78 ++++----- R/functions_SE.R | 32 ++-- R/methods.R | 187 ++++++++++++++++----- R/methods_SE.R | 29 ++-- man/adjust_abundance-methods.Rd | 42 +++-- man/cluster_elements-methods.Rd | 34 ++-- man/get_reduced_dimensions_UMAP_bulk.Rd | 6 +- man/get_reduced_dimensions_UMAP_bulk_SE.Rd | 4 +- man/keep_variable-methods.Rd | 18 +- man/reduce_dimensions-methods.Rd | 34 ++-- man/remove_redundancy-methods.Rd | 28 +-- 11 files changed, 322 insertions(+), 170 deletions(-) diff --git a/R/functions.R b/R/functions.R index daa6988b..ee0c096e 100755 --- a/R/functions.R +++ b/R/functions.R @@ -1536,7 +1536,7 @@ get_clusters_kmeans_bulk <- .feature = NULL, .abundance = NULL, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, ...) { # Check if centers is in dots dots_args = rlang::dots_list(...) @@ -1553,9 +1553,9 @@ get_clusters_kmeans_bulk <- # Prepare data frame distinct(!!.feature,!!.element,!!.abundance) %>% - # Check if log transform is needed - ifelse_pipe(log_transform, - ~ .x %>% dplyr::mutate(!!.abundance := !!.abundance %>% `+`(1) %>% log())) %>% + # Apply (log by default) transformation + dplyr::mutate(!!.abundance := transform(!!.abundance)) %>% + # Prepare data frame for return spread(!!.feature,!!.abundance) %>% @@ -1603,7 +1603,7 @@ get_clusters_SNN_bulk <- .feature = NULL, .abundance, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, ...) { # Get column names .element = enquo(.element) @@ -1631,7 +1631,8 @@ get_clusters_SNN_bulk <- distinct(!!.element,!!.feature,!!.abundance) %>% # Check if log tranfrom is needed - #ifelse_pipe(log_transform, ~ .x %>% dplyr::mutate(!!.abundance := !!.abundance %>% `+`(1) %>% log())) %>% + # dplyr::mutate(!!.abundance := transform(!!.abundance)) %>% + # Prepare data frame for return spread(!!.element,!!.abundance) @@ -1689,7 +1690,7 @@ get_reduced_dimensions_MDS_bulk <- .dims = 2, top = 500, of_samples = TRUE, - log_transform = TRUE) { + transform = log1p) { # Comply with CRAN NOTES . = NULL @@ -1714,9 +1715,9 @@ get_reduced_dimensions_MDS_bulk <- distinct(!!.feature,!!.element,!!.abundance) %>% - # Check if log transform is needed - ifelse_pipe(log_transform, - ~ .x %>% dplyr::mutate(!!.abundance := !!.abundance %>% log1p())) %>% + # Apply (log by default) transformation + dplyr::mutate(!!.abundance := transform(!!.abundance)) %>% + # Stop any column is not if not numeric or integer ifelse_pipe( @@ -1808,7 +1809,7 @@ get_reduced_dimensions_PCA_bulk <- .dims = 2, top = 500, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, scale = FALSE, ...) { # Comply with CRAN NOTES @@ -1831,9 +1832,9 @@ get_reduced_dimensions_PCA_bulk <- # Prepare data frame distinct(!!.feature,!!.element,!!.abundance) %>% - # Check if log transform is needed - ifelse_pipe(log_transform, - ~ .x %>% dplyr::mutate(!!.abundance := !!.abundance %>% log1p())) %>% + # Apply (log by default) transformation + dplyr::mutate(!!.abundance := transform(!!.abundance)) %>% + # Stop any column is not if not numeric or integer ifelse_pipe( @@ -1950,7 +1951,7 @@ get_reduced_dimensions_TSNE_bulk <- .dims = 2, top = 500, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, ...) { # Comply with CRAN NOTES . = NULL @@ -2004,9 +2005,9 @@ get_reduced_dimensions_TSNE_bulk <- ~ .x %>% eliminate_sparse_transcripts(!!.feature) ) %>% - # Check if log transform is needed - ifelse_pipe(log_transform, - ~ .x %>% dplyr::mutate(!!.abundance := !!.abundance %>% log1p)) %>% + # Apply (log by default) transformation + dplyr::mutate(!!.abundance := transform(!!.abundance)) %>% + # Filter most variable genes keep_variable_transcripts(!!.element,!!.feature,!!.abundance, top) %>% @@ -2064,7 +2065,7 @@ get_reduced_dimensions_UMAP_bulk <- .dims = 2, top = 500, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, scale = TRUE, calculate_for_pca_dimensions = 20, ...) { @@ -2114,8 +2115,8 @@ get_reduced_dimensions_UMAP_bulk <- ~ (.) ) %>% - # Check if log transform is needed - when(log_transform ~ dplyr::mutate(., !!.abundance := !!.abundance %>% log1p), ~ (.)) %>% + # Apply (log by default) transformation + dplyr::mutate(., !!.abundance := transform(!!.abundance)) %>% # Filter most variable genes keep_variable_transcripts(!!.element,!!.feature,!!.abundance, top) @@ -2515,7 +2516,7 @@ remove_redundancy_elements_through_correlation <- function(.data, correlation_threshold = 0.9, top = Inf, of_samples = TRUE, - log_transform = FALSE) { + transform = identity) { # Comply with CRAN NOTES . = NULL @@ -2548,9 +2549,9 @@ remove_redundancy_elements_through_correlation <- function(.data, # Filter variable genes keep_variable_transcripts(!!.element,!!.feature,!!.abundance, top = top) %>% - # Check if log transform is needed - ifelse_pipe(log_transform, - ~ .x %>% dplyr::mutate(!!.abundance := !!.abundance %>% `+`(1) %>% log())) %>% + # Apply (log by default) transformation + dplyr::mutate(!!.abundance := transform(!!.abundance)) %>% + distinct() %>% # NO NEED OF RECTANGULAR @@ -3079,7 +3080,8 @@ get_adjusted_counts_for_unwanted_variation_bulk <- function(.data, .sample = NULL, .transcript = NULL, .abundance = NULL, - log_transform = TRUE, + transform = transform, + inverse_transform = inverse_transform, ...) { # Get column names .sample = enquo(.sample) @@ -3108,9 +3110,9 @@ get_adjusted_counts_for_unwanted_variation_bulk <- function(.data, one_of(parse_formula(.formula))) %>% distinct() %>% - # Check if log transform is needed - ifelse_pipe(log_transform, - ~ .x %>% dplyr::mutate(!!.abundance := !!.abundance %>% log1p())) + # Apply (log by default) transformation + dplyr::mutate(!!.abundance := transform(!!.abundance)) + # Create design matrix @@ -3167,13 +3169,11 @@ get_adjusted_counts_for_unwanted_variation_bulk <- function(.data, gather(!!.sample,!!.abundance,-!!.transcript) %>% # Reverse-Log transform if transformed in the first place - ifelse_pipe( - log_transform, - ~ .x %>% - dplyr::mutate(!!.abundance := !!.abundance %>% exp %>% `-`(1)) %>% - dplyr::mutate(!!.abundance := ifelse(!!.abundance < 0, 0,!!.abundance)) %>% - dplyr::mutate(!!.abundance := !!.abundance %>% as.integer) - ) %>% + dplyr::mutate(!!.abundance := inverse_transform(!!.abundance)) %>% + + # In case the inverse tranform produces negative counts + dplyr::mutate(!!.abundance := ifelse(!!.abundance < 0, 0,!!.abundance)) %>% + dplyr::mutate(!!.abundance := !!.abundance %>% as.integer) %>% # Reset column names dplyr::rename(!!value_adjusted := !!.abundance) %>% @@ -3202,7 +3202,7 @@ keep_variable_transcripts = function(.data, .transcript = NULL, .abundance = NULL, top = 500, - log_transform = TRUE) { + transform = log1p) { # Get column names .sample = enquo(.sample) .transcript = enquo(.transcript) @@ -3225,8 +3225,8 @@ keep_variable_transcripts = function(.data, distinct(!!.sample,!!.transcript,!!.abundance) %>% # Check if logtansform is needed - ifelse_pipe(log_transform, - ~ .x %>% dplyr::mutate(!!.abundance := !!.abundance %>% `+`(1) %>% log())) %>% + dplyr::mutate(!!.abundance := transform(!!.abundance)) %>% + spread(!!.sample,!!.abundance) %>% as_matrix(rownames = quo_name(.transcript)) diff --git a/R/functions_SE.R b/R/functions_SE.R index ec763546..194a037f 100755 --- a/R/functions_SE.R +++ b/R/functions_SE.R @@ -14,7 +14,7 @@ #' @param .feature A column symbol. The column that is represents entities to cluster (i.e., normally samples) #' @param .element A column symbol. The column that is used to calculate distance (i.e., normally genes) #' @param of_samples A boolean -#' @param log_transform A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data) +#' @param transform A function that will tranform the counts, by default it is log1p for RNA sequencing data, but for avoinding tranformation you can use identity #' @param ... Further parameters passed to the function kmeans #' #' @return A tibble with additional columns @@ -23,7 +23,7 @@ get_clusters_kmeans_bulk_SE <- function(.data, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, ...) { # Check if centers is in dots @@ -34,7 +34,7 @@ get_clusters_kmeans_bulk_SE <- .data %>% # Check if log transform is needed - when(log_transform ~ log1p(.), ~ (.) ) %>% + transform() %>% # Decide if of samples or transcripts when( @@ -66,7 +66,7 @@ get_clusters_kmeans_bulk_SE <- #' @param .feature A column symbol. The column that is represents entities to cluster (i.e., normally samples) #' @param .element A column symbol. The column that is used to calculate distance (i.e., normally genes) #' @param of_samples A boolean -#' @param log_transform A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data) +#' @param transform A function that will tranform the counts, by default it is log1p for RNA sequencing data, but for avoinding tranformation you can use identity #' @param ... Further parameters passed to the function kmeans #' #' @return A tibble with additional columns @@ -74,7 +74,7 @@ get_clusters_kmeans_bulk_SE <- get_clusters_SNN_bulk_SE <- function(.data, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, ...) { @@ -127,7 +127,7 @@ get_clusters_SNN_bulk_SE <- #' @param .element A column symbol. The column that is used to calculate distance (i.e., normally samples) #' @param top An integer. How many top genes to select #' @param of_samples A boolean -#' @param log_transform A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data) +#' @param transform A function that will tranform the counts, by default it is log1p for RNA sequencing data, but for avoinding tranformation you can use identity #' @param scale A boolean #' #' @return A tibble with additional columns @@ -138,7 +138,7 @@ get_reduced_dimensions_MDS_bulk_SE <- .dims = 2, top = 500, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, scale = NULL # This is only a dummy argument for making it compatibble with PCA ) { # Comply with CRAN NOTES @@ -222,7 +222,7 @@ get_reduced_dimensions_MDS_bulk_SE <- #' @param .element A column symbol. The column that is used to calculate distance (i.e., normally samples) #' @param top An integer. How many top genes to select #' @param of_samples A boolean -#' @param log_transform A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data) +#' @param transform A function that will tranform the counts, by default it is log1p for RNA sequencing data, but for avoinding tranformation you can use identity #' @param scale A boolean #' @param ... Further parameters passed to the function prcomp #' @@ -234,7 +234,7 @@ get_reduced_dimensions_PCA_bulk_SE <- .dims = 2, top = 500, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, scale = FALSE, ...) { # Comply with CRAN NOTES @@ -323,7 +323,7 @@ we suggest to partition the dataset for sample clusters. #' @param .element A column symbol. The column that is used to calculate distance (i.e., normally samples) #' @param top An integer. How many top genes to select #' @param of_samples A boolean -#' @param log_transform A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data) +#' @param transform A function that will tranform the counts, by default it is log1p for RNA sequencing data, but for avoinding tranformation you can use identity #' @param scale A boolean #' @param ... Further parameters passed to the function Rtsne #' @@ -334,7 +334,7 @@ get_reduced_dimensions_TSNE_bulk_SE <- .dims = 2, top = 500, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, scale = NULL, # This is only a dummy argument for making it compatibble with PCA ...) { # Comply with CRAN NOTES @@ -406,7 +406,7 @@ get_reduced_dimensions_TSNE_bulk_SE <- #' @param .element A column symbol. The column that is used to calculate distance (i.e., normally samples) #' @param top An integer. How many top genes to select #' @param of_samples A boolean -#' @param log_transform A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data) +#' @param transform A function that will tranform the counts, by default it is log1p for RNA sequencing data, but for avoinding tranformation you can use identity #' @param calculate_for_pca_dimensions An integer of length one. The number of PCA dimensions to based the UMAP calculatio on. If NULL all variable features are considered #' @param ... Further parameters passed to the function uwot #' @@ -417,7 +417,7 @@ get_reduced_dimensions_UMAP_bulk_SE <- .dims = 2, top = 500, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, scale = NULL, # This is only a dummy argument for making it compatibble with PCA calculate_for_pca_dimensions = 20, ...) { @@ -528,13 +528,13 @@ filter_if_abundant_were_identified = function(.data){ #' @param .transcript A character name of the transcript/gene column #' @param .abundance A character name of the read count column #' @param top An integer. How many top genes to select -#' @param log_transform A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data) +#' @param transform A function that will tranform the counts, by default it is log1p for RNA sequencing data, but for avoinding tranformation you can use identity #' #' @return A tibble filtered genes #' keep_variable_transcripts_SE = function(.data, top = 500, - log_transform = TRUE) { + transform = log1p) { # Manage Inf @@ -546,7 +546,7 @@ keep_variable_transcripts_SE = function(.data, .data %>% # Check if log transform is needed - when(log_transform ~ log1p(.), ~ (.) ) + transform() s <- rowMeans((x - rowMeans(x, na.rm=TRUE)) ^ 2, na.rm=TRUE) diff --git a/R/methods.R b/R/methods.R index 94e5ccfc..2578ca1e 100755 --- a/R/methods.R +++ b/R/methods.R @@ -515,10 +515,12 @@ setMethod("scale_abundance", "tidybulk", .scale_abundance) #' #' @param method A character string. The cluster algorithm to use, at the moment k-means is the only algorithm included. #' @param of_samples A boolean. In case the input is a tidybulk object, it indicates Whether the element column will be sample or transcript column -#' @param log_transform A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data) +#' @param transform A function that will tranform the counts, by default it is log1p for RNA sequencing data, but for avoinding tranformation you can use identity #' @param action A character string. Whether to join the new information to the input tbl (add), or just get the non-redundant tbl with the new information (get). #' @param ... Further parameters passed to the function kmeans #' +#' @param log_transform DEPRECATED - A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data) +#' #' @details identifies clusters in the data, normally of samples. #' This function returns a tibble with additional columns for the cluster annotation. #' At the moment only k-means (DOI: 10.2307/2346830) and SNN clustering (DOI:10.1016/j.cell.2019.05.031) is supported, the plan is to introduce more clustering methods. @@ -554,9 +556,14 @@ setGeneric("cluster_elements", function(.data, .abundance = NULL, method, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, + action = "add", - ...) + ..., + + # DEPRECATED + log_transform = NULL + ) standardGeneric("cluster_elements")) # Set internal @@ -566,10 +573,26 @@ setGeneric("cluster_elements", function(.data, .abundance = NULL, method , of_samples = TRUE, - log_transform = TRUE, + transform = log1p, + action = "add", - ...) + ..., + + # DEPRECATED + log_transform = NULL + + ) { + + # DEPRECATION OF log_transform + if (is_present(log_transform) & !is.null(log_transform)) { + + # Signal the deprecation to the user + deprecate_warn("1.7.4", "tidybulk::test_differential_abundance(log_transform = )", details = "The argument log_transform is now deprecated, please use transform.") + + if(log_transform == TRUE) transform = log1p + } + # Get column names .element = enquo(.element) .feature = enquo(.feature) @@ -609,7 +632,7 @@ setGeneric("cluster_elements", function(.data, .element = !!.element, .feature = !!.feature, of_samples = of_samples, - log_transform = log_transform, + transform = transform, ... ), method == "SNN" ~ stop("tidybulk says: Matrix package (v1.3-3) causes an error with Seurat::FindNeighbors used in this method. We are trying to solve this issue. At the moment this option in unaviable."), @@ -618,7 +641,7 @@ setGeneric("cluster_elements", function(.data, # .element = !!.element, # .feature = !!.feature, # of_samples = of_samples, - # log_transform = log_transform, + # transform = transform, # ... # ), TRUE ~ stop("tidybulk says: the only supported methods are \"kmeans\" or \"SNN\" ") @@ -708,11 +731,13 @@ setMethod("cluster_elements", "tidybulk", .cluster_elements) #' @param top An integer. How many top genes to select for dimensionality reduction #' @param of_samples A boolean. In case the input is a tidybulk object, it indicates Whether the element column will be sample or transcript column #' @param .dims An integer. The number of dimensions your are interested in (e.g., 4 for returning the first four principal components). -#' @param log_transform A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data) +#' @param transform A function that will tranform the counts, by default it is log1p for RNA sequencing data, but for avoinding tranformation you can use identity #' @param scale A boolean for method="PCA", this will be passed to the `prcomp` function. It is not included in the ... argument because although the default for `prcomp` if FALSE, it is advisable to set it as TRUE. #' @param action A character string. Whether to join the new information to the input tbl (add), or just get the non-redundant tbl with the new information (get). #' @param ... Further parameters passed to the function prcomp if you choose method="PCA" or Rtsne if you choose method="tSNE" #' +#' @param log_transform DEPRECATED - A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data) +#' #' @details This function reduces the dimensions of the transcript abundances. #' It can use multi-dimensional scaling (MDS; DOI.org/10.1186/gb-2010-11-3-r25), #' principal component analysis (PCA), or tSNE (Jesse Krijthe et al. 2018) @@ -778,10 +803,15 @@ setGeneric("reduce_dimensions", function(.data, top = 500, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, scale = TRUE, action = "add", - ...) + ..., + + # DEPRECATED + log_transform = NULL + + ) standardGeneric("reduce_dimensions")) # Set internal @@ -794,11 +824,26 @@ setGeneric("reduce_dimensions", function(.data, top = 500, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, scale = TRUE, action = "add", - ...) + ..., + + # DEPRECATED + log_transform = NULL + + ) { + + # DEPRECATION OF log_transform + if (is_present(log_transform) & !is.null(log_transform)) { + + # Signal the deprecation to the user + deprecate_warn("1.7.4", "tidybulk::test_differential_abundance(log_transform = )", details = "The argument log_transform is now deprecated, please use transform.") + + if(log_transform == TRUE) transform = log1p + } + # Get column names .element = enquo(.element) .feature = enquo(.feature) @@ -839,7 +884,7 @@ setGeneric("reduce_dimensions", function(.data, .feature = !!.feature, top = top, of_samples = of_samples, - log_transform = log_transform, + transform = transform, ... ), tolower(method) == tolower("PCA") ~ get_reduced_dimensions_PCA_bulk(., @@ -849,7 +894,7 @@ setGeneric("reduce_dimensions", function(.data, .feature = !!.feature, top = top, of_samples = of_samples, - log_transform = log_transform, + transform = transform, scale = scale, ... ), @@ -860,7 +905,7 @@ setGeneric("reduce_dimensions", function(.data, .feature = !!.feature, top = top, of_samples = of_samples, - log_transform = log_transform, + transform = transform, ... ), tolower(method) == tolower("UMAP") ~ get_reduced_dimensions_UMAP_bulk(., @@ -870,7 +915,7 @@ setGeneric("reduce_dimensions", function(.data, .feature = !!.feature, top = top, of_samples = of_samples, - log_transform = log_transform, + transform = transform, scale = scale, ... ), @@ -1131,12 +1176,13 @@ setMethod("rotate_dimensions", "tidybulk", .rotate_dimensions) #' #' @param method A character string. The method to use, correlation and reduced_dimensions are available. The latter eliminates one of the most proximar pairs of samples in PCA reduced dimensions. #' @param of_samples A boolean. In case the input is a tidybulk object, it indicates Whether the element column will be sample or transcript column -#' @param log_transform A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data) +#' @param transform A function that will tranform the counts, by default it is log1p for RNA sequencing data, but for avoinding tranformation you can use identity #' @param correlation_threshold A real number between 0 and 1. For correlation based calculation. #' @param top An integer. How many top genes to select for correlation based method #' @param Dim_a_column A character string. For reduced_dimension based calculation. The column of one principal component #' @param Dim_b_column A character string. For reduced_dimension based calculation. The column of another principal component #' +#' @param log_transform DEPRECATED - A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data) #' #' @details This function removes redundant elements from the original data set (e.g., samples or transcripts). #' For example, if we want to define cell-type specific signatures with low sample redundancy. @@ -1214,10 +1260,13 @@ setGeneric("remove_redundancy", function(.data, correlation_threshold = 0.9, top = Inf, - log_transform = FALSE, - + transform = identity, Dim_a_column, - Dim_b_column) + Dim_b_column, + + # DEPRECATED + log_transform = NULL + ) standardGeneric("remove_redundancy")) # Set internal @@ -1229,15 +1278,27 @@ setGeneric("remove_redundancy", function(.data, of_samples = TRUE, - - correlation_threshold = 0.9, top = Inf, - log_transform = FALSE, + transform = identity, Dim_a_column = NULL, - Dim_b_column = NULL) + Dim_b_column = NULL, + + # DEPRECATED + log_transform = NULL +) { + + # DEPRECATION OF log_transform + if (is_present(log_transform) & !is.null(log_transform)) { + + # Signal the deprecation to the user + deprecate_warn("1.7.4", "tidybulk::test_differential_abundance(log_transform = )", details = "The argument log_transform is now deprecated, please use transform.") + + if(log_transform == TRUE) transform = log1p + } + # Make col names .abundance = enquo(.abundance) .element = enquo(.element) @@ -1262,7 +1323,7 @@ setGeneric("remove_redundancy", function(.data, correlation_threshold = correlation_threshold, top = top, of_samples = of_samples, - log_transform = log_transform + transform = transform ) } else if (method == "reduced_dimensions") { @@ -1330,10 +1391,13 @@ setMethod("remove_redundancy", "tidybulk", .remove_redundancy) #' @param .transcript The name of the transcript/gene column #' @param .abundance The name of the transcript/gene abundance column #' -#' @param log_transform A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data) +#' @param transform A function that will tranform the counts, by default it is log1p for RNA sequencing data, but for avoinding tranformation you can use identity +#' @param inverse_transform A function that is the inverse of transform (e.g. expm1 is inverse of log1p). This is needed to tranform back the counts after analysis. #' @param action A character string. Whether to join the new information to the input tbl (add), or just get the non-redundant tbl with the new information (get). #' @param ... Further parameters passed to the function sva::ComBat #' +#' @param log_transform DEPRECATED - A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data) +#' #' @details This function adjusts the abundance for (known) unwanted variation. #' At the moment just an unwanted covariate is allowed at a time using Combat (DOI: 10.1093/bioinformatics/bts034) #' @@ -1368,21 +1432,44 @@ setGeneric("adjust_abundance", function(.data, .sample = NULL, .transcript = NULL, .abundance = NULL, - log_transform = TRUE, + transform = log1p, + inverse_transform = expm1, + action = "add", - ...) + ..., + + # DEPRECATED + log_transform = NULL + ) standardGeneric("adjust_abundance")) # Set internal .adjust_abundance = function(.data, - .formula, - .sample = NULL, - .transcript = NULL, - .abundance = NULL, - log_transform = TRUE, - action = "add", - ...) + .formula, + .sample = NULL, + .transcript = NULL, + .abundance = NULL, + transform = log1p, + inverse_transform = expm1, + action = "add", + ..., + + # DEPRECATED + log_transform = NULL) { + + # DEPRECATION OF log_transform + if (is_present(log_transform) & !is.null(log_transform)) { + + # Signal the deprecation to the user + deprecate_warn("1.7.4", "tidybulk::test_differential_abundance(log_transform = )", details = "The argument log_transform is now deprecated, please use transform.") + + if(log_transform == TRUE){ + transform = log1p + inverse_transform = expm1 + } + } + # Get column names .sample = enquo(.sample) .transcript = enquo(.transcript) @@ -1419,7 +1506,8 @@ setGeneric("adjust_abundance", function(.data, .sample = !!.sample, .transcript = !!.transcript, .abundance = !!.abundance, - log_transform = log_transform, + transform = transform, + inverse_transform = inverse_transform, ... ) @@ -2462,7 +2550,9 @@ setMethod("test_differential_abundance", #' @param .transcript The name of the transcript/gene column #' @param .abundance The name of the transcript/gene abundance column #' @param top Integer. Number of top transcript to consider -#' @param log_transform A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data) +#' @param transform A function that will tranform the counts, by default it is log1p for RNA sequencing data, but for avoinding tranformation you can use identity +#' +#' @param log_transform DEPRECATED - A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data) #' #' @details At the moment this function uses edgeR \url{https://doi.org/10.1093/bioinformatics/btp616} #' @@ -2495,7 +2585,11 @@ setGeneric("keep_variable", function(.data, .transcript = NULL, .abundance = NULL, top = 500, - log_transform = TRUE) + transform = log1p, + + # DEPRECATED + log_transform = TRUE + ) standardGeneric("keep_variable")) # Set internal @@ -2504,8 +2598,21 @@ setGeneric("keep_variable", function(.data, .transcript = NULL, .abundance = NULL, top = 500, - log_transform = TRUE) + transform = log1p, + + # DEPRECATED + log_transform = NULL) { + + # DEPRECATION OF log_transform + if (is_present(log_transform) & !is.null(log_transform)) { + + # Signal the deprecation to the user + deprecate_warn("1.7.4", "tidybulk::test_differential_abundance(log_transform = )", details = "The argument log_transform is now deprecated, please use transform.") + + if(log_transform == TRUE) transform = log1p + } + # Make col names .sample = enquo(.sample) .transcript = enquo(.transcript) @@ -2523,7 +2630,7 @@ setGeneric("keep_variable", function(.data, .transcript = !!.transcript, .abundance = !!.abundance, top = top, - log_transform = log_transform + transform = transform ) } diff --git a/R/methods_SE.R b/R/methods_SE.R index a214d3e7..b2b84e1e 100755 --- a/R/methods_SE.R +++ b/R/methods_SE.R @@ -228,7 +228,7 @@ setMethod("scale_abundance", .cluster_elements_se = function(.data, method , of_samples = TRUE, - log_transform = TRUE, + transform = log1p, ...) { my_assay = @@ -251,7 +251,7 @@ setMethod("scale_abundance", my_cluster_function( my_assay, of_samples = of_samples, - log_transform = log_transform, + transform = transform, ... ) %>% as.character() %>% @@ -309,7 +309,7 @@ setMethod("cluster_elements", .dims = 2, top = 500, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, scale = TRUE, ...) { @@ -324,10 +324,10 @@ setMethod("cluster_elements", .[[get_assay_scaled_if_exists_SE(.data)]] %>% # Filter most variable genes - keep_variable_transcripts_SE(top = top, log_transform = log_transform) %>% + keep_variable_transcripts_SE(top = top, transform = transform) %>% # Check if log transform is needed - when(log_transform ~ log1p(.), ~ (.) ) + transform() my_reduction_function = method %>% @@ -346,7 +346,7 @@ setMethod("cluster_elements", .dims = .dims, top = top, of_samples = of_samples, - log_transform = log_transform, + transform = transform, scale=scale, ... ) @@ -512,7 +512,7 @@ setMethod("rotate_dimensions", of_samples = TRUE, correlation_threshold = 0.9, top = Inf, - log_transform = FALSE, + transform = identity, Dim_a_column = NULL, Dim_b_column = NULL) { @@ -545,10 +545,10 @@ setMethod("rotate_dimensions", .[[get_assay_scaled_if_exists_SE(.data)]] %>% # Filter most variable genes - keep_variable_transcripts_SE(top = top, log_transform = log_transform) %>% + keep_variable_transcripts_SE(top = top, transform = transform) %>% # Check if log transform is needed - when(log_transform ~ log1p(.), ~ (.) ) + transform() # Get correlated elements remove_redundancy_elements_through_correlation_SE( @@ -621,7 +621,8 @@ setMethod("remove_redundancy", .adjust_abundance_se = function(.data, .formula, - log_transform = TRUE, + transform = log1p, + inverse_transform = expm1, ...) { # Check if package is installed, otherwise install @@ -666,7 +667,7 @@ setMethod("remove_redundancy", .[[get_assay_scaled_if_exists_SE(.data)]] %>% # Check if log transform is needed - when(log_transform ~ log1p(.), ~ (.)) + transform() # Set column name for value scaled @@ -687,7 +688,7 @@ setMethod("remove_redundancy", ...) %>% # Check if log transform needs to be reverted - when(log_transform ~ expm1(.), ~ (.)) + inverse_transform() # Add the assay @@ -1178,7 +1179,7 @@ setMethod( .keep_variable_se = function(.data, top = 500, - log_transform = TRUE) + transform = log1p) { @@ -1193,7 +1194,7 @@ setMethod( .[[get_assay_scaled_if_exists_SE(.data)]] %>% # Filter most variable genes - keep_variable_transcripts_SE(top = top, log_transform = log_transform) %>% + keep_variable_transcripts_SE(top = top, transform = transform) %>% # Take gene names rownames() diff --git a/man/adjust_abundance-methods.Rd b/man/adjust_abundance-methods.Rd index 1310c087..7f5be64e 100644 --- a/man/adjust_abundance-methods.Rd +++ b/man/adjust_abundance-methods.Rd @@ -16,9 +16,11 @@ adjust_abundance( .sample = NULL, .transcript = NULL, .abundance = NULL, - log_transform = TRUE, + transform = log1p, + inverse_transform = expm1, action = "add", - ... + ..., + log_transform = NULL ) \S4method{adjust_abundance}{spec_tbl_df}( @@ -27,9 +29,11 @@ adjust_abundance( .sample = NULL, .transcript = NULL, .abundance = NULL, - log_transform = TRUE, + transform = log1p, + inverse_transform = expm1, action = "add", - ... + ..., + log_transform = NULL ) \S4method{adjust_abundance}{tbl_df}( @@ -38,9 +42,11 @@ adjust_abundance( .sample = NULL, .transcript = NULL, .abundance = NULL, - log_transform = TRUE, + transform = log1p, + inverse_transform = expm1, action = "add", - ... + ..., + log_transform = NULL ) \S4method{adjust_abundance}{tidybulk}( @@ -49,9 +55,11 @@ adjust_abundance( .sample = NULL, .transcript = NULL, .abundance = NULL, - log_transform = TRUE, + transform = log1p, + inverse_transform = expm1, action = "add", - ... + ..., + log_transform = NULL ) \S4method{adjust_abundance}{SummarizedExperiment}( @@ -60,9 +68,11 @@ adjust_abundance( .sample = NULL, .transcript = NULL, .abundance = NULL, - log_transform = TRUE, + transform = log1p, + inverse_transform = expm1, action = "add", - ... + ..., + log_transform = NULL ) \S4method{adjust_abundance}{RangedSummarizedExperiment}( @@ -71,9 +81,11 @@ adjust_abundance( .sample = NULL, .transcript = NULL, .abundance = NULL, - log_transform = TRUE, + transform = log1p, + inverse_transform = expm1, action = "add", - ... + ..., + log_transform = NULL ) } \arguments{ @@ -87,11 +99,15 @@ adjust_abundance( \item{.abundance}{The name of the transcript/gene abundance column} -\item{log_transform}{A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data)} +\item{transform}{A function that will tranform the counts, by default it is log1p for RNA sequencing data, but for avoinding tranformation you can use identity} + +\item{inverse_transform}{A function that is the inverse of transform (e.g. expm1 is inverse of log1p). This is needed to tranform back the counts after analysis.} \item{action}{A character string. Whether to join the new information to the input tbl (add), or just get the non-redundant tbl with the new information (get).} \item{...}{Further parameters passed to the function sva::ComBat} + +\item{log_transform}{DEPRECATED - A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data)} } \value{ A consistent object (to the input) with additional columns for the adjusted counts as `_adjusted` diff --git a/man/cluster_elements-methods.Rd b/man/cluster_elements-methods.Rd index 518f37f9..e1302075 100644 --- a/man/cluster_elements-methods.Rd +++ b/man/cluster_elements-methods.Rd @@ -17,9 +17,10 @@ cluster_elements( .abundance = NULL, method, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, action = "add", - ... + ..., + log_transform = NULL ) \S4method{cluster_elements}{spec_tbl_df}( @@ -29,9 +30,10 @@ cluster_elements( .abundance = NULL, method, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, action = "add", - ... + ..., + log_transform = NULL ) \S4method{cluster_elements}{tbl_df}( @@ -41,9 +43,10 @@ cluster_elements( .abundance = NULL, method, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, action = "add", - ... + ..., + log_transform = NULL ) \S4method{cluster_elements}{tidybulk}( @@ -53,9 +56,10 @@ cluster_elements( .abundance = NULL, method, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, action = "add", - ... + ..., + log_transform = NULL ) \S4method{cluster_elements}{SummarizedExperiment}( @@ -65,9 +69,10 @@ cluster_elements( .abundance = NULL, method, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, action = "add", - ... + ..., + log_transform = NULL ) \S4method{cluster_elements}{RangedSummarizedExperiment}( @@ -77,9 +82,10 @@ cluster_elements( .abundance = NULL, method, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, action = "add", - ... + ..., + log_transform = NULL ) } \arguments{ @@ -95,11 +101,13 @@ cluster_elements( \item{of_samples}{A boolean. In case the input is a tidybulk object, it indicates Whether the element column will be sample or transcript column} -\item{log_transform}{A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data)} +\item{transform}{A function that will tranform the counts, by default it is log1p for RNA sequencing data, but for avoinding tranformation you can use identity} \item{action}{A character string. Whether to join the new information to the input tbl (add), or just get the non-redundant tbl with the new information (get).} \item{...}{Further parameters passed to the function kmeans} + +\item{log_transform}{DEPRECATED - A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data)} } \value{ A tbl object with additional columns with cluster labels diff --git a/man/get_reduced_dimensions_UMAP_bulk.Rd b/man/get_reduced_dimensions_UMAP_bulk.Rd index 2d1145cf..1a8023d5 100644 --- a/man/get_reduced_dimensions_UMAP_bulk.Rd +++ b/man/get_reduced_dimensions_UMAP_bulk.Rd @@ -12,7 +12,7 @@ get_reduced_dimensions_UMAP_bulk( .dims = 2, top = 500, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, scale = TRUE, calculate_for_pca_dimensions = 20, ... @@ -33,11 +33,11 @@ get_reduced_dimensions_UMAP_bulk( \item{of_samples}{A boolean} -\item{log_transform}{A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data)} - \item{calculate_for_pca_dimensions}{An integer of length one. The number of PCA dimensions to based the UMAP calculatio on. If NULL all variable features are considered} \item{...}{Further parameters passed to the function uwot} + +\item{log_transform}{A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data)} } \value{ A tibble with additional columns diff --git a/man/get_reduced_dimensions_UMAP_bulk_SE.Rd b/man/get_reduced_dimensions_UMAP_bulk_SE.Rd index d6162239..b204aa0f 100644 --- a/man/get_reduced_dimensions_UMAP_bulk_SE.Rd +++ b/man/get_reduced_dimensions_UMAP_bulk_SE.Rd @@ -9,7 +9,7 @@ get_reduced_dimensions_UMAP_bulk_SE( .dims = 2, top = 500, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, scale = NULL, calculate_for_pca_dimensions = 20, ... @@ -24,7 +24,7 @@ get_reduced_dimensions_UMAP_bulk_SE( \item{of_samples}{A boolean} -\item{log_transform}{A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data)} +\item{transform}{A function that will tranform the counts, by default it is log1p for RNA sequencing data, but for avoinding tranformation you can use identity} \item{calculate_for_pca_dimensions}{An integer of length one. The number of PCA dimensions to based the UMAP calculatio on. If NULL all variable features are considered} diff --git a/man/keep_variable-methods.Rd b/man/keep_variable-methods.Rd index 6fb3e137..4457fb7b 100644 --- a/man/keep_variable-methods.Rd +++ b/man/keep_variable-methods.Rd @@ -16,6 +16,7 @@ keep_variable( .transcript = NULL, .abundance = NULL, top = 500, + transform = log1p, log_transform = TRUE ) @@ -25,7 +26,8 @@ keep_variable( .transcript = NULL, .abundance = NULL, top = 500, - log_transform = TRUE + transform = log1p, + log_transform = NULL ) \S4method{keep_variable}{tbl_df}( @@ -34,7 +36,8 @@ keep_variable( .transcript = NULL, .abundance = NULL, top = 500, - log_transform = TRUE + transform = log1p, + log_transform = NULL ) \S4method{keep_variable}{tidybulk}( @@ -43,12 +46,13 @@ keep_variable( .transcript = NULL, .abundance = NULL, top = 500, - log_transform = TRUE + transform = log1p, + log_transform = NULL ) -\S4method{keep_variable}{SummarizedExperiment}(.data, top = 500, log_transform = TRUE) +\S4method{keep_variable}{SummarizedExperiment}(.data, top = 500, transform = log1p) -\S4method{keep_variable}{RangedSummarizedExperiment}(.data, top = 500, log_transform = TRUE) +\S4method{keep_variable}{RangedSummarizedExperiment}(.data, top = 500, transform = log1p) } \arguments{ \item{.data}{A `tbl` (with at least three columns for sample, feature and transcript abundance) or `SummarizedExperiment` (more convenient if abstracted to tibble with library(tidySummarizedExperiment))} @@ -61,7 +65,9 @@ keep_variable( \item{top}{Integer. Number of top transcript to consider} -\item{log_transform}{A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data)} +\item{transform}{A function that will tranform the counts, by default it is log1p for RNA sequencing data, but for avoinding tranformation you can use identity} + +\item{log_transform}{DEPRECATED - A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data)} } \value{ A consistent object (to the input) with additional columns for the statistics from the hypothesis test (e.g., log fold change, p-value and false discovery rate). diff --git a/man/reduce_dimensions-methods.Rd b/man/reduce_dimensions-methods.Rd index 4c0fce04..4cf095c3 100644 --- a/man/reduce_dimensions-methods.Rd +++ b/man/reduce_dimensions-methods.Rd @@ -19,10 +19,11 @@ reduce_dimensions( .dims = 2, top = 500, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, scale = TRUE, action = "add", - ... + ..., + log_transform = NULL ) \S4method{reduce_dimensions}{spec_tbl_df}( @@ -34,10 +35,11 @@ reduce_dimensions( .dims = 2, top = 500, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, scale = TRUE, action = "add", - ... + ..., + log_transform = NULL ) \S4method{reduce_dimensions}{tbl_df}( @@ -49,10 +51,11 @@ reduce_dimensions( .dims = 2, top = 500, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, scale = TRUE, action = "add", - ... + ..., + log_transform = NULL ) \S4method{reduce_dimensions}{tidybulk}( @@ -64,10 +67,11 @@ reduce_dimensions( .dims = 2, top = 500, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, scale = TRUE, action = "add", - ... + ..., + log_transform = NULL ) \S4method{reduce_dimensions}{SummarizedExperiment}( @@ -79,10 +83,11 @@ reduce_dimensions( .dims = 2, top = 500, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, scale = TRUE, action = "add", - ... + ..., + log_transform = NULL ) \S4method{reduce_dimensions}{RangedSummarizedExperiment}( @@ -94,10 +99,11 @@ reduce_dimensions( .dims = 2, top = 500, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, scale = TRUE, action = "add", - ... + ..., + log_transform = NULL ) } \arguments{ @@ -117,13 +123,15 @@ reduce_dimensions( \item{of_samples}{A boolean. In case the input is a tidybulk object, it indicates Whether the element column will be sample or transcript column} -\item{log_transform}{A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data)} +\item{transform}{A function that will tranform the counts, by default it is log1p for RNA sequencing data, but for avoinding tranformation you can use identity} \item{scale}{A boolean for method="PCA", this will be passed to the `prcomp` function. It is not included in the ... argument because although the default for `prcomp` if FALSE, it is advisable to set it as TRUE.} \item{action}{A character string. Whether to join the new information to the input tbl (add), or just get the non-redundant tbl with the new information (get).} \item{...}{Further parameters passed to the function prcomp if you choose method="PCA" or Rtsne if you choose method="tSNE"} + +\item{log_transform}{DEPRECATED - A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data)} } \value{ A tbl object with additional columns for the reduced dimensions diff --git a/man/remove_redundancy-methods.Rd b/man/remove_redundancy-methods.Rd index 5cb9e2fd..0a626ac1 100644 --- a/man/remove_redundancy-methods.Rd +++ b/man/remove_redundancy-methods.Rd @@ -19,9 +19,10 @@ remove_redundancy( of_samples = TRUE, correlation_threshold = 0.9, top = Inf, - log_transform = FALSE, + transform = identity, Dim_a_column, - Dim_b_column + Dim_b_column, + log_transform = NULL ) \S4method{remove_redundancy}{spec_tbl_df}( @@ -33,9 +34,10 @@ remove_redundancy( of_samples = TRUE, correlation_threshold = 0.9, top = Inf, - log_transform = FALSE, + transform = identity, Dim_a_column = NULL, - Dim_b_column = NULL + Dim_b_column = NULL, + log_transform = NULL ) \S4method{remove_redundancy}{tbl_df}( @@ -47,9 +49,10 @@ remove_redundancy( of_samples = TRUE, correlation_threshold = 0.9, top = Inf, - log_transform = FALSE, + transform = identity, Dim_a_column = NULL, - Dim_b_column = NULL + Dim_b_column = NULL, + log_transform = NULL ) \S4method{remove_redundancy}{tidybulk}( @@ -61,9 +64,10 @@ remove_redundancy( of_samples = TRUE, correlation_threshold = 0.9, top = Inf, - log_transform = FALSE, + transform = identity, Dim_a_column = NULL, - Dim_b_column = NULL + Dim_b_column = NULL, + log_transform = NULL ) \S4method{remove_redundancy}{SummarizedExperiment}( @@ -75,7 +79,7 @@ remove_redundancy( of_samples = TRUE, correlation_threshold = 0.9, top = Inf, - log_transform = FALSE, + transform = identity, Dim_a_column = NULL, Dim_b_column = NULL ) @@ -89,7 +93,7 @@ remove_redundancy( of_samples = TRUE, correlation_threshold = 0.9, top = Inf, - log_transform = FALSE, + transform = identity, Dim_a_column = NULL, Dim_b_column = NULL ) @@ -111,11 +115,13 @@ remove_redundancy( \item{top}{An integer. How many top genes to select for correlation based method} -\item{log_transform}{A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data)} +\item{transform}{A function that will tranform the counts, by default it is log1p for RNA sequencing data, but for avoinding tranformation you can use identity} \item{Dim_a_column}{A character string. For reduced_dimension based calculation. The column of one principal component} \item{Dim_b_column}{A character string. For reduced_dimension based calculation. The column of another principal component} + +\item{log_transform}{DEPRECATED - A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data)} } \value{ A tbl object with with dropped redundant elements (e.g., samples). From 219709674727c4a5803efd5a9f71e3cfd4d2fe72 Mon Sep 17 00:00:00 2001 From: stemangiola Date: Sun, 26 Jun 2022 12:21:39 +1000 Subject: [PATCH 27/38] deprecation untested --- R/functions.R | 78 ++++----- R/functions_SE.R | 32 ++-- R/methods.R | 187 ++++++++++++++++----- R/methods_SE.R | 29 ++-- man/adjust_abundance-methods.Rd | 42 +++-- man/cluster_elements-methods.Rd | 34 ++-- man/get_reduced_dimensions_UMAP_bulk.Rd | 6 +- man/get_reduced_dimensions_UMAP_bulk_SE.Rd | 4 +- man/keep_variable-methods.Rd | 18 +- man/reduce_dimensions-methods.Rd | 34 ++-- man/remove_redundancy-methods.Rd | 28 +-- 11 files changed, 322 insertions(+), 170 deletions(-) diff --git a/R/functions.R b/R/functions.R index 4b92a025..c1490392 100755 --- a/R/functions.R +++ b/R/functions.R @@ -1536,7 +1536,7 @@ get_clusters_kmeans_bulk <- .feature = NULL, .abundance = NULL, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, ...) { # Check if centers is in dots dots_args = rlang::dots_list(...) @@ -1553,9 +1553,9 @@ get_clusters_kmeans_bulk <- # Prepare data frame distinct(!!.feature,!!.element,!!.abundance) %>% - # Check if log transform is needed - ifelse_pipe(log_transform, - ~ .x %>% dplyr::mutate(!!.abundance := !!.abundance %>% `+`(1) %>% log())) %>% + # Apply (log by default) transformation + dplyr::mutate(!!.abundance := transform(!!.abundance)) %>% + # Prepare data frame for return spread(!!.feature,!!.abundance) %>% @@ -1603,7 +1603,7 @@ get_clusters_SNN_bulk <- .feature = NULL, .abundance, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, ...) { # Get column names .element = enquo(.element) @@ -1631,7 +1631,8 @@ get_clusters_SNN_bulk <- distinct(!!.element,!!.feature,!!.abundance) %>% # Check if log tranfrom is needed - #ifelse_pipe(log_transform, ~ .x %>% dplyr::mutate(!!.abundance := !!.abundance %>% `+`(1) %>% log())) %>% + # dplyr::mutate(!!.abundance := transform(!!.abundance)) %>% + # Prepare data frame for return spread(!!.element,!!.abundance) @@ -1689,7 +1690,7 @@ get_reduced_dimensions_MDS_bulk <- .dims = 2, top = 500, of_samples = TRUE, - log_transform = TRUE) { + transform = log1p) { # Comply with CRAN NOTES . = NULL @@ -1714,9 +1715,9 @@ get_reduced_dimensions_MDS_bulk <- distinct(!!.feature,!!.element,!!.abundance) %>% - # Check if log transform is needed - ifelse_pipe(log_transform, - ~ .x %>% dplyr::mutate(!!.abundance := !!.abundance %>% log1p())) %>% + # Apply (log by default) transformation + dplyr::mutate(!!.abundance := transform(!!.abundance)) %>% + # Stop any column is not if not numeric or integer ifelse_pipe( @@ -1808,7 +1809,7 @@ get_reduced_dimensions_PCA_bulk <- .dims = 2, top = 500, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, scale = FALSE, ...) { # Comply with CRAN NOTES @@ -1831,9 +1832,9 @@ get_reduced_dimensions_PCA_bulk <- # Prepare data frame distinct(!!.feature,!!.element,!!.abundance) %>% - # Check if log transform is needed - ifelse_pipe(log_transform, - ~ .x %>% dplyr::mutate(!!.abundance := !!.abundance %>% log1p())) %>% + # Apply (log by default) transformation + dplyr::mutate(!!.abundance := transform(!!.abundance)) %>% + # Stop any column is not if not numeric or integer ifelse_pipe( @@ -1950,7 +1951,7 @@ get_reduced_dimensions_TSNE_bulk <- .dims = 2, top = 500, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, ...) { # Comply with CRAN NOTES . = NULL @@ -2004,9 +2005,9 @@ get_reduced_dimensions_TSNE_bulk <- ~ .x %>% eliminate_sparse_transcripts(!!.feature) ) %>% - # Check if log transform is needed - ifelse_pipe(log_transform, - ~ .x %>% dplyr::mutate(!!.abundance := !!.abundance %>% log1p)) %>% + # Apply (log by default) transformation + dplyr::mutate(!!.abundance := transform(!!.abundance)) %>% + # Filter most variable genes keep_variable_transcripts(!!.element,!!.feature,!!.abundance, top) %>% @@ -2064,7 +2065,7 @@ get_reduced_dimensions_UMAP_bulk <- .dims = 2, top = 500, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, scale = TRUE, calculate_for_pca_dimensions = 20, ...) { @@ -2114,8 +2115,8 @@ get_reduced_dimensions_UMAP_bulk <- ~ (.) ) %>% - # Check if log transform is needed - when(log_transform ~ dplyr::mutate(., !!.abundance := !!.abundance %>% log1p), ~ (.)) %>% + # Apply (log by default) transformation + dplyr::mutate(., !!.abundance := transform(!!.abundance)) %>% # Filter most variable genes keep_variable_transcripts(!!.element,!!.feature,!!.abundance, top) @@ -2515,7 +2516,7 @@ remove_redundancy_elements_through_correlation <- function(.data, correlation_threshold = 0.9, top = Inf, of_samples = TRUE, - log_transform = FALSE) { + transform = identity) { # Comply with CRAN NOTES . = NULL @@ -2548,9 +2549,9 @@ remove_redundancy_elements_through_correlation <- function(.data, # Filter variable genes keep_variable_transcripts(!!.element,!!.feature,!!.abundance, top = top) %>% - # Check if log transform is needed - ifelse_pipe(log_transform, - ~ .x %>% dplyr::mutate(!!.abundance := !!.abundance %>% `+`(1) %>% log())) %>% + # Apply (log by default) transformation + dplyr::mutate(!!.abundance := transform(!!.abundance)) %>% + distinct() %>% # NO NEED OF RECTANGULAR @@ -3079,7 +3080,8 @@ get_adjusted_counts_for_unwanted_variation_bulk <- function(.data, .sample = NULL, .transcript = NULL, .abundance = NULL, - log_transform = TRUE, + transform = transform, + inverse_transform = inverse_transform, ...) { # Get column names .sample = enquo(.sample) @@ -3108,9 +3110,9 @@ get_adjusted_counts_for_unwanted_variation_bulk <- function(.data, one_of(parse_formula(.formula))) %>% distinct() %>% - # Check if log transform is needed - ifelse_pipe(log_transform, - ~ .x %>% dplyr::mutate(!!.abundance := !!.abundance %>% log1p())) + # Apply (log by default) transformation + dplyr::mutate(!!.abundance := transform(!!.abundance)) + # Create design matrix @@ -3167,13 +3169,11 @@ get_adjusted_counts_for_unwanted_variation_bulk <- function(.data, gather(!!.sample,!!.abundance,-!!.transcript) %>% # Reverse-Log transform if transformed in the first place - ifelse_pipe( - log_transform, - ~ .x %>% - dplyr::mutate(!!.abundance := !!.abundance %>% exp %>% `-`(1)) %>% - dplyr::mutate(!!.abundance := ifelse(!!.abundance < 0, 0,!!.abundance)) %>% - dplyr::mutate(!!.abundance := !!.abundance %>% as.integer) - ) %>% + dplyr::mutate(!!.abundance := inverse_transform(!!.abundance)) %>% + + # In case the inverse tranform produces negative counts + dplyr::mutate(!!.abundance := ifelse(!!.abundance < 0, 0,!!.abundance)) %>% + dplyr::mutate(!!.abundance := !!.abundance %>% as.integer) %>% # Reset column names dplyr::rename(!!value_adjusted := !!.abundance) %>% @@ -3202,7 +3202,7 @@ keep_variable_transcripts = function(.data, .transcript = NULL, .abundance = NULL, top = 500, - log_transform = TRUE) { + transform = log1p) { # Get column names .sample = enquo(.sample) .transcript = enquo(.transcript) @@ -3225,8 +3225,8 @@ keep_variable_transcripts = function(.data, distinct(!!.sample,!!.transcript,!!.abundance) %>% # Check if logtansform is needed - ifelse_pipe(log_transform, - ~ .x %>% dplyr::mutate(!!.abundance := !!.abundance %>% `+`(1) %>% log())) %>% + dplyr::mutate(!!.abundance := transform(!!.abundance)) %>% + spread(!!.sample,!!.abundance) %>% as_matrix(rownames = quo_name(.transcript)) diff --git a/R/functions_SE.R b/R/functions_SE.R index ec763546..194a037f 100755 --- a/R/functions_SE.R +++ b/R/functions_SE.R @@ -14,7 +14,7 @@ #' @param .feature A column symbol. The column that is represents entities to cluster (i.e., normally samples) #' @param .element A column symbol. The column that is used to calculate distance (i.e., normally genes) #' @param of_samples A boolean -#' @param log_transform A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data) +#' @param transform A function that will tranform the counts, by default it is log1p for RNA sequencing data, but for avoinding tranformation you can use identity #' @param ... Further parameters passed to the function kmeans #' #' @return A tibble with additional columns @@ -23,7 +23,7 @@ get_clusters_kmeans_bulk_SE <- function(.data, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, ...) { # Check if centers is in dots @@ -34,7 +34,7 @@ get_clusters_kmeans_bulk_SE <- .data %>% # Check if log transform is needed - when(log_transform ~ log1p(.), ~ (.) ) %>% + transform() %>% # Decide if of samples or transcripts when( @@ -66,7 +66,7 @@ get_clusters_kmeans_bulk_SE <- #' @param .feature A column symbol. The column that is represents entities to cluster (i.e., normally samples) #' @param .element A column symbol. The column that is used to calculate distance (i.e., normally genes) #' @param of_samples A boolean -#' @param log_transform A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data) +#' @param transform A function that will tranform the counts, by default it is log1p for RNA sequencing data, but for avoinding tranformation you can use identity #' @param ... Further parameters passed to the function kmeans #' #' @return A tibble with additional columns @@ -74,7 +74,7 @@ get_clusters_kmeans_bulk_SE <- get_clusters_SNN_bulk_SE <- function(.data, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, ...) { @@ -127,7 +127,7 @@ get_clusters_SNN_bulk_SE <- #' @param .element A column symbol. The column that is used to calculate distance (i.e., normally samples) #' @param top An integer. How many top genes to select #' @param of_samples A boolean -#' @param log_transform A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data) +#' @param transform A function that will tranform the counts, by default it is log1p for RNA sequencing data, but for avoinding tranformation you can use identity #' @param scale A boolean #' #' @return A tibble with additional columns @@ -138,7 +138,7 @@ get_reduced_dimensions_MDS_bulk_SE <- .dims = 2, top = 500, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, scale = NULL # This is only a dummy argument for making it compatibble with PCA ) { # Comply with CRAN NOTES @@ -222,7 +222,7 @@ get_reduced_dimensions_MDS_bulk_SE <- #' @param .element A column symbol. The column that is used to calculate distance (i.e., normally samples) #' @param top An integer. How many top genes to select #' @param of_samples A boolean -#' @param log_transform A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data) +#' @param transform A function that will tranform the counts, by default it is log1p for RNA sequencing data, but for avoinding tranformation you can use identity #' @param scale A boolean #' @param ... Further parameters passed to the function prcomp #' @@ -234,7 +234,7 @@ get_reduced_dimensions_PCA_bulk_SE <- .dims = 2, top = 500, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, scale = FALSE, ...) { # Comply with CRAN NOTES @@ -323,7 +323,7 @@ we suggest to partition the dataset for sample clusters. #' @param .element A column symbol. The column that is used to calculate distance (i.e., normally samples) #' @param top An integer. How many top genes to select #' @param of_samples A boolean -#' @param log_transform A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data) +#' @param transform A function that will tranform the counts, by default it is log1p for RNA sequencing data, but for avoinding tranformation you can use identity #' @param scale A boolean #' @param ... Further parameters passed to the function Rtsne #' @@ -334,7 +334,7 @@ get_reduced_dimensions_TSNE_bulk_SE <- .dims = 2, top = 500, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, scale = NULL, # This is only a dummy argument for making it compatibble with PCA ...) { # Comply with CRAN NOTES @@ -406,7 +406,7 @@ get_reduced_dimensions_TSNE_bulk_SE <- #' @param .element A column symbol. The column that is used to calculate distance (i.e., normally samples) #' @param top An integer. How many top genes to select #' @param of_samples A boolean -#' @param log_transform A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data) +#' @param transform A function that will tranform the counts, by default it is log1p for RNA sequencing data, but for avoinding tranformation you can use identity #' @param calculate_for_pca_dimensions An integer of length one. The number of PCA dimensions to based the UMAP calculatio on. If NULL all variable features are considered #' @param ... Further parameters passed to the function uwot #' @@ -417,7 +417,7 @@ get_reduced_dimensions_UMAP_bulk_SE <- .dims = 2, top = 500, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, scale = NULL, # This is only a dummy argument for making it compatibble with PCA calculate_for_pca_dimensions = 20, ...) { @@ -528,13 +528,13 @@ filter_if_abundant_were_identified = function(.data){ #' @param .transcript A character name of the transcript/gene column #' @param .abundance A character name of the read count column #' @param top An integer. How many top genes to select -#' @param log_transform A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data) +#' @param transform A function that will tranform the counts, by default it is log1p for RNA sequencing data, but for avoinding tranformation you can use identity #' #' @return A tibble filtered genes #' keep_variable_transcripts_SE = function(.data, top = 500, - log_transform = TRUE) { + transform = log1p) { # Manage Inf @@ -546,7 +546,7 @@ keep_variable_transcripts_SE = function(.data, .data %>% # Check if log transform is needed - when(log_transform ~ log1p(.), ~ (.) ) + transform() s <- rowMeans((x - rowMeans(x, na.rm=TRUE)) ^ 2, na.rm=TRUE) diff --git a/R/methods.R b/R/methods.R index a6102697..b188fec6 100755 --- a/R/methods.R +++ b/R/methods.R @@ -515,10 +515,12 @@ setMethod("scale_abundance", "tidybulk", .scale_abundance) #' #' @param method A character string. The cluster algorithm to use, at the moment k-means is the only algorithm included. #' @param of_samples A boolean. In case the input is a tidybulk object, it indicates Whether the element column will be sample or transcript column -#' @param log_transform A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data) +#' @param transform A function that will tranform the counts, by default it is log1p for RNA sequencing data, but for avoinding tranformation you can use identity #' @param action A character string. Whether to join the new information to the input tbl (add), or just get the non-redundant tbl with the new information (get). #' @param ... Further parameters passed to the function kmeans #' +#' @param log_transform DEPRECATED - A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data) +#' #' @details identifies clusters in the data, normally of samples. #' This function returns a tibble with additional columns for the cluster annotation. #' At the moment only k-means (DOI: 10.2307/2346830) and SNN clustering (DOI:10.1016/j.cell.2019.05.031) is supported, the plan is to introduce more clustering methods. @@ -554,9 +556,14 @@ setGeneric("cluster_elements", function(.data, .abundance = NULL, method, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, + action = "add", - ...) + ..., + + # DEPRECATED + log_transform = NULL + ) standardGeneric("cluster_elements")) # Set internal @@ -566,10 +573,26 @@ setGeneric("cluster_elements", function(.data, .abundance = NULL, method , of_samples = TRUE, - log_transform = TRUE, + transform = log1p, + action = "add", - ...) + ..., + + # DEPRECATED + log_transform = NULL + + ) { + + # DEPRECATION OF log_transform + if (is_present(log_transform) & !is.null(log_transform)) { + + # Signal the deprecation to the user + deprecate_warn("1.7.4", "tidybulk::test_differential_abundance(log_transform = )", details = "The argument log_transform is now deprecated, please use transform.") + + if(log_transform == TRUE) transform = log1p + } + # Get column names .element = enquo(.element) .feature = enquo(.feature) @@ -609,7 +632,7 @@ setGeneric("cluster_elements", function(.data, .element = !!.element, .feature = !!.feature, of_samples = of_samples, - log_transform = log_transform, + transform = transform, ... ), method == "SNN" ~ stop("tidybulk says: Matrix package (v1.3-3) causes an error with Seurat::FindNeighbors used in this method. We are trying to solve this issue. At the moment this option in unaviable."), @@ -618,7 +641,7 @@ setGeneric("cluster_elements", function(.data, # .element = !!.element, # .feature = !!.feature, # of_samples = of_samples, - # log_transform = log_transform, + # transform = transform, # ... # ), TRUE ~ stop("tidybulk says: the only supported methods are \"kmeans\" or \"SNN\" ") @@ -708,11 +731,13 @@ setMethod("cluster_elements", "tidybulk", .cluster_elements) #' @param top An integer. How many top genes to select for dimensionality reduction #' @param of_samples A boolean. In case the input is a tidybulk object, it indicates Whether the element column will be sample or transcript column #' @param .dims An integer. The number of dimensions your are interested in (e.g., 4 for returning the first four principal components). -#' @param log_transform A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data) +#' @param transform A function that will tranform the counts, by default it is log1p for RNA sequencing data, but for avoinding tranformation you can use identity #' @param scale A boolean for method="PCA", this will be passed to the `prcomp` function. It is not included in the ... argument because although the default for `prcomp` if FALSE, it is advisable to set it as TRUE. #' @param action A character string. Whether to join the new information to the input tbl (add), or just get the non-redundant tbl with the new information (get). #' @param ... Further parameters passed to the function prcomp if you choose method="PCA" or Rtsne if you choose method="tSNE" #' +#' @param log_transform DEPRECATED - A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data) +#' #' @details This function reduces the dimensions of the transcript abundances. #' It can use multi-dimensional scaling (MDS; DOI.org/10.1186/gb-2010-11-3-r25), #' principal component analysis (PCA), or tSNE (Jesse Krijthe et al. 2018) @@ -778,10 +803,15 @@ setGeneric("reduce_dimensions", function(.data, top = 500, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, scale = TRUE, action = "add", - ...) + ..., + + # DEPRECATED + log_transform = NULL + + ) standardGeneric("reduce_dimensions")) # Set internal @@ -794,11 +824,26 @@ setGeneric("reduce_dimensions", function(.data, top = 500, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, scale = TRUE, action = "add", - ...) + ..., + + # DEPRECATED + log_transform = NULL + + ) { + + # DEPRECATION OF log_transform + if (is_present(log_transform) & !is.null(log_transform)) { + + # Signal the deprecation to the user + deprecate_warn("1.7.4", "tidybulk::test_differential_abundance(log_transform = )", details = "The argument log_transform is now deprecated, please use transform.") + + if(log_transform == TRUE) transform = log1p + } + # Get column names .element = enquo(.element) .feature = enquo(.feature) @@ -839,7 +884,7 @@ setGeneric("reduce_dimensions", function(.data, .feature = !!.feature, top = top, of_samples = of_samples, - log_transform = log_transform, + transform = transform, ... ), tolower(method) == tolower("PCA") ~ get_reduced_dimensions_PCA_bulk(., @@ -849,7 +894,7 @@ setGeneric("reduce_dimensions", function(.data, .feature = !!.feature, top = top, of_samples = of_samples, - log_transform = log_transform, + transform = transform, scale = scale, ... ), @@ -860,7 +905,7 @@ setGeneric("reduce_dimensions", function(.data, .feature = !!.feature, top = top, of_samples = of_samples, - log_transform = log_transform, + transform = transform, ... ), tolower(method) == tolower("UMAP") ~ get_reduced_dimensions_UMAP_bulk(., @@ -870,7 +915,7 @@ setGeneric("reduce_dimensions", function(.data, .feature = !!.feature, top = top, of_samples = of_samples, - log_transform = log_transform, + transform = transform, scale = scale, ... ), @@ -1131,12 +1176,13 @@ setMethod("rotate_dimensions", "tidybulk", .rotate_dimensions) #' #' @param method A character string. The method to use, correlation and reduced_dimensions are available. The latter eliminates one of the most proximar pairs of samples in PCA reduced dimensions. #' @param of_samples A boolean. In case the input is a tidybulk object, it indicates Whether the element column will be sample or transcript column -#' @param log_transform A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data) +#' @param transform A function that will tranform the counts, by default it is log1p for RNA sequencing data, but for avoinding tranformation you can use identity #' @param correlation_threshold A real number between 0 and 1. For correlation based calculation. #' @param top An integer. How many top genes to select for correlation based method #' @param Dim_a_column A character string. For reduced_dimension based calculation. The column of one principal component #' @param Dim_b_column A character string. For reduced_dimension based calculation. The column of another principal component #' +#' @param log_transform DEPRECATED - A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data) #' #' @details This function removes redundant elements from the original data set (e.g., samples or transcripts). #' For example, if we want to define cell-type specific signatures with low sample redundancy. @@ -1214,10 +1260,13 @@ setGeneric("remove_redundancy", function(.data, correlation_threshold = 0.9, top = Inf, - log_transform = FALSE, - + transform = identity, Dim_a_column, - Dim_b_column) + Dim_b_column, + + # DEPRECATED + log_transform = NULL + ) standardGeneric("remove_redundancy")) # Set internal @@ -1229,15 +1278,27 @@ setGeneric("remove_redundancy", function(.data, of_samples = TRUE, - - correlation_threshold = 0.9, top = Inf, - log_transform = FALSE, + transform = identity, Dim_a_column = NULL, - Dim_b_column = NULL) + Dim_b_column = NULL, + + # DEPRECATED + log_transform = NULL +) { + + # DEPRECATION OF log_transform + if (is_present(log_transform) & !is.null(log_transform)) { + + # Signal the deprecation to the user + deprecate_warn("1.7.4", "tidybulk::test_differential_abundance(log_transform = )", details = "The argument log_transform is now deprecated, please use transform.") + + if(log_transform == TRUE) transform = log1p + } + # Make col names .abundance = enquo(.abundance) .element = enquo(.element) @@ -1262,7 +1323,7 @@ setGeneric("remove_redundancy", function(.data, correlation_threshold = correlation_threshold, top = top, of_samples = of_samples, - log_transform = log_transform + transform = transform ) } else if (method == "reduced_dimensions") { @@ -1330,10 +1391,13 @@ setMethod("remove_redundancy", "tidybulk", .remove_redundancy) #' @param .transcript The name of the transcript/gene column #' @param .abundance The name of the transcript/gene abundance column #' -#' @param log_transform A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data) +#' @param transform A function that will tranform the counts, by default it is log1p for RNA sequencing data, but for avoinding tranformation you can use identity +#' @param inverse_transform A function that is the inverse of transform (e.g. expm1 is inverse of log1p). This is needed to tranform back the counts after analysis. #' @param action A character string. Whether to join the new information to the input tbl (add), or just get the non-redundant tbl with the new information (get). #' @param ... Further parameters passed to the function sva::ComBat #' +#' @param log_transform DEPRECATED - A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data) +#' #' @details This function adjusts the abundance for (known) unwanted variation. #' At the moment just an unwanted covariate is allowed at a time using Combat (DOI: 10.1093/bioinformatics/bts034) #' @@ -1368,21 +1432,44 @@ setGeneric("adjust_abundance", function(.data, .sample = NULL, .transcript = NULL, .abundance = NULL, - log_transform = TRUE, + transform = log1p, + inverse_transform = expm1, + action = "add", - ...) + ..., + + # DEPRECATED + log_transform = NULL + ) standardGeneric("adjust_abundance")) # Set internal .adjust_abundance = function(.data, - .formula, - .sample = NULL, - .transcript = NULL, - .abundance = NULL, - log_transform = TRUE, - action = "add", - ...) + .formula, + .sample = NULL, + .transcript = NULL, + .abundance = NULL, + transform = log1p, + inverse_transform = expm1, + action = "add", + ..., + + # DEPRECATED + log_transform = NULL) { + + # DEPRECATION OF log_transform + if (is_present(log_transform) & !is.null(log_transform)) { + + # Signal the deprecation to the user + deprecate_warn("1.7.4", "tidybulk::test_differential_abundance(log_transform = )", details = "The argument log_transform is now deprecated, please use transform.") + + if(log_transform == TRUE){ + transform = log1p + inverse_transform = expm1 + } + } + # Get column names .sample = enquo(.sample) .transcript = enquo(.transcript) @@ -1419,7 +1506,8 @@ setGeneric("adjust_abundance", function(.data, .sample = !!.sample, .transcript = !!.transcript, .abundance = !!.abundance, - log_transform = log_transform, + transform = transform, + inverse_transform = inverse_transform, ... ) @@ -2461,7 +2549,9 @@ setMethod("test_differential_abundance", #' @param .transcript The name of the transcript/gene column #' @param .abundance The name of the transcript/gene abundance column #' @param top Integer. Number of top transcript to consider -#' @param log_transform A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data) +#' @param transform A function that will tranform the counts, by default it is log1p for RNA sequencing data, but for avoinding tranformation you can use identity +#' +#' @param log_transform DEPRECATED - A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data) #' #' @details At the moment this function uses edgeR \url{https://doi.org/10.1093/bioinformatics/btp616} #' @@ -2494,7 +2584,11 @@ setGeneric("keep_variable", function(.data, .transcript = NULL, .abundance = NULL, top = 500, - log_transform = TRUE) + transform = log1p, + + # DEPRECATED + log_transform = TRUE + ) standardGeneric("keep_variable")) # Set internal @@ -2503,8 +2597,21 @@ setGeneric("keep_variable", function(.data, .transcript = NULL, .abundance = NULL, top = 500, - log_transform = TRUE) + transform = log1p, + + # DEPRECATED + log_transform = NULL) { + + # DEPRECATION OF log_transform + if (is_present(log_transform) & !is.null(log_transform)) { + + # Signal the deprecation to the user + deprecate_warn("1.7.4", "tidybulk::test_differential_abundance(log_transform = )", details = "The argument log_transform is now deprecated, please use transform.") + + if(log_transform == TRUE) transform = log1p + } + # Make col names .sample = enquo(.sample) .transcript = enquo(.transcript) @@ -2522,7 +2629,7 @@ setGeneric("keep_variable", function(.data, .transcript = !!.transcript, .abundance = !!.abundance, top = top, - log_transform = log_transform + transform = transform ) } diff --git a/R/methods_SE.R b/R/methods_SE.R index d8a99ef1..b1b1e5a0 100755 --- a/R/methods_SE.R +++ b/R/methods_SE.R @@ -228,7 +228,7 @@ setMethod("scale_abundance", .cluster_elements_se = function(.data, method , of_samples = TRUE, - log_transform = TRUE, + transform = log1p, ...) { my_assay = @@ -251,7 +251,7 @@ setMethod("scale_abundance", my_cluster_function( my_assay, of_samples = of_samples, - log_transform = log_transform, + transform = transform, ... ) %>% as.character() %>% @@ -309,7 +309,7 @@ setMethod("cluster_elements", .dims = 2, top = 500, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, scale = TRUE, ...) { @@ -324,10 +324,10 @@ setMethod("cluster_elements", .[[get_assay_scaled_if_exists_SE(.data)]] %>% # Filter most variable genes - keep_variable_transcripts_SE(top = top, log_transform = log_transform) %>% + keep_variable_transcripts_SE(top = top, transform = transform) %>% # Check if log transform is needed - when(log_transform ~ log1p(.), ~ (.) ) + transform() my_reduction_function = method %>% @@ -346,7 +346,7 @@ setMethod("cluster_elements", .dims = .dims, top = top, of_samples = of_samples, - log_transform = log_transform, + transform = transform, scale=scale, ... ) @@ -512,7 +512,7 @@ setMethod("rotate_dimensions", of_samples = TRUE, correlation_threshold = 0.9, top = Inf, - log_transform = FALSE, + transform = identity, Dim_a_column = NULL, Dim_b_column = NULL) { @@ -545,10 +545,10 @@ setMethod("rotate_dimensions", .[[get_assay_scaled_if_exists_SE(.data)]] %>% # Filter most variable genes - keep_variable_transcripts_SE(top = top, log_transform = log_transform) %>% + keep_variable_transcripts_SE(top = top, transform = transform) %>% # Check if log transform is needed - when(log_transform ~ log1p(.), ~ (.) ) + transform() # Get correlated elements remove_redundancy_elements_through_correlation_SE( @@ -621,7 +621,8 @@ setMethod("remove_redundancy", .adjust_abundance_se = function(.data, .formula, - log_transform = TRUE, + transform = log1p, + inverse_transform = expm1, ...) { # Check if package is installed, otherwise install @@ -666,7 +667,7 @@ setMethod("remove_redundancy", .[[get_assay_scaled_if_exists_SE(.data)]] %>% # Check if log transform is needed - when(log_transform ~ log1p(.), ~ (.)) + transform() # Set column name for value scaled @@ -687,7 +688,7 @@ setMethod("remove_redundancy", ...) %>% # Check if log transform needs to be reverted - when(log_transform ~ expm1(.), ~ (.)) + inverse_transform() # Add the assay @@ -1198,7 +1199,7 @@ setMethod( .keep_variable_se = function(.data, top = 500, - log_transform = TRUE) + transform = log1p) { @@ -1213,7 +1214,7 @@ setMethod( .[[get_assay_scaled_if_exists_SE(.data)]] %>% # Filter most variable genes - keep_variable_transcripts_SE(top = top, log_transform = log_transform) %>% + keep_variable_transcripts_SE(top = top, transform = transform) %>% # Take gene names rownames() diff --git a/man/adjust_abundance-methods.Rd b/man/adjust_abundance-methods.Rd index 1310c087..7f5be64e 100644 --- a/man/adjust_abundance-methods.Rd +++ b/man/adjust_abundance-methods.Rd @@ -16,9 +16,11 @@ adjust_abundance( .sample = NULL, .transcript = NULL, .abundance = NULL, - log_transform = TRUE, + transform = log1p, + inverse_transform = expm1, action = "add", - ... + ..., + log_transform = NULL ) \S4method{adjust_abundance}{spec_tbl_df}( @@ -27,9 +29,11 @@ adjust_abundance( .sample = NULL, .transcript = NULL, .abundance = NULL, - log_transform = TRUE, + transform = log1p, + inverse_transform = expm1, action = "add", - ... + ..., + log_transform = NULL ) \S4method{adjust_abundance}{tbl_df}( @@ -38,9 +42,11 @@ adjust_abundance( .sample = NULL, .transcript = NULL, .abundance = NULL, - log_transform = TRUE, + transform = log1p, + inverse_transform = expm1, action = "add", - ... + ..., + log_transform = NULL ) \S4method{adjust_abundance}{tidybulk}( @@ -49,9 +55,11 @@ adjust_abundance( .sample = NULL, .transcript = NULL, .abundance = NULL, - log_transform = TRUE, + transform = log1p, + inverse_transform = expm1, action = "add", - ... + ..., + log_transform = NULL ) \S4method{adjust_abundance}{SummarizedExperiment}( @@ -60,9 +68,11 @@ adjust_abundance( .sample = NULL, .transcript = NULL, .abundance = NULL, - log_transform = TRUE, + transform = log1p, + inverse_transform = expm1, action = "add", - ... + ..., + log_transform = NULL ) \S4method{adjust_abundance}{RangedSummarizedExperiment}( @@ -71,9 +81,11 @@ adjust_abundance( .sample = NULL, .transcript = NULL, .abundance = NULL, - log_transform = TRUE, + transform = log1p, + inverse_transform = expm1, action = "add", - ... + ..., + log_transform = NULL ) } \arguments{ @@ -87,11 +99,15 @@ adjust_abundance( \item{.abundance}{The name of the transcript/gene abundance column} -\item{log_transform}{A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data)} +\item{transform}{A function that will tranform the counts, by default it is log1p for RNA sequencing data, but for avoinding tranformation you can use identity} + +\item{inverse_transform}{A function that is the inverse of transform (e.g. expm1 is inverse of log1p). This is needed to tranform back the counts after analysis.} \item{action}{A character string. Whether to join the new information to the input tbl (add), or just get the non-redundant tbl with the new information (get).} \item{...}{Further parameters passed to the function sva::ComBat} + +\item{log_transform}{DEPRECATED - A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data)} } \value{ A consistent object (to the input) with additional columns for the adjusted counts as `_adjusted` diff --git a/man/cluster_elements-methods.Rd b/man/cluster_elements-methods.Rd index 518f37f9..e1302075 100644 --- a/man/cluster_elements-methods.Rd +++ b/man/cluster_elements-methods.Rd @@ -17,9 +17,10 @@ cluster_elements( .abundance = NULL, method, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, action = "add", - ... + ..., + log_transform = NULL ) \S4method{cluster_elements}{spec_tbl_df}( @@ -29,9 +30,10 @@ cluster_elements( .abundance = NULL, method, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, action = "add", - ... + ..., + log_transform = NULL ) \S4method{cluster_elements}{tbl_df}( @@ -41,9 +43,10 @@ cluster_elements( .abundance = NULL, method, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, action = "add", - ... + ..., + log_transform = NULL ) \S4method{cluster_elements}{tidybulk}( @@ -53,9 +56,10 @@ cluster_elements( .abundance = NULL, method, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, action = "add", - ... + ..., + log_transform = NULL ) \S4method{cluster_elements}{SummarizedExperiment}( @@ -65,9 +69,10 @@ cluster_elements( .abundance = NULL, method, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, action = "add", - ... + ..., + log_transform = NULL ) \S4method{cluster_elements}{RangedSummarizedExperiment}( @@ -77,9 +82,10 @@ cluster_elements( .abundance = NULL, method, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, action = "add", - ... + ..., + log_transform = NULL ) } \arguments{ @@ -95,11 +101,13 @@ cluster_elements( \item{of_samples}{A boolean. In case the input is a tidybulk object, it indicates Whether the element column will be sample or transcript column} -\item{log_transform}{A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data)} +\item{transform}{A function that will tranform the counts, by default it is log1p for RNA sequencing data, but for avoinding tranformation you can use identity} \item{action}{A character string. Whether to join the new information to the input tbl (add), or just get the non-redundant tbl with the new information (get).} \item{...}{Further parameters passed to the function kmeans} + +\item{log_transform}{DEPRECATED - A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data)} } \value{ A tbl object with additional columns with cluster labels diff --git a/man/get_reduced_dimensions_UMAP_bulk.Rd b/man/get_reduced_dimensions_UMAP_bulk.Rd index 2d1145cf..1a8023d5 100644 --- a/man/get_reduced_dimensions_UMAP_bulk.Rd +++ b/man/get_reduced_dimensions_UMAP_bulk.Rd @@ -12,7 +12,7 @@ get_reduced_dimensions_UMAP_bulk( .dims = 2, top = 500, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, scale = TRUE, calculate_for_pca_dimensions = 20, ... @@ -33,11 +33,11 @@ get_reduced_dimensions_UMAP_bulk( \item{of_samples}{A boolean} -\item{log_transform}{A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data)} - \item{calculate_for_pca_dimensions}{An integer of length one. The number of PCA dimensions to based the UMAP calculatio on. If NULL all variable features are considered} \item{...}{Further parameters passed to the function uwot} + +\item{log_transform}{A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data)} } \value{ A tibble with additional columns diff --git a/man/get_reduced_dimensions_UMAP_bulk_SE.Rd b/man/get_reduced_dimensions_UMAP_bulk_SE.Rd index d6162239..b204aa0f 100644 --- a/man/get_reduced_dimensions_UMAP_bulk_SE.Rd +++ b/man/get_reduced_dimensions_UMAP_bulk_SE.Rd @@ -9,7 +9,7 @@ get_reduced_dimensions_UMAP_bulk_SE( .dims = 2, top = 500, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, scale = NULL, calculate_for_pca_dimensions = 20, ... @@ -24,7 +24,7 @@ get_reduced_dimensions_UMAP_bulk_SE( \item{of_samples}{A boolean} -\item{log_transform}{A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data)} +\item{transform}{A function that will tranform the counts, by default it is log1p for RNA sequencing data, but for avoinding tranformation you can use identity} \item{calculate_for_pca_dimensions}{An integer of length one. The number of PCA dimensions to based the UMAP calculatio on. If NULL all variable features are considered} diff --git a/man/keep_variable-methods.Rd b/man/keep_variable-methods.Rd index 6fb3e137..4457fb7b 100644 --- a/man/keep_variable-methods.Rd +++ b/man/keep_variable-methods.Rd @@ -16,6 +16,7 @@ keep_variable( .transcript = NULL, .abundance = NULL, top = 500, + transform = log1p, log_transform = TRUE ) @@ -25,7 +26,8 @@ keep_variable( .transcript = NULL, .abundance = NULL, top = 500, - log_transform = TRUE + transform = log1p, + log_transform = NULL ) \S4method{keep_variable}{tbl_df}( @@ -34,7 +36,8 @@ keep_variable( .transcript = NULL, .abundance = NULL, top = 500, - log_transform = TRUE + transform = log1p, + log_transform = NULL ) \S4method{keep_variable}{tidybulk}( @@ -43,12 +46,13 @@ keep_variable( .transcript = NULL, .abundance = NULL, top = 500, - log_transform = TRUE + transform = log1p, + log_transform = NULL ) -\S4method{keep_variable}{SummarizedExperiment}(.data, top = 500, log_transform = TRUE) +\S4method{keep_variable}{SummarizedExperiment}(.data, top = 500, transform = log1p) -\S4method{keep_variable}{RangedSummarizedExperiment}(.data, top = 500, log_transform = TRUE) +\S4method{keep_variable}{RangedSummarizedExperiment}(.data, top = 500, transform = log1p) } \arguments{ \item{.data}{A `tbl` (with at least three columns for sample, feature and transcript abundance) or `SummarizedExperiment` (more convenient if abstracted to tibble with library(tidySummarizedExperiment))} @@ -61,7 +65,9 @@ keep_variable( \item{top}{Integer. Number of top transcript to consider} -\item{log_transform}{A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data)} +\item{transform}{A function that will tranform the counts, by default it is log1p for RNA sequencing data, but for avoinding tranformation you can use identity} + +\item{log_transform}{DEPRECATED - A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data)} } \value{ A consistent object (to the input) with additional columns for the statistics from the hypothesis test (e.g., log fold change, p-value and false discovery rate). diff --git a/man/reduce_dimensions-methods.Rd b/man/reduce_dimensions-methods.Rd index 4c0fce04..4cf095c3 100644 --- a/man/reduce_dimensions-methods.Rd +++ b/man/reduce_dimensions-methods.Rd @@ -19,10 +19,11 @@ reduce_dimensions( .dims = 2, top = 500, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, scale = TRUE, action = "add", - ... + ..., + log_transform = NULL ) \S4method{reduce_dimensions}{spec_tbl_df}( @@ -34,10 +35,11 @@ reduce_dimensions( .dims = 2, top = 500, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, scale = TRUE, action = "add", - ... + ..., + log_transform = NULL ) \S4method{reduce_dimensions}{tbl_df}( @@ -49,10 +51,11 @@ reduce_dimensions( .dims = 2, top = 500, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, scale = TRUE, action = "add", - ... + ..., + log_transform = NULL ) \S4method{reduce_dimensions}{tidybulk}( @@ -64,10 +67,11 @@ reduce_dimensions( .dims = 2, top = 500, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, scale = TRUE, action = "add", - ... + ..., + log_transform = NULL ) \S4method{reduce_dimensions}{SummarizedExperiment}( @@ -79,10 +83,11 @@ reduce_dimensions( .dims = 2, top = 500, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, scale = TRUE, action = "add", - ... + ..., + log_transform = NULL ) \S4method{reduce_dimensions}{RangedSummarizedExperiment}( @@ -94,10 +99,11 @@ reduce_dimensions( .dims = 2, top = 500, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, scale = TRUE, action = "add", - ... + ..., + log_transform = NULL ) } \arguments{ @@ -117,13 +123,15 @@ reduce_dimensions( \item{of_samples}{A boolean. In case the input is a tidybulk object, it indicates Whether the element column will be sample or transcript column} -\item{log_transform}{A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data)} +\item{transform}{A function that will tranform the counts, by default it is log1p for RNA sequencing data, but for avoinding tranformation you can use identity} \item{scale}{A boolean for method="PCA", this will be passed to the `prcomp` function. It is not included in the ... argument because although the default for `prcomp` if FALSE, it is advisable to set it as TRUE.} \item{action}{A character string. Whether to join the new information to the input tbl (add), or just get the non-redundant tbl with the new information (get).} \item{...}{Further parameters passed to the function prcomp if you choose method="PCA" or Rtsne if you choose method="tSNE"} + +\item{log_transform}{DEPRECATED - A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data)} } \value{ A tbl object with additional columns for the reduced dimensions diff --git a/man/remove_redundancy-methods.Rd b/man/remove_redundancy-methods.Rd index 5cb9e2fd..0a626ac1 100644 --- a/man/remove_redundancy-methods.Rd +++ b/man/remove_redundancy-methods.Rd @@ -19,9 +19,10 @@ remove_redundancy( of_samples = TRUE, correlation_threshold = 0.9, top = Inf, - log_transform = FALSE, + transform = identity, Dim_a_column, - Dim_b_column + Dim_b_column, + log_transform = NULL ) \S4method{remove_redundancy}{spec_tbl_df}( @@ -33,9 +34,10 @@ remove_redundancy( of_samples = TRUE, correlation_threshold = 0.9, top = Inf, - log_transform = FALSE, + transform = identity, Dim_a_column = NULL, - Dim_b_column = NULL + Dim_b_column = NULL, + log_transform = NULL ) \S4method{remove_redundancy}{tbl_df}( @@ -47,9 +49,10 @@ remove_redundancy( of_samples = TRUE, correlation_threshold = 0.9, top = Inf, - log_transform = FALSE, + transform = identity, Dim_a_column = NULL, - Dim_b_column = NULL + Dim_b_column = NULL, + log_transform = NULL ) \S4method{remove_redundancy}{tidybulk}( @@ -61,9 +64,10 @@ remove_redundancy( of_samples = TRUE, correlation_threshold = 0.9, top = Inf, - log_transform = FALSE, + transform = identity, Dim_a_column = NULL, - Dim_b_column = NULL + Dim_b_column = NULL, + log_transform = NULL ) \S4method{remove_redundancy}{SummarizedExperiment}( @@ -75,7 +79,7 @@ remove_redundancy( of_samples = TRUE, correlation_threshold = 0.9, top = Inf, - log_transform = FALSE, + transform = identity, Dim_a_column = NULL, Dim_b_column = NULL ) @@ -89,7 +93,7 @@ remove_redundancy( of_samples = TRUE, correlation_threshold = 0.9, top = Inf, - log_transform = FALSE, + transform = identity, Dim_a_column = NULL, Dim_b_column = NULL ) @@ -111,11 +115,13 @@ remove_redundancy( \item{top}{An integer. How many top genes to select for correlation based method} -\item{log_transform}{A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data)} +\item{transform}{A function that will tranform the counts, by default it is log1p for RNA sequencing data, but for avoinding tranformation you can use identity} \item{Dim_a_column}{A character string. For reduced_dimension based calculation. The column of one principal component} \item{Dim_b_column}{A character string. For reduced_dimension based calculation. The column of another principal component} + +\item{log_transform}{DEPRECATED - A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data)} } \value{ A tbl object with with dropped redundant elements (e.g., samples). From 4e3b77c2759e94216225b21b0ec45a526fa9bb1d Mon Sep 17 00:00:00 2001 From: stemangiola Date: Sun, 26 Jun 2022 12:30:36 +1000 Subject: [PATCH 28/38] fix CHECK --- DESCRIPTION | 3 ++- R/data.R | 5 +++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index f1042cb8..49c1cbc6 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -33,7 +33,8 @@ Imports: SummarizedExperiment, GenomicRanges, methods, - S4Vectors + S4Vectors, + crayon Suggests: BiocStyle, testthat, diff --git a/R/data.R b/R/data.R index aeb79e8d..b3e69e1c 100755 --- a/R/data.R +++ b/R/data.R @@ -57,3 +57,8 @@ #' #' "counts_SE" + +#' Needed for tests tximeta_summarizeToGene_object, It is SummarizedExperiment from tximeta +#' +#' +"tximeta_summarizeToGene_object" From a439f770ae982636f4269d880fcaefc361614543 Mon Sep 17 00:00:00 2001 From: stemangiola Date: Sun, 26 Jun 2022 12:39:36 +1000 Subject: [PATCH 29/38] add docs --- man/tximeta_summarizeToGene_object.Rd | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 man/tximeta_summarizeToGene_object.Rd diff --git a/man/tximeta_summarizeToGene_object.Rd b/man/tximeta_summarizeToGene_object.Rd new file mode 100644 index 00000000..fa1520d8 --- /dev/null +++ b/man/tximeta_summarizeToGene_object.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{tximeta_summarizeToGene_object} +\alias{tximeta_summarizeToGene_object} +\title{Needed for tests tximeta_summarizeToGene_object, It is SummarizedExperiment from tximeta} +\format{ +An object of class \code{RangedSummarizedExperiment} with 10 rows and 1 columns. +} +\usage{ +tximeta_summarizeToGene_object +} +\description{ +Needed for tests tximeta_summarizeToGene_object, It is SummarizedExperiment from tximeta +} +\keyword{datasets} From a1719eed0c17b243c99f701b1b24a81f4f97abd8 Mon Sep 17 00:00:00 2001 From: stemangiola Date: Sun, 26 Jun 2022 12:52:58 +1000 Subject: [PATCH 30/38] fix CHECKs --- R/methods.R | 2 -- R/methods_SE.R | 5 ++++- man/remove_redundancy-methods.Rd | 6 ++++-- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/R/methods.R b/R/methods.R index b188fec6..cec35919 100755 --- a/R/methods.R +++ b/R/methods.R @@ -1256,8 +1256,6 @@ setGeneric("remove_redundancy", function(.data, of_samples = TRUE, - - correlation_threshold = 0.9, top = Inf, transform = identity, diff --git a/R/methods_SE.R b/R/methods_SE.R index b1b1e5a0..f7d6c2b8 100755 --- a/R/methods_SE.R +++ b/R/methods_SE.R @@ -515,7 +515,10 @@ setMethod("rotate_dimensions", transform = identity, Dim_a_column = NULL, - Dim_b_column = NULL) { + Dim_b_column = NULL, + + # DEPRECATED + log_transform = NULL) { Dim_a_column = enquo(Dim_a_column) diff --git a/man/remove_redundancy-methods.Rd b/man/remove_redundancy-methods.Rd index 0a626ac1..b83bc56d 100644 --- a/man/remove_redundancy-methods.Rd +++ b/man/remove_redundancy-methods.Rd @@ -81,7 +81,8 @@ remove_redundancy( top = Inf, transform = identity, Dim_a_column = NULL, - Dim_b_column = NULL + Dim_b_column = NULL, + log_transform = NULL ) \S4method{remove_redundancy}{RangedSummarizedExperiment}( @@ -95,7 +96,8 @@ remove_redundancy( top = Inf, transform = identity, Dim_a_column = NULL, - Dim_b_column = NULL + Dim_b_column = NULL, + log_transform = NULL ) } \arguments{ From ffc068075fa24d1073fbb09e79e80b889c30b06d Mon Sep 17 00:00:00 2001 From: stemangiola Date: Tue, 28 Jun 2022 10:58:22 +1000 Subject: [PATCH 31/38] add news and version UP --- DESCRIPTION | 2 +- inst/NEWS.rd | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 49c1cbc6..e9efa94d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: tidybulk Title: Brings transcriptomics to the tidyverse -Version: 1.7.3 +Version: 1.7.4 Authors@R: c(person("Stefano", "Mangiola", email = "mangiolastefano@gmail.com", role = c("aut", "cre")), person("Maria", "Doyle", email = "Maria.Doyle@petermac.org", diff --git a/inst/NEWS.rd b/inst/NEWS.rd index f0b5573f..c2476a43 100644 --- a/inst/NEWS.rd +++ b/inst/NEWS.rd @@ -47,3 +47,13 @@ \item Improve missing abundance with force scaling \item Other small fixes and messaging }} + +\section{Changes in version 1.7.4, Bioconductor 3.16 Dev}{ +\itemize{ + \item Improved deconvolution robustness for SummarizedExperiment, edge cases + \item Allow mapping of tidybulk_SAM_BAM to non-human genomes + \item Adopt the vocabulary .feature, .sample, for conversion between SummarizedExperiment and tibble, similarly to tidySummarizedExperiment + \item Deprecate .contrasts argument if favour of contrasts (with no dot) + \item Make aggregate_duplicates more robust for tibble and SummarizedExperiment inputs + \item Deprecate log_tranform argument for all methods for a more generic tranform argument that accepts arbitrary functions +}} From c3e317cbbaf8c89838c670814077eb9b0cc86631 Mon Sep 17 00:00:00 2001 From: stemangiola Date: Tue, 28 Jun 2022 10:58:37 +1000 Subject: [PATCH 32/38] use |> for tests --- tests/testthat/test-bulk_methods.R | 287 +++++++++++++++-------------- 1 file changed, 147 insertions(+), 140 deletions(-) diff --git a/tests/testthat/test-bulk_methods.R b/tests/testthat/test-bulk_methods.R index 6da46d11..8dda6d8f 100755 --- a/tests/testthat/test-bulk_methods.R +++ b/tests/testthat/test-bulk_methods.R @@ -1,11 +1,11 @@ context('Bulk methods') +library(dplyr) + data("se_mini") data("breast_tcga_mini_SE") - -input_df = se_mini %>% tidybulk() %>% as_tibble() %>% setNames(c("b","a", "c", "Cell type", "time" , "condition", "days", "dead", "entrez")) - -input_df_breast = breast_tcga_mini_SE %>% tidybulk() %>% as_tibble() %>% setNames(c( "b","a", "c", "c norm", "call")) +input_df = se_mini |> tidybulk() |> as_tibble() |> setNames(c("b","a", "c", "Cell type", "time" , "condition", "days", "dead", "entrez")) +input_df_breast = breast_tcga_mini_SE |> tidybulk() |> as_tibble() |> setNames(c( "b","a", "c", "c norm", "call")) test_that("Creating tt object from tibble, number of parameters, methods",{ @@ -47,7 +47,7 @@ test_that("Only scaled counts - no object",{ res = scale_abundance( - input_df %>% identify_abundant(a, b, c), + input_df |> identify_abundant(a, b, c), .sample = a, .transcript = b, .abundance = c, @@ -69,12 +69,12 @@ test_that("Only scaled counts - no object",{ expect_equal(length(internals$tt_columns), 4 ) - expect_equal(quo_name(internals$tt_columns[[4]]), "c_scaled" ) + expect_equal(rlang::quo_name(internals$tt_columns[[4]]), "c_scaled" ) # With factor of interest res = scale_abundance( - input_df %>% identify_abundant(a, b, c, factor_of_interest = condition), + input_df |> identify_abundant(a, b, c, factor_of_interest = condition), .sample = a, .transcript = b, .abundance = c, @@ -98,7 +98,7 @@ test_that("Only scaled counts - no object",{ expect_message( scale_abundance( - left_join(input_df, sam) %>% identify_abundant(a, b, c, factor_of_interest = condition_cont), + left_join(input_df, sam) |> identify_abundant(a, b, c, factor_of_interest = condition_cont), .sample = a, .transcript = b, .abundance = c @@ -113,7 +113,7 @@ test_that("Adding scaled counts - no object",{ res = scale_abundance( - input_df %>% identify_abundant(a, b, c), + input_df |> identify_abundant(a, b, c), .sample = a, .transcript = b, .abundance = c, @@ -136,8 +136,8 @@ test_that("Adding scaled counts - no object",{ test_that("Scaled counts - subset",{ res = - input_df %>% - identify_abundant(a, b, c) %>% + input_df |> + identify_abundant(a, b, c) |> scale_abundance( .sample = a, .transcript = b, @@ -182,7 +182,7 @@ test_that("Only differential trancript abundance - no object",{ res = test_differential_abundance( - input_df %>% identify_abundant(a, b, c, factor_of_interest = condition), + input_df |> identify_abundant(a, b, c, factor_of_interest = condition), ~ condition, .sample = a, .transcript = b, @@ -207,7 +207,7 @@ test_that("Only differential trancript abundance - no object",{ # Robust version res = test_differential_abundance( - input_df %>% identify_abundant(a, b, c, factor_of_interest = condition), + input_df |> identify_abundant(a, b, c, factor_of_interest = condition), ~ condition, .sample = a, .transcript = b, @@ -236,7 +236,7 @@ test_that("Only differential trancript abundance - no object",{ res = test_differential_abundance( - left_join(input_df , sam) %>% identify_abundant(a, b, c, factor_of_interest = condition_cont), + left_join(input_df , sam) |> identify_abundant(a, b, c, factor_of_interest = condition_cont), ~ condition_cont, .sample = a, .transcript = b, @@ -261,7 +261,7 @@ test_that("Only differential trancript abundance - no object",{ # Continuous and discrete res = test_differential_abundance( - left_join(input_df , sam) %>% identify_abundant(a, b, c, factor_of_interest = condition_cont), + left_join(input_df , sam) |> identify_abundant(a, b, c, factor_of_interest = condition_cont), ~ condition_cont + condition, .sample = a, .transcript = b, @@ -286,7 +286,7 @@ test_that("Only differential trancript abundance - no object",{ # Just one covariate error expect_error( test_differential_abundance( - filter(input_df %>% identify_abundant(a, b, c, factor_of_interest = condition), condition), + filter(input_df |> identify_abundant(a, b, c, factor_of_interest = condition), condition), ~ condition, .sample = a, .transcript = b, @@ -300,7 +300,7 @@ test_that("Only differential trancript abundance - no object",{ # Change scaling method res = test_differential_abundance( - input_df %>% identify_abundant(a, b, c, factor_of_interest = condition), + input_df |> identify_abundant(a, b, c, factor_of_interest = condition), ~ condition, .sample = a, .transcript = b, @@ -311,8 +311,8 @@ test_that("Only differential trancript abundance - no object",{ ) # Treat - input_df %>% - identify_abundant(a, b, c, factor_of_interest = condition) %>% + input_df |> + identify_abundant(a, b, c, factor_of_interest = condition) |> test_differential_abundance( ~ condition, .sample = a, @@ -322,9 +322,9 @@ test_that("Only differential trancript abundance - no object",{ method = "edgeR_likelihood_ratio", test_above_log2_fold_change = 1, action="get" - ) %>% - filter(FDR<0.05) %>% - nrow %>% + ) |> + filter(FDR<0.05) |> + nrow() |> expect_equal(169) }) @@ -333,7 +333,7 @@ test_that("Only differential trancript abundance - no object - with contrasts",{ res = test_differential_abundance( - input_df %>% identify_abundant(a, b, c, factor_of_interest = condition), + input_df |> identify_abundant(a, b, c, factor_of_interest = condition), ~ 0 + condition, .sample = a, .transcript = b, @@ -362,7 +362,7 @@ test_that("Add differential trancript abundance - no object",{ res = test_differential_abundance( - input_df %>% identify_abundant(a, b, c, factor_of_interest = condition), + input_df |> identify_abundant(a, b, c, factor_of_interest = condition), ~ condition, .sample = a, .transcript = b, @@ -390,7 +390,7 @@ test_that("Only differential trancript abundance voom - no object",{ res = test_differential_abundance( - input_df %>% identify_abundant(a, b, c, factor_of_interest = condition), + input_df |> identify_abundant(a, b, c, factor_of_interest = condition), ~ condition, .sample = a, .transcript = b, @@ -418,7 +418,7 @@ test_that("Only differential trancript abundance voom - no object",{ res = test_differential_abundance( - left_join(input_df , sam) %>% identify_abundant(a, b, c, factor_of_interest = condition_cont), + left_join(input_df , sam) |> identify_abundant(a, b, c, factor_of_interest = condition_cont), ~ condition_cont, .sample = a, .transcript = b, @@ -443,7 +443,7 @@ test_that("Only differential trancript abundance voom - no object",{ # Continuous and discrete res = test_differential_abundance( - left_join(input_df , sam) %>% identify_abundant(a, b, c, factor_of_interest = condition_cont), + left_join(input_df , sam) |> identify_abundant(a, b, c, factor_of_interest = condition_cont), ~ condition_cont + condition, .sample = a, .transcript = b, @@ -468,7 +468,7 @@ test_that("Only differential trancript abundance voom - no object",{ # Just one covariate error expect_error( test_differential_abundance( - filter(input_df %>% identify_abundant(a, b, c, factor_of_interest = condition), condition), + filter(input_df |> identify_abundant(a, b, c, factor_of_interest = condition), condition), ~ condition, .sample = a, .transcript = b, @@ -482,7 +482,7 @@ test_that("Only differential trancript abundance voom - no object",{ # Change scaling method res = test_differential_abundance( - input_df %>% identify_abundant(a, b, c, factor_of_interest = condition), + input_df |> identify_abundant(a, b, c, factor_of_interest = condition), ~ condition, .sample = a, .transcript = b, @@ -497,7 +497,7 @@ test_that("Only differential trancript abundance - no object - with contrasts",{ res = test_differential_abundance( - input_df %>% identify_abundant(a, b, c, factor_of_interest = condition), + input_df |> identify_abundant(a, b, c, factor_of_interest = condition), ~ 0 + condition, .sample = a, .transcript = b, @@ -526,7 +526,7 @@ test_that("Voom with sample weights method",{ res = test_differential_abundance( - input_df %>% identify_abundant(a, b, c, factor_of_interest = condition), + input_df |> identify_abundant(a, b, c, factor_of_interest = condition), ~ condition, .sample = a, .transcript = b, @@ -549,8 +549,8 @@ test_that("Voom with sample weights method",{ test_that("Voom with treat method",{ - input_df %>% - identify_abundant(a, b, c, factor_of_interest = condition) %>% + input_df |> + identify_abundant(a, b, c, factor_of_interest = condition) |> test_differential_abundance( ~ condition, .sample = a, @@ -559,16 +559,16 @@ test_that("Voom with treat method",{ method = "limma_voom", test_above_log2_fold_change = 1, action="only" - ) %>% - filter(adj.P.Val<0.05) %>% - nrow %>% + ) |> + filter(adj.P.Val<0.05) |> + nrow() |> expect_equal(97) # with multiple contrasts res <- - input_df %>% - rename(cell_type = `Cell type`) %>% - identify_abundant(a, b, c, factor_of_interest = cell_type) %>% + input_df |> + rename(cell_type = `Cell type`) |> + identify_abundant(a, b, c, factor_of_interest = cell_type) |> test_differential_abundance( ~ 0 + cell_type, .sample = a, @@ -580,14 +580,14 @@ test_that("Voom with treat method",{ action="only" ) - res %>% - filter(`adj.P.Val___cell_typeb_cell-cell_typemonocyte` < 0.05) %>% - nrow %>% + res |> + filter(`adj.P.Val___cell_typeb_cell-cell_typemonocyte` < 0.05) |> + nrow() |> expect_equal(293) - res %>% - filter(`adj.P.Val___cell_typeb_cell-cell_typet_cell`<0.05) %>% - nrow %>% + res |> + filter(`adj.P.Val___cell_typeb_cell-cell_typet_cell`<0.05) |> + nrow() |> expect_equal(246) }) @@ -596,7 +596,7 @@ test_that("New method choice",{ res = test_differential_abundance( - input_df %>% identify_abundant(a, b, c, factor_of_interest = condition), + input_df |> identify_abundant(a, b, c, factor_of_interest = condition), ~ condition, .sample = a, .transcript = b, @@ -621,7 +621,7 @@ test_that("New method choice",{ # Wrong method expect_error( test_differential_abundance( - filter(input_df %>% identify_abundant(a, b, c, factor_of_interest = condition), condition), + filter(input_df |> identify_abundant(a, b, c, factor_of_interest = condition), condition), ~ condition, .sample = a, .transcript = b, @@ -636,26 +636,26 @@ test_that("New method choice",{ test_that("DESeq2 differential trancript abundance - no object",{ res_deseq2 = - test_deseq2_df %>% - DESeq2::DESeq() %>% + test_deseq2_df |> + DESeq2::DESeq() |> DESeq2::results() res_tidybulk = - test_deseq2_df %>% - tidybulk %>% - identify_abundant(factor_of_interest = condition) %>% + test_deseq2_df |> + tidybulk() |> + identify_abundant(factor_of_interest = condition) |> test_differential_abundance(~condition, method="DeSEQ2", action="get") expect_equal( - res_tidybulk %>% dplyr::slice(c(1, 3,4, 6)) %>% dplyr::pull(log2FoldChange), + res_tidybulk |> dplyr::slice(c(1, 3,4, 6)) |> dplyr::pull(log2FoldChange), res_deseq2[c(1, 3,4, 6),2], tolerance =0.005 ) res = test_differential_abundance( - input_df %>% identify_abundant(a, b, c, factor_of_interest = condition), + input_df |> identify_abundant(a, b, c, factor_of_interest = condition), ~ condition, .sample = a, .transcript = b, @@ -683,7 +683,7 @@ test_that("DESeq2 differential trancript abundance - no object",{ res = test_differential_abundance( - left_join(input_df , sam) %>% identify_abundant(a, b, c, factor_of_interest = condition_cont), + left_join(input_df , sam) |> identify_abundant(a, b, c, factor_of_interest = condition_cont), ~ condition_cont, .sample = a, .transcript = b, @@ -708,7 +708,7 @@ test_that("DESeq2 differential trancript abundance - no object",{ # Continuous and discrete res = test_differential_abundance( - left_join(input_df , sam) %>% identify_abundant(a, b, c, factor_of_interest = condition_cont), + left_join(input_df , sam) |> identify_abundant(a, b, c, factor_of_interest = condition_cont), ~ condition_cont + condition, .sample = a, .transcript = b, @@ -733,7 +733,7 @@ test_that("DESeq2 differential trancript abundance - no object",{ # Just one covariate error expect_error( test_differential_abundance( - filter(input_df %>% identify_abundant(a, b, c, factor_of_interest = condition), condition), + filter(input_df |> identify_abundant(a, b, c, factor_of_interest = condition), condition), ~ condition, .sample = a, .transcript = b, @@ -745,8 +745,8 @@ test_that("DESeq2 differential trancript abundance - no object",{ ) # # Contrasts - # input_df %>% - # identify_abundant(a, b, c, factor_of_interest = condition) %>% + # input_df |> + # identify_abundant(a, b, c, factor_of_interest = condition) |> # test_differential_abundance( # ~ 0 + condition, # .sample = a, @@ -755,12 +755,12 @@ test_that("DESeq2 differential trancript abundance - no object",{ # method = "deseq2", # contrasts = "this_is - wrong", # action="only" - # ) %>% + # ) |> # expect_error("for the moment, the contrasts argument") deseq2_contrasts = - input_df %>% - identify_abundant(a, b, c, factor_of_interest = condition) %>% + input_df |> + identify_abundant(a, b, c, factor_of_interest = condition) |> test_differential_abundance( ~ 0 + condition, .sample = a, @@ -772,8 +772,8 @@ test_that("DESeq2 differential trancript abundance - no object",{ ) edger_contrasts = - input_df %>% - identify_abundant(a, b, c, factor_of_interest = condition) %>% + input_df |> + identify_abundant(a, b, c, factor_of_interest = condition) |> test_differential_abundance( ~ 0 + condition, .sample = a, @@ -783,10 +783,10 @@ test_that("DESeq2 differential trancript abundance - no object",{ action="only" ) - library(dplyr) + expect_gt( - (deseq2_contrasts %>% filter(b=="ABCB4") %>% pull(3)) * - (edger_contrasts %>% filter(b=="ABCB4") %>% pull(2)), + (deseq2_contrasts |> filter(b=="ABCB4") |> pull(3)) * + (edger_contrasts |> filter(b=="ABCB4") |> pull(2)), 0 ) @@ -795,37 +795,38 @@ test_that("DESeq2 differential trancript abundance - no object",{ test_that("test prefix",{ library(DESeq2) + library(stringr) - df = input_df %>% tidybulk(a, b, c, ) %>% identify_abundant(factor_of_interest = condition) + df = input_df |> tidybulk(a, b, c, ) |> identify_abundant(factor_of_interest = condition) res_DeSEQ2 = - df %>% + df |> test_differential_abundance(~condition, method="DeSEQ2", action="only", prefix = "prefix_") res_voom = - df %>% + df |> test_differential_abundance(~condition, method="limma_voom", action="only", prefix = "prefix_") res_voom_sample_weights = - df %>% + df |> test_differential_abundance(~condition, method="limma_voom_sample_weights", action="only", prefix = "prefix_") res_edger = - df %>% + df |> test_differential_abundance(~condition, method="edgeR_likelihood_ratio", action="only", prefix = "prefix_") - expect_gt(colnames(res_DeSEQ2) %>% grep("prefix_", .) %>% length, 0) - expect_gt(colnames(res_voom) %>% grep("prefix_", .) %>% length, 0) - expect_gt(colnames(res_voom_sample_weights) %>% grep("prefix_", .) %>% length, 0) - expect_gt(colnames(res_edger) %>% grep("prefix_", .) %>% length, 0) + expect_gt(colnames(res_DeSEQ2) |> str_which("prefix_") |> length(), 0) + expect_gt(colnames(res_voom) |> str_which("prefix_") |> length(), 0) + expect_gt(colnames(res_voom_sample_weights) |> str_which("prefix_") |> length(), 0) + expect_gt(colnames(res_edger) |> str_which("prefix_") |> length(), 0) }) test_that("Get entrez from symbol - no object",{ res = - input_df %>% - select(-entrez) %>% + input_df |> + select(-entrez) |> symbol_to_entrez(.transcript = b, .sample = a) expect_equal( @@ -837,7 +838,7 @@ test_that("Get entrez from symbol - no object",{ # test_that("Get gene enrichment - no object",{ # -# if (find.package("EGSEA", quiet = TRUE) %>% length %>% equals(0)) { +# if (find.package("EGSEA", quiet = TRUE) |> length() |> equals(0)) { # message("Installing EGSEA needed for differential transcript abundance analyses") # if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager", repos = "https://cloud.r-project.org") # BiocManager::install("EGSEA") @@ -856,7 +857,7 @@ test_that("Get entrez from symbol - no object",{ # .transcript = d, # .sample = a, # .abundance = c -# ) %>% identify_abundant(a, b, c, factor_of_interest = condition), +# ) |> identify_abundant(a, b, c, factor_of_interest = condition), # ~ condition, # .sample = a, # .entrez = d, @@ -885,7 +886,7 @@ test_that("Only adjusted counts - no object",{ res = adjust_abundance( - cm %>% identify_abundant(a, b, c), + cm |> identify_abundant(a, b, c), ~ condition + batch, .sample = a, .transcript = b, @@ -914,7 +915,7 @@ test_that("Get adjusted counts - no object",{ res = adjust_abundance( - cm %>% identify_abundant(a, b, c), + cm |> identify_abundant(a, b, c), ~ condition + batch, .sample = a, .transcript = b, @@ -943,7 +944,7 @@ test_that("Add adjusted counts - no object",{ res = adjust_abundance( - cm %>% identify_abundant(a, b, c), + cm |> identify_abundant(a, b, c), ~ condition + batch, .sample = a, .transcript = b, @@ -1127,7 +1128,7 @@ test_that("Only reduced dimensions MDS - no object",{ res = reduce_dimensions( - input_df %>% identify_abundant(a, b, c), + input_df |> identify_abundant(a, b, c), method = "MDS", .abundance = c, .element = a, @@ -1148,10 +1149,12 @@ test_that("Only reduced dimensions MDS - no object",{ expect_equal( class(attr(res, "internals")$MDS[[1]])[1], "MDS" ) + inp = input_df |> identify_abundant(a, b, c) + # Duplicate genes/samples expect_error( reduce_dimensions( - input_df %>% identify_abundant(a, b, c) %>% bind_rows( (.) %>% dplyr::slice(1) %>% mutate(c = c+1) ), + inp |> bind_rows( inp|> dplyr::slice(1) |> mutate(c = c+1) ), method = "MDS", .abundance = c, .element = a, @@ -1165,7 +1168,7 @@ test_that("Get reduced dimensions MDS - no object",{ res = reduce_dimensions( - input_df %>% identify_abundant(a, b, c), + input_df |> identify_abundant(a, b, c), method = "MDS", .abundance = c, .element = a, @@ -1194,7 +1197,7 @@ test_that("Add reduced dimensions MDS - no object",{ res = reduce_dimensions( - input_df %>% identify_abundant(a, b, c), + input_df |> identify_abundant(a, b, c), method = "MDS", .abundance = c, .element = a, @@ -1220,7 +1223,7 @@ test_that("Only reduced dimensions PCA - no object",{ res = reduce_dimensions( - input_df %>% identify_abundant(a, b, c), + input_df |> identify_abundant(a, b, c), method="PCA", .abundance = c, .element = a, @@ -1242,9 +1245,11 @@ test_that("Only reduced dimensions PCA - no object",{ expect_equal( class(attr(res, "internals")$PCA), "prcomp" ) # Duplicate genes/samples + inp = input_df |> identify_abundant(a, b, c) + expect_error( reduce_dimensions( - input_df %>% identify_abundant(a, b, c) %>% bind_rows( (.) %>% dplyr::slice(1) %>% mutate(c = c+1) ), + inp |> bind_rows( inp |> dplyr::slice(1) |> mutate(c = c+1) ), method = "PCA", .abundance = c, .element = a, @@ -1258,7 +1263,7 @@ test_that("Get reduced dimensions PCA - no object",{ res = reduce_dimensions( - input_df %>% identify_abundant(a, b, c), + input_df |> identify_abundant(a, b, c), method="PCA", .abundance = c, .element = a, @@ -1285,7 +1290,7 @@ test_that("Add reduced dimensions PCA - no object",{ res = reduce_dimensions( - input_df %>% identify_abundant(a, b, c), + input_df |> identify_abundant(a, b, c), method="PCA", .abundance = c, .element = a, @@ -1312,8 +1317,8 @@ test_that("Get reduced dimensions tSNE - no object",{ set.seed(132) res = - input_df_breast %>% - identify_abundant(a, b, c) %>% + input_df_breast |> + identify_abundant(a, b, c) |> reduce_dimensions( method="tSNE", @@ -1322,7 +1327,7 @@ test_that("Get reduced dimensions tSNE - no object",{ .feature = b, action="get", verbose=FALSE - ) %>% + ) |> suppressMessages() expect_equal( @@ -1341,9 +1346,11 @@ test_that("Get reduced dimensions tSNE - no object",{ ) # Duplicate genes/samples + inp = input_df |> identify_abundant(a, b, c) + expect_error( reduce_dimensions( - input_df %>% identify_abundant(a, b, c) %>% bind_rows( (.) %>% dplyr::slice(1) %>% mutate(c = c+1) ), + inp |> bind_rows( inp |> dplyr::slice(1) |> mutate(c = c+1) ), method = "tSNE", .abundance = c, .element = a, @@ -1360,8 +1367,8 @@ test_that("Add reduced dimensions UMAP - no object",{ set.seed(132) res = - input_df_breast %>% - identify_abundant(a, b, c) %>% + input_df_breast |> + identify_abundant(a, b, c) |> reduce_dimensions( method="UMAP", @@ -1388,7 +1395,7 @@ test_that("Only rotated dimensions - no object",{ res.pca = reduce_dimensions( - input_df %>% identify_abundant(a, b, c), + input_df |> identify_abundant(a, b, c), method="PCA", .abundance = c, .element = a, @@ -1423,7 +1430,7 @@ test_that("Get rotated dimensions - no object",{ res.pca = reduce_dimensions( - input_df %>% identify_abundant(a, b, c), + input_df |> identify_abundant(a, b, c), method="PCA", .abundance = c, .element = a, @@ -1462,7 +1469,7 @@ test_that("Add rotated dimensions - no object",{ res.pca = reduce_dimensions( - input_df %>% identify_abundant(a, b, c), + input_df |> identify_abundant(a, b, c), method="PCA", .abundance = c, .element = a, @@ -1602,7 +1609,7 @@ test_that("Add description to symbol",{ res = describe_transcript( - input_df %>% tidybulk(a, b, c) + input_df |> tidybulk(a, b, c) ) @@ -1715,10 +1722,10 @@ test_that("differential composition",{ .transcript = b, .abundance = c, cores = 1 - ) %>% - pull(`estimate_(Intercept)`) %>% - .[[1]] %>% - as.integer %>% + ) |> + pull(`estimate_(Intercept)`) |> + magrittr::extract2(1) |> + as.integer() |> expect_equal( -2, tollerance =1e-3) # llsr @@ -1729,36 +1736,36 @@ test_that("differential composition",{ .transcript = b, .abundance = c, method="llsr" - ) %>% - pull(`estimate_(Intercept)`) %>% - .[[1]] %>% - as.integer %>% + ) |> + pull(`estimate_(Intercept)`) |> + magrittr::extract2(1) |> + as.integer() |> expect_equal( -2, tollerance =1e-3) # Survival analyses - input_df %>% - select(a, b, c) %>% - nest(data = -a) %>% + input_df |> + select(a, b, c) |> + nest(data = -a) |> mutate( days = c(1, 10, 500, 1000, 2000), dead = c(1, 1, 1, 0, 1) - ) %>% - unnest(data) %>% + ) |> + unnest(data) |> test_differential_cellularity( survival::Surv(days, dead) ~ ., .sample = a, .transcript = b, .abundance = c, cores = 1 - ) %>% - pull(estimate) %>% - .[[1]] %>% + ) |> + pull(estimate) |> + magrittr::extract2(1) |> expect_equal(26.2662279, tolerance = 30) # round() %in% c( # 26, # 97 is the github action MacOS that has different value # 26, # 112 is the github action UBUNTU that has different value # 26 # 93 is the github action Windows that has different value - # ) %>% + # ) |> # expect_true() }) @@ -1766,43 +1773,43 @@ test_that("differential composition",{ test_that("test_stratification_cellularity",{ # Cibersort - input_df %>% - select(a, b, c) %>% - nest(data = -a) %>% + input_df |> + select(a, b, c) |> + nest(data = -a) |> mutate( days = c(1, 10, 500, 1000, 2000), dead = c(1, 1, 1, 0, 1) - ) %>% - unnest(data) %>% + ) |> + unnest(data) |> test_stratification_cellularity( survival::Surv(days, dead) ~ ., .sample = a, .transcript = b, .abundance = c, cores = 1 - ) %>% - pull(.low_cellularity_expected) %>% - .[[1]] %>% + ) |> + pull(.low_cellularity_expected) |> + magrittr::extract2(1) |> expect_equal(3.35, tolerance =1e-1) # llsr - input_df %>% - select(a, b, c) %>% - nest(data = -a) %>% + input_df |> + select(a, b, c) |> + nest(data = -a) |> mutate( days = c(1, 10, 500, 1000, 2000), dead = c(1, 1, 1, 0, 1) - ) %>% - unnest(data) %>% + ) |> + unnest(data) |> test_stratification_cellularity( survival::Surv(days, dead) ~ ., .sample = a, .transcript = b, .abundance = c, method = "llsr" - ) %>% - pull(.low_cellularity_expected) %>% - .[[1]] %>% + ) |> + pull(.low_cellularity_expected) |> + magrittr::extract2(1) |> expect_equal(3.35, tolerance =1e-1) }) @@ -1837,8 +1844,8 @@ test_that("filter abundant - no object",{ ) expect_gt( - res1 %>% filter(.abundant) %>% nrow(), - res2 %>% filter(.abundant) %>% nrow() + res1 |> filter(.abundant) |> nrow(), + res2 |> filter(.abundant) |> nrow() ) res = @@ -1915,7 +1922,7 @@ test_that("pivot",{ test_that("gene over representation",{ - df_entrez = se_mini %>% tidybulk() %>% as_tibble() + df_entrez = se_mini |> tidybulk() |> as_tibble() df_entrez = aggregate_duplicates(df_entrez, aggregation_function = sum, .sample = .sample, .transcript = entrez, .abundance = count) df_entrez = mutate(df_entrez, do_test = .feature %in% c("TNFRSF4", "PLCH2", "PADI4", "PAX7")) @@ -1942,14 +1949,14 @@ test_that("gene over representation",{ # .sample = a, # .transcript = b, # .abundance = c -# ) %>% -# scale_abundance() %>% +# ) |> +# scale_abundance() |> # get_bibliography() # # }) # # test_that("as_SummarizedExperiment",{ -# input_df %>% +# input_df |> # as_SummarizedExperiment( # .sample = a, # .transcript = b, From 527ca24dcfc5a90717fb8dae42d2b7f4978d389c Mon Sep 17 00:00:00 2001 From: stemangiola Date: Tue, 28 Jun 2022 11:22:57 +1000 Subject: [PATCH 33/38] make tests more detailed --- tests/testthat/test-bulk_methods.R | 74 +++++++++++++++--------------- 1 file changed, 36 insertions(+), 38 deletions(-) diff --git a/tests/testthat/test-bulk_methods.R b/tests/testthat/test-bulk_methods.R index 8dda6d8f..846e17d4 100755 --- a/tests/testthat/test-bulk_methods.R +++ b/tests/testthat/test-bulk_methods.R @@ -1298,6 +1298,11 @@ test_that("Add reduced dimensions PCA - no object",{ action="add" ) + res |> + pull(PC1) |> + magrittr::extract2(1) |> + expect_equal(-7.214337, tolerance = 0.01) + expect_equal( typeof(res$`PC1`), "double" @@ -1330,6 +1335,11 @@ test_that("Get reduced dimensions tSNE - no object",{ ) |> suppressMessages() + res |> + pull(tSNE1) |> + magrittr::extract2(1) |> + expect_equal(2.432608, tolerance = 0.01) + expect_equal( typeof(res$`tSNE1`), "double", @@ -1378,16 +1388,12 @@ test_that("Add reduced dimensions UMAP - no object",{ action="add" ) - expect_equal( - typeof(res$`UMAP1`), - "double", - tolerance=1e-1 - ) + res |> + pull(UMAP1) |> + magrittr::extract2(1) |> + expect_equal(-2.12, tolerance = 0.01) - expect_equal( - ncol(res), - 8 - ) + expect_equal(ncol(res), 8) }) @@ -1821,12 +1827,9 @@ test_that("filter abundant - no object",{ .sample = a, .transcript = b, .abundance = c - ) - - expect_equal( - ncol(res1), - 10 - ) + ) |> + filter(.abundant) |> + nrow() res2 = identify_abundant( @@ -1836,48 +1839,40 @@ test_that("filter abundant - no object",{ .abundance = c, minimum_proportion = 0.5, minimum_counts = 30 - ) + ) |> + filter(.abundant) |> + nrow() - expect_equal( - ncol(res2), - 10 - ) + expect_equal(res1, 910) + expect_equal(res2, 625) + + expect_gt(res1 ,res2 ) - expect_gt( - res1 |> filter(.abundant) |> nrow(), - res2 |> filter(.abundant) |> nrow() - ) - res = keep_abundant( input_df, .sample = a, .transcript = b, .abundance = c - ) + ) |> + nrow() |> - expect_equal( - ncol(res), - 10 - ) + expect_equal(910 ) }) test_that("filter abundant with design - no object",{ - res = identify_abundant( input_df, .sample = a, .transcript = b, .abundance = c, factor_of_interest = condition - ) - - expect_equal( - ncol(res), - 10 - ) + ) |> + filter(.abundant) |> + nrow() |> + expect_equal(1965) @@ -1935,7 +1930,10 @@ test_that("gene over representation",{ species="Homo sapiens" ) - expect_equal( ncol(res), 10 ) + res |> + pull(pvalue) |> + magrittr::extract2(1) |> + expect_equal(0.0004572092, tolerance = 0.0001 ) From b399314080ee8cb7c423af3618560fef601ae2ff Mon Sep 17 00:00:00 2001 From: stemangiola Date: Sun, 26 Jun 2022 12:21:39 +1000 Subject: [PATCH 34/38] deprecation untested --- R/functions.R | 78 ++++----- R/functions_SE.R | 32 ++-- R/methods.R | 187 ++++++++++++++++----- R/methods_SE.R | 29 ++-- man/adjust_abundance-methods.Rd | 42 +++-- man/cluster_elements-methods.Rd | 34 ++-- man/get_reduced_dimensions_UMAP_bulk.Rd | 6 +- man/get_reduced_dimensions_UMAP_bulk_SE.Rd | 4 +- man/keep_variable-methods.Rd | 18 +- man/reduce_dimensions-methods.Rd | 34 ++-- man/remove_redundancy-methods.Rd | 28 +-- 11 files changed, 322 insertions(+), 170 deletions(-) diff --git a/R/functions.R b/R/functions.R index 05485430..e9460eca 100755 --- a/R/functions.R +++ b/R/functions.R @@ -1536,7 +1536,7 @@ get_clusters_kmeans_bulk <- .feature = NULL, .abundance = NULL, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, ...) { # Check if centers is in dots dots_args = rlang::dots_list(...) @@ -1553,9 +1553,9 @@ get_clusters_kmeans_bulk <- # Prepare data frame distinct(!!.feature,!!.element,!!.abundance) %>% - # Check if log transform is needed - ifelse_pipe(log_transform, - ~ .x %>% dplyr::mutate(!!.abundance := !!.abundance %>% `+`(1) %>% log())) %>% + # Apply (log by default) transformation + dplyr::mutate(!!.abundance := transform(!!.abundance)) %>% + # Prepare data frame for return spread(!!.feature,!!.abundance) %>% @@ -1603,7 +1603,7 @@ get_clusters_SNN_bulk <- .feature = NULL, .abundance, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, ...) { # Get column names .element = enquo(.element) @@ -1631,7 +1631,8 @@ get_clusters_SNN_bulk <- distinct(!!.element,!!.feature,!!.abundance) %>% # Check if log tranfrom is needed - #ifelse_pipe(log_transform, ~ .x %>% dplyr::mutate(!!.abundance := !!.abundance %>% `+`(1) %>% log())) %>% + # dplyr::mutate(!!.abundance := transform(!!.abundance)) %>% + # Prepare data frame for return spread(!!.element,!!.abundance) @@ -1689,7 +1690,7 @@ get_reduced_dimensions_MDS_bulk <- .dims = 2, top = 500, of_samples = TRUE, - log_transform = TRUE) { + transform = log1p) { # Comply with CRAN NOTES . = NULL @@ -1714,9 +1715,9 @@ get_reduced_dimensions_MDS_bulk <- distinct(!!.feature,!!.element,!!.abundance) %>% - # Check if log transform is needed - ifelse_pipe(log_transform, - ~ .x %>% dplyr::mutate(!!.abundance := !!.abundance %>% log1p())) %>% + # Apply (log by default) transformation + dplyr::mutate(!!.abundance := transform(!!.abundance)) %>% + # Stop any column is not if not numeric or integer ifelse_pipe( @@ -1808,7 +1809,7 @@ get_reduced_dimensions_PCA_bulk <- .dims = 2, top = 500, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, scale = FALSE, ...) { # Comply with CRAN NOTES @@ -1831,9 +1832,9 @@ get_reduced_dimensions_PCA_bulk <- # Prepare data frame distinct(!!.feature,!!.element,!!.abundance) %>% - # Check if log transform is needed - ifelse_pipe(log_transform, - ~ .x %>% dplyr::mutate(!!.abundance := !!.abundance %>% log1p())) %>% + # Apply (log by default) transformation + dplyr::mutate(!!.abundance := transform(!!.abundance)) %>% + # Stop any column is not if not numeric or integer ifelse_pipe( @@ -1950,7 +1951,7 @@ get_reduced_dimensions_TSNE_bulk <- .dims = 2, top = 500, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, ...) { # Comply with CRAN NOTES . = NULL @@ -2004,9 +2005,9 @@ get_reduced_dimensions_TSNE_bulk <- ~ .x %>% eliminate_sparse_transcripts(!!.feature) ) %>% - # Check if log transform is needed - ifelse_pipe(log_transform, - ~ .x %>% dplyr::mutate(!!.abundance := !!.abundance %>% log1p)) %>% + # Apply (log by default) transformation + dplyr::mutate(!!.abundance := transform(!!.abundance)) %>% + # Filter most variable genes keep_variable_transcripts(!!.element,!!.feature,!!.abundance, top) %>% @@ -2064,7 +2065,7 @@ get_reduced_dimensions_UMAP_bulk <- .dims = 2, top = 500, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, scale = TRUE, calculate_for_pca_dimensions = 20, ...) { @@ -2114,8 +2115,8 @@ get_reduced_dimensions_UMAP_bulk <- ~ (.) ) %>% - # Check if log transform is needed - when(log_transform ~ dplyr::mutate(., !!.abundance := !!.abundance %>% log1p), ~ (.)) %>% + # Apply (log by default) transformation + dplyr::mutate(., !!.abundance := transform(!!.abundance)) %>% # Filter most variable genes keep_variable_transcripts(!!.element,!!.feature,!!.abundance, top) @@ -2545,7 +2546,7 @@ remove_redundancy_elements_through_correlation <- function(.data, correlation_threshold = 0.9, top = Inf, of_samples = TRUE, - log_transform = FALSE) { + transform = identity) { # Comply with CRAN NOTES . = NULL @@ -2578,9 +2579,9 @@ remove_redundancy_elements_through_correlation <- function(.data, # Filter variable genes keep_variable_transcripts(!!.element,!!.feature,!!.abundance, top = top) %>% - # Check if log transform is needed - ifelse_pipe(log_transform, - ~ .x %>% dplyr::mutate(!!.abundance := !!.abundance %>% `+`(1) %>% log())) %>% + # Apply (log by default) transformation + dplyr::mutate(!!.abundance := transform(!!.abundance)) %>% + distinct() %>% # NO NEED OF RECTANGULAR @@ -3109,7 +3110,8 @@ get_adjusted_counts_for_unwanted_variation_bulk <- function(.data, .sample = NULL, .transcript = NULL, .abundance = NULL, - log_transform = TRUE, + transform = transform, + inverse_transform = inverse_transform, ...) { # Get column names .sample = enquo(.sample) @@ -3138,9 +3140,9 @@ get_adjusted_counts_for_unwanted_variation_bulk <- function(.data, one_of(parse_formula(.formula))) %>% distinct() %>% - # Check if log transform is needed - ifelse_pipe(log_transform, - ~ .x %>% dplyr::mutate(!!.abundance := !!.abundance %>% log1p())) + # Apply (log by default) transformation + dplyr::mutate(!!.abundance := transform(!!.abundance)) + # Create design matrix @@ -3197,13 +3199,11 @@ get_adjusted_counts_for_unwanted_variation_bulk <- function(.data, gather(!!.sample,!!.abundance,-!!.transcript) %>% # Reverse-Log transform if transformed in the first place - ifelse_pipe( - log_transform, - ~ .x %>% - dplyr::mutate(!!.abundance := !!.abundance %>% exp %>% `-`(1)) %>% - dplyr::mutate(!!.abundance := ifelse(!!.abundance < 0, 0,!!.abundance)) %>% - dplyr::mutate(!!.abundance := !!.abundance %>% as.integer) - ) %>% + dplyr::mutate(!!.abundance := inverse_transform(!!.abundance)) %>% + + # In case the inverse tranform produces negative counts + dplyr::mutate(!!.abundance := ifelse(!!.abundance < 0, 0,!!.abundance)) %>% + dplyr::mutate(!!.abundance := !!.abundance %>% as.integer) %>% # Reset column names dplyr::rename(!!value_adjusted := !!.abundance) %>% @@ -3232,7 +3232,7 @@ keep_variable_transcripts = function(.data, .transcript = NULL, .abundance = NULL, top = 500, - log_transform = TRUE) { + transform = log1p) { # Get column names .sample = enquo(.sample) .transcript = enquo(.transcript) @@ -3255,8 +3255,8 @@ keep_variable_transcripts = function(.data, distinct(!!.sample,!!.transcript,!!.abundance) %>% # Check if logtansform is needed - ifelse_pipe(log_transform, - ~ .x %>% dplyr::mutate(!!.abundance := !!.abundance %>% `+`(1) %>% log())) %>% + dplyr::mutate(!!.abundance := transform(!!.abundance)) %>% + spread(!!.sample,!!.abundance) %>% as_matrix(rownames = quo_name(.transcript)) diff --git a/R/functions_SE.R b/R/functions_SE.R index ec763546..194a037f 100755 --- a/R/functions_SE.R +++ b/R/functions_SE.R @@ -14,7 +14,7 @@ #' @param .feature A column symbol. The column that is represents entities to cluster (i.e., normally samples) #' @param .element A column symbol. The column that is used to calculate distance (i.e., normally genes) #' @param of_samples A boolean -#' @param log_transform A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data) +#' @param transform A function that will tranform the counts, by default it is log1p for RNA sequencing data, but for avoinding tranformation you can use identity #' @param ... Further parameters passed to the function kmeans #' #' @return A tibble with additional columns @@ -23,7 +23,7 @@ get_clusters_kmeans_bulk_SE <- function(.data, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, ...) { # Check if centers is in dots @@ -34,7 +34,7 @@ get_clusters_kmeans_bulk_SE <- .data %>% # Check if log transform is needed - when(log_transform ~ log1p(.), ~ (.) ) %>% + transform() %>% # Decide if of samples or transcripts when( @@ -66,7 +66,7 @@ get_clusters_kmeans_bulk_SE <- #' @param .feature A column symbol. The column that is represents entities to cluster (i.e., normally samples) #' @param .element A column symbol. The column that is used to calculate distance (i.e., normally genes) #' @param of_samples A boolean -#' @param log_transform A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data) +#' @param transform A function that will tranform the counts, by default it is log1p for RNA sequencing data, but for avoinding tranformation you can use identity #' @param ... Further parameters passed to the function kmeans #' #' @return A tibble with additional columns @@ -74,7 +74,7 @@ get_clusters_kmeans_bulk_SE <- get_clusters_SNN_bulk_SE <- function(.data, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, ...) { @@ -127,7 +127,7 @@ get_clusters_SNN_bulk_SE <- #' @param .element A column symbol. The column that is used to calculate distance (i.e., normally samples) #' @param top An integer. How many top genes to select #' @param of_samples A boolean -#' @param log_transform A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data) +#' @param transform A function that will tranform the counts, by default it is log1p for RNA sequencing data, but for avoinding tranformation you can use identity #' @param scale A boolean #' #' @return A tibble with additional columns @@ -138,7 +138,7 @@ get_reduced_dimensions_MDS_bulk_SE <- .dims = 2, top = 500, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, scale = NULL # This is only a dummy argument for making it compatibble with PCA ) { # Comply with CRAN NOTES @@ -222,7 +222,7 @@ get_reduced_dimensions_MDS_bulk_SE <- #' @param .element A column symbol. The column that is used to calculate distance (i.e., normally samples) #' @param top An integer. How many top genes to select #' @param of_samples A boolean -#' @param log_transform A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data) +#' @param transform A function that will tranform the counts, by default it is log1p for RNA sequencing data, but for avoinding tranformation you can use identity #' @param scale A boolean #' @param ... Further parameters passed to the function prcomp #' @@ -234,7 +234,7 @@ get_reduced_dimensions_PCA_bulk_SE <- .dims = 2, top = 500, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, scale = FALSE, ...) { # Comply with CRAN NOTES @@ -323,7 +323,7 @@ we suggest to partition the dataset for sample clusters. #' @param .element A column symbol. The column that is used to calculate distance (i.e., normally samples) #' @param top An integer. How many top genes to select #' @param of_samples A boolean -#' @param log_transform A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data) +#' @param transform A function that will tranform the counts, by default it is log1p for RNA sequencing data, but for avoinding tranformation you can use identity #' @param scale A boolean #' @param ... Further parameters passed to the function Rtsne #' @@ -334,7 +334,7 @@ get_reduced_dimensions_TSNE_bulk_SE <- .dims = 2, top = 500, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, scale = NULL, # This is only a dummy argument for making it compatibble with PCA ...) { # Comply with CRAN NOTES @@ -406,7 +406,7 @@ get_reduced_dimensions_TSNE_bulk_SE <- #' @param .element A column symbol. The column that is used to calculate distance (i.e., normally samples) #' @param top An integer. How many top genes to select #' @param of_samples A boolean -#' @param log_transform A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data) +#' @param transform A function that will tranform the counts, by default it is log1p for RNA sequencing data, but for avoinding tranformation you can use identity #' @param calculate_for_pca_dimensions An integer of length one. The number of PCA dimensions to based the UMAP calculatio on. If NULL all variable features are considered #' @param ... Further parameters passed to the function uwot #' @@ -417,7 +417,7 @@ get_reduced_dimensions_UMAP_bulk_SE <- .dims = 2, top = 500, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, scale = NULL, # This is only a dummy argument for making it compatibble with PCA calculate_for_pca_dimensions = 20, ...) { @@ -528,13 +528,13 @@ filter_if_abundant_were_identified = function(.data){ #' @param .transcript A character name of the transcript/gene column #' @param .abundance A character name of the read count column #' @param top An integer. How many top genes to select -#' @param log_transform A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data) +#' @param transform A function that will tranform the counts, by default it is log1p for RNA sequencing data, but for avoinding tranformation you can use identity #' #' @return A tibble filtered genes #' keep_variable_transcripts_SE = function(.data, top = 500, - log_transform = TRUE) { + transform = log1p) { # Manage Inf @@ -546,7 +546,7 @@ keep_variable_transcripts_SE = function(.data, .data %>% # Check if log transform is needed - when(log_transform ~ log1p(.), ~ (.) ) + transform() s <- rowMeans((x - rowMeans(x, na.rm=TRUE)) ^ 2, na.rm=TRUE) diff --git a/R/methods.R b/R/methods.R index a6102697..b188fec6 100755 --- a/R/methods.R +++ b/R/methods.R @@ -515,10 +515,12 @@ setMethod("scale_abundance", "tidybulk", .scale_abundance) #' #' @param method A character string. The cluster algorithm to use, at the moment k-means is the only algorithm included. #' @param of_samples A boolean. In case the input is a tidybulk object, it indicates Whether the element column will be sample or transcript column -#' @param log_transform A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data) +#' @param transform A function that will tranform the counts, by default it is log1p for RNA sequencing data, but for avoinding tranformation you can use identity #' @param action A character string. Whether to join the new information to the input tbl (add), or just get the non-redundant tbl with the new information (get). #' @param ... Further parameters passed to the function kmeans #' +#' @param log_transform DEPRECATED - A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data) +#' #' @details identifies clusters in the data, normally of samples. #' This function returns a tibble with additional columns for the cluster annotation. #' At the moment only k-means (DOI: 10.2307/2346830) and SNN clustering (DOI:10.1016/j.cell.2019.05.031) is supported, the plan is to introduce more clustering methods. @@ -554,9 +556,14 @@ setGeneric("cluster_elements", function(.data, .abundance = NULL, method, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, + action = "add", - ...) + ..., + + # DEPRECATED + log_transform = NULL + ) standardGeneric("cluster_elements")) # Set internal @@ -566,10 +573,26 @@ setGeneric("cluster_elements", function(.data, .abundance = NULL, method , of_samples = TRUE, - log_transform = TRUE, + transform = log1p, + action = "add", - ...) + ..., + + # DEPRECATED + log_transform = NULL + + ) { + + # DEPRECATION OF log_transform + if (is_present(log_transform) & !is.null(log_transform)) { + + # Signal the deprecation to the user + deprecate_warn("1.7.4", "tidybulk::test_differential_abundance(log_transform = )", details = "The argument log_transform is now deprecated, please use transform.") + + if(log_transform == TRUE) transform = log1p + } + # Get column names .element = enquo(.element) .feature = enquo(.feature) @@ -609,7 +632,7 @@ setGeneric("cluster_elements", function(.data, .element = !!.element, .feature = !!.feature, of_samples = of_samples, - log_transform = log_transform, + transform = transform, ... ), method == "SNN" ~ stop("tidybulk says: Matrix package (v1.3-3) causes an error with Seurat::FindNeighbors used in this method. We are trying to solve this issue. At the moment this option in unaviable."), @@ -618,7 +641,7 @@ setGeneric("cluster_elements", function(.data, # .element = !!.element, # .feature = !!.feature, # of_samples = of_samples, - # log_transform = log_transform, + # transform = transform, # ... # ), TRUE ~ stop("tidybulk says: the only supported methods are \"kmeans\" or \"SNN\" ") @@ -708,11 +731,13 @@ setMethod("cluster_elements", "tidybulk", .cluster_elements) #' @param top An integer. How many top genes to select for dimensionality reduction #' @param of_samples A boolean. In case the input is a tidybulk object, it indicates Whether the element column will be sample or transcript column #' @param .dims An integer. The number of dimensions your are interested in (e.g., 4 for returning the first four principal components). -#' @param log_transform A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data) +#' @param transform A function that will tranform the counts, by default it is log1p for RNA sequencing data, but for avoinding tranformation you can use identity #' @param scale A boolean for method="PCA", this will be passed to the `prcomp` function. It is not included in the ... argument because although the default for `prcomp` if FALSE, it is advisable to set it as TRUE. #' @param action A character string. Whether to join the new information to the input tbl (add), or just get the non-redundant tbl with the new information (get). #' @param ... Further parameters passed to the function prcomp if you choose method="PCA" or Rtsne if you choose method="tSNE" #' +#' @param log_transform DEPRECATED - A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data) +#' #' @details This function reduces the dimensions of the transcript abundances. #' It can use multi-dimensional scaling (MDS; DOI.org/10.1186/gb-2010-11-3-r25), #' principal component analysis (PCA), or tSNE (Jesse Krijthe et al. 2018) @@ -778,10 +803,15 @@ setGeneric("reduce_dimensions", function(.data, top = 500, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, scale = TRUE, action = "add", - ...) + ..., + + # DEPRECATED + log_transform = NULL + + ) standardGeneric("reduce_dimensions")) # Set internal @@ -794,11 +824,26 @@ setGeneric("reduce_dimensions", function(.data, top = 500, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, scale = TRUE, action = "add", - ...) + ..., + + # DEPRECATED + log_transform = NULL + + ) { + + # DEPRECATION OF log_transform + if (is_present(log_transform) & !is.null(log_transform)) { + + # Signal the deprecation to the user + deprecate_warn("1.7.4", "tidybulk::test_differential_abundance(log_transform = )", details = "The argument log_transform is now deprecated, please use transform.") + + if(log_transform == TRUE) transform = log1p + } + # Get column names .element = enquo(.element) .feature = enquo(.feature) @@ -839,7 +884,7 @@ setGeneric("reduce_dimensions", function(.data, .feature = !!.feature, top = top, of_samples = of_samples, - log_transform = log_transform, + transform = transform, ... ), tolower(method) == tolower("PCA") ~ get_reduced_dimensions_PCA_bulk(., @@ -849,7 +894,7 @@ setGeneric("reduce_dimensions", function(.data, .feature = !!.feature, top = top, of_samples = of_samples, - log_transform = log_transform, + transform = transform, scale = scale, ... ), @@ -860,7 +905,7 @@ setGeneric("reduce_dimensions", function(.data, .feature = !!.feature, top = top, of_samples = of_samples, - log_transform = log_transform, + transform = transform, ... ), tolower(method) == tolower("UMAP") ~ get_reduced_dimensions_UMAP_bulk(., @@ -870,7 +915,7 @@ setGeneric("reduce_dimensions", function(.data, .feature = !!.feature, top = top, of_samples = of_samples, - log_transform = log_transform, + transform = transform, scale = scale, ... ), @@ -1131,12 +1176,13 @@ setMethod("rotate_dimensions", "tidybulk", .rotate_dimensions) #' #' @param method A character string. The method to use, correlation and reduced_dimensions are available. The latter eliminates one of the most proximar pairs of samples in PCA reduced dimensions. #' @param of_samples A boolean. In case the input is a tidybulk object, it indicates Whether the element column will be sample or transcript column -#' @param log_transform A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data) +#' @param transform A function that will tranform the counts, by default it is log1p for RNA sequencing data, but for avoinding tranformation you can use identity #' @param correlation_threshold A real number between 0 and 1. For correlation based calculation. #' @param top An integer. How many top genes to select for correlation based method #' @param Dim_a_column A character string. For reduced_dimension based calculation. The column of one principal component #' @param Dim_b_column A character string. For reduced_dimension based calculation. The column of another principal component #' +#' @param log_transform DEPRECATED - A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data) #' #' @details This function removes redundant elements from the original data set (e.g., samples or transcripts). #' For example, if we want to define cell-type specific signatures with low sample redundancy. @@ -1214,10 +1260,13 @@ setGeneric("remove_redundancy", function(.data, correlation_threshold = 0.9, top = Inf, - log_transform = FALSE, - + transform = identity, Dim_a_column, - Dim_b_column) + Dim_b_column, + + # DEPRECATED + log_transform = NULL + ) standardGeneric("remove_redundancy")) # Set internal @@ -1229,15 +1278,27 @@ setGeneric("remove_redundancy", function(.data, of_samples = TRUE, - - correlation_threshold = 0.9, top = Inf, - log_transform = FALSE, + transform = identity, Dim_a_column = NULL, - Dim_b_column = NULL) + Dim_b_column = NULL, + + # DEPRECATED + log_transform = NULL +) { + + # DEPRECATION OF log_transform + if (is_present(log_transform) & !is.null(log_transform)) { + + # Signal the deprecation to the user + deprecate_warn("1.7.4", "tidybulk::test_differential_abundance(log_transform = )", details = "The argument log_transform is now deprecated, please use transform.") + + if(log_transform == TRUE) transform = log1p + } + # Make col names .abundance = enquo(.abundance) .element = enquo(.element) @@ -1262,7 +1323,7 @@ setGeneric("remove_redundancy", function(.data, correlation_threshold = correlation_threshold, top = top, of_samples = of_samples, - log_transform = log_transform + transform = transform ) } else if (method == "reduced_dimensions") { @@ -1330,10 +1391,13 @@ setMethod("remove_redundancy", "tidybulk", .remove_redundancy) #' @param .transcript The name of the transcript/gene column #' @param .abundance The name of the transcript/gene abundance column #' -#' @param log_transform A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data) +#' @param transform A function that will tranform the counts, by default it is log1p for RNA sequencing data, but for avoinding tranformation you can use identity +#' @param inverse_transform A function that is the inverse of transform (e.g. expm1 is inverse of log1p). This is needed to tranform back the counts after analysis. #' @param action A character string. Whether to join the new information to the input tbl (add), or just get the non-redundant tbl with the new information (get). #' @param ... Further parameters passed to the function sva::ComBat #' +#' @param log_transform DEPRECATED - A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data) +#' #' @details This function adjusts the abundance for (known) unwanted variation. #' At the moment just an unwanted covariate is allowed at a time using Combat (DOI: 10.1093/bioinformatics/bts034) #' @@ -1368,21 +1432,44 @@ setGeneric("adjust_abundance", function(.data, .sample = NULL, .transcript = NULL, .abundance = NULL, - log_transform = TRUE, + transform = log1p, + inverse_transform = expm1, + action = "add", - ...) + ..., + + # DEPRECATED + log_transform = NULL + ) standardGeneric("adjust_abundance")) # Set internal .adjust_abundance = function(.data, - .formula, - .sample = NULL, - .transcript = NULL, - .abundance = NULL, - log_transform = TRUE, - action = "add", - ...) + .formula, + .sample = NULL, + .transcript = NULL, + .abundance = NULL, + transform = log1p, + inverse_transform = expm1, + action = "add", + ..., + + # DEPRECATED + log_transform = NULL) { + + # DEPRECATION OF log_transform + if (is_present(log_transform) & !is.null(log_transform)) { + + # Signal the deprecation to the user + deprecate_warn("1.7.4", "tidybulk::test_differential_abundance(log_transform = )", details = "The argument log_transform is now deprecated, please use transform.") + + if(log_transform == TRUE){ + transform = log1p + inverse_transform = expm1 + } + } + # Get column names .sample = enquo(.sample) .transcript = enquo(.transcript) @@ -1419,7 +1506,8 @@ setGeneric("adjust_abundance", function(.data, .sample = !!.sample, .transcript = !!.transcript, .abundance = !!.abundance, - log_transform = log_transform, + transform = transform, + inverse_transform = inverse_transform, ... ) @@ -2461,7 +2549,9 @@ setMethod("test_differential_abundance", #' @param .transcript The name of the transcript/gene column #' @param .abundance The name of the transcript/gene abundance column #' @param top Integer. Number of top transcript to consider -#' @param log_transform A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data) +#' @param transform A function that will tranform the counts, by default it is log1p for RNA sequencing data, but for avoinding tranformation you can use identity +#' +#' @param log_transform DEPRECATED - A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data) #' #' @details At the moment this function uses edgeR \url{https://doi.org/10.1093/bioinformatics/btp616} #' @@ -2494,7 +2584,11 @@ setGeneric("keep_variable", function(.data, .transcript = NULL, .abundance = NULL, top = 500, - log_transform = TRUE) + transform = log1p, + + # DEPRECATED + log_transform = TRUE + ) standardGeneric("keep_variable")) # Set internal @@ -2503,8 +2597,21 @@ setGeneric("keep_variable", function(.data, .transcript = NULL, .abundance = NULL, top = 500, - log_transform = TRUE) + transform = log1p, + + # DEPRECATED + log_transform = NULL) { + + # DEPRECATION OF log_transform + if (is_present(log_transform) & !is.null(log_transform)) { + + # Signal the deprecation to the user + deprecate_warn("1.7.4", "tidybulk::test_differential_abundance(log_transform = )", details = "The argument log_transform is now deprecated, please use transform.") + + if(log_transform == TRUE) transform = log1p + } + # Make col names .sample = enquo(.sample) .transcript = enquo(.transcript) @@ -2522,7 +2629,7 @@ setGeneric("keep_variable", function(.data, .transcript = !!.transcript, .abundance = !!.abundance, top = top, - log_transform = log_transform + transform = transform ) } diff --git a/R/methods_SE.R b/R/methods_SE.R index ecd444d6..25afa37a 100755 --- a/R/methods_SE.R +++ b/R/methods_SE.R @@ -228,7 +228,7 @@ setMethod("scale_abundance", .cluster_elements_se = function(.data, method , of_samples = TRUE, - log_transform = TRUE, + transform = log1p, ...) { my_assay = @@ -251,7 +251,7 @@ setMethod("scale_abundance", my_cluster_function( my_assay, of_samples = of_samples, - log_transform = log_transform, + transform = transform, ... ) %>% as.character() %>% @@ -309,7 +309,7 @@ setMethod("cluster_elements", .dims = 2, top = 500, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, scale = TRUE, ...) { @@ -324,10 +324,10 @@ setMethod("cluster_elements", .[[get_assay_scaled_if_exists_SE(.data)]] %>% # Filter most variable genes - keep_variable_transcripts_SE(top = top, log_transform = log_transform) %>% + keep_variable_transcripts_SE(top = top, transform = transform) %>% # Check if log transform is needed - when(log_transform ~ log1p(.), ~ (.) ) + transform() my_reduction_function = method %>% @@ -346,7 +346,7 @@ setMethod("cluster_elements", .dims = .dims, top = top, of_samples = of_samples, - log_transform = log_transform, + transform = transform, scale=scale, ... ) @@ -512,7 +512,7 @@ setMethod("rotate_dimensions", of_samples = TRUE, correlation_threshold = 0.9, top = Inf, - log_transform = FALSE, + transform = identity, Dim_a_column = NULL, Dim_b_column = NULL) { @@ -545,10 +545,10 @@ setMethod("rotate_dimensions", .[[get_assay_scaled_if_exists_SE(.data)]] %>% # Filter most variable genes - keep_variable_transcripts_SE(top = top, log_transform = log_transform) %>% + keep_variable_transcripts_SE(top = top, transform = transform) %>% # Check if log transform is needed - when(log_transform ~ log1p(.), ~ (.) ) + transform() # Get correlated elements remove_redundancy_elements_through_correlation_SE( @@ -621,7 +621,8 @@ setMethod("remove_redundancy", .adjust_abundance_se = function(.data, .formula, - log_transform = TRUE, + transform = log1p, + inverse_transform = expm1, ...) { # Check if package is installed, otherwise install @@ -666,7 +667,7 @@ setMethod("remove_redundancy", .[[get_assay_scaled_if_exists_SE(.data)]] %>% # Check if log transform is needed - when(log_transform ~ log1p(.), ~ (.)) + transform() # Set column name for value scaled @@ -687,7 +688,7 @@ setMethod("remove_redundancy", ...) %>% # Check if log transform needs to be reverted - when(log_transform ~ expm1(.), ~ (.)) + inverse_transform() # Add the assay @@ -1246,7 +1247,7 @@ setMethod( .keep_variable_se = function(.data, top = 500, - log_transform = TRUE) + transform = log1p) { @@ -1261,7 +1262,7 @@ setMethod( .[[get_assay_scaled_if_exists_SE(.data)]] %>% # Filter most variable genes - keep_variable_transcripts_SE(top = top, log_transform = log_transform) %>% + keep_variable_transcripts_SE(top = top, transform = transform) %>% # Take gene names rownames() diff --git a/man/adjust_abundance-methods.Rd b/man/adjust_abundance-methods.Rd index 1310c087..7f5be64e 100644 --- a/man/adjust_abundance-methods.Rd +++ b/man/adjust_abundance-methods.Rd @@ -16,9 +16,11 @@ adjust_abundance( .sample = NULL, .transcript = NULL, .abundance = NULL, - log_transform = TRUE, + transform = log1p, + inverse_transform = expm1, action = "add", - ... + ..., + log_transform = NULL ) \S4method{adjust_abundance}{spec_tbl_df}( @@ -27,9 +29,11 @@ adjust_abundance( .sample = NULL, .transcript = NULL, .abundance = NULL, - log_transform = TRUE, + transform = log1p, + inverse_transform = expm1, action = "add", - ... + ..., + log_transform = NULL ) \S4method{adjust_abundance}{tbl_df}( @@ -38,9 +42,11 @@ adjust_abundance( .sample = NULL, .transcript = NULL, .abundance = NULL, - log_transform = TRUE, + transform = log1p, + inverse_transform = expm1, action = "add", - ... + ..., + log_transform = NULL ) \S4method{adjust_abundance}{tidybulk}( @@ -49,9 +55,11 @@ adjust_abundance( .sample = NULL, .transcript = NULL, .abundance = NULL, - log_transform = TRUE, + transform = log1p, + inverse_transform = expm1, action = "add", - ... + ..., + log_transform = NULL ) \S4method{adjust_abundance}{SummarizedExperiment}( @@ -60,9 +68,11 @@ adjust_abundance( .sample = NULL, .transcript = NULL, .abundance = NULL, - log_transform = TRUE, + transform = log1p, + inverse_transform = expm1, action = "add", - ... + ..., + log_transform = NULL ) \S4method{adjust_abundance}{RangedSummarizedExperiment}( @@ -71,9 +81,11 @@ adjust_abundance( .sample = NULL, .transcript = NULL, .abundance = NULL, - log_transform = TRUE, + transform = log1p, + inverse_transform = expm1, action = "add", - ... + ..., + log_transform = NULL ) } \arguments{ @@ -87,11 +99,15 @@ adjust_abundance( \item{.abundance}{The name of the transcript/gene abundance column} -\item{log_transform}{A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data)} +\item{transform}{A function that will tranform the counts, by default it is log1p for RNA sequencing data, but for avoinding tranformation you can use identity} + +\item{inverse_transform}{A function that is the inverse of transform (e.g. expm1 is inverse of log1p). This is needed to tranform back the counts after analysis.} \item{action}{A character string. Whether to join the new information to the input tbl (add), or just get the non-redundant tbl with the new information (get).} \item{...}{Further parameters passed to the function sva::ComBat} + +\item{log_transform}{DEPRECATED - A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data)} } \value{ A consistent object (to the input) with additional columns for the adjusted counts as `_adjusted` diff --git a/man/cluster_elements-methods.Rd b/man/cluster_elements-methods.Rd index 518f37f9..e1302075 100644 --- a/man/cluster_elements-methods.Rd +++ b/man/cluster_elements-methods.Rd @@ -17,9 +17,10 @@ cluster_elements( .abundance = NULL, method, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, action = "add", - ... + ..., + log_transform = NULL ) \S4method{cluster_elements}{spec_tbl_df}( @@ -29,9 +30,10 @@ cluster_elements( .abundance = NULL, method, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, action = "add", - ... + ..., + log_transform = NULL ) \S4method{cluster_elements}{tbl_df}( @@ -41,9 +43,10 @@ cluster_elements( .abundance = NULL, method, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, action = "add", - ... + ..., + log_transform = NULL ) \S4method{cluster_elements}{tidybulk}( @@ -53,9 +56,10 @@ cluster_elements( .abundance = NULL, method, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, action = "add", - ... + ..., + log_transform = NULL ) \S4method{cluster_elements}{SummarizedExperiment}( @@ -65,9 +69,10 @@ cluster_elements( .abundance = NULL, method, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, action = "add", - ... + ..., + log_transform = NULL ) \S4method{cluster_elements}{RangedSummarizedExperiment}( @@ -77,9 +82,10 @@ cluster_elements( .abundance = NULL, method, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, action = "add", - ... + ..., + log_transform = NULL ) } \arguments{ @@ -95,11 +101,13 @@ cluster_elements( \item{of_samples}{A boolean. In case the input is a tidybulk object, it indicates Whether the element column will be sample or transcript column} -\item{log_transform}{A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data)} +\item{transform}{A function that will tranform the counts, by default it is log1p for RNA sequencing data, but for avoinding tranformation you can use identity} \item{action}{A character string. Whether to join the new information to the input tbl (add), or just get the non-redundant tbl with the new information (get).} \item{...}{Further parameters passed to the function kmeans} + +\item{log_transform}{DEPRECATED - A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data)} } \value{ A tbl object with additional columns with cluster labels diff --git a/man/get_reduced_dimensions_UMAP_bulk.Rd b/man/get_reduced_dimensions_UMAP_bulk.Rd index 2d1145cf..1a8023d5 100644 --- a/man/get_reduced_dimensions_UMAP_bulk.Rd +++ b/man/get_reduced_dimensions_UMAP_bulk.Rd @@ -12,7 +12,7 @@ get_reduced_dimensions_UMAP_bulk( .dims = 2, top = 500, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, scale = TRUE, calculate_for_pca_dimensions = 20, ... @@ -33,11 +33,11 @@ get_reduced_dimensions_UMAP_bulk( \item{of_samples}{A boolean} -\item{log_transform}{A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data)} - \item{calculate_for_pca_dimensions}{An integer of length one. The number of PCA dimensions to based the UMAP calculatio on. If NULL all variable features are considered} \item{...}{Further parameters passed to the function uwot} + +\item{log_transform}{A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data)} } \value{ A tibble with additional columns diff --git a/man/get_reduced_dimensions_UMAP_bulk_SE.Rd b/man/get_reduced_dimensions_UMAP_bulk_SE.Rd index d6162239..b204aa0f 100644 --- a/man/get_reduced_dimensions_UMAP_bulk_SE.Rd +++ b/man/get_reduced_dimensions_UMAP_bulk_SE.Rd @@ -9,7 +9,7 @@ get_reduced_dimensions_UMAP_bulk_SE( .dims = 2, top = 500, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, scale = NULL, calculate_for_pca_dimensions = 20, ... @@ -24,7 +24,7 @@ get_reduced_dimensions_UMAP_bulk_SE( \item{of_samples}{A boolean} -\item{log_transform}{A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data)} +\item{transform}{A function that will tranform the counts, by default it is log1p for RNA sequencing data, but for avoinding tranformation you can use identity} \item{calculate_for_pca_dimensions}{An integer of length one. The number of PCA dimensions to based the UMAP calculatio on. If NULL all variable features are considered} diff --git a/man/keep_variable-methods.Rd b/man/keep_variable-methods.Rd index 6fb3e137..4457fb7b 100644 --- a/man/keep_variable-methods.Rd +++ b/man/keep_variable-methods.Rd @@ -16,6 +16,7 @@ keep_variable( .transcript = NULL, .abundance = NULL, top = 500, + transform = log1p, log_transform = TRUE ) @@ -25,7 +26,8 @@ keep_variable( .transcript = NULL, .abundance = NULL, top = 500, - log_transform = TRUE + transform = log1p, + log_transform = NULL ) \S4method{keep_variable}{tbl_df}( @@ -34,7 +36,8 @@ keep_variable( .transcript = NULL, .abundance = NULL, top = 500, - log_transform = TRUE + transform = log1p, + log_transform = NULL ) \S4method{keep_variable}{tidybulk}( @@ -43,12 +46,13 @@ keep_variable( .transcript = NULL, .abundance = NULL, top = 500, - log_transform = TRUE + transform = log1p, + log_transform = NULL ) -\S4method{keep_variable}{SummarizedExperiment}(.data, top = 500, log_transform = TRUE) +\S4method{keep_variable}{SummarizedExperiment}(.data, top = 500, transform = log1p) -\S4method{keep_variable}{RangedSummarizedExperiment}(.data, top = 500, log_transform = TRUE) +\S4method{keep_variable}{RangedSummarizedExperiment}(.data, top = 500, transform = log1p) } \arguments{ \item{.data}{A `tbl` (with at least three columns for sample, feature and transcript abundance) or `SummarizedExperiment` (more convenient if abstracted to tibble with library(tidySummarizedExperiment))} @@ -61,7 +65,9 @@ keep_variable( \item{top}{Integer. Number of top transcript to consider} -\item{log_transform}{A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data)} +\item{transform}{A function that will tranform the counts, by default it is log1p for RNA sequencing data, but for avoinding tranformation you can use identity} + +\item{log_transform}{DEPRECATED - A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data)} } \value{ A consistent object (to the input) with additional columns for the statistics from the hypothesis test (e.g., log fold change, p-value and false discovery rate). diff --git a/man/reduce_dimensions-methods.Rd b/man/reduce_dimensions-methods.Rd index 4c0fce04..4cf095c3 100644 --- a/man/reduce_dimensions-methods.Rd +++ b/man/reduce_dimensions-methods.Rd @@ -19,10 +19,11 @@ reduce_dimensions( .dims = 2, top = 500, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, scale = TRUE, action = "add", - ... + ..., + log_transform = NULL ) \S4method{reduce_dimensions}{spec_tbl_df}( @@ -34,10 +35,11 @@ reduce_dimensions( .dims = 2, top = 500, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, scale = TRUE, action = "add", - ... + ..., + log_transform = NULL ) \S4method{reduce_dimensions}{tbl_df}( @@ -49,10 +51,11 @@ reduce_dimensions( .dims = 2, top = 500, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, scale = TRUE, action = "add", - ... + ..., + log_transform = NULL ) \S4method{reduce_dimensions}{tidybulk}( @@ -64,10 +67,11 @@ reduce_dimensions( .dims = 2, top = 500, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, scale = TRUE, action = "add", - ... + ..., + log_transform = NULL ) \S4method{reduce_dimensions}{SummarizedExperiment}( @@ -79,10 +83,11 @@ reduce_dimensions( .dims = 2, top = 500, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, scale = TRUE, action = "add", - ... + ..., + log_transform = NULL ) \S4method{reduce_dimensions}{RangedSummarizedExperiment}( @@ -94,10 +99,11 @@ reduce_dimensions( .dims = 2, top = 500, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, scale = TRUE, action = "add", - ... + ..., + log_transform = NULL ) } \arguments{ @@ -117,13 +123,15 @@ reduce_dimensions( \item{of_samples}{A boolean. In case the input is a tidybulk object, it indicates Whether the element column will be sample or transcript column} -\item{log_transform}{A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data)} +\item{transform}{A function that will tranform the counts, by default it is log1p for RNA sequencing data, but for avoinding tranformation you can use identity} \item{scale}{A boolean for method="PCA", this will be passed to the `prcomp` function. It is not included in the ... argument because although the default for `prcomp` if FALSE, it is advisable to set it as TRUE.} \item{action}{A character string. Whether to join the new information to the input tbl (add), or just get the non-redundant tbl with the new information (get).} \item{...}{Further parameters passed to the function prcomp if you choose method="PCA" or Rtsne if you choose method="tSNE"} + +\item{log_transform}{DEPRECATED - A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data)} } \value{ A tbl object with additional columns for the reduced dimensions diff --git a/man/remove_redundancy-methods.Rd b/man/remove_redundancy-methods.Rd index 5cb9e2fd..0a626ac1 100644 --- a/man/remove_redundancy-methods.Rd +++ b/man/remove_redundancy-methods.Rd @@ -19,9 +19,10 @@ remove_redundancy( of_samples = TRUE, correlation_threshold = 0.9, top = Inf, - log_transform = FALSE, + transform = identity, Dim_a_column, - Dim_b_column + Dim_b_column, + log_transform = NULL ) \S4method{remove_redundancy}{spec_tbl_df}( @@ -33,9 +34,10 @@ remove_redundancy( of_samples = TRUE, correlation_threshold = 0.9, top = Inf, - log_transform = FALSE, + transform = identity, Dim_a_column = NULL, - Dim_b_column = NULL + Dim_b_column = NULL, + log_transform = NULL ) \S4method{remove_redundancy}{tbl_df}( @@ -47,9 +49,10 @@ remove_redundancy( of_samples = TRUE, correlation_threshold = 0.9, top = Inf, - log_transform = FALSE, + transform = identity, Dim_a_column = NULL, - Dim_b_column = NULL + Dim_b_column = NULL, + log_transform = NULL ) \S4method{remove_redundancy}{tidybulk}( @@ -61,9 +64,10 @@ remove_redundancy( of_samples = TRUE, correlation_threshold = 0.9, top = Inf, - log_transform = FALSE, + transform = identity, Dim_a_column = NULL, - Dim_b_column = NULL + Dim_b_column = NULL, + log_transform = NULL ) \S4method{remove_redundancy}{SummarizedExperiment}( @@ -75,7 +79,7 @@ remove_redundancy( of_samples = TRUE, correlation_threshold = 0.9, top = Inf, - log_transform = FALSE, + transform = identity, Dim_a_column = NULL, Dim_b_column = NULL ) @@ -89,7 +93,7 @@ remove_redundancy( of_samples = TRUE, correlation_threshold = 0.9, top = Inf, - log_transform = FALSE, + transform = identity, Dim_a_column = NULL, Dim_b_column = NULL ) @@ -111,11 +115,13 @@ remove_redundancy( \item{top}{An integer. How many top genes to select for correlation based method} -\item{log_transform}{A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data)} +\item{transform}{A function that will tranform the counts, by default it is log1p for RNA sequencing data, but for avoinding tranformation you can use identity} \item{Dim_a_column}{A character string. For reduced_dimension based calculation. The column of one principal component} \item{Dim_b_column}{A character string. For reduced_dimension based calculation. The column of another principal component} + +\item{log_transform}{DEPRECATED - A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data)} } \value{ A tbl object with with dropped redundant elements (e.g., samples). From 0690a9f14df08a8d756e52055b222d09f21e0702 Mon Sep 17 00:00:00 2001 From: stemangiola Date: Sun, 26 Jun 2022 12:52:58 +1000 Subject: [PATCH 35/38] fix CHECKs --- R/methods.R | 2 -- R/methods_SE.R | 5 ++++- man/remove_redundancy-methods.Rd | 6 ++++-- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/R/methods.R b/R/methods.R index b188fec6..cec35919 100755 --- a/R/methods.R +++ b/R/methods.R @@ -1256,8 +1256,6 @@ setGeneric("remove_redundancy", function(.data, of_samples = TRUE, - - correlation_threshold = 0.9, top = Inf, transform = identity, diff --git a/R/methods_SE.R b/R/methods_SE.R index 25afa37a..747507db 100755 --- a/R/methods_SE.R +++ b/R/methods_SE.R @@ -515,7 +515,10 @@ setMethod("rotate_dimensions", transform = identity, Dim_a_column = NULL, - Dim_b_column = NULL) { + Dim_b_column = NULL, + + # DEPRECATED + log_transform = NULL) { Dim_a_column = enquo(Dim_a_column) diff --git a/man/remove_redundancy-methods.Rd b/man/remove_redundancy-methods.Rd index 0a626ac1..b83bc56d 100644 --- a/man/remove_redundancy-methods.Rd +++ b/man/remove_redundancy-methods.Rd @@ -81,7 +81,8 @@ remove_redundancy( top = Inf, transform = identity, Dim_a_column = NULL, - Dim_b_column = NULL + Dim_b_column = NULL, + log_transform = NULL ) \S4method{remove_redundancy}{RangedSummarizedExperiment}( @@ -95,7 +96,8 @@ remove_redundancy( top = Inf, transform = identity, Dim_a_column = NULL, - Dim_b_column = NULL + Dim_b_column = NULL, + log_transform = NULL ) } \arguments{ From 38d06885cd65fca63e9c6738a8ceb9f6d446d3fa Mon Sep 17 00:00:00 2001 From: stemangiola Date: Tue, 28 Jun 2022 11:48:27 +1000 Subject: [PATCH 36/38] drop tsne test --- tests/testthat/test-bulk_methods.R | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/testthat/test-bulk_methods.R b/tests/testthat/test-bulk_methods.R index 846e17d4..f6d962c6 100755 --- a/tests/testthat/test-bulk_methods.R +++ b/tests/testthat/test-bulk_methods.R @@ -1335,10 +1335,11 @@ test_that("Get reduced dimensions tSNE - no object",{ ) |> suppressMessages() - res |> - pull(tSNE1) |> - magrittr::extract2(1) |> - expect_equal(2.432608, tolerance = 0.01) + # DOES NOT REPRODUCE MAYBE BECAUSE OF DIFFERENT TSNE VERSIONS + # res |> + # pull(tSNE1) |> + # magrittr::extract2(1) |> + # expect_equal(2.432608, tolerance = 0.01) expect_equal( typeof(res$`tSNE1`), From c00ae278af8b01b24b14fbe975cdacd80f94ff6d Mon Sep 17 00:00:00 2001 From: stemangiola Date: Sun, 26 Jun 2022 12:21:39 +1000 Subject: [PATCH 37/38] deprecation untested --- R/functions.R | 78 ++++----- R/functions_SE.R | 32 ++-- R/methods.R | 187 ++++++++++++++++----- R/methods_SE.R | 29 ++-- man/adjust_abundance-methods.Rd | 42 +++-- man/cluster_elements-methods.Rd | 34 ++-- man/get_reduced_dimensions_UMAP_bulk.Rd | 6 +- man/get_reduced_dimensions_UMAP_bulk_SE.Rd | 4 +- man/keep_variable-methods.Rd | 18 +- man/reduce_dimensions-methods.Rd | 34 ++-- man/remove_redundancy-methods.Rd | 28 +-- 11 files changed, 322 insertions(+), 170 deletions(-) diff --git a/R/functions.R b/R/functions.R index 05485430..e9460eca 100755 --- a/R/functions.R +++ b/R/functions.R @@ -1536,7 +1536,7 @@ get_clusters_kmeans_bulk <- .feature = NULL, .abundance = NULL, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, ...) { # Check if centers is in dots dots_args = rlang::dots_list(...) @@ -1553,9 +1553,9 @@ get_clusters_kmeans_bulk <- # Prepare data frame distinct(!!.feature,!!.element,!!.abundance) %>% - # Check if log transform is needed - ifelse_pipe(log_transform, - ~ .x %>% dplyr::mutate(!!.abundance := !!.abundance %>% `+`(1) %>% log())) %>% + # Apply (log by default) transformation + dplyr::mutate(!!.abundance := transform(!!.abundance)) %>% + # Prepare data frame for return spread(!!.feature,!!.abundance) %>% @@ -1603,7 +1603,7 @@ get_clusters_SNN_bulk <- .feature = NULL, .abundance, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, ...) { # Get column names .element = enquo(.element) @@ -1631,7 +1631,8 @@ get_clusters_SNN_bulk <- distinct(!!.element,!!.feature,!!.abundance) %>% # Check if log tranfrom is needed - #ifelse_pipe(log_transform, ~ .x %>% dplyr::mutate(!!.abundance := !!.abundance %>% `+`(1) %>% log())) %>% + # dplyr::mutate(!!.abundance := transform(!!.abundance)) %>% + # Prepare data frame for return spread(!!.element,!!.abundance) @@ -1689,7 +1690,7 @@ get_reduced_dimensions_MDS_bulk <- .dims = 2, top = 500, of_samples = TRUE, - log_transform = TRUE) { + transform = log1p) { # Comply with CRAN NOTES . = NULL @@ -1714,9 +1715,9 @@ get_reduced_dimensions_MDS_bulk <- distinct(!!.feature,!!.element,!!.abundance) %>% - # Check if log transform is needed - ifelse_pipe(log_transform, - ~ .x %>% dplyr::mutate(!!.abundance := !!.abundance %>% log1p())) %>% + # Apply (log by default) transformation + dplyr::mutate(!!.abundance := transform(!!.abundance)) %>% + # Stop any column is not if not numeric or integer ifelse_pipe( @@ -1808,7 +1809,7 @@ get_reduced_dimensions_PCA_bulk <- .dims = 2, top = 500, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, scale = FALSE, ...) { # Comply with CRAN NOTES @@ -1831,9 +1832,9 @@ get_reduced_dimensions_PCA_bulk <- # Prepare data frame distinct(!!.feature,!!.element,!!.abundance) %>% - # Check if log transform is needed - ifelse_pipe(log_transform, - ~ .x %>% dplyr::mutate(!!.abundance := !!.abundance %>% log1p())) %>% + # Apply (log by default) transformation + dplyr::mutate(!!.abundance := transform(!!.abundance)) %>% + # Stop any column is not if not numeric or integer ifelse_pipe( @@ -1950,7 +1951,7 @@ get_reduced_dimensions_TSNE_bulk <- .dims = 2, top = 500, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, ...) { # Comply with CRAN NOTES . = NULL @@ -2004,9 +2005,9 @@ get_reduced_dimensions_TSNE_bulk <- ~ .x %>% eliminate_sparse_transcripts(!!.feature) ) %>% - # Check if log transform is needed - ifelse_pipe(log_transform, - ~ .x %>% dplyr::mutate(!!.abundance := !!.abundance %>% log1p)) %>% + # Apply (log by default) transformation + dplyr::mutate(!!.abundance := transform(!!.abundance)) %>% + # Filter most variable genes keep_variable_transcripts(!!.element,!!.feature,!!.abundance, top) %>% @@ -2064,7 +2065,7 @@ get_reduced_dimensions_UMAP_bulk <- .dims = 2, top = 500, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, scale = TRUE, calculate_for_pca_dimensions = 20, ...) { @@ -2114,8 +2115,8 @@ get_reduced_dimensions_UMAP_bulk <- ~ (.) ) %>% - # Check if log transform is needed - when(log_transform ~ dplyr::mutate(., !!.abundance := !!.abundance %>% log1p), ~ (.)) %>% + # Apply (log by default) transformation + dplyr::mutate(., !!.abundance := transform(!!.abundance)) %>% # Filter most variable genes keep_variable_transcripts(!!.element,!!.feature,!!.abundance, top) @@ -2545,7 +2546,7 @@ remove_redundancy_elements_through_correlation <- function(.data, correlation_threshold = 0.9, top = Inf, of_samples = TRUE, - log_transform = FALSE) { + transform = identity) { # Comply with CRAN NOTES . = NULL @@ -2578,9 +2579,9 @@ remove_redundancy_elements_through_correlation <- function(.data, # Filter variable genes keep_variable_transcripts(!!.element,!!.feature,!!.abundance, top = top) %>% - # Check if log transform is needed - ifelse_pipe(log_transform, - ~ .x %>% dplyr::mutate(!!.abundance := !!.abundance %>% `+`(1) %>% log())) %>% + # Apply (log by default) transformation + dplyr::mutate(!!.abundance := transform(!!.abundance)) %>% + distinct() %>% # NO NEED OF RECTANGULAR @@ -3109,7 +3110,8 @@ get_adjusted_counts_for_unwanted_variation_bulk <- function(.data, .sample = NULL, .transcript = NULL, .abundance = NULL, - log_transform = TRUE, + transform = transform, + inverse_transform = inverse_transform, ...) { # Get column names .sample = enquo(.sample) @@ -3138,9 +3140,9 @@ get_adjusted_counts_for_unwanted_variation_bulk <- function(.data, one_of(parse_formula(.formula))) %>% distinct() %>% - # Check if log transform is needed - ifelse_pipe(log_transform, - ~ .x %>% dplyr::mutate(!!.abundance := !!.abundance %>% log1p())) + # Apply (log by default) transformation + dplyr::mutate(!!.abundance := transform(!!.abundance)) + # Create design matrix @@ -3197,13 +3199,11 @@ get_adjusted_counts_for_unwanted_variation_bulk <- function(.data, gather(!!.sample,!!.abundance,-!!.transcript) %>% # Reverse-Log transform if transformed in the first place - ifelse_pipe( - log_transform, - ~ .x %>% - dplyr::mutate(!!.abundance := !!.abundance %>% exp %>% `-`(1)) %>% - dplyr::mutate(!!.abundance := ifelse(!!.abundance < 0, 0,!!.abundance)) %>% - dplyr::mutate(!!.abundance := !!.abundance %>% as.integer) - ) %>% + dplyr::mutate(!!.abundance := inverse_transform(!!.abundance)) %>% + + # In case the inverse tranform produces negative counts + dplyr::mutate(!!.abundance := ifelse(!!.abundance < 0, 0,!!.abundance)) %>% + dplyr::mutate(!!.abundance := !!.abundance %>% as.integer) %>% # Reset column names dplyr::rename(!!value_adjusted := !!.abundance) %>% @@ -3232,7 +3232,7 @@ keep_variable_transcripts = function(.data, .transcript = NULL, .abundance = NULL, top = 500, - log_transform = TRUE) { + transform = log1p) { # Get column names .sample = enquo(.sample) .transcript = enquo(.transcript) @@ -3255,8 +3255,8 @@ keep_variable_transcripts = function(.data, distinct(!!.sample,!!.transcript,!!.abundance) %>% # Check if logtansform is needed - ifelse_pipe(log_transform, - ~ .x %>% dplyr::mutate(!!.abundance := !!.abundance %>% `+`(1) %>% log())) %>% + dplyr::mutate(!!.abundance := transform(!!.abundance)) %>% + spread(!!.sample,!!.abundance) %>% as_matrix(rownames = quo_name(.transcript)) diff --git a/R/functions_SE.R b/R/functions_SE.R index ec763546..194a037f 100755 --- a/R/functions_SE.R +++ b/R/functions_SE.R @@ -14,7 +14,7 @@ #' @param .feature A column symbol. The column that is represents entities to cluster (i.e., normally samples) #' @param .element A column symbol. The column that is used to calculate distance (i.e., normally genes) #' @param of_samples A boolean -#' @param log_transform A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data) +#' @param transform A function that will tranform the counts, by default it is log1p for RNA sequencing data, but for avoinding tranformation you can use identity #' @param ... Further parameters passed to the function kmeans #' #' @return A tibble with additional columns @@ -23,7 +23,7 @@ get_clusters_kmeans_bulk_SE <- function(.data, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, ...) { # Check if centers is in dots @@ -34,7 +34,7 @@ get_clusters_kmeans_bulk_SE <- .data %>% # Check if log transform is needed - when(log_transform ~ log1p(.), ~ (.) ) %>% + transform() %>% # Decide if of samples or transcripts when( @@ -66,7 +66,7 @@ get_clusters_kmeans_bulk_SE <- #' @param .feature A column symbol. The column that is represents entities to cluster (i.e., normally samples) #' @param .element A column symbol. The column that is used to calculate distance (i.e., normally genes) #' @param of_samples A boolean -#' @param log_transform A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data) +#' @param transform A function that will tranform the counts, by default it is log1p for RNA sequencing data, but for avoinding tranformation you can use identity #' @param ... Further parameters passed to the function kmeans #' #' @return A tibble with additional columns @@ -74,7 +74,7 @@ get_clusters_kmeans_bulk_SE <- get_clusters_SNN_bulk_SE <- function(.data, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, ...) { @@ -127,7 +127,7 @@ get_clusters_SNN_bulk_SE <- #' @param .element A column symbol. The column that is used to calculate distance (i.e., normally samples) #' @param top An integer. How many top genes to select #' @param of_samples A boolean -#' @param log_transform A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data) +#' @param transform A function that will tranform the counts, by default it is log1p for RNA sequencing data, but for avoinding tranformation you can use identity #' @param scale A boolean #' #' @return A tibble with additional columns @@ -138,7 +138,7 @@ get_reduced_dimensions_MDS_bulk_SE <- .dims = 2, top = 500, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, scale = NULL # This is only a dummy argument for making it compatibble with PCA ) { # Comply with CRAN NOTES @@ -222,7 +222,7 @@ get_reduced_dimensions_MDS_bulk_SE <- #' @param .element A column symbol. The column that is used to calculate distance (i.e., normally samples) #' @param top An integer. How many top genes to select #' @param of_samples A boolean -#' @param log_transform A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data) +#' @param transform A function that will tranform the counts, by default it is log1p for RNA sequencing data, but for avoinding tranformation you can use identity #' @param scale A boolean #' @param ... Further parameters passed to the function prcomp #' @@ -234,7 +234,7 @@ get_reduced_dimensions_PCA_bulk_SE <- .dims = 2, top = 500, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, scale = FALSE, ...) { # Comply with CRAN NOTES @@ -323,7 +323,7 @@ we suggest to partition the dataset for sample clusters. #' @param .element A column symbol. The column that is used to calculate distance (i.e., normally samples) #' @param top An integer. How many top genes to select #' @param of_samples A boolean -#' @param log_transform A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data) +#' @param transform A function that will tranform the counts, by default it is log1p for RNA sequencing data, but for avoinding tranformation you can use identity #' @param scale A boolean #' @param ... Further parameters passed to the function Rtsne #' @@ -334,7 +334,7 @@ get_reduced_dimensions_TSNE_bulk_SE <- .dims = 2, top = 500, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, scale = NULL, # This is only a dummy argument for making it compatibble with PCA ...) { # Comply with CRAN NOTES @@ -406,7 +406,7 @@ get_reduced_dimensions_TSNE_bulk_SE <- #' @param .element A column symbol. The column that is used to calculate distance (i.e., normally samples) #' @param top An integer. How many top genes to select #' @param of_samples A boolean -#' @param log_transform A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data) +#' @param transform A function that will tranform the counts, by default it is log1p for RNA sequencing data, but for avoinding tranformation you can use identity #' @param calculate_for_pca_dimensions An integer of length one. The number of PCA dimensions to based the UMAP calculatio on. If NULL all variable features are considered #' @param ... Further parameters passed to the function uwot #' @@ -417,7 +417,7 @@ get_reduced_dimensions_UMAP_bulk_SE <- .dims = 2, top = 500, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, scale = NULL, # This is only a dummy argument for making it compatibble with PCA calculate_for_pca_dimensions = 20, ...) { @@ -528,13 +528,13 @@ filter_if_abundant_were_identified = function(.data){ #' @param .transcript A character name of the transcript/gene column #' @param .abundance A character name of the read count column #' @param top An integer. How many top genes to select -#' @param log_transform A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data) +#' @param transform A function that will tranform the counts, by default it is log1p for RNA sequencing data, but for avoinding tranformation you can use identity #' #' @return A tibble filtered genes #' keep_variable_transcripts_SE = function(.data, top = 500, - log_transform = TRUE) { + transform = log1p) { # Manage Inf @@ -546,7 +546,7 @@ keep_variable_transcripts_SE = function(.data, .data %>% # Check if log transform is needed - when(log_transform ~ log1p(.), ~ (.) ) + transform() s <- rowMeans((x - rowMeans(x, na.rm=TRUE)) ^ 2, na.rm=TRUE) diff --git a/R/methods.R b/R/methods.R index a6102697..b188fec6 100755 --- a/R/methods.R +++ b/R/methods.R @@ -515,10 +515,12 @@ setMethod("scale_abundance", "tidybulk", .scale_abundance) #' #' @param method A character string. The cluster algorithm to use, at the moment k-means is the only algorithm included. #' @param of_samples A boolean. In case the input is a tidybulk object, it indicates Whether the element column will be sample or transcript column -#' @param log_transform A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data) +#' @param transform A function that will tranform the counts, by default it is log1p for RNA sequencing data, but for avoinding tranformation you can use identity #' @param action A character string. Whether to join the new information to the input tbl (add), or just get the non-redundant tbl with the new information (get). #' @param ... Further parameters passed to the function kmeans #' +#' @param log_transform DEPRECATED - A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data) +#' #' @details identifies clusters in the data, normally of samples. #' This function returns a tibble with additional columns for the cluster annotation. #' At the moment only k-means (DOI: 10.2307/2346830) and SNN clustering (DOI:10.1016/j.cell.2019.05.031) is supported, the plan is to introduce more clustering methods. @@ -554,9 +556,14 @@ setGeneric("cluster_elements", function(.data, .abundance = NULL, method, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, + action = "add", - ...) + ..., + + # DEPRECATED + log_transform = NULL + ) standardGeneric("cluster_elements")) # Set internal @@ -566,10 +573,26 @@ setGeneric("cluster_elements", function(.data, .abundance = NULL, method , of_samples = TRUE, - log_transform = TRUE, + transform = log1p, + action = "add", - ...) + ..., + + # DEPRECATED + log_transform = NULL + + ) { + + # DEPRECATION OF log_transform + if (is_present(log_transform) & !is.null(log_transform)) { + + # Signal the deprecation to the user + deprecate_warn("1.7.4", "tidybulk::test_differential_abundance(log_transform = )", details = "The argument log_transform is now deprecated, please use transform.") + + if(log_transform == TRUE) transform = log1p + } + # Get column names .element = enquo(.element) .feature = enquo(.feature) @@ -609,7 +632,7 @@ setGeneric("cluster_elements", function(.data, .element = !!.element, .feature = !!.feature, of_samples = of_samples, - log_transform = log_transform, + transform = transform, ... ), method == "SNN" ~ stop("tidybulk says: Matrix package (v1.3-3) causes an error with Seurat::FindNeighbors used in this method. We are trying to solve this issue. At the moment this option in unaviable."), @@ -618,7 +641,7 @@ setGeneric("cluster_elements", function(.data, # .element = !!.element, # .feature = !!.feature, # of_samples = of_samples, - # log_transform = log_transform, + # transform = transform, # ... # ), TRUE ~ stop("tidybulk says: the only supported methods are \"kmeans\" or \"SNN\" ") @@ -708,11 +731,13 @@ setMethod("cluster_elements", "tidybulk", .cluster_elements) #' @param top An integer. How many top genes to select for dimensionality reduction #' @param of_samples A boolean. In case the input is a tidybulk object, it indicates Whether the element column will be sample or transcript column #' @param .dims An integer. The number of dimensions your are interested in (e.g., 4 for returning the first four principal components). -#' @param log_transform A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data) +#' @param transform A function that will tranform the counts, by default it is log1p for RNA sequencing data, but for avoinding tranformation you can use identity #' @param scale A boolean for method="PCA", this will be passed to the `prcomp` function. It is not included in the ... argument because although the default for `prcomp` if FALSE, it is advisable to set it as TRUE. #' @param action A character string. Whether to join the new information to the input tbl (add), or just get the non-redundant tbl with the new information (get). #' @param ... Further parameters passed to the function prcomp if you choose method="PCA" or Rtsne if you choose method="tSNE" #' +#' @param log_transform DEPRECATED - A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data) +#' #' @details This function reduces the dimensions of the transcript abundances. #' It can use multi-dimensional scaling (MDS; DOI.org/10.1186/gb-2010-11-3-r25), #' principal component analysis (PCA), or tSNE (Jesse Krijthe et al. 2018) @@ -778,10 +803,15 @@ setGeneric("reduce_dimensions", function(.data, top = 500, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, scale = TRUE, action = "add", - ...) + ..., + + # DEPRECATED + log_transform = NULL + + ) standardGeneric("reduce_dimensions")) # Set internal @@ -794,11 +824,26 @@ setGeneric("reduce_dimensions", function(.data, top = 500, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, scale = TRUE, action = "add", - ...) + ..., + + # DEPRECATED + log_transform = NULL + + ) { + + # DEPRECATION OF log_transform + if (is_present(log_transform) & !is.null(log_transform)) { + + # Signal the deprecation to the user + deprecate_warn("1.7.4", "tidybulk::test_differential_abundance(log_transform = )", details = "The argument log_transform is now deprecated, please use transform.") + + if(log_transform == TRUE) transform = log1p + } + # Get column names .element = enquo(.element) .feature = enquo(.feature) @@ -839,7 +884,7 @@ setGeneric("reduce_dimensions", function(.data, .feature = !!.feature, top = top, of_samples = of_samples, - log_transform = log_transform, + transform = transform, ... ), tolower(method) == tolower("PCA") ~ get_reduced_dimensions_PCA_bulk(., @@ -849,7 +894,7 @@ setGeneric("reduce_dimensions", function(.data, .feature = !!.feature, top = top, of_samples = of_samples, - log_transform = log_transform, + transform = transform, scale = scale, ... ), @@ -860,7 +905,7 @@ setGeneric("reduce_dimensions", function(.data, .feature = !!.feature, top = top, of_samples = of_samples, - log_transform = log_transform, + transform = transform, ... ), tolower(method) == tolower("UMAP") ~ get_reduced_dimensions_UMAP_bulk(., @@ -870,7 +915,7 @@ setGeneric("reduce_dimensions", function(.data, .feature = !!.feature, top = top, of_samples = of_samples, - log_transform = log_transform, + transform = transform, scale = scale, ... ), @@ -1131,12 +1176,13 @@ setMethod("rotate_dimensions", "tidybulk", .rotate_dimensions) #' #' @param method A character string. The method to use, correlation and reduced_dimensions are available. The latter eliminates one of the most proximar pairs of samples in PCA reduced dimensions. #' @param of_samples A boolean. In case the input is a tidybulk object, it indicates Whether the element column will be sample or transcript column -#' @param log_transform A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data) +#' @param transform A function that will tranform the counts, by default it is log1p for RNA sequencing data, but for avoinding tranformation you can use identity #' @param correlation_threshold A real number between 0 and 1. For correlation based calculation. #' @param top An integer. How many top genes to select for correlation based method #' @param Dim_a_column A character string. For reduced_dimension based calculation. The column of one principal component #' @param Dim_b_column A character string. For reduced_dimension based calculation. The column of another principal component #' +#' @param log_transform DEPRECATED - A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data) #' #' @details This function removes redundant elements from the original data set (e.g., samples or transcripts). #' For example, if we want to define cell-type specific signatures with low sample redundancy. @@ -1214,10 +1260,13 @@ setGeneric("remove_redundancy", function(.data, correlation_threshold = 0.9, top = Inf, - log_transform = FALSE, - + transform = identity, Dim_a_column, - Dim_b_column) + Dim_b_column, + + # DEPRECATED + log_transform = NULL + ) standardGeneric("remove_redundancy")) # Set internal @@ -1229,15 +1278,27 @@ setGeneric("remove_redundancy", function(.data, of_samples = TRUE, - - correlation_threshold = 0.9, top = Inf, - log_transform = FALSE, + transform = identity, Dim_a_column = NULL, - Dim_b_column = NULL) + Dim_b_column = NULL, + + # DEPRECATED + log_transform = NULL +) { + + # DEPRECATION OF log_transform + if (is_present(log_transform) & !is.null(log_transform)) { + + # Signal the deprecation to the user + deprecate_warn("1.7.4", "tidybulk::test_differential_abundance(log_transform = )", details = "The argument log_transform is now deprecated, please use transform.") + + if(log_transform == TRUE) transform = log1p + } + # Make col names .abundance = enquo(.abundance) .element = enquo(.element) @@ -1262,7 +1323,7 @@ setGeneric("remove_redundancy", function(.data, correlation_threshold = correlation_threshold, top = top, of_samples = of_samples, - log_transform = log_transform + transform = transform ) } else if (method == "reduced_dimensions") { @@ -1330,10 +1391,13 @@ setMethod("remove_redundancy", "tidybulk", .remove_redundancy) #' @param .transcript The name of the transcript/gene column #' @param .abundance The name of the transcript/gene abundance column #' -#' @param log_transform A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data) +#' @param transform A function that will tranform the counts, by default it is log1p for RNA sequencing data, but for avoinding tranformation you can use identity +#' @param inverse_transform A function that is the inverse of transform (e.g. expm1 is inverse of log1p). This is needed to tranform back the counts after analysis. #' @param action A character string. Whether to join the new information to the input tbl (add), or just get the non-redundant tbl with the new information (get). #' @param ... Further parameters passed to the function sva::ComBat #' +#' @param log_transform DEPRECATED - A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data) +#' #' @details This function adjusts the abundance for (known) unwanted variation. #' At the moment just an unwanted covariate is allowed at a time using Combat (DOI: 10.1093/bioinformatics/bts034) #' @@ -1368,21 +1432,44 @@ setGeneric("adjust_abundance", function(.data, .sample = NULL, .transcript = NULL, .abundance = NULL, - log_transform = TRUE, + transform = log1p, + inverse_transform = expm1, + action = "add", - ...) + ..., + + # DEPRECATED + log_transform = NULL + ) standardGeneric("adjust_abundance")) # Set internal .adjust_abundance = function(.data, - .formula, - .sample = NULL, - .transcript = NULL, - .abundance = NULL, - log_transform = TRUE, - action = "add", - ...) + .formula, + .sample = NULL, + .transcript = NULL, + .abundance = NULL, + transform = log1p, + inverse_transform = expm1, + action = "add", + ..., + + # DEPRECATED + log_transform = NULL) { + + # DEPRECATION OF log_transform + if (is_present(log_transform) & !is.null(log_transform)) { + + # Signal the deprecation to the user + deprecate_warn("1.7.4", "tidybulk::test_differential_abundance(log_transform = )", details = "The argument log_transform is now deprecated, please use transform.") + + if(log_transform == TRUE){ + transform = log1p + inverse_transform = expm1 + } + } + # Get column names .sample = enquo(.sample) .transcript = enquo(.transcript) @@ -1419,7 +1506,8 @@ setGeneric("adjust_abundance", function(.data, .sample = !!.sample, .transcript = !!.transcript, .abundance = !!.abundance, - log_transform = log_transform, + transform = transform, + inverse_transform = inverse_transform, ... ) @@ -2461,7 +2549,9 @@ setMethod("test_differential_abundance", #' @param .transcript The name of the transcript/gene column #' @param .abundance The name of the transcript/gene abundance column #' @param top Integer. Number of top transcript to consider -#' @param log_transform A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data) +#' @param transform A function that will tranform the counts, by default it is log1p for RNA sequencing data, but for avoinding tranformation you can use identity +#' +#' @param log_transform DEPRECATED - A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data) #' #' @details At the moment this function uses edgeR \url{https://doi.org/10.1093/bioinformatics/btp616} #' @@ -2494,7 +2584,11 @@ setGeneric("keep_variable", function(.data, .transcript = NULL, .abundance = NULL, top = 500, - log_transform = TRUE) + transform = log1p, + + # DEPRECATED + log_transform = TRUE + ) standardGeneric("keep_variable")) # Set internal @@ -2503,8 +2597,21 @@ setGeneric("keep_variable", function(.data, .transcript = NULL, .abundance = NULL, top = 500, - log_transform = TRUE) + transform = log1p, + + # DEPRECATED + log_transform = NULL) { + + # DEPRECATION OF log_transform + if (is_present(log_transform) & !is.null(log_transform)) { + + # Signal the deprecation to the user + deprecate_warn("1.7.4", "tidybulk::test_differential_abundance(log_transform = )", details = "The argument log_transform is now deprecated, please use transform.") + + if(log_transform == TRUE) transform = log1p + } + # Make col names .sample = enquo(.sample) .transcript = enquo(.transcript) @@ -2522,7 +2629,7 @@ setGeneric("keep_variable", function(.data, .transcript = !!.transcript, .abundance = !!.abundance, top = top, - log_transform = log_transform + transform = transform ) } diff --git a/R/methods_SE.R b/R/methods_SE.R index ecd444d6..25afa37a 100755 --- a/R/methods_SE.R +++ b/R/methods_SE.R @@ -228,7 +228,7 @@ setMethod("scale_abundance", .cluster_elements_se = function(.data, method , of_samples = TRUE, - log_transform = TRUE, + transform = log1p, ...) { my_assay = @@ -251,7 +251,7 @@ setMethod("scale_abundance", my_cluster_function( my_assay, of_samples = of_samples, - log_transform = log_transform, + transform = transform, ... ) %>% as.character() %>% @@ -309,7 +309,7 @@ setMethod("cluster_elements", .dims = 2, top = 500, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, scale = TRUE, ...) { @@ -324,10 +324,10 @@ setMethod("cluster_elements", .[[get_assay_scaled_if_exists_SE(.data)]] %>% # Filter most variable genes - keep_variable_transcripts_SE(top = top, log_transform = log_transform) %>% + keep_variable_transcripts_SE(top = top, transform = transform) %>% # Check if log transform is needed - when(log_transform ~ log1p(.), ~ (.) ) + transform() my_reduction_function = method %>% @@ -346,7 +346,7 @@ setMethod("cluster_elements", .dims = .dims, top = top, of_samples = of_samples, - log_transform = log_transform, + transform = transform, scale=scale, ... ) @@ -512,7 +512,7 @@ setMethod("rotate_dimensions", of_samples = TRUE, correlation_threshold = 0.9, top = Inf, - log_transform = FALSE, + transform = identity, Dim_a_column = NULL, Dim_b_column = NULL) { @@ -545,10 +545,10 @@ setMethod("rotate_dimensions", .[[get_assay_scaled_if_exists_SE(.data)]] %>% # Filter most variable genes - keep_variable_transcripts_SE(top = top, log_transform = log_transform) %>% + keep_variable_transcripts_SE(top = top, transform = transform) %>% # Check if log transform is needed - when(log_transform ~ log1p(.), ~ (.) ) + transform() # Get correlated elements remove_redundancy_elements_through_correlation_SE( @@ -621,7 +621,8 @@ setMethod("remove_redundancy", .adjust_abundance_se = function(.data, .formula, - log_transform = TRUE, + transform = log1p, + inverse_transform = expm1, ...) { # Check if package is installed, otherwise install @@ -666,7 +667,7 @@ setMethod("remove_redundancy", .[[get_assay_scaled_if_exists_SE(.data)]] %>% # Check if log transform is needed - when(log_transform ~ log1p(.), ~ (.)) + transform() # Set column name for value scaled @@ -687,7 +688,7 @@ setMethod("remove_redundancy", ...) %>% # Check if log transform needs to be reverted - when(log_transform ~ expm1(.), ~ (.)) + inverse_transform() # Add the assay @@ -1246,7 +1247,7 @@ setMethod( .keep_variable_se = function(.data, top = 500, - log_transform = TRUE) + transform = log1p) { @@ -1261,7 +1262,7 @@ setMethod( .[[get_assay_scaled_if_exists_SE(.data)]] %>% # Filter most variable genes - keep_variable_transcripts_SE(top = top, log_transform = log_transform) %>% + keep_variable_transcripts_SE(top = top, transform = transform) %>% # Take gene names rownames() diff --git a/man/adjust_abundance-methods.Rd b/man/adjust_abundance-methods.Rd index 1310c087..7f5be64e 100644 --- a/man/adjust_abundance-methods.Rd +++ b/man/adjust_abundance-methods.Rd @@ -16,9 +16,11 @@ adjust_abundance( .sample = NULL, .transcript = NULL, .abundance = NULL, - log_transform = TRUE, + transform = log1p, + inverse_transform = expm1, action = "add", - ... + ..., + log_transform = NULL ) \S4method{adjust_abundance}{spec_tbl_df}( @@ -27,9 +29,11 @@ adjust_abundance( .sample = NULL, .transcript = NULL, .abundance = NULL, - log_transform = TRUE, + transform = log1p, + inverse_transform = expm1, action = "add", - ... + ..., + log_transform = NULL ) \S4method{adjust_abundance}{tbl_df}( @@ -38,9 +42,11 @@ adjust_abundance( .sample = NULL, .transcript = NULL, .abundance = NULL, - log_transform = TRUE, + transform = log1p, + inverse_transform = expm1, action = "add", - ... + ..., + log_transform = NULL ) \S4method{adjust_abundance}{tidybulk}( @@ -49,9 +55,11 @@ adjust_abundance( .sample = NULL, .transcript = NULL, .abundance = NULL, - log_transform = TRUE, + transform = log1p, + inverse_transform = expm1, action = "add", - ... + ..., + log_transform = NULL ) \S4method{adjust_abundance}{SummarizedExperiment}( @@ -60,9 +68,11 @@ adjust_abundance( .sample = NULL, .transcript = NULL, .abundance = NULL, - log_transform = TRUE, + transform = log1p, + inverse_transform = expm1, action = "add", - ... + ..., + log_transform = NULL ) \S4method{adjust_abundance}{RangedSummarizedExperiment}( @@ -71,9 +81,11 @@ adjust_abundance( .sample = NULL, .transcript = NULL, .abundance = NULL, - log_transform = TRUE, + transform = log1p, + inverse_transform = expm1, action = "add", - ... + ..., + log_transform = NULL ) } \arguments{ @@ -87,11 +99,15 @@ adjust_abundance( \item{.abundance}{The name of the transcript/gene abundance column} -\item{log_transform}{A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data)} +\item{transform}{A function that will tranform the counts, by default it is log1p for RNA sequencing data, but for avoinding tranformation you can use identity} + +\item{inverse_transform}{A function that is the inverse of transform (e.g. expm1 is inverse of log1p). This is needed to tranform back the counts after analysis.} \item{action}{A character string. Whether to join the new information to the input tbl (add), or just get the non-redundant tbl with the new information (get).} \item{...}{Further parameters passed to the function sva::ComBat} + +\item{log_transform}{DEPRECATED - A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data)} } \value{ A consistent object (to the input) with additional columns for the adjusted counts as `_adjusted` diff --git a/man/cluster_elements-methods.Rd b/man/cluster_elements-methods.Rd index 518f37f9..e1302075 100644 --- a/man/cluster_elements-methods.Rd +++ b/man/cluster_elements-methods.Rd @@ -17,9 +17,10 @@ cluster_elements( .abundance = NULL, method, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, action = "add", - ... + ..., + log_transform = NULL ) \S4method{cluster_elements}{spec_tbl_df}( @@ -29,9 +30,10 @@ cluster_elements( .abundance = NULL, method, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, action = "add", - ... + ..., + log_transform = NULL ) \S4method{cluster_elements}{tbl_df}( @@ -41,9 +43,10 @@ cluster_elements( .abundance = NULL, method, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, action = "add", - ... + ..., + log_transform = NULL ) \S4method{cluster_elements}{tidybulk}( @@ -53,9 +56,10 @@ cluster_elements( .abundance = NULL, method, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, action = "add", - ... + ..., + log_transform = NULL ) \S4method{cluster_elements}{SummarizedExperiment}( @@ -65,9 +69,10 @@ cluster_elements( .abundance = NULL, method, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, action = "add", - ... + ..., + log_transform = NULL ) \S4method{cluster_elements}{RangedSummarizedExperiment}( @@ -77,9 +82,10 @@ cluster_elements( .abundance = NULL, method, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, action = "add", - ... + ..., + log_transform = NULL ) } \arguments{ @@ -95,11 +101,13 @@ cluster_elements( \item{of_samples}{A boolean. In case the input is a tidybulk object, it indicates Whether the element column will be sample or transcript column} -\item{log_transform}{A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data)} +\item{transform}{A function that will tranform the counts, by default it is log1p for RNA sequencing data, but for avoinding tranformation you can use identity} \item{action}{A character string. Whether to join the new information to the input tbl (add), or just get the non-redundant tbl with the new information (get).} \item{...}{Further parameters passed to the function kmeans} + +\item{log_transform}{DEPRECATED - A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data)} } \value{ A tbl object with additional columns with cluster labels diff --git a/man/get_reduced_dimensions_UMAP_bulk.Rd b/man/get_reduced_dimensions_UMAP_bulk.Rd index 2d1145cf..1a8023d5 100644 --- a/man/get_reduced_dimensions_UMAP_bulk.Rd +++ b/man/get_reduced_dimensions_UMAP_bulk.Rd @@ -12,7 +12,7 @@ get_reduced_dimensions_UMAP_bulk( .dims = 2, top = 500, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, scale = TRUE, calculate_for_pca_dimensions = 20, ... @@ -33,11 +33,11 @@ get_reduced_dimensions_UMAP_bulk( \item{of_samples}{A boolean} -\item{log_transform}{A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data)} - \item{calculate_for_pca_dimensions}{An integer of length one. The number of PCA dimensions to based the UMAP calculatio on. If NULL all variable features are considered} \item{...}{Further parameters passed to the function uwot} + +\item{log_transform}{A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data)} } \value{ A tibble with additional columns diff --git a/man/get_reduced_dimensions_UMAP_bulk_SE.Rd b/man/get_reduced_dimensions_UMAP_bulk_SE.Rd index d6162239..b204aa0f 100644 --- a/man/get_reduced_dimensions_UMAP_bulk_SE.Rd +++ b/man/get_reduced_dimensions_UMAP_bulk_SE.Rd @@ -9,7 +9,7 @@ get_reduced_dimensions_UMAP_bulk_SE( .dims = 2, top = 500, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, scale = NULL, calculate_for_pca_dimensions = 20, ... @@ -24,7 +24,7 @@ get_reduced_dimensions_UMAP_bulk_SE( \item{of_samples}{A boolean} -\item{log_transform}{A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data)} +\item{transform}{A function that will tranform the counts, by default it is log1p for RNA sequencing data, but for avoinding tranformation you can use identity} \item{calculate_for_pca_dimensions}{An integer of length one. The number of PCA dimensions to based the UMAP calculatio on. If NULL all variable features are considered} diff --git a/man/keep_variable-methods.Rd b/man/keep_variable-methods.Rd index 6fb3e137..4457fb7b 100644 --- a/man/keep_variable-methods.Rd +++ b/man/keep_variable-methods.Rd @@ -16,6 +16,7 @@ keep_variable( .transcript = NULL, .abundance = NULL, top = 500, + transform = log1p, log_transform = TRUE ) @@ -25,7 +26,8 @@ keep_variable( .transcript = NULL, .abundance = NULL, top = 500, - log_transform = TRUE + transform = log1p, + log_transform = NULL ) \S4method{keep_variable}{tbl_df}( @@ -34,7 +36,8 @@ keep_variable( .transcript = NULL, .abundance = NULL, top = 500, - log_transform = TRUE + transform = log1p, + log_transform = NULL ) \S4method{keep_variable}{tidybulk}( @@ -43,12 +46,13 @@ keep_variable( .transcript = NULL, .abundance = NULL, top = 500, - log_transform = TRUE + transform = log1p, + log_transform = NULL ) -\S4method{keep_variable}{SummarizedExperiment}(.data, top = 500, log_transform = TRUE) +\S4method{keep_variable}{SummarizedExperiment}(.data, top = 500, transform = log1p) -\S4method{keep_variable}{RangedSummarizedExperiment}(.data, top = 500, log_transform = TRUE) +\S4method{keep_variable}{RangedSummarizedExperiment}(.data, top = 500, transform = log1p) } \arguments{ \item{.data}{A `tbl` (with at least three columns for sample, feature and transcript abundance) or `SummarizedExperiment` (more convenient if abstracted to tibble with library(tidySummarizedExperiment))} @@ -61,7 +65,9 @@ keep_variable( \item{top}{Integer. Number of top transcript to consider} -\item{log_transform}{A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data)} +\item{transform}{A function that will tranform the counts, by default it is log1p for RNA sequencing data, but for avoinding tranformation you can use identity} + +\item{log_transform}{DEPRECATED - A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data)} } \value{ A consistent object (to the input) with additional columns for the statistics from the hypothesis test (e.g., log fold change, p-value and false discovery rate). diff --git a/man/reduce_dimensions-methods.Rd b/man/reduce_dimensions-methods.Rd index 4c0fce04..4cf095c3 100644 --- a/man/reduce_dimensions-methods.Rd +++ b/man/reduce_dimensions-methods.Rd @@ -19,10 +19,11 @@ reduce_dimensions( .dims = 2, top = 500, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, scale = TRUE, action = "add", - ... + ..., + log_transform = NULL ) \S4method{reduce_dimensions}{spec_tbl_df}( @@ -34,10 +35,11 @@ reduce_dimensions( .dims = 2, top = 500, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, scale = TRUE, action = "add", - ... + ..., + log_transform = NULL ) \S4method{reduce_dimensions}{tbl_df}( @@ -49,10 +51,11 @@ reduce_dimensions( .dims = 2, top = 500, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, scale = TRUE, action = "add", - ... + ..., + log_transform = NULL ) \S4method{reduce_dimensions}{tidybulk}( @@ -64,10 +67,11 @@ reduce_dimensions( .dims = 2, top = 500, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, scale = TRUE, action = "add", - ... + ..., + log_transform = NULL ) \S4method{reduce_dimensions}{SummarizedExperiment}( @@ -79,10 +83,11 @@ reduce_dimensions( .dims = 2, top = 500, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, scale = TRUE, action = "add", - ... + ..., + log_transform = NULL ) \S4method{reduce_dimensions}{RangedSummarizedExperiment}( @@ -94,10 +99,11 @@ reduce_dimensions( .dims = 2, top = 500, of_samples = TRUE, - log_transform = TRUE, + transform = log1p, scale = TRUE, action = "add", - ... + ..., + log_transform = NULL ) } \arguments{ @@ -117,13 +123,15 @@ reduce_dimensions( \item{of_samples}{A boolean. In case the input is a tidybulk object, it indicates Whether the element column will be sample or transcript column} -\item{log_transform}{A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data)} +\item{transform}{A function that will tranform the counts, by default it is log1p for RNA sequencing data, but for avoinding tranformation you can use identity} \item{scale}{A boolean for method="PCA", this will be passed to the `prcomp` function. It is not included in the ... argument because although the default for `prcomp` if FALSE, it is advisable to set it as TRUE.} \item{action}{A character string. Whether to join the new information to the input tbl (add), or just get the non-redundant tbl with the new information (get).} \item{...}{Further parameters passed to the function prcomp if you choose method="PCA" or Rtsne if you choose method="tSNE"} + +\item{log_transform}{DEPRECATED - A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data)} } \value{ A tbl object with additional columns for the reduced dimensions diff --git a/man/remove_redundancy-methods.Rd b/man/remove_redundancy-methods.Rd index 5cb9e2fd..0a626ac1 100644 --- a/man/remove_redundancy-methods.Rd +++ b/man/remove_redundancy-methods.Rd @@ -19,9 +19,10 @@ remove_redundancy( of_samples = TRUE, correlation_threshold = 0.9, top = Inf, - log_transform = FALSE, + transform = identity, Dim_a_column, - Dim_b_column + Dim_b_column, + log_transform = NULL ) \S4method{remove_redundancy}{spec_tbl_df}( @@ -33,9 +34,10 @@ remove_redundancy( of_samples = TRUE, correlation_threshold = 0.9, top = Inf, - log_transform = FALSE, + transform = identity, Dim_a_column = NULL, - Dim_b_column = NULL + Dim_b_column = NULL, + log_transform = NULL ) \S4method{remove_redundancy}{tbl_df}( @@ -47,9 +49,10 @@ remove_redundancy( of_samples = TRUE, correlation_threshold = 0.9, top = Inf, - log_transform = FALSE, + transform = identity, Dim_a_column = NULL, - Dim_b_column = NULL + Dim_b_column = NULL, + log_transform = NULL ) \S4method{remove_redundancy}{tidybulk}( @@ -61,9 +64,10 @@ remove_redundancy( of_samples = TRUE, correlation_threshold = 0.9, top = Inf, - log_transform = FALSE, + transform = identity, Dim_a_column = NULL, - Dim_b_column = NULL + Dim_b_column = NULL, + log_transform = NULL ) \S4method{remove_redundancy}{SummarizedExperiment}( @@ -75,7 +79,7 @@ remove_redundancy( of_samples = TRUE, correlation_threshold = 0.9, top = Inf, - log_transform = FALSE, + transform = identity, Dim_a_column = NULL, Dim_b_column = NULL ) @@ -89,7 +93,7 @@ remove_redundancy( of_samples = TRUE, correlation_threshold = 0.9, top = Inf, - log_transform = FALSE, + transform = identity, Dim_a_column = NULL, Dim_b_column = NULL ) @@ -111,11 +115,13 @@ remove_redundancy( \item{top}{An integer. How many top genes to select for correlation based method} -\item{log_transform}{A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data)} +\item{transform}{A function that will tranform the counts, by default it is log1p for RNA sequencing data, but for avoinding tranformation you can use identity} \item{Dim_a_column}{A character string. For reduced_dimension based calculation. The column of one principal component} \item{Dim_b_column}{A character string. For reduced_dimension based calculation. The column of another principal component} + +\item{log_transform}{DEPRECATED - A boolean, whether the value should be log-transformed (e.g., TRUE for RNA sequencing data)} } \value{ A tbl object with with dropped redundant elements (e.g., samples). From f47d148a0468022255c99474d63eb2e558478112 Mon Sep 17 00:00:00 2001 From: stemangiola Date: Sun, 26 Jun 2022 12:52:58 +1000 Subject: [PATCH 38/38] fix CHECKs --- R/methods.R | 2 -- R/methods_SE.R | 5 ++++- man/remove_redundancy-methods.Rd | 6 ++++-- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/R/methods.R b/R/methods.R index b188fec6..cec35919 100755 --- a/R/methods.R +++ b/R/methods.R @@ -1256,8 +1256,6 @@ setGeneric("remove_redundancy", function(.data, of_samples = TRUE, - - correlation_threshold = 0.9, top = Inf, transform = identity, diff --git a/R/methods_SE.R b/R/methods_SE.R index 25afa37a..747507db 100755 --- a/R/methods_SE.R +++ b/R/methods_SE.R @@ -515,7 +515,10 @@ setMethod("rotate_dimensions", transform = identity, Dim_a_column = NULL, - Dim_b_column = NULL) { + Dim_b_column = NULL, + + # DEPRECATED + log_transform = NULL) { Dim_a_column = enquo(Dim_a_column) diff --git a/man/remove_redundancy-methods.Rd b/man/remove_redundancy-methods.Rd index 0a626ac1..b83bc56d 100644 --- a/man/remove_redundancy-methods.Rd +++ b/man/remove_redundancy-methods.Rd @@ -81,7 +81,8 @@ remove_redundancy( top = Inf, transform = identity, Dim_a_column = NULL, - Dim_b_column = NULL + Dim_b_column = NULL, + log_transform = NULL ) \S4method{remove_redundancy}{RangedSummarizedExperiment}( @@ -95,7 +96,8 @@ remove_redundancy( top = Inf, transform = identity, Dim_a_column = NULL, - Dim_b_column = NULL + Dim_b_column = NULL, + log_transform = NULL ) } \arguments{