diff --git a/R/methods.R b/R/methods.R index 0a6e6599..e0086b77 100755 --- a/R/methods.R +++ b/R/methods.R @@ -586,7 +586,7 @@ setMethod("scale_abundance", "tidybulk", .scale_abundance) #' @param .sample The name of the sample column #' @param .transcript The name of the transcript/gene column #' @param .abundance The name of the transcript/gene abundance column -#' @param .subset_for_scaling A gene-wise quosure condition. This will be used to filter rows (features/genes) of the dataset. For example +#' @param method A character string. Either "limma_normalize_quantiles" for limma::normalizeQuantiles or "preprocesscore_normalize_quantiles_use_target" for preprocessCore::normalize.quantiles.use.target for large-scale dataset, where limmma could not be compatible. #' @param action A character string between "add" (default) and "only". "add" joins the new information to the input tbl (default), "only" return a non-redundant tbl with the just new information. #' #' @@ -620,7 +620,7 @@ setGeneric("quantile_normalise_abundance", function(.data, .sample = NULL, .transcript = NULL, .abundance = NULL, - .subset_for_scaling = NULL, + method = "limma_normalize_quantiles", action = "add") standardGeneric("quantile_normalise_abundance")) @@ -629,7 +629,7 @@ setGeneric("quantile_normalise_abundance", function(.data, .sample = NULL, .transcript = NULL, .abundance = NULL, - .subset_for_scaling = NULL, + method = "limma_normalize_quantiles", action = "add") { @@ -645,20 +645,9 @@ setGeneric("quantile_normalise_abundance", function(.data, .transcript = col_names$.transcript .abundance = col_names$.abundance - .subset_for_scaling = enquo(.subset_for_scaling) - # Set column name for value scaled value_scaled = as.symbol(sprintf("%s%s", quo_name(.abundance), scaled_string)) - - # Check if package is installed, otherwise install - if (find.package("limma", quiet = TRUE) %>% length %>% equals(0)) { - message("tidybulk says: Installing limma needed for analyses") - if (!requireNamespace("BiocManager", quietly = TRUE)) - install.packages("BiocManager", repos = "https://cloud.r-project.org") - BiocManager::install("limma", ask = FALSE) - } - # Reformat input data set .data_norm <- .data %>% @@ -669,8 +658,49 @@ setGeneric("quantile_normalise_abundance", function(.data, # Set samples and genes as factors dplyr::mutate(!!.sample := factor(!!.sample),!!.transcript := factor(!!.transcript)) |> pivot_wider(names_from = !!.sample, values_from = !!.abundance) |> - as_matrix(rownames=!!.transcript) |> - limma::normalizeQuantiles() |> + as_matrix(rownames=!!.transcript) + + + if(tolower(method) == "limma_normalize_quantiles"){ + + # Check if package is installed, otherwise install + if (find.package("limma", quiet = TRUE) %>% length %>% equals(0)) { + message("tidybulk says: Installing limma needed for analyses") + if (!requireNamespace("BiocManager", quietly = TRUE)) + install.packages("BiocManager", repos = "https://cloud.r-project.org") + BiocManager::install("limma", ask = FALSE) + } + + .data_norm = + .data_norm |> + limma::normalizeQuantiles() + } + else if(tolower(method) == "preprocesscore_normalize_quantiles_use_target"){ + + # Check if package is installed, otherwise install + if (find.package("preprocessCore", quiet = TRUE) %>% length %>% equals(0)) { + message("tidybulk says: Installing preprocessCore needed for analyses") + if (!requireNamespace("BiocManager", quietly = TRUE)) + install.packages("BiocManager", repos = "https://cloud.r-project.org") + BiocManager::install("preprocessCore", ask = FALSE) + } + + .data_norm_quant = + .data_norm |> + preprocessCore::normalize.quantiles.use.target( + target = preprocessCore::normalize.quantiles.determine.target(.data_norm) + ) + + colnames(.data_norm_quant) = .data_norm |> colnames() + rownames(.data_norm_quant) = .data_norm |> rownames() + + .data_norm = .data_norm_quant + rm(.data_norm_quant) + + } else stop("tidybulk says: the methods must be limma_normalize_quantiles or preprocesscore") + + .data_norm = + .data_norm |> as_tibble(rownames = quo_name(.transcript)) |> pivot_longer(-!!.transcript, names_to = quo_name(.sample), values_to = quo_names(value_scaled)) |> diff --git a/R/methods_SE.R b/R/methods_SE.R index 85a9970f..6ade282c 100755 --- a/R/methods_SE.R +++ b/R/methods_SE.R @@ -247,7 +247,7 @@ setMethod("scale_abundance", .sample = NULL, .transcript = NULL, .abundance = NULL, - .subset_for_scaling = NULL, + method = "limma_normalize_quantiles", action = NULL) { @@ -255,7 +255,6 @@ setMethod("scale_abundance", . = NULL .abundance = enquo(.abundance) - .subset_for_scaling = enquo(.subset_for_scaling) # Set column name for value scaled @@ -271,21 +270,55 @@ setMethod("scale_abundance", # Set column name for value scaled value_scaled = my_assay %>% paste0(scaled_string) - # Check if package is installed, otherwise install - if (find.package("limma", quiet = TRUE) %>% length %>% equals(0)) { - message("tidybulk says: Installing limma needed for analyses") - if (!requireNamespace("BiocManager", quietly = TRUE)) - install.packages("BiocManager", repos = "https://cloud.r-project.org") - BiocManager::install("limma", ask = FALSE) + + if(tolower(method) == "limma_normalize_quantiles"){ + + # Check if package is installed, otherwise install + if (find.package("limma", quiet = TRUE) %>% length %>% equals(0)) { + message("tidybulk says: Installing limma needed for analyses") + if (!requireNamespace("BiocManager", quietly = TRUE)) + install.packages("BiocManager", repos = "https://cloud.r-project.org") + BiocManager::install("limma", ask = FALSE) + } + + .data_norm <- + .data %>% + assay(my_assay) |> + limma::normalizeQuantiles() |> + list() |> + setNames(value_scaled) + } + else if(tolower(method) == "preprocesscore_normalize_quantiles_use_target"){ + + # Check if package is installed, otherwise install + if (find.package("preprocessCore", quiet = TRUE) %>% length %>% equals(0)) { + message("tidybulk says: Installing preprocessCore needed for analyses") + if (!requireNamespace("BiocManager", quietly = TRUE)) + install.packages("BiocManager", repos = "https://cloud.r-project.org") + BiocManager::install("preprocessCore", ask = FALSE) + } - # Reformat input data set - .data_norm <- - .data %>% - assay(my_assay) |> - limma::normalizeQuantiles() |> - list() |> - setNames(value_scaled) + .data_norm = + .data |> + assay(my_assay) |> + as.matrix() + + .data_norm = + .data_norm |> + preprocessCore::normalize.quantiles.use.target( + target = preprocessCore::normalize.quantiles.determine.target(.data_norm) + ) + + colnames(.data_norm) = .data |> assay(my_assay) |> colnames() + rownames(.data_norm) = .data |> assay(my_assay) |> rownames() + + .data_norm = + .data_norm |> + list() |> + setNames(value_scaled) + + } else stop("tidybulk says: the methods must be limma_normalize_quantiles or preprocesscore") # Add the assay assays(.data) = assays(.data) %>% c(.data_norm) diff --git a/man/quantile_normalise_abundance-methods.Rd b/man/quantile_normalise_abundance-methods.Rd index 2df917fb..fdaf6e46 100644 --- a/man/quantile_normalise_abundance-methods.Rd +++ b/man/quantile_normalise_abundance-methods.Rd @@ -15,7 +15,7 @@ quantile_normalise_abundance( .sample = NULL, .transcript = NULL, .abundance = NULL, - .subset_for_scaling = NULL, + method = "limma_normalize_quantiles", action = "add" ) @@ -24,7 +24,7 @@ quantile_normalise_abundance( .sample = NULL, .transcript = NULL, .abundance = NULL, - .subset_for_scaling = NULL, + method = "limma_normalize_quantiles", action = "add" ) @@ -33,7 +33,7 @@ quantile_normalise_abundance( .sample = NULL, .transcript = NULL, .abundance = NULL, - .subset_for_scaling = NULL, + method = "limma_normalize_quantiles", action = "add" ) @@ -42,7 +42,7 @@ quantile_normalise_abundance( .sample = NULL, .transcript = NULL, .abundance = NULL, - .subset_for_scaling = NULL, + method = "limma_normalize_quantiles", action = "add" ) @@ -51,7 +51,7 @@ quantile_normalise_abundance( .sample = NULL, .transcript = NULL, .abundance = NULL, - .subset_for_scaling = NULL, + method = "limma_normalize_quantiles", action = NULL ) @@ -60,7 +60,7 @@ quantile_normalise_abundance( .sample = NULL, .transcript = NULL, .abundance = NULL, - .subset_for_scaling = NULL, + method = "limma_normalize_quantiles", action = NULL ) } @@ -73,7 +73,7 @@ quantile_normalise_abundance( \item{.abundance}{The name of the transcript/gene abundance column} -\item{.subset_for_scaling}{A gene-wise quosure condition. This will be used to filter rows (features/genes) of the dataset. For example} +\item{method}{A character string. Either "limma_normalize_quantiles" for limma::normalizeQuantiles or "preprocesscore_normalize_quantiles_use_target" for preprocessCore::normalize.quantiles.use.target for large-scale dataset, where limmma could not be compatible.} \item{action}{A character string between "add" (default) and "only". "add" joins the new information to the input tbl (default), "only" return a non-redundant tbl with the just new information.} } diff --git a/tests/testthat/test-bulk_methods_SummarizedExperiment.R b/tests/testthat/test-bulk_methods_SummarizedExperiment.R index aa67e9d5..067bd6bc 100755 --- a/tests/testthat/test-bulk_methods_SummarizedExperiment.R +++ b/tests/testthat/test-bulk_methods_SummarizedExperiment.R @@ -83,9 +83,30 @@ test_that("quantile normalisation",{ expect_equal( res_tibble |> filter(a=="SRR1740035" & b=="ABCB9") |> - pull(c_scaled) + dplyr::pull(c_scaled) ) + # preprocessCore + res = se_mini |> quantile_normalise_abundance(method = "preprocesscore_normalize_quantiles_use_target") + + res_tibble = + input_df |> + quantile_normalise_abundance( + .sample = a, + .transcript = b, + .abundance = c, + method = "preprocesscore_normalize_quantiles_use_target", + action = "get" + ) + + + SummarizedExperiment::assay(res, "count_scaled")["ABCB9","SRR1740035"] |> + expect_equal( + res_tibble |> + filter(a=="SRR1740035" & b=="ABCB9") |> + dplyr::pull(c_scaled) + ) + }) @@ -465,7 +486,7 @@ test_that("differential trancript abundance - random effects SE",{ #mutate(time = time |> stringr::str_replace_all(" ", "_")) |> test_differential_abundance( ~ condition + (1 + condition | time), - method = "glmmseq_lme4", + method = "glmmseq_lme4", cores = 1 ) @@ -475,31 +496,31 @@ test_that("differential trancript abundance - random effects SE",{ c(0.1065866, 0.1109067, 0.1116562 , NA), tolerance=1e-2 ) - + # Custom dispersion - se_mini = + se_mini = se_mini |> identify_abundant(factor_of_interest = condition) - + rowData(se_mini)$disp_ = rep(2,nrow(se_mini)) - - res = + + res = se_mini[1:10,] |> - #mutate(time = time |> stringr::str_replace_all(" ", "_")) |> + #mutate(time = time |> stringr::str_replace_all(" ", "_")) |> test_differential_abundance( ~ condition + (1 + condition | time), - method = "glmmseq_lme4", - .dispersion = disp_, + method = "glmmseq_lme4", + .dispersion = disp_, cores = 1 - ) - - rowData(res)[,"P_condition_adjusted"] |> - head(4) |> + ) + + rowData(res)[,"P_condition_adjusted"] |> + head(4) |> expect_equal( c(0.1153254, 0.1668555, 0.1668555 , NA), tolerance=1e-3 ) - + })