Skip to content

Commit

Permalink
Merge pull request #290 from stemangiola/quantile-normalisation
Browse files Browse the repository at this point in the history
add preprocess core method
  • Loading branch information
stemangiola authored Aug 31, 2023
2 parents fd8b3aa + b88b6d0 commit f4d9b4d
Show file tree
Hide file tree
Showing 4 changed files with 137 additions and 53 deletions.
62 changes: 46 additions & 16 deletions R/methods.R
Original file line number Diff line number Diff line change
Expand Up @@ -586,7 +586,7 @@ setMethod("scale_abundance", "tidybulk", .scale_abundance)
#' @param .sample The name of the sample column
#' @param .transcript The name of the transcript/gene column
#' @param .abundance The name of the transcript/gene abundance column
#' @param .subset_for_scaling A gene-wise quosure condition. This will be used to filter rows (features/genes) of the dataset. For example
#' @param method A character string. Either "limma_normalize_quantiles" for limma::normalizeQuantiles or "preprocesscore_normalize_quantiles_use_target" for preprocessCore::normalize.quantiles.use.target for large-scale dataset, where limmma could not be compatible.
#' @param action A character string between "add" (default) and "only". "add" joins the new information to the input tbl (default), "only" return a non-redundant tbl with the just new information.
#'
#'
Expand Down Expand Up @@ -620,7 +620,7 @@ setGeneric("quantile_normalise_abundance", function(.data,
.sample = NULL,
.transcript = NULL,
.abundance = NULL,
.subset_for_scaling = NULL,
method = "limma_normalize_quantiles",
action = "add")
standardGeneric("quantile_normalise_abundance"))

Expand All @@ -629,7 +629,7 @@ setGeneric("quantile_normalise_abundance", function(.data,
.sample = NULL,
.transcript = NULL,
.abundance = NULL,
.subset_for_scaling = NULL,
method = "limma_normalize_quantiles",
action = "add")
{

Expand All @@ -645,20 +645,9 @@ setGeneric("quantile_normalise_abundance", function(.data,
.transcript = col_names$.transcript
.abundance = col_names$.abundance

.subset_for_scaling = enquo(.subset_for_scaling)

# Set column name for value scaled
value_scaled = as.symbol(sprintf("%s%s", quo_name(.abundance), scaled_string))


# Check if package is installed, otherwise install
if (find.package("limma", quiet = TRUE) %>% length %>% equals(0)) {
message("tidybulk says: Installing limma needed for analyses")
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager", repos = "https://cloud.r-project.org")
BiocManager::install("limma", ask = FALSE)
}

# Reformat input data set
.data_norm <-
.data %>%
Expand All @@ -669,8 +658,49 @@ setGeneric("quantile_normalise_abundance", function(.data,
# Set samples and genes as factors
dplyr::mutate(!!.sample := factor(!!.sample),!!.transcript := factor(!!.transcript)) |>
pivot_wider(names_from = !!.sample, values_from = !!.abundance) |>
as_matrix(rownames=!!.transcript) |>
limma::normalizeQuantiles() |>
as_matrix(rownames=!!.transcript)


if(tolower(method) == "limma_normalize_quantiles"){

# Check if package is installed, otherwise install
if (find.package("limma", quiet = TRUE) %>% length %>% equals(0)) {
message("tidybulk says: Installing limma needed for analyses")
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager", repos = "https://cloud.r-project.org")
BiocManager::install("limma", ask = FALSE)
}

.data_norm =
.data_norm |>
limma::normalizeQuantiles()
}
else if(tolower(method) == "preprocesscore_normalize_quantiles_use_target"){

# Check if package is installed, otherwise install
if (find.package("preprocessCore", quiet = TRUE) %>% length %>% equals(0)) {
message("tidybulk says: Installing preprocessCore needed for analyses")
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager", repos = "https://cloud.r-project.org")
BiocManager::install("preprocessCore", ask = FALSE)
}

.data_norm_quant =
.data_norm |>
preprocessCore::normalize.quantiles.use.target(
target = preprocessCore::normalize.quantiles.determine.target(.data_norm)
)

colnames(.data_norm_quant) = .data_norm |> colnames()
rownames(.data_norm_quant) = .data_norm |> rownames()

.data_norm = .data_norm_quant
rm(.data_norm_quant)

} else stop("tidybulk says: the methods must be limma_normalize_quantiles or preprocesscore")

.data_norm =
.data_norm |>
as_tibble(rownames = quo_name(.transcript)) |>
pivot_longer(-!!.transcript, names_to = quo_name(.sample), values_to = quo_names(value_scaled)) |>

Expand Down
63 changes: 48 additions & 15 deletions R/methods_SE.R
Original file line number Diff line number Diff line change
Expand Up @@ -247,15 +247,14 @@ setMethod("scale_abundance",
.sample = NULL,
.transcript = NULL,
.abundance = NULL,
.subset_for_scaling = NULL,
method = "limma_normalize_quantiles",
action = NULL) {


# Fix NOTEs
. = NULL

.abundance = enquo(.abundance)
.subset_for_scaling = enquo(.subset_for_scaling)

# Set column name for value scaled

Expand All @@ -271,21 +270,55 @@ setMethod("scale_abundance",
# Set column name for value scaled
value_scaled = my_assay %>% paste0(scaled_string)

# Check if package is installed, otherwise install
if (find.package("limma", quiet = TRUE) %>% length %>% equals(0)) {
message("tidybulk says: Installing limma needed for analyses")
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager", repos = "https://cloud.r-project.org")
BiocManager::install("limma", ask = FALSE)

if(tolower(method) == "limma_normalize_quantiles"){

# Check if package is installed, otherwise install
if (find.package("limma", quiet = TRUE) %>% length %>% equals(0)) {
message("tidybulk says: Installing limma needed for analyses")
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager", repos = "https://cloud.r-project.org")
BiocManager::install("limma", ask = FALSE)
}

.data_norm <-
.data %>%
assay(my_assay) |>
limma::normalizeQuantiles() |>
list() |>
setNames(value_scaled)

}
else if(tolower(method) == "preprocesscore_normalize_quantiles_use_target"){

# Check if package is installed, otherwise install
if (find.package("preprocessCore", quiet = TRUE) %>% length %>% equals(0)) {
message("tidybulk says: Installing preprocessCore needed for analyses")
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager", repos = "https://cloud.r-project.org")
BiocManager::install("preprocessCore", ask = FALSE)
}

# Reformat input data set
.data_norm <-
.data %>%
assay(my_assay) |>
limma::normalizeQuantiles() |>
list() |>
setNames(value_scaled)
.data_norm =
.data |>
assay(my_assay) |>
as.matrix()

.data_norm =
.data_norm |>
preprocessCore::normalize.quantiles.use.target(
target = preprocessCore::normalize.quantiles.determine.target(.data_norm)
)

colnames(.data_norm) = .data |> assay(my_assay) |> colnames()
rownames(.data_norm) = .data |> assay(my_assay) |> rownames()

.data_norm =
.data_norm |>
list() |>
setNames(value_scaled)

} else stop("tidybulk says: the methods must be limma_normalize_quantiles or preprocesscore")

# Add the assay
assays(.data) = assays(.data) %>% c(.data_norm)
Expand Down
14 changes: 7 additions & 7 deletions man/quantile_normalise_abundance-methods.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

51 changes: 36 additions & 15 deletions tests/testthat/test-bulk_methods_SummarizedExperiment.R
Original file line number Diff line number Diff line change
Expand Up @@ -83,9 +83,30 @@ test_that("quantile normalisation",{
expect_equal(
res_tibble |>
filter(a=="SRR1740035" & b=="ABCB9") |>
pull(c_scaled)
dplyr::pull(c_scaled)
)

# preprocessCore
res = se_mini |> quantile_normalise_abundance(method = "preprocesscore_normalize_quantiles_use_target")

res_tibble =
input_df |>
quantile_normalise_abundance(
.sample = a,
.transcript = b,
.abundance = c,
method = "preprocesscore_normalize_quantiles_use_target",
action = "get"
)


SummarizedExperiment::assay(res, "count_scaled")["ABCB9","SRR1740035"] |>
expect_equal(
res_tibble |>
filter(a=="SRR1740035" & b=="ABCB9") |>
dplyr::pull(c_scaled)
)

})


Expand Down Expand Up @@ -465,7 +486,7 @@ test_that("differential trancript abundance - random effects SE",{
#mutate(time = time |> stringr::str_replace_all(" ", "_")) |>
test_differential_abundance(
~ condition + (1 + condition | time),
method = "glmmseq_lme4",
method = "glmmseq_lme4",
cores = 1
)

Expand All @@ -475,31 +496,31 @@ test_that("differential trancript abundance - random effects SE",{
c(0.1065866, 0.1109067, 0.1116562 , NA),
tolerance=1e-2
)

# Custom dispersion
se_mini =
se_mini =
se_mini |>
identify_abundant(factor_of_interest = condition)

rowData(se_mini)$disp_ = rep(2,nrow(se_mini))
res =

res =
se_mini[1:10,] |>
#mutate(time = time |> stringr::str_replace_all(" ", "_")) |>
#mutate(time = time |> stringr::str_replace_all(" ", "_")) |>
test_differential_abundance(
~ condition + (1 + condition | time),
method = "glmmseq_lme4",
.dispersion = disp_,
method = "glmmseq_lme4",
.dispersion = disp_,
cores = 1
)
rowData(res)[,"P_condition_adjusted"] |>
head(4) |>
)

rowData(res)[,"P_condition_adjusted"] |>
head(4) |>
expect_equal(
c(0.1153254, 0.1668555, 0.1668555 , NA),
tolerance=1e-3
)

})


Expand Down

0 comments on commit f4d9b4d

Please sign in to comment.