Skip to content

Commit

Permalink
Merge pull request #276 from stemangiola/random-effects
Browse files Browse the repository at this point in the history
add tibble based method
  • Loading branch information
stemangiola authored Jul 6, 2023
2 parents 136461b + 895aabc commit e97612a
Show file tree
Hide file tree
Showing 4 changed files with 209 additions and 1 deletion.
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,8 @@ Suggests:
igraph,
EGSEA,
IRanges,
here
here,
glmmSeq
VignetteBuilder:
knitr
RdMacros:
Expand Down
146 changes: 146 additions & 0 deletions R/functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -643,6 +643,152 @@ get_differential_transcript_abundance_bulk <- function(.data,
}
}

#' Get differential transcription information to a tibble using glmmSeq
#'
#' @keywords internal
#' @noRd
#'
#'
#'
#' @import tibble
#' @importFrom magrittr set_colnames
#' @importFrom stats model.matrix
#' @importFrom utils install.packages
#' @importFrom purrr when
#' @importFrom rlang inform
#' @importFrom tidyr spread
#' @importFrom tidyr pivot_wider
#' @importFrom dplyr slice
#'
#'
#' @param .data A tibble
#' @param .formula a formula with no response variable, referring only to numeric variables
#' @param .sample The name of the sample column
#' @param .transcript The name of the transcript/gene column
#' @param .abundance The name of the transcript/gene abundance column
#' @param .contrasts A character vector. Not used for this method
#' @param method A string character. Either "edgeR_quasi_likelihood" (i.e., QLF), "edgeR_likelihood_ratio" (i.e., LRT)
#' @param test_above_log2_fold_change A positive real value. This works for edgeR and limma_voom methods. It uses the `treat` function, which tests that the difference in abundance is bigger than this threshold rather than zero \url{https://pubmed.ncbi.nlm.nih.gov/19176553}.
#' @param scaling_method A character string. The scaling method passed to the backend function (i.e., edgeR::calcNormFactors; "TMM","TMMwsp","RLE","upperquartile")
#' @param omit_contrast_in_colnames If just one contrast is specified you can choose to omit the contrast label in the colnames.
#' @param .sample_total_read_count
#'
#' @return A tibble with glmmSeq results
#'
get_differential_transcript_abundance_glmmSeq <- function(.data,
.formula,
.sample = NULL,
.transcript = NULL,
.abundance = NULL,
.contrasts = NULL,
method ,
test_above_log2_fold_change = NULL,
scaling_method = NULL,
omit_contrast_in_colnames = FALSE,
prefix = "",
.sample_total_read_count = NULL,
...) {
# Get column names
.sample = enquo(.sample)
.transcript = enquo(.transcript)
.abundance = enquo(.abundance)
.sample_total_read_count = enquo(.sample_total_read_count)

# Check if omit_contrast_in_colnames is correctly setup
if(omit_contrast_in_colnames & length(.contrasts) > 1){
warning("tidybulk says: you can omit contrasts in column names only when maximum one contrast is present")
omit_contrast_in_colnames = FALSE
}

# # Specify the design column tested
# if(is.null(.contrasts))
# message(
# sprintf(
# "tidybulk says: The design column being tested is %s",
# design %>% colnames %>% .[1]
# )
# )

# # If I don't have intercept or I have categorical factor of interest BUT I don't have contrasts
# if(
# is.null(.contrasts) &
# (
# (
# ! .data |> pull(parse_formula(.formula)[1]) |> is("numeric") &
# .data |> pull(parse_formula(.formula)[1]) |> unique() |> length() |> gt(2)
# ) |
# colnames(design)[1] != "(Intercept)"
# )
# )
# warning("tidybulk says: If you have (i) an intercept-free design (i.e. ~ 0 + factor) or you have a categorical factor of interest with more than 2 values you should use the `contrasts` argument.")
#
# my_contrasts =
# .contrasts %>%
# ifelse_pipe(length(.) > 0,
# ~ limma::makeContrasts(contrasts = .x, levels = design),
# ~ NULL)

# Check if package is installed, otherwise install
if (find.package("edgeR", quiet = TRUE) %>% length %>% equals(0)) {
message("tidybulk says: Installing edgeR needed for differential transcript abundance analyses")
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager", repos = "https://cloud.r-project.org")
BiocManager::install("edgeR", ask = FALSE)
}

# Check if package is installed, otherwise install
if (find.package("glmmSeq", quiet = TRUE) %>% length %>% equals(0)) {
message("tidybulk says: Installing glmmSeq needed for differential transcript abundance analyses")
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager", repos = "https://cloud.r-project.org")
BiocManager::install("glmmSeq", ask = FALSE)
}

metadata =
.data |>
pivot_sample(!!.sample) |>
as_data_frame(rownames = !!.sample)

counts =
.data %>%
select(!!.transcript,!!.sample,!!.abundance) %>%
spread(!!.sample,!!.abundance) %>%
as_matrix(rownames = !!.transcript)

# Reorder counts
counts = counts[,rownames(metadata),drop=FALSE]

glmmSeq_object =
glmmSeq::glmmSeq( .formula,
countdata = counts ,
metadata = metadata,
dispersion = setNames(edgeR::estimateDisp(counts)$tagwise.dispersion, rownames(counts)),
progress = TRUE,
method = method |> str_remove("^glmmSeq_"),
...
)

glmmSeq_object |>
summary() |>
as_tibble(rownames = "gene") |>
mutate(across(starts_with("P_"), list(adjusted = function(x) p.adjust(x, method="BH")), .names = "{.col}_{.fn}")) |>

# Attach attributes
reattach_internals(.data) %>%

# select method
memorise_methods_used("glmmSeq") |>

# Add raw object
attach_to_internals(glmmSeq_object, "glmmSeq") %>%
# Communicate the attribute added
{

rlang::inform("tidybulk says: to access the raw results (fitted GLM) do `attr(..., \"internals\")$glmmSeq`", .frequency_id = "Access DE results glmmSeq", .frequency = "once")

(.)
}
}

#' Get differential transcription information to a tibble using voom.
#'
Expand Down
18 changes: 18 additions & 0 deletions R/methods.R
Original file line number Diff line number Diff line change
Expand Up @@ -2586,6 +2586,22 @@ such as batch effects (if applicable) in the formula.
prefix = prefix,
...
),

# glmmseq
tolower(method) %in% c("glmmseq_lme4", "glmmseq_glmmTMB") ~ get_differential_transcript_abundance_glmmSeq(
.,
.formula,
.sample = !!.sample,
.transcript = !!.transcript,
.abundance = !!.abundance,
.contrasts = contrasts,
method = method,
test_above_log2_fold_change = test_above_log2_fold_change,
scaling_method = scaling_method,
omit_contrast_in_colnames = omit_contrast_in_colnames,
prefix = prefix,
...
),

# Else error
TRUE ~ stop("tidybulk says: the only methods supported at the moment are \"edgeR_quasi_likelihood\" (i.e., QLF), \"edgeR_likelihood_ratio\" (i.e., LRT), \"limma_voom\", \"limma_voom_sample_weights\", \"DESeq2\"")
Expand Down Expand Up @@ -4539,3 +4555,5 @@ as_matrix <- function(tbl,
# Convert to matrix
as.matrix()
}


43 changes: 43 additions & 0 deletions R/utilities.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,44 @@
#' Get matrix from tibble
#'
#' @keywords internal
#' @noRd
#'
#'
#' @importFrom magrittr set_rownames
#' @importFrom rlang quo_is_null
#'
#' @param tbl A tibble
#' @param rownames The column name of the input tibble that will become the rownames of the output matrix
#' @param do_check A boolean
#'
#' @return A matrix
#'
#' @examples
#'
#'
#' tibble(.feature = "CD3G", count=1) |> as_matrix(rownames=.feature)
#'
as_data_frame <- function(tbl,
rownames = NULL,
do_check = TRUE) {

# Fix NOTEs
. = NULL

rownames = enquo(rownames)
tbl %>%

as.data.frame() |>

# Deal with rownames column if present
ifelse_pipe(
!quo_is_null(rownames),
~ .x |>
magrittr::set_rownames(tbl |> pull(!!rownames)) |>
select(-1)
)
}

my_stop = function() {
stop("
You should call tidybulk library *after* tidyverse libraries.
Expand Down Expand Up @@ -1581,3 +1622,5 @@ get_special_column_name_symbol = function(name){
feature__ = get_special_column_name_symbol(".feature")
sample__ = get_special_column_name_symbol(".sample")



0 comments on commit e97612a

Please sign in to comment.