Merge 4702189 into d913f44

insightsengineering · Aug 29, 2022 · 108256e · 108256e
2 parents d913f44 + 4702189
commit 108256e
Show file tree

Hide file tree

Showing 6 changed files with 233 additions and 30 deletions.
diff --git a/NEWS.md b/NEWS.md
@@ -12,6 +12,8 @@
 * Added `stat_mean_pval` function to calculate the p-value of the mean as a new summary statistic.
 * New statistic `mean_se` (mean with standard error) for `summarize_variables()` 
   and related functions.
+* Added parameters `interaction_y' and `interaction_item` in ANCOVA related functions to make the 
+  calculation about interaction available.
 
 ### Migration from `assertthat` to `checkmate`
 * Substituted all `assertthat` calls with `checkmate`. 
@@ -48,7 +50,7 @@
   custom strings across data.frame (this can be merged with `df_explicit_na`).
 * Fixed warnings occurring in example tests.
 * Fixed internal function examples errors by removing `tern:::` prefix and added `dontrun` to internal function examples.
-* Fixed bug in `s_ancova` causing an error when the first level of the arm factor is not the control arm.
+* Fixed bug in `s_ancova` causing an error when the first level of the arm factor is not the control arm. 
 
 ### Documentation and NAMESPACE Polishing
 * Added stable badges for:

diff --git a/R/summarize_ancova.R b/R/summarize_ancova.R
@@ -20,7 +20,8 @@ NULL
 #'   reference group.
 #'   - `covariates`: (`character`)\cr a vector that can contain single variable names (such as
 #'   `"X1"`), and/or interaction terms indicated by `"X1 * X2"`.
-#'
+#' @param interaction_item (`character`)\cr name of the variable that should have interactions
+#'   with arm. if the interaction is not needed, the default option is NULL
 #' @examples
 #' h_ancova(
 #'   .var = "Sepal.Length",
@@ -31,7 +32,8 @@ NULL
 #' @export
 h_ancova <- function(.var,
                      .df_row,
-                     variables) {
+                     variables,
+                     interaction_item = NULL) {
   checkmate::assert_string(.var)
   checkmate::assert_list(variables)
   checkmate::assert_subset(names(variables), c("arm", "covariates"))
@@ -52,14 +54,20 @@ h_ancova <- function(.var,
     formula <- stats::as.formula(paste0(.var, " ~ ", arm))
   }
 
+  if (is.null(interaction_item)) {
+    specs <- arm
+  } else {
+    specs <- c(arm, interaction_item)
+  }
+
   lm_fit <- stats::lm(
     formula = formula,
     data = .df_row
   )
   emmeans_fit <- emmeans::emmeans(
     lm_fit,
     # Specify here the group variable over which EMM are desired.
-    specs = arm,
+    specs = specs,
     # Pass the data again so that the factor levels of the arm variable can be inferred.
     data = .df_row
   )
@@ -71,6 +79,10 @@ h_ancova <- function(.var,
 #'   of the investigated linear model.
 #' @inheritParams argument_convention
 #' @inheritParams h_ancova
+#' @param interaction_y (`character`)\cr a selected item inside of the interaction_item column
+#'   which will be used to select the specific ANCOVA results. if the interaction is not
+#'   needed, the default option is FALSE
+#'
 #' @return A named list of 5 statistics:
 #'   - `n`: count of complete sample size for the group.
 #'   - `lsmean`: estimated marginal means in the group.
@@ -85,8 +97,8 @@ h_ancova <- function(.var,
 #' library(scda)
 #' library(dplyr)
 #'
-#' adsl <- synthetic_cdisc_data("latest")$adsl
-#' adqs <- synthetic_cdisc_data("latest")$adqs
+#' adsl <- synthetic_cdisc_data("rcd_2022_02_28")$adsl
+#' adqs <- synthetic_cdisc_data("rcd_2022_02_28")$adqs
 #'
 #' adqs_single <- adqs %>%
 #'   filter(
@@ -106,7 +118,7 @@ h_ancova <- function(.var,
 #'
 #' # Internal function - s_ancova
 #' \dontrun{
-#' s_ancova(df, .var, .df_row, variables, .ref_group, .in_ref_col = FALSE, conf_level)
+#' s_ancova(df, .var, .df_row, variables, .ref_group, .in_ref_col = FALSE, conf_level, interaction_y = FALSE, interaction_item = NULL)
 #' }
 #'
 #' @keywords internal
@@ -116,8 +128,10 @@ s_ancova <- function(df,
                      variables,
                      .ref_group,
                      .in_ref_col,
-                     conf_level) {
-  emmeans_fit <- h_ancova(.var = .var, variables = variables, .df_row = .df_row)
+                     conf_level,
+                     interaction_y = FALSE,
+                     interaction_item = NULL) {
+  emmeans_fit <- h_ancova(.var = .var, variables = variables, .df_row = .df_row, interaction_item = interaction_item)
 
   sum_fit <- summary(
     emmeans_fit,
@@ -126,17 +140,29 @@ s_ancova <- function(df,
 
   arm <- variables$arm
 
-  y <- df[[.var]]
   sum_level <- as.character(unique(df[[arm]]))
 
   # Ensure that there is only one element in sum_level.
   checkmate::assert_scalar(sum_level)
 
   sum_fit_level <- sum_fit[sum_fit[[arm]] == sum_level, ]
 
-  # Get the index of ref arm
-  ref_key <- seq(sum_fit[[arm]][unique(.ref_group[[arm]])])
-  ref_key <- tail(ref_key, n = 1)
+  # Get the index of the ref arm
+  if (interaction_y != FALSE) {
+    y <- unlist(df[(df[[interaction_item]] == interaction_y), .var])
+    # convert characters selected in interaction_y into the numeric order
+    interaction_y <- which(sum_fit_level[[interaction_item]] == interaction_y)
+    sum_fit_level <- sum_fit_level[interaction_y, ]
+    # if interaction is called, reset the index
+    ref_key <- seq(sum_fit[[arm]][unique(.ref_group[[arm]])])
+    ref_key <- tail(ref_key, n = 1)
+    ref_key <- (interaction_y - 1) * length(unique(.df_row[[arm]])) + ref_key
+  } else {
+    y <- df[[.var]]
+    # Get the index of the ref arm when interaction is not called
+    ref_key <- seq(sum_fit[[arm]][unique(.ref_group[[arm]])])
+    ref_key <- tail(ref_key, n = 1)
+  }
 
   if (.in_ref_col) {
     list(
@@ -164,6 +190,9 @@ s_ancova <- function(df,
     )
 
     sum_contrasts_level <- sum_contrasts[grepl(sum_level, sum_contrasts$contrast), ]
+    if (interaction_y != FALSE) {
+      sum_contrasts_level <- sum_contrasts_level[interaction_y, ]
+    }
 
     list(
       n = length(y[!is.na(y)]),
@@ -184,7 +213,7 @@ s_ancova <- function(df,
 #' @examples
 #' # Internal function - a_ancova
 #' \dontrun{
-#' a_ancova(df, .var, .df_row, variables, .ref_group, .in_ref_col = FALSE, conf_level)
+#' a_ancova(df, .var, .df_row, variables, .ref_group, .in_ref_col = FALSE, interaction_y = FALSE, interaction_item = NULL, conf_level)
 #' }
 #'
 #' @keywords internal
@@ -211,8 +240,8 @@ a_ancova <- make_afun(
 #' library(rtables)
 #' library(dplyr)
 #'
-#' adsl <- synthetic_cdisc_data("latest")$adsl
-#' adqs <- synthetic_cdisc_data("latest")$adqs
+#' adsl <- synthetic_cdisc_data("rcd_2022_02_28")$adsl
+#' adqs <- synthetic_cdisc_data("rcd_2022_02_28")$adqs
 #' adqs_single <- adqs %>%
 #'   filter(
 #'     AVISIT == "WEEK 1 DAY 8", # single time point
@@ -239,6 +268,49 @@ a_ancova <- make_afun(
 #'     conf_level = 0.95, var_labels = "Adjusted comparison (covariates BASE and STRATA1)"
 #'   ) %>%
 #'   build_table(adqs_single, alt_counts_df = adsl)
+#'
+#' # Another example: count the interaction between rows and columns into consideration
+#' adsl <- synthetic_cdisc_data("rcd_2022_02_28")$adsl
+#' adqs <- synthetic_cdisc_data("rcd_2022_02_28")$adqs
+#' adqs_single <- adqs %>%
+#'   filter(AVISIT %in% c("WEEK 1 DAY 8", "WEEK 2 DAY 15", "WEEK 5 DAY 36")) %>%
+#'   droplevels() %>%
+#'   filter(PARAM == "BFI All Questions") %>%
+#'   mutate(CHG = ifelse(BMEASIFL == "Y", CHG, NA)) # only analyze evaluable population
+#'
+#' basic_table() %>%
+#'   split_cols_by("ARMCD", ref_group = "ARM A") %>%
+#'   add_colcounts() %>%
+#'   split_rows_by("STRATA1", split_fun = drop_split_levels) %>%
+#'   summarize_ancova(
+#'     vars = "CHG",
+#'     variables = list(arm = "ARMCD", covariates = c("BASE", "AVISIT", "AVISIT*ARMCD")),
+#'     conf_level = 0.95,
+#'     var_labels = "WEEK 1 DAY 8",
+#'     table_names = "WEEK 1 DAY 8",
+#'     interaction_y = "WEEK 1 DAY 8",
+#'     interaction_item = "AVISIT"
+#'   ) %>%
+#'   summarize_ancova(
+#'     vars = "CHG",
+#'     variables = list(arm = "ARMCD", covariates = c("BASE", "AVISIT", "AVISIT*ARMCD")),
+#'     conf_level = 0.95,
+#'     var_labels = "WEEK 2 DAY 15",
+#'     table_names = "WEEK 2 DAY 15",
+#'     interaction_y = "WEEK 2 DAY 15",
+#'     interaction_item = "AVISIT"
+#'   ) %>%
+#'   summarize_ancova(
+#'     vars = "CHG",
+#'     variables = list(arm = "ARMCD", covariates = c("BASE", "AVISIT", "AVISIT*ARMCD")),
+#'     conf_level = 0.95,
+#'     var_labels = "WEEK 5 DAY 36",
+#'     table_names = "WEEK 5 DAY 36",
+#'     interaction_y = "WEEK 5 DAY 36",
+#'     interaction_item = "AVISIT"
+#'   ) %>%
+#'   build_table(adqs_single, alt_counts_df = adsl)
+#'
 #' \dontrun{
 #' basic_table() %>%
 #'   split_cols_by("ARMCD", ref_group = "ARM A") %>%
@@ -259,9 +331,13 @@ summarize_ancova <- function(lyt,
                              .stats = NULL,
                              .formats = NULL,
                              .labels = NULL,
-                             .indent_mods = NULL) {
+                             .indent_mods = NULL,
+                             interaction_y = FALSE,
+                             interaction_item = NULL) {
   afun <- make_afun(
     a_ancova,
+    interaction_y = interaction_y,
+    interaction_item = interaction_item,
     .stats = .stats,
     .formats = .formats,
     .labels = .labels,

diff --git a/man/compare_variables.Rd b/man/compare_variables.Rd
diff --git a/man/summarize_ancova.Rd b/man/summarize_ancova.Rd