Add analyze_patients_exposure_in_cols (#916)

Closes #915 --------- Signed-off-by: Davide Garolini <[email protected]> Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: Davide Garolini <[email protected]> Co-authored-by: 27856297+dependabot-preview[bot]@users.noreply.github.com <27856297+dependabot-preview[bot]@users.noreply.github.com>
insightsengineering · May 15, 2023 · c64ee78 · c64ee78
1 parent 3a56bf1
commit c64ee78
Show file tree

Hide file tree

Showing 8 changed files with 348 additions and 80 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -42,6 +42,7 @@ export(a_summary)
 export(add_rowcounts)
 export(aesi_label)
 export(analyze_num_patients)
+export(analyze_patients_exposure_in_cols)
 export(analyze_vars_in_cols)
 export(append_varlabels)
 export(as.rtable)

diff --git a/NEWS.md b/NEWS.md
@@ -12,6 +12,7 @@
 * Corrected ordering of factor levels by `d_onco_rsp_label`.
 * Added new "Analyze Functions" and "Summarize Functions" overview pages.
 * Moved all KM plot documentation to the `g_km` function.
+* Added `a_count_patients_sum_exposure` for new analyze function `analyze_patients_exposure_in_cols` and `summarize_patients_exposure_in_cols`.
 
 ### Miscellaneous
 * Implemented the `lubridate` package for date variables in `tern` datasets.

diff --git a/R/analyze_colvars_functions.R b/R/analyze_colvars_functions.R
@@ -5,6 +5,7 @@
 #' These functions are wrappers of [rtables::analyze_colvars()] which apply corresponding `tern` statistics functions
 #' to add an analysis to a given table layout:
 #'
+#' * [analyze_patients_exposure_in_cols()]
 #' * [analyze_vars_in_cols()] (extended wrapper, never used in other `tern` functions)
 #' * [summarize_colvars()] (low level wrapper, never used in other `tern` functions)
 #' * [summarize_coxreg()] (contains also [rtables::summarize_row_groups()])

diff --git a/R/summarize_patients_exposure_in_cols.R b/R/summarize_patients_exposure_in_cols.R
@@ -5,20 +5,22 @@
 #' Counting the number of patients and summing analysis value (i.e exposure values) across all patients
 #' when a column table layout is required.
 #'
+#' @inheritParams argument_convention
+#'
 #' @name summarize_patients_exposure_in_cols
 NULL
 
 #' @describeIn summarize_patients_exposure_in_cols Statistics function which counts numbers
 #'   of patients and the sum of exposure across all patients.
 #'
-#' @inheritParams argument_convention
+#' @param ex_var (`character`)\cr name of the variable within `df` containing exposure values.
 #' @param custom_label (`string` or `NULL`)\cr if provided and `labelstr` is empty then this will
 #'   be used as label.
 #'
 #' @return
 #' * `s_count_patients_sum_exposure()` returns a named `list` with the statistics:
 #'   * `n_patients`: Number of unique patients in `df`.
-#'   * `sum_exposure`: Sum of `.var` across all patients in `df`.
+#'   * `sum_exposure`: Sum of `ex_var` across all patients in `df`.
 #'
 #' @examples
 #' set.seed(1)
@@ -39,6 +41,7 @@ NULL
 #' # Internal function - s_count_patients_sum_exposure
 #' \dontrun{
 #' s_count_patients_sum_exposure(df = df, .N_col = nrow(adsl))
+#' s_count_patients_sum_exposure(df = df, .N_col = nrow(adsl), .stats = "n_patients")
 #' s_count_patients_sum_exposure(
 #'   df = df,
 #'   .N_col = nrow(adsl),
@@ -47,17 +50,19 @@ NULL
 #' }
 #'
 #' @keywords internal
-s_count_patients_sum_exposure <- function(df, # nolintr
-                                          .var = "AVAL",
+s_count_patients_sum_exposure <- function(df,
+                                          ex_var = "AVAL",
                                           id = "USUBJID",
                                           labelstr = "",
-                                          .N_col, # nolintr
+                                          .stats = c("n_patients", "sum_exposure"),
+                                          .N_col, # nolint
                                           custom_label = NULL) {
-  assert_df_with_variables(df, list(.var = .var, id = id))
+  assert_df_with_variables(df, list(ex_var = ex_var, id = id))
   checkmate::assert_string(id)
   checkmate::assert_string(labelstr)
   checkmate::assert_string(custom_label, null.ok = TRUE)
-  checkmate::assert_numeric(df[[.var]])
+  checkmate::assert_numeric(df[[ex_var]])
+  checkmate::assert_true(all(.stats %in% c("n_patients", "sum_exposure")))
 
   row_label <- if (labelstr != "") {
     labelstr
@@ -69,65 +74,103 @@ s_count_patients_sum_exposure <- function(df, # nolintr
 
   y <- list()
 
-  y$n_patients <-
-    formatters::with_label(
-      s_num_patients_content(
-        df = df,
-        .N_col = .N_col, # nolintr
-        .var = id,
-        labelstr = ""
-      )$unique,
-      row_label
-    )
-
-  y$sum_exposure <- formatters::with_label(sum(df[[.var]]), row_label)
+  if ("n_patients" %in% .stats) {
+    y$n_patients <-
+      formatters::with_label(
+        s_num_patients_content(
+          df = df,
+          .N_col = .N_col, # nolint
+          .var = id,
+          labelstr = ""
+        )$unique,
+        row_label
+      )
+  }
+  if ("sum_exposure" %in% .stats) {
+    y$sum_exposure <- formatters::with_label(sum(df[[ex_var]]), row_label)
+  }
   y
 }
 
+#' @describeIn summarize_patients_exposure_in_cols Analysis function which is used as `afun` in
+#'   [rtables::analyze_colvars()] within `analyze_patients_exposure_in_cols()` and as `cfun` in
+#'   [rtables::summarize_row_groups()] within `summarize_patients_exposure_in_cols()`.
+#'
+#' @return
+#' * `a_count_patients_sum_exposure()` returns formatted [rtables::CellValue()].
+#'
+#' @examples
+#' tern:::a_count_patients_sum_exposure(
+#'   df = df,
+#'   var = "SEX",
+#'   .N_col = nrow(df),
+#'   .stats = "n_patients"
+#' )
+#'
+#' @keywords internal
+a_count_patients_sum_exposure <- function(df,
+                                          var = NULL,
+                                          ex_var = "AVAL",
+                                          id = "USUBJID",
+                                          labelstr = "",
+                                          .N_col, # nolint
+                                          .stats,
+                                          .formats = list(n_patients = "xx (xx.x%)", sum_exposure = "xx"),
+                                          custom_label = NULL) {
+  if (!is.null(var)) {
+    assert_df_with_variables(df, list(var = var))
+    df[[var]] <- as.factor(df[[var]])
+  }
+
+  y <- list()
+  if (is.null(var)) {
+    y[[.stats]] <- list(Total = s_count_patients_sum_exposure(
+      df = df,
+      ex_var = ex_var,
+      id = id,
+      labelstr = labelstr,
+      .N_col = .N_col,
+      .stats = .stats,
+      custom_label = custom_label
+    )[[.stats]])
+  } else {
+    for (lvl in levels(df[[var]])) {
+      y[[.stats]][[lvl]] <- s_count_patients_sum_exposure(
+        df = subset(df, get(var) == lvl),
+        ex_var = ex_var,
+        id = id,
+        labelstr = labelstr,
+        .N_col = .N_col,
+        .stats = .stats,
+        custom_label = lvl
+      )[[.stats]]
+    }
+  }
+
+  in_rows(.list = y[[.stats]], .formats = .formats[[.stats]])
+}
+
 #' @describeIn summarize_patients_exposure_in_cols Layout-creating function which can take statistics
 #'   function arguments and additional format arguments. This function is a wrapper for
 #'   [rtables::split_cols_by_multivar()] and [rtables::summarize_row_groups()].
 #'
-#' @inheritParams argument_convention
-#' @param col_split (`flag`)\cr whether the columns should be split. Set to `FALSE` when the required
-#'   column split has been done already earlier in the layout pipe.
-#'
 #' @return
 #' * `summarize_patients_exposure_in_cols()` returns a layout object suitable for passing to further
 #'   layouting functions, or to [rtables::build_table()]. Adding this function to an `rtable` layout will
-#'   add formatted rows, with the statistics from `s_count_patients_sum_exposure()` arranged in
+#'   add formatted content rows, with the statistics from `s_count_patients_sum_exposure()` arranged in
 #'   columns, to the table layout.
 #'
 #' @examples
 #' lyt <- basic_table() %>%
-#'   split_cols_by("ARMCD", split_fun = add_overall_level("Total", first = FALSE)) %>%
-#'   summarize_patients_exposure_in_cols(var = "AVAL", col_split = TRUE) %>%
-#'   split_rows_by("SEX") %>%
-#'   summarize_patients_exposure_in_cols(var = "AVAL", col_split = FALSE)
+#'   summarize_patients_exposure_in_cols(var = "AVAL", col_split = TRUE)
 #' result <- build_table(lyt, df = df, alt_counts_df = adsl)
 #' result
 #'
 #' lyt2 <- basic_table() %>%
-#'   split_cols_by("ARMCD", split_fun = add_overall_level("Total", first = FALSE)) %>%
-#'   summarize_patients_exposure_in_cols(
-#'     var = "AVAL", col_split = TRUE,
-#'     .stats = "n_patients", custom_label = "some custom label"
-#'   ) %>%
-#'   split_rows_by("SEX") %>%
-#'   summarize_patients_exposure_in_cols(var = "AVAL", col_split = FALSE)
+#'   summarize_patients_exposure_in_cols(var = "AVAL", col_split = TRUE, .stats = "sum_exposure")
 #' result2 <- build_table(lyt2, df = df, alt_counts_df = adsl)
 #' result2
 #'
-#' lyt3 <- basic_table() %>%
-#'   summarize_patients_exposure_in_cols(var = "AVAL", col_split = TRUE)
-#' result3 <- build_table(lyt3, df = df, alt_counts_df = adsl)
-#' result3
-#'
-#' lyt4 <- basic_table() %>%
-#'   summarize_patients_exposure_in_cols(var = "AVAL", col_split = TRUE, .stats = "sum_exposure")
-#' result4 <- build_table(lyt4, df = df, alt_counts_df = adsl)
-#' result4
-#'
 #' @export
 summarize_patients_exposure_in_cols <- function(lyt, # nolintr
                                                 var,
@@ -136,28 +179,82 @@ summarize_patients_exposure_in_cols <- function(lyt, # nolintr
                                                 .labels = c(n_patients = "Patients", sum_exposure = "Person time"),
                                                 .indent_mods = NULL,
                                                 col_split = TRUE) {
-  afun_list <- Map(
-    function(stat) {
-      make_afun(
-        s_count_patients_sum_exposure,
-        .stats = stat,
-        .formats = ifelse(stat == "n_patients", "xx (xx.x%)", "xx")
-      )
-    },
-    stat = .stats
-  )
-
   if (col_split) {
     lyt <- split_cols_by_multivar(
       lyt = lyt,
       vars = rep(var, length(.stats)),
-      varlabels = .labels[.stats]
+      varlabels = .labels[.stats],
+      extra_args = list(.stats = .stats)
     )
   }
   summarize_row_groups(
     lyt = lyt,
     var = var,
-    cfun = afun_list,
+    cfun = a_count_patients_sum_exposure,
     extra_args = list(...)
   )
 }
+
+#' @describeIn summarize_patients_exposure_in_cols Layout-creating function which can take statistics
+#'   function arguments and additional format arguments. This function is a wrapper for
+#'   [rtables::split_cols_by_multivar()] and [rtables::analyze_colvars()].
+#'
+#' @inheritParams argument_convention
+#' @param col_split (`flag`)\cr whether the columns should be split. Set to `FALSE` when the required
+#'   column split has been done already earlier in the layout pipe.
+#'
+#' @return
+#' * `analyze_patients_exposure_in_cols()` returns a layout object suitable for passing to further
+#'   layouting functions, or to [rtables::build_table()]. Adding this function to an `rtable` layout will
+#'   add formatted data rows, with the statistics from `s_count_patients_sum_exposure()` arranged in
+#'   columns, to the table layout.
+#'
+#' @note As opposed to [summarize_patients_exposure_in_cols()] which generates content rows, this function
+#'   generates data rows which will _not_ be repeated on multiple pages when pagination is used.
+#'
+#' @examples
+#' lyt3 <- basic_table() %>%
+#'   split_cols_by("ARMCD", split_fun = add_overall_level("Total", first = FALSE)) %>%
+#'   summarize_patients_exposure_in_cols(var = "AVAL", col_split = TRUE) %>%
+#'   analyze_patients_exposure_in_cols(var = "SEX", col_split = FALSE)
+#' result3 <- build_table(lyt3, df = df, alt_counts_df = adsl)
+#' result3
+#'
+#' lyt4 <- basic_table() %>%
+#'   split_cols_by("ARMCD", split_fun = add_overall_level("Total", first = FALSE)) %>%
+#'   summarize_patients_exposure_in_cols(
+#'     var = "AVAL", col_split = TRUE,
+#'     .stats = "n_patients", custom_label = "some custom label"
+#'   ) %>%
+#'   analyze_patients_exposure_in_cols(var = "SEX", col_split = FALSE, ex_var = "AVAL")
+#' result4 <- build_table(lyt4, df = df, alt_counts_df = adsl)
+#' result4
+#'
+#' lyt5 <- basic_table() %>%
+#'   analyze_patients_exposure_in_cols(var = "SEX", col_split = TRUE, ex_var = "AVAL")
+#' result5 <- build_table(lyt5, df = df, alt_counts_df = adsl)
+#' result5
+#'
+#' @export
+analyze_patients_exposure_in_cols <- function(lyt, # nolint
+                                              var = NULL,
+                                              ex_var = "AVAL",
+                                              col_split = TRUE,
+                                              .stats = c("n_patients", "sum_exposure"),
+                                              .labels = c(n_patients = "Patients", sum_exposure = "Person time"),
+                                              .indent_mods = 0L) {
+  if (col_split) {
+    lyt <- split_cols_by_multivar(
+      lyt = lyt,
+      vars = rep(ex_var, length(.stats)),
+      varlabels = .labels[.stats],
+      extra_args = list(.stats = .stats)
+    )
+  }
+  lyt <- lyt %>% analyze_colvars(
+    afun = a_count_patients_sum_exposure,
+    indent_mod = .indent_mods,
+    extra_args = list(var = var, ex_var = ex_var)
+  )
+  lyt
+}
diff --git a/man/analyze_colvars_functions.Rd b/man/analyze_colvars_functions.Rd