insightsengineering · anajens · Oct 14, 2023 · Oct 15, 2023 · Oct 16, 2023 · Oct 16, 2023
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -159,3 +159,4 @@ Collate:
     'utils_factor.R'
     'utils_grid.R'
     'utils_rtables.R'
+    'utils_split_funs.R'
diff --git a/NAMESPACE b/NAMESPACE
@@ -217,7 +217,9 @@ export(has_fraction_in_cols)
 export(has_fractions_difference)
 export(imputation_rule)
 export(keep_content_rows)
+export(keep_level_order)
 export(keep_rows)
+export(level_order)
 export(logistic_regression_cols)
 export(logistic_summary_by_flag)
 export(month2day)
@@ -235,6 +237,7 @@ export(prop_strat_wilson)
 export(prop_wald)
 export(prop_wilson)
 export(reapply_varlabels)
+export(ref_group_last)
 export(s_compare)
 export(s_count_occurrences)
 export(s_count_occurrences_by_grade)

diff --git a/NEWS.md b/NEWS.md
@@ -2,6 +2,7 @@
 
 ### New Features
 * Added the `na_str` argument to `analyze` & `summarize_row_groups` wrapper functions `count_abnormal`, `count_abnormal_by_baseline`, `count_abnormal_by_marked`, `count_abnormal_by_worst_grade`, `count_abnormal_lab_worsen_by_baseline`, `count_cumulative`, `count_missed_doses`, `count_occurrences`, `count_occurrences_by_grade`, `summarize_occurrences_by_grade`, `summarize_patients_events_in_cols`, `count_patients_with_event`, `count_patients_with_flags`, `count_values`, `estimate_multinomial_response`, `estimate_proportion`, `h_tab_one_biomarker`, `estimate_incidence_rate`, `logistic_summary_by_flag`, `estimate_odds_ratio`, `estimate_proportion_diff`, `test_proportion_diff`, `summarize_ancova`, `summarize_change`, `summarize_glm_count`, `summarize_num_patients`, `analyze_num_patients`, `summarize_patients_exposure_in_cols`, `coxph_pairwise`, `tabulate_survival_subgroups`, `surv_time`, and `surv_timepoint`.
+* Added `ref_group_last` function to be used in creating custom split functions to place the reference group facet last.
 
 ### Enhancements
 * Added formatting function `format_count_fraction_lt10` for formatting `count_fraction` with special consideration when count is less than 10.

diff --git a/R/utils_split_funs.R b/R/utils_split_funs.R
@@ -0,0 +1,156 @@
+#' Custom Split Functions
+#'
+#' @description `r lifecycle::badge("stable")`
+#'
+#' Collection of useful functions that are expanding on the core list of functions
+#' provided by `rtables`. See `?rtables::custom_split_funs` and [rtables::make_split_fun()]
+#' for more information on how to make a custom split function. All these functions
+#' work with [split_rows_by()] argument `split_fun` to modify the way the split
+#' happens.
+#'
+#' @seealso [rtables::make_split_fun()]
+#'
+#' @name utils_split_funs
+NULL
+
+#' @describeIn utils_split_funs split function to place reference group facet last
+#'  during post-processing stage.
+#'
+#' @return
+#' * `ref_group_last` returns an utility function that puts the reference group
+#'  as last and needs to be assigned to `split_fun`.
+#'
+#' @examples
+#' library(dplyr)
+#'
+#' dat <- data.frame(
+#'   x = factor(letters[1:5], levels = letters[5:1]),
+#'   y = 1:5
+#' )
+#' # ref_group_last
+#'
+#' # With rtables layout functions
+#' basic_table() %>%
+#'   split_cols_by("x", ref_group = "c", split_fun = ref_group_last) %>%
+#'   analyze("y") %>%
+#'   build_table(dat)
+#'
+#' # With tern layout funcitons
+#' adtte_f <- tern_ex_adtte %>%
+#'   filter(PARAMCD == "OS") %>%
+#'   mutate(
+#'     AVAL = day2month(AVAL),
+#'     is_event = CNSR == 0
+#'   )
+#'
+#' basic_table() %>%
+#'   split_cols_by(var = "ARMCD", ref_group = "ARM B", split_fun = ref_group_last) %>%
+#'   add_colcounts() %>%
+#'   surv_time(
+#'     vars = "AVAL",
+#'     var_labels = "Survival Time (Months)",
+#'     is_event = "is_event",
+#'   ) %>%
+#'   build_table(df = adtte_f)
+#'
+#' @export
+ref_group_last <- make_split_fun(
+  post = list(
+    function(splret, spl, fulldf) {
+      if (!"ref_group_value" %in% slotNames(spl)) {
+        stop("Reference group is undefined.")
+      }
+
+      spl_var <- spl@payload # can be substituted by splret$labels
+      init_lvls <- levels(fulldf[[spl_var]])
+
+      if (!all(names(splret$values) %in% init_lvls)) {
+        stop("This split function does not work with combination facets.")
+      }
+
+      ref_group_pos <- which(init_lvls == spl@ref_group_value)
+      reord_lvls <- c(init_lvls[-ref_group_pos], init_lvls[ref_group_pos])
+      ord <- match(reord_lvls, names(splret$values))
+
+      make_split_result(
+        splret$values[ord],
+        splret$datasplit[ord],
+        splret$labels[ord]
+      )
+    }
+  )
+)
+#' @describeIn utils_split_funs split function to keep original order of factor
+#'  levels in the split.
+#'
+#' @return
+#' * `keep_level_order` returns an utility function that keeps the original levels'.
+#'  It needs to be assigned to `split_fun`.
+#'
+#' @examples
+#' # keep_level_order --------
+#' # Even if default would bring ref_group first, the original order puts it last
+#' basic_table() %>%
+#'   split_cols_by("Species", ref_group = "virginica", split_fun = keep_level_order) %>%
+#'   analyze("Sepal.Length") %>%
+#'   build_table(iris)
+#'
+#' @export
+keep_level_order <- make_split_fun(
+  post = list(
+    function(splret, spl, fulldf, ...) {
+      ord <- order(names(splret$values))
+      make_split_result(
+        splret$values[ord],
+        splret$datasplit[ord],
+        splret$labels[ord]
+      )
+    }
+  )
+)
+#' @describeIn utils_split_funs split function to change level order based on a integerish
+#'   vector or a character vector that represent the split variable's factor levels.
+#'
+#' @param order (`character` or `integer`)\cr vector of ordering indexes for the split facets.
+#'
+#' @return
+#' * `keep_level_order` returns an utility function that changes the original levels' order,
+#'   depending on input `order` and split levels.
+#'
+#' @examples
+#' # level_order --------
+#' # Even if default would bring ref_group first, the original order puts it last
+#' basic_table() %>%
+#'   split_cols_by("Species", split_fun = level_order(c(1, 3, 2))) %>%
+#'   analyze("Sepal.Length") %>%
+#'   build_table(iris)
+#'
+#' # character vector
+#' new_order <- level_order(levels(iris$Species)[c(1, 3, 2)])
+#' basic_table() %>%
+#'   split_cols_by("Species", ref_group = "virginica", split_fun = new_order) %>%
+#'   analyze("Sepal.Length") %>%
+#'   build_table(iris)
+#'
+#' @export
+level_order <- function(order) {
+  make_split_fun(
+    post = list(
+      function(splret, spl, fulldf, ...) {
+        if (checkmate::test_integerish(order)) {
+          checkmate::assert_integerish(order, lower = 1, upper = length(splret$values))
+          ord <- order
+        } else {
+          checkmate::assert_character(order, len = length(splret$values))
+          checkmate::assert_set_equal(order, names(splret$values), ordered = FALSE)
+          ord <- match(order, names(splret$values))
+        }
+        make_split_result(
+          splret$values[ord],
+          splret$datasplit[ord],
+          splret$labels[ord]
+        )
+      }
+    )
+  )
+}
diff --git a/_pkgdown.yml b/_pkgdown.yml
@@ -114,6 +114,7 @@ reference:
       - split_cols_by_groups
       - to_string_matrix
       - groups_list_to_df
+      - utils_split_funs
 
   - title: rtables Formatting Functions
     desc: These functions provide customized formatting rules to work with the

diff --git a/man/utils_split_funs.Rd b/man/utils_split_funs.Rd
diff --git a/tests/testthat/_snaps/utils_split_fun.md b/tests/testthat/_snaps/utils_split_fun.md
@@ -0,0 +1,77 @@
+# analyze_vars works as expected with ref_group_last split fun
+
+    Code
+      res[3:4, ]
+    Output
+                    ARM A        ARM B        ARM C   
+                    (N=69)       (N=73)       (N=58)  
+      ————————————————————————————————————————————————
+      Mean (SD)   34.1 (6.8)   35.8 (7.1)   36.1 (7.4)
+      Median         32.8         35.4         36.2   
+
+# compare_vars works as expected with ref_group_last split fun
+
+    Code
+      res[1:2, ]
+    Output
+                    ARM A        ARM C        ARM B   
+                    (N=69)       (N=58)       (N=73)  
+      ————————————————————————————————————————————————
+      n               69           58           73    
+      Mean (SD)   34.1 (6.8)   36.1 (7.4)   35.8 (7.1)
+
+# summarize_ancova works as expected with ref_group_last split fun
+
+    Code
+      res[1:2, ]
+    Output
+                              ARM A    ARM C    ARM B 
+                              (N=69)   (N=58)   (N=73)
+      ————————————————————————————————————————————————
+      Unadjusted comparison                           
+        n                       69       58       73  
+
+---
+
+    Code
+      res
+    Output
+                                   ARM A    ARM C    ARM B 
+                                   (N=69)   (N=58)   (N=73)
+      —————————————————————————————————————————————————————
+      Unadjusted rate (per year)                           
+        Rate                       8.2061   7.8551   9.1554
+
+# binary endpoint layouts work as expected with ref_group_last split fun
+
+    Code
+      res
+    Output
+                                             A: Drug X          C: Combination     B: Placebo
+                                               (N=69)               (N=58)           (N=73)  
+      ———————————————————————————————————————————————————————————————————————————————————————
+      Odds Ratio (95% CI)                2.47 (1.22 - 5.01)   2.29 (1.10 - 4.78)             
+      Difference in Response rate (%)           20.5                 19.0                    
+        95% CI (Wald, with correction)      (3.6, 37.3)          (1.2, 36.8)                 
+        p-value (Chi-Squared Test)             0.0113               0.0263                   
+
+# time to event layouts works as expected with ref_group_last split fun
+
+    Code
+      res
+    Output
+                                            ARM A             ARM C            ARM B     
+                                            (N=69)           (N=58)            (N=73)    
+      ———————————————————————————————————————————————————————————————————————————————————
+      CoxPH                                                                              
+        p-value (log-rank)                  0.0159           0.1820                      
+        Hazard Ratio                         0.58             1.31                       
+        95% CI                           (0.37, 0.91)     (0.88, 1.95)                   
+      6 Months                                                                           
+        Patients remaining at risk            49               39                46      
+        Event Free Rate (%)                 85.29             71.87            71.55     
+        95% CI                          (76.38, 94.19)   (60.15, 83.58)    (60.96, 82.14)
+        Difference in Event Free Rate       13.74             0.31                       
+          95% CI                        (-0.10, 27.57)   (-15.47, 16.10)                 
+          p-value (Z-test)                  0.0517           0.9688                      
+