Add split functions for ref_group position (#1111)

# Pull Request  Fixes #1085 --------- Signed-off-by: Davide Garolini <[email protected]> Co-authored-by: stoilovs <[email protected]> Co-authored-by: Jana Stoilova <[email protected]> Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com>
insightsengineering · Oct 31, 2023 · 5e8cfd1 · 5e8cfd1
1 parent 9c4b9b3
commit 5e8cfd1
Show file tree

Hide file tree

Showing 8 changed files with 487 additions and 0 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -167,3 +167,4 @@ Collate:
     'utils_factor.R'
     'utils_grid.R'
     'utils_rtables.R'
+    'utils_split_funs.R'
diff --git a/NAMESPACE b/NAMESPACE
@@ -219,6 +219,7 @@ export(has_fractions_difference)
 export(imputation_rule)
 export(keep_content_rows)
 export(keep_rows)
+export(level_order)
 export(logistic_regression_cols)
 export(logistic_summary_by_flag)
 export(month2day)
@@ -236,6 +237,7 @@ export(prop_strat_wilson)
 export(prop_wald)
 export(prop_wilson)
 export(reapply_varlabels)
+export(ref_group_position)
 export(s_compare)
 export(s_count_occurrences)
 export(s_count_occurrences_by_grade)

diff --git a/NEWS.md b/NEWS.md
@@ -1,5 +1,10 @@
 # tern 0.9.2.9003
 
+### New Features
+* Added `ref_group_position` function to place the reference group facet last, first or at a certain position.
+* Added `keep_level_order` split function to retain original order of levels in a split.
+* Added `level_order` split function to reorder manually the levels.
+
 ### Miscellaneous
 * Specified minimal version of package dependencies.
 

diff --git a/R/utils_split_funs.R b/R/utils_split_funs.R
@@ -0,0 +1,154 @@
+#' Custom Split Functions
+#'
+#' @description `r lifecycle::badge("stable")`
+#'
+#' Collection of useful functions that are expanding on the core list of functions
+#' provided by `rtables`. See [rtables::custom_split_funs] and [rtables::make_split_fun()]
+#' for more information on how to make a custom split function. All these functions
+#' work with [split_rows_by()] argument `split_fun` to modify the way the split
+#' happens. For other split functions, consider consulting [`rtables::split_funcs`].
+#'
+#' @seealso [rtables::make_split_fun()]
+#'
+#' @name utils_split_funs
+NULL
+
+#' @describeIn utils_split_funs split function to place reference group facet at a specific position
+#'  during post-processing stage.
+#'
+#' @param position (`string` or `integer`)\cr should it be `"first"` or `"last"` or in a specific position?
+#'
+#' @return
+#' * `ref_group_position` returns an utility function that puts the reference group
+#'  as first, last or at a certain position and needs to be assigned to `split_fun`.
+#'
+#' @examples
+#' library(dplyr)
+#'
+#' dat <- data.frame(
+#'   x = factor(letters[1:5], levels = letters[5:1]),
+#'   y = 1:5
+#' )
+#'
+#' # With rtables layout functions
+#' basic_table() %>%
+#'   split_cols_by("x", ref_group = "c", split_fun = ref_group_position("last")) %>%
+#'   analyze("y") %>%
+#'   build_table(dat)
+#'
+#' # With tern layout funcitons
+#' adtte_f <- tern_ex_adtte %>%
+#'   filter(PARAMCD == "OS") %>%
+#'   mutate(
+#'     AVAL = day2month(AVAL),
+#'     is_event = CNSR == 0
+#'   )
+#'
+#' basic_table() %>%
+#'   split_cols_by(var = "ARMCD", ref_group = "ARM B", split_fun = ref_group_position("first")) %>%
+#'   add_colcounts() %>%
+#'   surv_time(
+#'     vars = "AVAL",
+#'     var_labels = "Survival Time (Months)",
+#'     is_event = "is_event",
+#'   ) %>%
+#'   build_table(df = adtte_f)
+#'
+#' basic_table() %>%
+#'   split_cols_by(var = "ARMCD", ref_group = "ARM B", split_fun = ref_group_position(2)) %>%
+#'   add_colcounts() %>%
+#'   surv_time(
+#'     vars = "AVAL",
+#'     var_labels = "Survival Time (Months)",
+#'     is_event = "is_event",
+#'   ) %>%
+#'   build_table(df = adtte_f)
+#'
+#' @export
+ref_group_position <- function(position = "first") {
+  make_split_fun(
+    post = list(
+      function(splret, spl, fulldf) {
+        if (!"ref_group_value" %in% methods::slotNames(spl)) {
+          stop("Reference group is undefined.")
+        }
+
+        spl_var <- rtables:::spl_payload(spl)
+        fulldf[[spl_var]] <- factor(fulldf[[spl_var]])
+        init_lvls <- levels(fulldf[[spl_var]])
+
+        if (!all(names(splret$values) %in% init_lvls)) {
+          stop("This split function does not work with combination facets.")
+        }
+
+        ref_group_pos <- which(init_lvls == rtables:::spl_ref_group(spl))
+        pos_choices <- c("first", "last")
+        if (checkmate::test_choice(position, pos_choices) && position == "first") {
+          pos <- 0
+        } else if (checkmate::test_choice(position, pos_choices) && position == "last") {
+          pos <- length(init_lvls)
+        } else if (checkmate::test_int(position, lower = 1, upper = length(init_lvls))) {
+          pos <- position - 1
+        } else {
+          stop("Wrong input for ref group position. It must be 'first', 'last', or a integer.")
+        }
+
+        reord_lvls <- append(init_lvls[-ref_group_pos], init_lvls[ref_group_pos], after = pos)
+        ord <- match(reord_lvls, names(splret$values))
+
+        make_split_result(
+          splret$values[ord],
+          splret$datasplit[ord],
+          splret$labels[ord]
+        )
+      }
+    )
+  )
+}
+
+#' @describeIn utils_split_funs split function to change level order based on a `integer`
+#'   vector or a `character` vector that represent the split variable's factor levels.
+#'
+#' @param order (`character` or `integer`)\cr vector of ordering indexes for the split facets.
+#'
+#' @return
+#' * `level_order` returns an utility function that changes the original levels' order,
+#'   depending on input `order` and split levels.
+#'
+#' @examples
+#' # level_order --------
+#' # Even if default would bring ref_group first, the original order puts it last
+#' basic_table() %>%
+#'   split_cols_by("Species", split_fun = level_order(c(1, 3, 2))) %>%
+#'   analyze("Sepal.Length") %>%
+#'   build_table(iris)
+#'
+#' # character vector
+#' new_order <- level_order(levels(iris$Species)[c(1, 3, 2)])
+#' basic_table() %>%
+#'   split_cols_by("Species", ref_group = "virginica", split_fun = new_order) %>%
+#'   analyze("Sepal.Length") %>%
+#'   build_table(iris)
+#'
+#' @export
+level_order <- function(order) {
+  make_split_fun(
+    post = list(
+      function(splret, spl, fulldf) {
+        if (checkmate::test_integerish(order)) {
+          checkmate::assert_integerish(order, lower = 1, upper = length(splret$values))
+          ord <- order
+        } else {
+          checkmate::assert_character(order, len = length(splret$values))
+          checkmate::assert_set_equal(order, names(splret$values), ordered = FALSE)
+          ord <- match(order, names(splret$values))
+        }
+        make_split_result(
+          splret$values[ord],
+          splret$datasplit[ord],
+          splret$labels[ord]
+        )
+      }
+    )
+  )
+}
diff --git a/_pkgdown.yml b/_pkgdown.yml
@@ -114,6 +114,7 @@ reference:
       - split_cols_by_groups
       - to_string_matrix
       - groups_list_to_df
+      - utils_split_funs
 
   - title: rtables Formatting Functions
     desc: These functions provide customized formatting rules to work with the

diff --git a/man/utils_split_funs.Rd b/man/utils_split_funs.Rd
diff --git a/tests/testthat/_snaps/utils_split_fun.md b/tests/testthat/_snaps/utils_split_fun.md
@@ -0,0 +1,77 @@
+# analyze_vars works as expected with ref_group_position last split fun
+
+    Code
+      res[3:4, ]
+    Output
+                    ARM A        ARM B        ARM C   
+                    (N=69)       (N=73)       (N=58)  
+      ————————————————————————————————————————————————
+      Mean (SD)   34.1 (6.8)   35.8 (7.1)   36.1 (7.4)
+      Median         32.8         35.4         36.2   
+
+# compare_vars works as expected with ref_group first split fun
+
+    Code
+      res[1:2, ]
+    Output
+                    ARM B        ARM A        ARM C   
+                    (N=73)       (N=69)       (N=58)  
+      ————————————————————————————————————————————————
+      n               73           69           58    
+      Mean (SD)   35.8 (7.1)   34.1 (6.8)   36.1 (7.4)
+
+# summarize_ancova works as expected with ref_group position split fun
+
+    Code
+      res[1:2, ]
+    Output
+                              ARM A    ARM B    ARM C 
+                              (N=69)   (N=73)   (N=58)
+      ————————————————————————————————————————————————
+      Unadjusted comparison                           
+        n                       69       73       58  
+
+# binary endpoint layouts work as expected with ref_group_position last split fun
+
+    Code
+      res
+    Output
+                                             A: Drug X          C: Combination     B: Placebo
+                                               (N=69)               (N=58)           (N=73)  
+      ———————————————————————————————————————————————————————————————————————————————————————
+      Odds Ratio (95% CI)                2.47 (1.22 - 5.01)   2.29 (1.10 - 4.78)             
+      Difference in Response rate (%)           20.5                 19.0                    
+        95% CI (Wald, with correction)      (3.6, 37.3)          (1.2, 36.8)                 
+        p-value (Chi-Squared Test)             0.0113               0.0263                   
+
+# time to event layouts works as expected with ref_group_position last split fun
+
+    Code
+      res
+    Output
+                                            ARM A             ARM C            ARM B     
+                                            (N=69)           (N=58)            (N=73)    
+      ———————————————————————————————————————————————————————————————————————————————————
+      CoxPH                                                                              
+        p-value (log-rank)                  0.0159           0.1820                      
+        Hazard Ratio                         0.58             1.31                       
+        95% CI                           (0.37, 0.91)     (0.88, 1.95)                   
+      6 Months                                                                           
+        Patients remaining at risk            49               39                46      
+        Event Free Rate (%)                 85.29             71.87            71.55     
+        95% CI                          (76.38, 94.19)   (60.15, 83.58)    (60.96, 82.14)
+        Difference in Event Free Rate       13.74             0.31                       
+          95% CI                        (-0.10, 27.57)   (-15.47, 16.10)                 
+          p-value (Z-test)                  0.0517           0.9688                      
+
+# summarize_ancova works as expected with ref_group_position last split fun
+
+    Code
+      res
+    Output
+                                   ARM A    ARM C    ARM B 
+                                   (N=69)   (N=58)   (N=73)
+      —————————————————————————————————————————————————————
+      Unadjusted rate (per year)                           
+        Rate                       8.2061   7.8551   9.1554
+