add tests for utilities

MangiolaLaboratory · Nov 29, 2023 · 972f115 · 972f115
1 parent fbe4553
commit 972f115
Show file tree

Hide file tree

Showing 7 changed files with 638 additions and 216 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -32,7 +32,6 @@ importFrom(SingleCellExperiment,colData)
 importFrom(SingleCellExperiment,counts)
 importFrom(boot,logit)
 importFrom(dplyr,any_of)
-importFrom(dplyr,case_when)
 importFrom(dplyr,cummean)
 importFrom(dplyr,distinct_at)
 importFrom(dplyr,enquo)

diff --git a/R/functions_dirichlet_multinomial.R b/R/functions_dirichlet_multinomial.R
@@ -461,8 +461,6 @@ fit_and_generate_quantities = function(data_for_model, model, model_generate, ce
     # # Attach beta posterior
     # left_join(fitted,  by="M") %>%
 
-    # label_deleterious_outliers()
-
     # Add precision as attribute
     add_attr(
       fit %>% extract("precision") %$% precision,

diff --git a/R/methods.R b/R/methods.R
@@ -1229,7 +1229,6 @@ sccomp_remove_unwanted_variation.data.frame = function(.data,
 #'
 #' # Set coefficients for cell_groups. In this case all coefficients are 0 for simplicity.
 #' counts_obj = counts_obj |> mutate(b_0 = 0, b_1 = 0)
-
 #' # Simulate data
 #' simulate_data(counts_obj, estimate, ~type, ~1, sample, cell_group, c(b_0, b_1))
 #'

diff --git a/R/utilities.R b/R/utilities.R
@@ -282,36 +282,6 @@ vb_iterative = function(model,
 }
 
 
-#' fit_to_counts_rng
-#'
-#' @importFrom tidyr separate
-#' @importFrom tidyr nest
-#' @importFrom rstan summary
-#'
-#' @param fit A fit object
-#' @param adj_prob_theshold fit real
-#'
-#' @keywords internal
-#' @noRd
-fit_to_counts_rng = function(fit, adj_prob_theshold){
-
-  writeLines(sprintf("executing %s", "fit_to_counts_rng"))
-
-  fit %>%
-    rstan::summary("counts_rng",
-                   prob = c(adj_prob_theshold, 1 - adj_prob_theshold)) %$%
-    summary %>%
-    as_tibble(rownames = ".variable") %>%
-    separate(.variable,
-             c(".variable", "S", "G"),
-             sep = "[\\[,\\]]",
-             extra = "drop") %>%
-    mutate(S = S %>% as.integer, G = G %>% as.integer) %>%
-    select(-any_of(c("n_eff", "Rhat", "khat"))) %>%
-    rename(`.lower` = (.) %>% ncol - 1,
-           `.upper` = (.) %>% ncol)
-}
-
 #' draws_to_tibble_x_y
 #'
 #' @importFrom tidyr pivot_longer
@@ -420,30 +390,6 @@ summary_to_tibble = function(fit, par, x, y = NULL, probs = c(0.025, 0.25, 0.50,
 
 }
 
-#' @importFrom rlang :=
-label_deleterious_outliers = function(.my_data){
-
-  .my_data %>%
-
-    # join CI
-    mutate(outlier_above = !!.count > `95%`) %>%
-    mutate(outlier_below = !!.count < `5%`) %>%
-
-    # Mark if on the right of the factor scale
-    mutate(is_group_right = !!as.symbol(colnames(X)[2]) > mean( !!as.symbol(colnames(X)[2]) )) %>%
-
-    # Check if outlier might be deleterious for the statistics
-    mutate(
-      !!as.symbol(sprintf("deleterious_outlier_%s", iteration)) :=
-        (outlier_above & slope > 0 & is_group_right)  |
-        (outlier_below & slope > 0 & !is_group_right) |
-        (outlier_above & slope < 0 & !is_group_right) |
-        (outlier_below & slope < 0 & is_group_right)
-    ) %>%
-
-    select(-outlier_above, -outlier_below, -is_group_right)
-
-}
 
 fit_model = function(
   data_for_model, model, censoring_iteration = 1, cores = detectCores(), quantile = 0.95,
@@ -543,6 +489,7 @@ parse_fit = function(data_for_model, fit, censoring_iteration = 1, chains){
 
 }
 
+#' USED FOR DIRICHLE TMODEL
 #' @importFrom purrr map2_lgl
 #' @importFrom tidyr pivot_wider
 #' @importFrom stats C
@@ -585,6 +532,7 @@ beta_to_CI = function(fitted, censoring_iteration = 1, false_positive_rate, fact
 
 }
 
+#' USED FOR DIRICHLE TMODEL
 #' @importFrom purrr map2_lgl
 #' @importFrom tidyr pivot_wider
 #' @importFrom stats C
@@ -806,131 +754,6 @@ get_design_matrix = function(.data_spread, formula, .sample){
   rownames(design_matrix) = .data_spread |> pull(!!.sample)
 
   design_matrix
-}
-
-check_random_intercept_design = function(.data, factor_names, random_intercept_elements, formula, X){
-
-  .data_ = .data
-
-  # Loop across groupings
-  random_intercept_elements |>
-    nest(factors = `factor` ) |>
-    mutate(checked = map2(
-      grouping, factors,
-      ~ {
-
-        .y = unlist(.y)
-
-        # Check that the group column is categorical
-        stopifnot("sccomp says: the grouping column should be categorical (not numeric)" =
-                    .data_ |>
-                    select(.x) |>
-                    pull(1) |>
-                    class() %in%
-                    c("factor", "logical", "character")
-        )
-
-
-        # # Check sanity of the grouping if only random intercept
-        # stopifnot(
-        #   "sccomp says: the random intercept completely confounded with one or more discrete factors" =
-        #     !(
-        #       !.y |> equals("(Intercept)") &&
-        #         .data_ |> select(any_of(.y)) |> suppressWarnings() |>  pull(1) |> class() %in% c("factor", "character") |> any() &&
-        #         .data_ |>
-        #         select(.x, any_of(.y)) |>
-        #         select_if(\(x) is.character(x) | is.factor(x) | is.logical(x)) |>
-        #         distinct() %>%
-        #
-        #         # TEMPORARY FIX
-        #         set_names(c(colnames(.)[1], 'factor___temp')) |>
-        #
-        #         count(factor___temp) |>
-        #         pull(n) |>
-        #         equals(1) |>
-        #         any()
-        #     )
-        # )
-
-        # # Check if random intercept with random continuous slope. At the moment is not possible
-        # # Because it would require I believe a multivariate prior
-        # stopifnot(
-        #   "sccomp says: continuous random slope is not supported yet" =
-        #     !(
-        #       .y |> str_subset("1", negate = TRUE) |> length() |> gt(0) &&
-        #         .data_ |>
-        #         select(
-        #           .y |> str_subset("1", negate = TRUE)
-        #         ) |>
-        #         map_chr(class) %in%
-        #         c("integer", "numeric")
-        #     )
-        # )
-
-        # Check if random intercept with random continuous slope. At the moment is not possible
-        # Because it would require I believe a multivariate prior
-        stopifnot(
-          "sccomp says: currently, discrete random slope is only supported in a intercept-free model. For example ~ 0 + treatment + (treatment | group)" =
-            !(
-              # If I have both random intercept and random discrete slope
-
-                .y |> equals("(Intercept)") |> any() &&
-                  length(.y) > 1 &&
-                # If I have random slope and non-intercept-free model
-                .data_ |> select(any_of(.y)) |> suppressWarnings() |>  pull(1) |> class() %in% c("factor", "character") |> any()
-
-            )
-        )
-
-
-        # I HAVE TO REVESIT THIS
-        #  stopifnot(
-        #   "sccomp says: the groups in the formula (factor | group) should not be shared across factor groups" =
-        #     !(
-        #       # If I duplicated groups
-        #       .y  |> identical("(Intercept)") |> not() &&
-        #       .data_ |> select(.y |> setdiff("(Intercept)")) |> lapply(class) != "numeric" &&
-        #         .data_ |>
-        #         select(.x, .y |> setdiff("(Intercept)")) |>
-        #
-        #         # Drop the factor represented by the intercept if any
-        #         mutate(`parameter` = .y |> setdiff("(Intercept)")) |>
-        #         unite("factor_name", c(parameter, factor), sep = "", remove = FALSE) |>
-        #         filter(factor_name %in% colnames(X)) |>
-        #
-        #         # Count
-        #         distinct() %>%
-        #         set_names(as.character(1:ncol(.))) |>
-        #         count(`1`) |>
-        #         filter(n>1) |>
-        #         nrow() |>
-        #         gt(1)
-        #
-        #     )
-        # )
-
-      }
-    ))
-
-  random_intercept_elements |>
-    nest(groupings = grouping ) |>
-    mutate(checked = map2(`factor`, groupings, ~{
-      # Check the same group spans multiple factors
-      stopifnot(
-        "sccomp says: the groups in the formula (factor | group) should be present in only one factor, including the intercept" =
-          !(
-              # If I duplicated groups
-            .y |> unlist() |> length() |> gt(1)
-
-          )
-      )
-
-
-    }))
-
-
-
-
 }
 
 #' @importFrom purrr when
@@ -1004,7 +827,6 @@ data_spread_to_model_input =
     # Random intercept
     if(nrow(random_intercept_elements)>0 ) {
 
-      #check_random_intercept_design(.data_spread, any_of(factor_names), random_intercept_elements, formula, X)
       random_intercept_grouping = get_random_intercept_design2(.data_spread, !!.sample,  formula )
 
       # Actual parameters, excluding for the sum to one parameters
@@ -1198,6 +1020,10 @@ data_simulation_to_model_input =
     .exposure = enquo(.exposure)
     .coefficients = enquo(.coefficients)
 
+    # Check formula
+    if(!formula |> is("formula"))
+      stop("sccomp says: the formula argument must be of formula class, not character at class, for example")
+
     factor_names = parse_formula(formula)
 
     sample_data =

diff --git a/man/sccomp_estimate.Rd b/man/sccomp_estimate.Rd
diff --git a/tests/testthat/test-sccomp_.R b/tests/testthat/test-sccomp_.R
@@ -464,26 +464,21 @@ test_that("plot test variability",{
 
 test_that("test constrasts",{
 
-
-  estimate =
-    seurat_obj |>
-    sccomp_estimate(
-      formula_composition = ~ type ,
-      formula_variability = ~ 1,
-      sample, cell_group,
-      approximate_posterior_inference = FALSE,
-      cores = 1,
-      mcmc_seed = 42,      
-      max_sampling_iterations = 1000
-    )
-
   new_test =
-    estimate |>
+    my_estimate |>
     sccomp_test() 
 
 })
 
+test_that(" Simulate data",{
+
+  data("counts_obj")
 
+    counts_obj |> 
+    mutate(b_0 = 0, b_1 = 0) |> 
+    simulate_data(my_estimate, ~type, ~1, sample, cell_group, c(b_0, b_1)) |> 
+      expect_s3_class("tbl")
+
+})
 
 
-res_composition