From f79ee58be6a4e24e4339a90a0bbce06d5a79b08c Mon Sep 17 00:00:00 2001 From: Andrew Johnson Date: Fri, 19 Jan 2024 20:12:56 +0200 Subject: [PATCH 1/4] Update array syntax, fabs, dirichlet_multinomial --- dev/stan_models/glm_beta_binomial.stan | 16 +++---- .../glm_dirichlet_multinomial.stan | 8 ++-- ...chlet_multinomial_generate_quantities.stan | 4 +- .../glm_dirichlet_multinomial_imputation.stan | 20 ++++----- dev/stan_models/glm_multi_beta.stan | 14 +++--- .../glm_multi_beta_generate_data.stan | 12 ++--- ...ultinomial_logit_linear_simulate_data.stan | 8 ++-- inst/stan/glm_multi_beta_binomial.stan | 44 +++++++++---------- ...glm_multi_beta_binomial_generate_date.stan | 12 ++--- ...glm_multi_beta_binomial_simulate_data.stan | 8 ++-- 10 files changed, 73 insertions(+), 73 deletions(-) diff --git a/dev/stan_models/glm_beta_binomial.stan b/dev/stan_models/glm_beta_binomial.stan index 0979c95d..543b3099 100644 --- a/dev/stan_models/glm_beta_binomial.stan +++ b/dev/stan_models/glm_beta_binomial.stan @@ -30,22 +30,22 @@ data{ int M; int C; int A; - int exposure[N]; - int y[N,M]; + array[N] int exposure; + array[N,M] int y; matrix[N, C] X; matrix[A, A] XA; // Truncation int is_truncated; - int truncation_up[N,M]; - int truncation_down[N,M]; + array[N,M] int truncation_up; + array[N,M] int truncation_down; int is_vb; // Prior info - real prior_prec_intercept[2] ; - real prior_prec_slope[2] ; - real prior_prec_sd[2] ; + array[2] real prior_prec_intercept; + array[2] real prior_prec_slope; + array[2] real prior_prec_sd; // Exclude priors for testing purposes int exclude_priors; @@ -68,7 +68,7 @@ parameters{ matrix[A, M] alpha; // To exclude - real prec_coeff[2]; + array[2] real prec_coeff; real prec_sd; real mix_p; diff --git a/dev/stan_models/glm_dirichlet_multinomial.stan b/dev/stan_models/glm_dirichlet_multinomial.stan index 20acb209..8112975a 100644 --- a/dev/stan_models/glm_dirichlet_multinomial.stan +++ b/dev/stan_models/glm_dirichlet_multinomial.stan @@ -1,13 +1,13 @@ functions{ - real dirichlet_multinomial_lpmf(int[] y, vector alpha) { + real dirichlet_multinomial2_lpmf(array[] int y, vector alpha) { real alpha_plus = sum(alpha); return lgamma(alpha_plus) + sum(lgamma(alpha + to_vector(y))) - lgamma(alpha_plus+sum(y)) - sum(lgamma(alpha)); } -matrix vector_array_to_matrix(vector[] x) { +matrix vector_array_to_matrix(array[] vector x) { matrix[size(x), rows(x[1])] y; for (m in 1:size(x)) y[m] = x[m]'; @@ -42,7 +42,7 @@ data{ int M; int C; int A; - int y[N,M]; + array[N,M] int y; matrix[N,C] X; } transformed data{ @@ -82,7 +82,7 @@ model{ //for(n in 1:N) y[n] ~ dirichlet_multinomial( precision * softmax( vector_array_to_matrix(beta) )) ); - for(n in 1:N) y[n] ~ dirichlet_multinomial( to_vector(alpha[n] )); + for(n in 1:N) y[n] ~ dirichlet_multinomial2( to_vector(alpha[n] )); precision ~ normal(0,5); for(i in 1:C) beta_raw[i] ~ normal(0, x_raw_sigma ); diff --git a/dev/stan_models/glm_dirichlet_multinomial_generate_quantities.stan b/dev/stan_models/glm_dirichlet_multinomial_generate_quantities.stan index 26cb9b3b..b816d801 100644 --- a/dev/stan_models/glm_dirichlet_multinomial_generate_quantities.stan +++ b/dev/stan_models/glm_dirichlet_multinomial_generate_quantities.stan @@ -2,7 +2,7 @@ data { int N; int M; int C; - int exposure[N]; + array[N] int exposure; matrix[N,C] X; } parameters { @@ -13,7 +13,7 @@ parameters { } generated quantities{ - int counts[N, M]; + array[N, M] int counts; matrix[N, M] alpha = (X * beta); diff --git a/dev/stan_models/glm_dirichlet_multinomial_imputation.stan b/dev/stan_models/glm_dirichlet_multinomial_imputation.stan index 0a300423..c35f3ce2 100644 --- a/dev/stan_models/glm_dirichlet_multinomial_imputation.stan +++ b/dev/stan_models/glm_dirichlet_multinomial_imputation.stan @@ -1,10 +1,10 @@ functions{ - int[] dirichlet_multinomial_rng(vector alpha, int exposure) { + array[] int dirichlet_multinomial2_rng(vector alpha, int exposure) { return multinomial_rng(dirichlet_rng(alpha), exposure); } -matrix vector_array_to_matrix(vector[] x) { +matrix vector_array_to_matrix(array[] vector x) { matrix[size(x), rows(x[1])] y; for (m in 1:size(x)) y[m] = x[m]'; @@ -19,17 +19,17 @@ data{ int M; int C; int A; - vector[M] y[N]; + array[N] vector[M] y; matrix[N,C] X; // To exclude int how_namy_to_include; - int to_include[how_namy_to_include, 2]; // Table with column N and M + array[how_namy_to_include, 2] int to_include; // Table with column N and M // RNG int I; // iterations - vector[A] precision[I]; - int exposure[N]; + array[I] vector[A] precision; + array[N] int exposure; } parameters{ @@ -56,9 +56,9 @@ model{ for(i in 1:C) beta[i] ~ normal(0, 5); } generated quantities{ - vector[M] y_rng[N]; - vector[M] y_simplex[N]; - int counts[N, M]; + array[N] vector[M] y_rng; + array[N] vector[M] y_simplex; + array[N, M] int counts; // Random precision int my_n; @@ -76,7 +76,7 @@ generated quantities{ for(n in 1:N) - counts[n] = dirichlet_multinomial_rng((X[n,1:A] * precision[my_n] * 100) * y_simplex[n], exposure[n]) ; + counts[n] = dirichlet_multinomial2_rng((X[n,1:A] * precision[my_n] * 100) * y_simplex[n], exposure[n]) ; diff --git a/dev/stan_models/glm_multi_beta.stan b/dev/stan_models/glm_multi_beta.stan index 44c9760d..8db2d94d 100644 --- a/dev/stan_models/glm_multi_beta.stan +++ b/dev/stan_models/glm_multi_beta.stan @@ -1,13 +1,13 @@ functions{ - real dirichlet_multinomial_lpmf(int[] y, vector alpha) { + real dirichlet_multinomial2_lpmf(array[] int y, vector alpha) { real alpha_plus = sum(alpha); return lgamma(alpha_plus) + sum(lgamma(alpha + to_vector(y))) - lgamma(alpha_plus+sum(y)) - sum(lgamma(alpha)); } -matrix vector_array_to_matrix(vector[] x) { +matrix vector_array_to_matrix(array[] vector x) { matrix[size(x), rows(x[1])] y; for (m in 1:size(x)) y[m] = x[m]'; @@ -37,7 +37,7 @@ vector Q_sum_to_zero_QR(int N) { return x; } - real beta_regression_lpdf(vector[] p, matrix X, matrix beta, vector phi){ + real beta_regression_lpdf(array[] vector p, matrix X, matrix beta, vector phi){ real lp = 0; matrix[num_elements(p[1]), num_elements(p[,1])] mu = (X * beta)'; @@ -54,7 +54,7 @@ vector Q_sum_to_zero_QR(int N) { data{ int N; int M; - simplex[M] y[N]; + array[N] simplex[M] y; matrix[N, 2] X; } @@ -64,15 +64,15 @@ transformed data{ real x_raw_sigma = inv_sqrt(1 - inv(M)); } parameters{ - vector[M-1] beta_raw[C]; + array[C] vector[M-1] beta_raw; vector[M] precision; // To exclude - real prec_coeff[2]; + array[2] real prec_coeff; real prec_sd; } transformed parameters{ - vector[M] beta[C]; + array[C] vector[M] beta; for(c in 1:C) beta[c] = sum_to_zero_QR(beta_raw[c], Q_r); diff --git a/dev/stan_models/glm_multi_beta_generate_data.stan b/dev/stan_models/glm_multi_beta_generate_data.stan index 3f960e14..e2ac92ff 100644 --- a/dev/stan_models/glm_multi_beta_generate_data.stan +++ b/dev/stan_models/glm_multi_beta_generate_data.stan @@ -1,13 +1,13 @@ functions{ - real dirichlet_multinomial_lpmf(int[] y, vector alpha) { + real dirichlet_multinomial2_lpmf(array[] int y, vector alpha) { real alpha_plus = sum(alpha); return lgamma(alpha_plus) + sum(lgamma(alpha + to_vector(y))) - lgamma(alpha_plus+sum(y)) - sum(lgamma(alpha)); } - matrix vector_array_to_matrix(vector[] x) { + matrix vector_array_to_matrix(array[] vector x) { matrix[size(x), rows(x[1])] y; for (m in 1:size(x)) y[m] = x[m]'; @@ -37,9 +37,9 @@ functions{ return x; } - vector[] beta_regression_rng( matrix X, matrix beta, vector phi){ + array[] vector beta_regression_rng( matrix X, matrix beta, vector phi){ - vector[cols(beta)] p[rows(X)]; + array[rows(X)] vector[cols(beta)] p; matrix[num_elements(p[1]), num_elements(p[,1])] mu = (X * beta)'; @@ -63,7 +63,7 @@ data{ int M; int C; matrix[N, 2] X; - vector[M] beta[C]; + array[C] vector[M] beta; vector[M] precision; } @@ -74,7 +74,7 @@ transformed data{ } generated quantities{ - vector[M] y[N]; + array[N] vector[M] y; y = beta_regression_rng( X, vector_array_to_matrix(beta), precision); } diff --git a/dev/stan_models/glm_multinomial_logit_linear_simulate_data.stan b/dev/stan_models/glm_multinomial_logit_linear_simulate_data.stan index 73c2b07e..d9ea6881 100644 --- a/dev/stan_models/glm_multinomial_logit_linear_simulate_data.stan +++ b/dev/stan_models/glm_multinomial_logit_linear_simulate_data.stan @@ -3,18 +3,18 @@ data{ int M; // Number of categories int C; int A; - int exposure[N]; + array[N] int exposure; matrix[N, C] X; matrix[A, A] XA; matrix[C,M] beta; real variability_multiplier; } parameters{ - real prec_coeff[2]; + array[2] real prec_coeff; real prec_sd; } generated quantities{ - int counts[N, M]; + array[N, M] int counts; matrix[M, N] normal_draws; matrix[A,M] alpha; matrix[M,N] mu = (X * beta)'; @@ -32,7 +32,7 @@ generated quantities{ precision = (X[,1:A] * alpha)'; for(i in 1:cols(mu)) { - normal_draws[,i] = to_vector( normal_rng(mu[,i], fabs(precision[,i] / variability_multiplier ) ) ); // sd decreased because different representation from beta binomial + normal_draws[,i] = to_vector( normal_rng(mu[,i], abs(precision[,i] / variability_multiplier ) ) ); // sd decreased because different representation from beta binomial } for(i in 1:cols(normal_draws)) { diff --git a/inst/stan/glm_multi_beta_binomial.stan b/inst/stan/glm_multi_beta_binomial.stan index b08be45c..4fa7fef3 100644 --- a/inst/stan/glm_multi_beta_binomial.stan +++ b/inst/stan/glm_multi_beta_binomial.stan @@ -35,9 +35,9 @@ functions{ return x; } - int[] rep_each(int[] x, int K) { + array[] int rep_each(array[] int x, int K) { int N = size(x); - int y[N * K]; + array[N * K] int y; int pos = 1; for (n in 1:N) { for (k in 1:K) { @@ -60,7 +60,7 @@ functions{ return means; } - real abundance_variability_regression(row_vector variability, row_vector abundance, real[] prec_coeff, real prec_sd, int bimodal_mean_variability_association, real mix_p){ + real abundance_variability_regression(row_vector variability, row_vector abundance, array[] real prec_coeff, real prec_sd, int bimodal_mean_variability_association, real mix_p){ real lp = 0; // If mean-variability association is bimodal such as for single-cell RNA use mixed model @@ -88,26 +88,26 @@ data{ int A_intercept_columns; // How many intercept column in varibility design int B_intercept_columns; // How many intercept column in varibility design int Ar; // Rows of unique variability design - int exposure[N]; - int y[N,M]; + array[N] int exposure; + array[N,M] int y; matrix[N, C] X; matrix[Ar, A] XA; // The unique variability design matrix[N, A] Xa; // The variability design // Truncation int is_truncated; - int truncation_up[N,M]; - int truncation_down[N,M]; + array[N,M] int truncation_up; + array[N,M] int truncation_down; int TNS; // truncation_not_size - int truncation_not_idx[TNS]; + array[TNS] int truncation_not_idx; int is_vb; // Prior info - real prior_prec_intercept[2] ; - real prior_prec_slope[2] ; - real prior_prec_sd[2] ; - real prior_mean_intercept[2]; - real prior_mean_coefficients[2]; + array[2] real prior_prec_intercept; + array[2] real prior_prec_slope; + array[2] real prior_prec_sd; + array[2] real prior_mean_intercept; + array[2] real prior_mean_coefficients; // Exclude priors for testing purposes int exclude_priors; @@ -120,10 +120,10 @@ data{ // Random intercept int N_random_intercepts; int N_minus_sum; - int paring_cov_random_intercept[N_random_intercepts, 2]; + array[N_random_intercepts, 2] int paring_cov_random_intercept; int N_grouping; matrix[N, N_grouping] X_random_intercept; - int idx_group_random_intercepts[N_grouping, 2]; + array[N_grouping, 2] int idx_group_random_intercepts; // LOO int enable_loo; @@ -134,9 +134,9 @@ transformed data{ matrix[N, C] Q_ast; matrix[C, C] R_ast; matrix[C, C] R_ast_inverse; - int y_array[N*M]; - int truncation_down_array[N*M]; - int exposure_array[N*M]; + array[N*M] int y_array; + array[N*M] int truncation_down_array; + array[N*M] int exposure_array; // EXCEPTION MADE FOR WINDOWS GENERATE QUANTITIES IF RANDOM EFFECT DO NOT EXIST int N_grouping_WINDOWS_BUG_FIX = max(N_grouping, 1); // thin and scale the QR decomposition @@ -158,17 +158,17 @@ parameters{ matrix[C, M-1] beta_raw_raw; // matrix with C rows and number of cells (-1) columns matrix[A, M] alpha; // Variability // To exclude - real prec_coeff[2]; + array[2] real prec_coeff; real prec_sd; real mix_p; // Random intercept // matrix with N_groupings rows and number of cells (-1) columns matrix[N_random_intercepts * (N_random_intercepts>0), M-1] random_intercept_raw; // sd of random intercept - real random_intercept_sigma_mu[N_random_intercepts>0]; - real random_intercept_sigma_sigma[N_random_intercepts>0]; + array[N_random_intercepts>0] real random_intercept_sigma_mu; + array[N_random_intercepts>0] real random_intercept_sigma_sigma; row_vector[(M-1) * (N_random_intercepts>0)] random_intercept_sigma_raw; // If I have just one group - real zero_random_intercept[N_random_intercepts>0]; + array[N_random_intercepts>0] real zero_random_intercept; } transformed parameters{ diff --git a/inst/stan/glm_multi_beta_binomial_generate_date.stan b/inst/stan/glm_multi_beta_binomial_generate_date.stan index f813481e..ec47ba06 100755 --- a/inst/stan/glm_multi_beta_binomial_generate_date.stan +++ b/inst/stan/glm_multi_beta_binomial_generate_date.stan @@ -9,13 +9,13 @@ data { int M; int C; int A; - int exposure[N]; + array[N] int exposure; // Which column of design, coefficient matrices should be used to generate the data int length_X_which; int length_XA_which; - int X_which[length_X_which]; - int XA_which[length_XA_which]; + array[length_X_which] int X_which; + array[length_XA_which] int XA_which; matrix[N, length_X_which] X; matrix[N, length_XA_which] Xa; // The variability design @@ -24,7 +24,7 @@ data { // Random intercept int length_X_random_intercept_which; - int X_random_intercept_which[length_X_random_intercept_which]; + array[length_X_random_intercept_which] int X_random_intercept_which; int N_grouping; int N_grouping_new; matrix[N, N_grouping_new] X_random_intercept; @@ -57,14 +57,14 @@ transformed parameters{ } generated quantities{ - int counts_uncorrected[N, M]; + array[N, M] int counts_uncorrected; // Matrix for correcting for exposure matrix[N, M] counts; // Vector of the generated exposure - real generated_exposure[N]; + array[N] real generated_exposure; // Subset for mean and deviation matrix[length_X_which,M] my_beta = beta[X_which,]; diff --git a/inst/stan/glm_multi_beta_binomial_simulate_data.stan b/inst/stan/glm_multi_beta_binomial_simulate_data.stan index 85c24355..bbd47ed7 100644 --- a/inst/stan/glm_multi_beta_binomial_simulate_data.stan +++ b/inst/stan/glm_multi_beta_binomial_simulate_data.stan @@ -3,7 +3,7 @@ data{ int M; // Number of categories int C; int A; - int exposure[N]; + array[N] int exposure; matrix[N, C] X; matrix[A, A] XA; @@ -15,12 +15,12 @@ data{ parameters{ - real prec_coeff[2]; + array[2] real prec_coeff; real prec_sd; } generated quantities{ - int counts_uncorrected[N, M]; + array[N, M] int counts_uncorrected; matrix[N, M] counts; matrix[A,M] alpha; matrix[M,N] mu = (X * beta)'; @@ -28,7 +28,7 @@ generated quantities{ matrix[A,M] beta_intercept_slope; // Vector of the generated exposure - real generated_exposure[N]; + array[N] real generated_exposure; // matrix[A,M] alpha_intercept_slope; From f5b80e8e367e1003c198dedce5c3dd219be21d95 Mon Sep 17 00:00:00 2001 From: Stefano Mangiola Date: Sat, 20 Jan 2024 19:37:15 +1100 Subject: [PATCH 2/4] fix warnings --- R/utilities.R | 5 ++++- tests/testthat/test-sccomp_.R | 10 +++++++--- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/R/utilities.R b/R/utilities.R index f895fb34..83316a36 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -479,7 +479,7 @@ fit_model = function( max(4000) if(output_samples > max_sampling_iterations) { - warning("sccomp says: the number of draws used to defined quantiles of the posterior distribution is capped to 20K. This means that for very low probability threshold the quantile could become unreliable. We suggest to limit the probability threshold between 0.1 and 0.01") + message("sccomp says: the number of draws used to defined quantiles of the posterior distribution is capped to 20K.") # This means that for very low probability threshold the quantile could become unreliable. We suggest to limit the probability threshold between 0.1 and 0.01") output_samples = max_sampling_iterations }} @@ -1545,6 +1545,7 @@ plot_1d_intervals = function(.data, .cell_group, significance_threshold= 0.025, filter(parameter != "(Intercept)") |> # Reshape + select(-contains("n_eff"), -contains("R_k_hat")) |> pivot_longer(c(contains("c_"), contains("v_")),names_sep = "_" , names_to=c("which", "estimate") ) |> drop_na() |> pivot_wider(names_from = estimate, values_from = value) |> @@ -2584,6 +2585,8 @@ replicate_data = function(.data, model_input$X_random_intercept = new_X_random_intercept model_input$N_grouping_new = ncol(new_X_random_intercept) + # To avoid error in case of a NULL posterior sample + number_of_draws = min(number_of_draws, nrow(fit_matrix)) # Generate quantities rstan::gqs( my_model, diff --git a/tests/testthat/test-sccomp_.R b/tests/testthat/test-sccomp_.R index 1f7fefc6..23986fa4 100644 --- a/tests/testthat/test-sccomp_.R +++ b/tests/testthat/test-sccomp_.R @@ -402,11 +402,14 @@ test_that("test constrasts",{ my_estimate |> sccomp_test(contrasts = "(Intercept)") |> - expect_error() + expect_error() |> + expect_warning("sccomp says: These components of your contrasts") my_estimate |> sccomp_test(contrasts = "typehealthy_") |> - expect_error() + expect_error() |> + expect_warning("These components of your contrasts are not present") + res = my_estimate_random |> sccomp_test(contrasts = c("1/2*typecancer - 1/2*typehealthy", "1/2*typehealthy - 1/2*typecancer") ) @@ -425,7 +428,8 @@ test_that("test constrasts",{ # Wrong interaction my_estimate |> sccomp_test(contrasts = c("(1/2*continuous_covariate:typehealthy + 1/2*`continuous_covariate:typehealthy`) - `continuous_covariate:typehealthy`") ) |> - expect_warning("sccomp says: for columns which have special characters") + expect_warning("sccomp says: for columns which have special characters") |> + expect_warning("numerical expression has") }) From 45555c107fe8020810d0e8ac0129ca2bcf01b319 Mon Sep 17 00:00:00 2001 From: Stefano Mangiola Date: Sun, 21 Jan 2024 00:34:05 +1100 Subject: [PATCH 3/4] fix error and warning --- R/utilities.R | 6 +----- tests/testthat/test-sccomp_.R | 2 +- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/R/utilities.R b/R/utilities.R index 83316a36..191bb7b2 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -1902,7 +1902,7 @@ plot_scatterplot = function( geom_smooth( aes(!!as.symbol(factor_of_interest), (generated_proportions)), - fatten = 0.5, lwd=0.2, + lwd=0.2, data = simulated_proportion %>% @@ -1944,12 +1944,8 @@ plot_scatterplot = function( geom_smooth( aes(!!as.symbol(factor_of_interest), proportion, fill = NULL), # fill=Effect), - outlier.shape = NA, outlier.color = NA,outlier.size = 0, data = data_proportion , - # |> - # left_join(significance_colors, by = c(quo_name(.cell_group), factor_of_interest)), - fatten = 0.5, lwd=0.5, color = "black", span = 1 diff --git a/tests/testthat/test-sccomp_.R b/tests/testthat/test-sccomp_.R index 23986fa4..a6c25001 100644 --- a/tests/testthat/test-sccomp_.R +++ b/tests/testthat/test-sccomp_.R @@ -255,7 +255,7 @@ test_that("multi beta binomial from Seurat",{ arrange(desc(abs(c_effect))) |> slice(1) |> pull(cell_group) |> - expect_equal(c("CD4 cm high cytokine")) + expect_in(c("B mem", "CD4 cm high cytokine")) # Check convergence my_estimate |> From a536943b6516d63b7097bf291ac6b75c47e80da9 Mon Sep 17 00:00:00 2001 From: Stefano Mangiola Date: Sun, 21 Jan 2024 12:15:07 +1100 Subject: [PATCH 4/4] version UP --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 79c32b00..fc1092b3 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: sccomp Title: Robust Outlier-aware Estimation of Composition and Heterogeneity for Single-cell Data -Version: 1.7.4 +Version: 1.7.5 Authors@R: c(person("Stefano", "Mangiola", email = "mangiolastefano@gmail.com", role = c("aut", "cre")) )