Merge pull request #111 from andrjohns/array-syntax

Update array syntax, fabs, dirichlet_multinomial
MangiolaLaboratory · Jan 21, 2024 · e1b4de2 · e1b4de2
2 parents 1e0de5b + a536943
commit e1b4de2
Show file tree

Hide file tree

Showing 13 changed files with 87 additions and 84 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: sccomp
 Title: Robust Outlier-aware Estimation of Composition and Heterogeneity for Single-cell Data
-Version: 1.7.4
+Version: 1.7.5
 Authors@R: c(person("Stefano", "Mangiola", email = "[email protected]",
                   role = c("aut", "cre"))
                   )

diff --git a/R/utilities.R b/R/utilities.R
@@ -479,7 +479,7 @@ fit_model = function(
       max(4000) 
 
     if(output_samples > max_sampling_iterations) {
-      warning("sccomp says: the number of draws used to defined quantiles of the posterior distribution is capped to 20K. This means that for very low probability threshold the quantile could become unreliable. We suggest to limit the probability threshold between 0.1 and 0.01")
+      message("sccomp says: the number of draws used to defined quantiles of the posterior distribution is capped to 20K.") # This means that for very low probability threshold the quantile could become unreliable. We suggest to limit the probability threshold between 0.1 and 0.01")
       output_samples = max_sampling_iterations
 
   }}
@@ -1545,6 +1545,7 @@ plot_1d_intervals = function(.data, .cell_group, significance_threshold= 0.025,
     filter(parameter != "(Intercept)") |>
 
     # Reshape
+    select(-contains("n_eff"), -contains("R_k_hat")) |> 
     pivot_longer(c(contains("c_"), contains("v_")),names_sep = "_" , names_to=c("which", "estimate") ) |>
     drop_na() |>
     pivot_wider(names_from = estimate, values_from = value) |>
@@ -1901,7 +1902,7 @@ plot_scatterplot = function(
 
       geom_smooth(
         aes(!!as.symbol(factor_of_interest), (generated_proportions)),
-        fatten = 0.5, lwd=0.2,
+        lwd=0.2,
         data =
           simulated_proportion %>%
 
@@ -1943,12 +1944,8 @@ plot_scatterplot = function(
 
       geom_smooth(
         aes(!!as.symbol(factor_of_interest), proportion, fill = NULL), # fill=Effect),
-        outlier.shape = NA, outlier.color = NA,outlier.size = 0,
         data =
           data_proportion ,
-        # |>
-        #   left_join(significance_colors, by = c(quo_name(.cell_group), factor_of_interest)),
-        fatten = 0.5,
         lwd=0.5,
         color = "black",
         span = 1
@@ -2584,6 +2581,8 @@ replicate_data = function(.data,
   model_input$X_random_intercept = new_X_random_intercept
   model_input$N_grouping_new = ncol(new_X_random_intercept)
 
+  # To avoid error in case of a NULL posterior sample
+  number_of_draws = min(number_of_draws, nrow(fit_matrix))
   # Generate quantities
   rstan::gqs(
     my_model,

diff --git a/dev/stan_models/glm_beta_binomial.stan b/dev/stan_models/glm_beta_binomial.stan
@@ -30,22 +30,22 @@ data{
 	int M;
 	int C;
 	int A;
-	int exposure[N];
-	int y[N,M];
+	array[N] int exposure;
+	array[N,M] int y;
 	matrix[N, C] X;
 	matrix[A, A] XA;
 
 	// Truncation
 	int is_truncated;
-	int truncation_up[N,M];
-	int truncation_down[N,M];
+	array[N,M] int truncation_up;
+	array[N,M] int truncation_down;
 
 	int<lower=0, upper=1> is_vb;
 
   // Prior info
-  real prior_prec_intercept[2] ;
-  real prior_prec_slope[2] ;
-  real prior_prec_sd[2] ;
+  array[2] real prior_prec_intercept;
+  array[2] real prior_prec_slope;
+  array[2] real prior_prec_sd;
 
   // Exclude priors for testing purposes
   int<lower=0, upper=1> exclude_priors;
@@ -68,7 +68,7 @@ parameters{
 	matrix[A, M] alpha;
 
 	// To exclude
-  real prec_coeff[2];
+  array[2] real prec_coeff;
   real<lower=0> prec_sd;
 
   real<lower=0, upper=1> mix_p;

diff --git a/dev/stan_models/glm_dirichlet_multinomial.stan b/dev/stan_models/glm_dirichlet_multinomial.stan
@@ -1,13 +1,13 @@
 functions{
 
-  real dirichlet_multinomial_lpmf(int[] y, vector alpha) {
+  real dirichlet_multinomial2_lpmf(array[] int y, vector alpha) {
     	 real alpha_plus = sum(alpha);
 
     return lgamma(alpha_plus) + sum(lgamma(alpha + to_vector(y)))
                 - lgamma(alpha_plus+sum(y)) - sum(lgamma(alpha));
   }
 
-matrix vector_array_to_matrix(vector[] x) {
+matrix vector_array_to_matrix(array[] vector x) {
 		matrix[size(x), rows(x[1])] y;
 		for (m in 1:size(x))
 		  y[m] = x[m]';
@@ -42,7 +42,7 @@ data{
 	int M;
 	int C;
 	int A;
-	int y[N,M];
+	array[N,M] int y;
 	matrix[N,C] X;
 }
 transformed data{
@@ -82,7 +82,7 @@ model{
 
 	 //for(n in 1:N) y[n] ~ dirichlet_multinomial( precision * softmax( vector_array_to_matrix(beta) )) );
 
-	 for(n in 1:N) y[n] ~ dirichlet_multinomial( to_vector(alpha[n] ));
+	 for(n in 1:N) y[n] ~ dirichlet_multinomial2( to_vector(alpha[n] ));
 
 	 precision ~ normal(0,5);
 	 for(i in 1:C) beta_raw[i] ~ normal(0, x_raw_sigma );

diff --git a/dev/stan_models/glm_dirichlet_multinomial_generate_quantities.stan b/dev/stan_models/glm_dirichlet_multinomial_generate_quantities.stan
@@ -2,7 +2,7 @@ data {
 	int<lower=0> N;
 	int<lower=0> M;
 	int<lower=0> C;
-	int exposure[N];
+	array[N] int exposure;
 	matrix[N,C] X;
 }
 parameters {
@@ -13,7 +13,7 @@ parameters {
 }
 generated quantities{
 
-  int counts[N, M];
+  array[N, M] int counts;
 
   matrix[N, M] alpha = (X * beta);
 

diff --git a/dev/stan_models/glm_dirichlet_multinomial_imputation.stan b/dev/stan_models/glm_dirichlet_multinomial_imputation.stan
@@ -1,10 +1,10 @@
 functions{
 
-	int[] dirichlet_multinomial_rng(vector alpha, int exposure) {
+	array[] int dirichlet_multinomial2_rng(vector alpha, int exposure) {
 	    return multinomial_rng(dirichlet_rng(alpha), exposure);
 	}
 
-matrix vector_array_to_matrix(vector[] x) {
+matrix vector_array_to_matrix(array[] vector x) {
 		matrix[size(x), rows(x[1])] y;
 		for (m in 1:size(x))
 		  y[m] = x[m]';
@@ -19,17 +19,17 @@ data{
 	int M;
 	int C;
 	int A;
-	vector[M] y[N];
+	array[N] vector[M] y;
 	matrix[N,C] X;
 
 	// To exclude
 	int<lower=0> how_namy_to_include;
-	int to_include[how_namy_to_include, 2]; // Table with column N and M
+	array[how_namy_to_include, 2] int to_include; // Table with column N and M
 
   // RNG
 	int I; // iterations
-	vector[A] precision[I];
-	int exposure[N];
+	array[I] vector[A] precision;
+	array[N] int exposure;
 
 }
 parameters{
@@ -56,9 +56,9 @@ model{
 	 for(i in 1:C) beta[i] ~ normal(0, 5);
 }
 generated quantities{
-  vector[M] y_rng[N];
-  vector[M] y_simplex[N];
-  int counts[N, M];
+  array[N] vector[M] y_rng;
+  array[N] vector[M] y_simplex;
+  array[N, M] int counts;
 
   // Random precision
   int my_n;
@@ -76,7 +76,7 @@ generated quantities{
 
 
 	for(n in 1:N)
-	  counts[n] = dirichlet_multinomial_rng((X[n,1:A] * precision[my_n] * 100) * y_simplex[n], exposure[n]) ;
+	  counts[n] = dirichlet_multinomial2_rng((X[n,1:A] * precision[my_n] * 100) * y_simplex[n], exposure[n]) ;
 
 
 

diff --git a/dev/stan_models/glm_multi_beta.stan b/dev/stan_models/glm_multi_beta.stan
@@ -1,13 +1,13 @@
 functions{
 
-  real dirichlet_multinomial_lpmf(int[] y, vector alpha) {
+  real dirichlet_multinomial2_lpmf(array[] int y, vector alpha) {
     	 real alpha_plus = sum(alpha);
 
     return lgamma(alpha_plus) + sum(lgamma(alpha + to_vector(y)))
                 - lgamma(alpha_plus+sum(y)) - sum(lgamma(alpha));
   }
 
-matrix vector_array_to_matrix(vector[] x) {
+matrix vector_array_to_matrix(array[] vector x) {
 		matrix[size(x), rows(x[1])] y;
 		for (m in 1:size(x))
 		  y[m] = x[m]';
@@ -37,7 +37,7 @@ vector Q_sum_to_zero_QR(int N) {
     return x;
   }
 
-  real beta_regression_lpdf(vector[] p, matrix X, matrix beta, vector phi){
+  real beta_regression_lpdf(array[] vector p, matrix X, matrix beta, vector phi){
 
 		real lp = 0;
     matrix[num_elements(p[1]), num_elements(p[,1])] mu = (X * beta)';
@@ -54,7 +54,7 @@ vector Q_sum_to_zero_QR(int N) {
 data{
 	int N;
 	int M;
-	simplex[M] y[N];
+	array[N] simplex[M] y;
 	matrix[N, 2] X;
 
 }
@@ -64,15 +64,15 @@ transformed data{
   real x_raw_sigma = inv_sqrt(1 - inv(M));
 }
 parameters{
-	vector[M-1] beta_raw[C];
+	array[C] vector[M-1] beta_raw;
 	vector[M] precision;
 
 	// To exclude
-  real prec_coeff[2];
+  array[2] real prec_coeff;
   real<lower=0> prec_sd;
 }
 transformed parameters{
-		vector[M] beta[C];
+		array[C] vector[M] beta;
 	  for(c in 1:C)	beta[c] =  sum_to_zero_QR(beta_raw[c], Q_r);
 
 

diff --git a/dev/stan_models/glm_multi_beta_generate_data.stan b/dev/stan_models/glm_multi_beta_generate_data.stan
@@ -1,13 +1,13 @@
 functions{
 
-  real dirichlet_multinomial_lpmf(int[] y, vector alpha) {
+  real dirichlet_multinomial2_lpmf(array[] int y, vector alpha) {
     	 real alpha_plus = sum(alpha);
 
     return lgamma(alpha_plus) + sum(lgamma(alpha + to_vector(y)))
                 - lgamma(alpha_plus+sum(y)) - sum(lgamma(alpha));
   }
 
-  matrix vector_array_to_matrix(vector[] x) {
+  matrix vector_array_to_matrix(array[] vector x) {
 		matrix[size(x), rows(x[1])] y;
 		for (m in 1:size(x))
 		  y[m] = x[m]';
@@ -37,9 +37,9 @@ functions{
     return x;
   }
 
-	vector[] beta_regression_rng( matrix X, matrix beta, vector phi){
+	array[] vector beta_regression_rng( matrix X, matrix beta, vector phi){
 
-		vector[cols(beta)] p[rows(X)];
+		array[rows(X)] vector[cols(beta)] p;
 
 
     matrix[num_elements(p[1]), num_elements(p[,1])] mu = (X * beta)';
@@ -63,7 +63,7 @@ data{
 	int M;
 	int C;
 	matrix[N, 2] X;
-	vector[M] beta[C];
+	array[C] vector[M] beta;
 	vector<lower=0>[M] precision;
 
 }
@@ -74,7 +74,7 @@ transformed data{
 }
 generated quantities{
 
-  vector[M] y[N];
+  array[N] vector[M] y;
 
   y = beta_regression_rng(  X, vector_array_to_matrix(beta),  precision);
 }
diff --git a/dev/stan_models/glm_multinomial_logit_linear_simulate_data.stan b/dev/stan_models/glm_multinomial_logit_linear_simulate_data.stan
@@ -3,18 +3,18 @@ data{
   int M; // Number of categories
   int C;
   int A;
-  int exposure[N];
+  array[N] int exposure;
   matrix[N, C] X;
   matrix[A, A] XA;
   matrix[C,M] beta;
   real variability_multiplier;
 }
 parameters{
-  real prec_coeff[2];
+  array[2] real prec_coeff;
   real<lower=0> prec_sd;
 }
 generated quantities{
-  int counts[N, M];
+  array[N, M] int counts;
   matrix[M, N] normal_draws;
   matrix[A,M] alpha;
   matrix[M,N] mu = (X * beta)';
@@ -32,7 +32,7 @@ generated quantities{
   precision = (X[,1:A] * alpha)';
 
    for(i in 1:cols(mu)) {
-      normal_draws[,i] = to_vector( normal_rng(mu[,i],  fabs(precision[,i] / variability_multiplier ) )  ); // sd decreased because different representation from beta binomial
+      normal_draws[,i] = to_vector( normal_rng(mu[,i],  abs(precision[,i] / variability_multiplier ) )  ); // sd decreased because different representation from beta binomial
     }
 
   for(i in 1:cols(normal_draws)) {