07_instruments_heterogeneity_review.Rmd

---
title: "Exploration of MD SNPs heterogeneity using various robust methods"
author: "Marina Vabistsevits"
date: "`r Sys.Date()`"
output: html_document
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
library(readr)

library(tidyr)
library(dplyr)

library(TwoSampleMR)
library(MRPRESSO)
library(RadialMR)
library(mrclust)
library(phenoscanner)
library(ieugwasr)

library(cowplot)
library(ggplot2)
```

```{r message=F}
# set path for pre-calculated data, outside the code repo
# `local` / `remote` (reading data from RDSF)
currently_working_env = "local"
source("set_paths.R")
set_paths(currently_working_env)

# metafile
data_lookup<-read_csv(paste0("metadata/data_lookup.csv")) 
bcac_lookup<-read_csv(paste0("metadata/data_lookup_BCAC.csv")) 

# load functions
source("functions.R")

# collecting data for Source Data file here:
source_data<- list()
```

# Select and load data - one at a time

```{r}

# primary MD unadjusted data

da <- read_tsv(paste0(data_path_tophits, "dense_area_unadj_tophits.tsv")) 
exposure_name <- unique(da$exposure)
exp_code <- "DA_unadj"

da <- read_tsv(paste0(data_path_tophits, "nondense_area_unadj_tophits.tsv")) 
exposure_name <- unique(da$exposure)
exp_code <- "NDA_unadj"

da <- read_tsv(paste0(data_path_tophits, "percent_density_unadj_tophits.tsv")) 
exposure_name <- unique(da$exposure)
exp_code <- "PD_unadj"


# # secondary "adjsuted for BMI" MD data
# 
# da <- read_tsv(paste0(data_path_tophits, "dense_area_tophits.tsv"))
# exposure_name <- unique(da$exposure)
# exp_code <- "DA" 
# 
# da <- read_tsv(paste0(data_path_tophits, "nondense_area_tophits.tsv"))
# exposure_name <- unique(da$exposure)
# exp_code <- "NDA" 
# 
# da <- read_tsv(paste0(data_path_tophits, "percent_density_tophits.tsv"))
# exposure_name <- unique(da$exposure)
# exp_code <- "PD" 
# 
# 
# # exploratory "all top hits" data
# 
# da <- read_tsv(paste0(data_path_tophits, "dense_area_tophits_all.tsv")) 
# exposure_name <- unique(da$exposure)
# exp_code <- "DA_all"
# 
# da <- read_tsv(paste0(data_path_tophits, "nondense_area_tophits_all.tsv")) 
# exposure_name <- unique(da$exposure)
# exp_code <- "NDA_all"
# 
# da <- read_tsv(paste0(data_path_tophits, "percent_density_tophits_all.tsv")) 
# exposure_name <- unique(da$exposure)
# exp_code <- "PD_all"
# 
# 
# # data from BCAC MD for comarison
# 
# da <- read_tsv(paste0(data_path_tophits, "BCAC_DA_tophits.tsv")) 
# exposure_name <- unique(da$exposure)
# exp_code <- "DA_bcac"
# 
# da <- read_tsv(paste0(data_path_tophits, "BCAC_NDA_tophits.tsv")) 
# exposure_name <- unique(da$exposure)
# exp_code <- "NDA_bcac"
# 
# da <- read_tsv(paste0(data_path_tophits, "BCAC_PMD_tophits.tsv")) 
# exposure_name <- unique(da$exposure)
# exp_code <- "PD_bcac"


```

# MR and basic sensitivity analyses

```{r}

# MR
if (grepl("unadj", exp_code)){dir <- "md_unadj"}

out <- extract_outcome_data(snps = da$SNP,
                            outcome = "ieu-a-1126")
harmonised<- harmonise_data(exposure_dat = da, 
                                outcome_dat = out) %>% 
             TwoSampleMR::split_outcome() %>% 
             separate(outcome, sep = "\\(", into = c("outcome", "tmp"), remove = T) %>% 
             mutate(outcome = paste(outcome, "(BCAC 2017)") )

res_single <- mr_singlesnp(harmonised, 
                               all_method=c("mr_ivw_mre","mr_egger_regression", "mr_weighted_median"))%>% 
                  mutate(SNP = gsub("Inverse variance weighted (multiplicative random effects)", "IVW (MRE)", SNP, fixed = T)) 

  
res <- mr(harmonised) %>% 
  generate_odds_ratios()

# sensitivity
het<- mr_heterogeneity(harmonised)
egger_int <- mr_pleiotropy_test(harmonised)

p1 <- mr_scatter_plot_manual(res, harmonised)


p2 <- mr_forest_plot(res_single) 

res_loo <- mr_leaveoneout(harmonised)
p3 <- mr_leaveoneout_plot_manual(res_loo)

x<-cowplot::plot_grid( p2[[1]], p3[[1]], p1[[1]],nrow=2) 

```

# MR-PRESSO 
```{r}
# selection of NbDistribution
## Data Frame of nsnps and number of iterations
## idea from: https://github.com/sjfandrews/MR_ADPhenome/blob/b64d8821dbf1546090f47e0642cc8092592cddc8/workflow/scripts/mr_MRPRESSO.R
#df.NbD <- data.frame(n = c(10, 50, 100, 500, 1000, 1500, 2000),
#                     NbDistribution = c(10000, 10000, 10000, 25000, 50000, 75000, 100000))
#nsnps <- nrow(harmonised)
#SignifThreshold <- 0.05
#NbDistribution <- df.NbD[which.min(abs(df.NbD$n - nsnps)), 2]
#
# i have < 20 SNPs, so will need ~ 10000 NbDistribution - but result sis the same with 1000, so will keep that
```


```{r}
# run MR-presso
library(MRPRESSO)
mr_presso <- mr_presso(data = harmonised,
                       BetaOutcome = "beta.outcome", BetaExposure = "beta.exposure", 
                       SdOutcome = "se.outcome", SdExposure = "se.exposure",
                       OUTLIERtest = TRUE, DISTORTIONtest = TRUE,  
                       NbDistribution = 1000,  SignifThreshold = 0.05)

mr_presso_df <- as.data.frame(mr_presso$`Main MR results`) %>% 
                rename(b=`Causal Estimate`, se = Sd) %>% 
                mutate(Exposure = exp_code,
                       Outcome = unique(harmonised$outcome)) %>%
                generate_odds_ratios() %>% 
                select(Exposure, Outcome, everything())


outlier_indicies <- mr_presso$`MR-PRESSO results`$`Distortion Test`$`Outliers Indices`

outliers<- harmonised %>% 
              select(SNP) %>% 
              mutate(index = row_number()) %>% 
              filter(index %in% outlier_indicies) %>% pull(SNP)

# mr without outliers

res_outliers_presso <- harmonised %>% 
        filter(!SNP %in% outliers) %>% 
        mr() %>% 
        filter(method == "Inverse variance weighted") %>% 
        rename(p=pval, SNP=method) %>% 
        mutate(SNP = "Outlier corrected - IVW")

res_single_presso <- bind_rows(res_single, res_outliers_presso)
outliers_list = append(outliers, 'Outlier corrected - IVW')

res_single_presso <- res_single_presso %>% mutate(outcome = "Overall breast cancer")

source_data[["fig4a"]]<- res_single_presso

forest_presso <- mr_forest_plot_outliers(res_single_presso,  outliers_list= outliers_list, outliers_colour = "steelblue", method = "MR-PRESSO") 

ggsave(plot=forest_presso, 
      height=5, width=6,
       filename=paste0(results_path, dir, "/presso_radial/", exp_code, "_snps_outliers_forest_presso.png"))


# presso supl file

supl <- res_single_presso %>% 
  mutate(is_outlier = ifelse(SNP %in% outliers, T, F)) %>% 
  select(-nsnp, -id.exposure)

write_tsv(supl,  paste0(results_path, dir, "/presso_radial/", exp_code,"_presso_results.tsv") )
```


# F-stat and Steiger filtering 

```{r}
sf_out <- calc_steiger(harmonised, exposure_ss = 24192, outcome_ss = 228951, outcome_ncase = 122977, outcome_ncontrol = 105974)
sf_out$Fstat
sf_out$total_r2
sf_out$directionality$correct_causal_direction
sf_out$single_rsq$steiger_dir
```

# Radial MR

```{r}
library(RadialMR)
# format
raddat <- format_radial(
  harmonised$beta.exposure, harmonised$beta.outcome,
  harmonised$se.exposure, harmonised$se.outcome, 
  harmonised$SNP)

# test IVW 
ivwrad <- ivw_radial(raddat, alpha=0.05/nrow(raddat), weights=3) 
dim(ivwrad$outliers)[1]  #9 outliers at bonf 
#ivwrad <- ivw_radial(raddat, alpha=0.05, weights=3)
#dim(ivwrad$outliers)[1] #13 outliers at 0.05

# test Egger
eggrad <- egger_radial(raddat, alpha=0.05/nrow(raddat), weights=3) # bonf
#eggrad <- egger_radial(raddat, alpha=0.05, weights=3) # 0.05
dim(eggrad$outliers)[1] #12 outliers at 0.05  (9 bonf)


#plot_radial(ivwrad,radial_scale = T, show_outliers = F,scale_match= T)
#plot_radial(eggrad,radial_scale = T, show_outliers = F,scale_match= T)

# make plot
radial_plot <- plot_radial(c(ivwrad,eggrad), radial_scale = T, show_outliers = F,scale_match= T)
ggsave(plot=radial_plot, 
      height=5, width=6,
       filename=paste0(results_path, dir, "/presso_radial/", exp_code, "_snps_outliers_radialplot.png"))


# select outliers
ivwrad$qstatistic 
ivwrad$sortoutliers <- ivwrad$outliers[order(ivwrad$outliers$p.value),]
ivwrad$sortoutliers$Qsum <- cumsum(ivwrad$sortoutliers$Q_statistic)
ivwrad$sortoutliers$Qdif <- ivwrad$sortoutliers$Qsum - ivwrad$qstatistic

# mr without outliers
res_outliers_radial <- harmonised %>% 
        filter(!SNP %in% ivwrad$sortoutliers$SNP) %>% 
        mr() %>% 
        filter(method == "Inverse variance weighted") %>% 
        rename(p=pval, SNP=method) %>% 
        mutate(SNP = "Outlier corrected - IVW")

res_outliers_radial <- bind_rows(res_single, res_outliers_radial)
outliers_list = append(ivwrad$sortoutliers$SNP, 'Outlier corrected - IVW')

res_outliers_radial <- res_outliers_radial %>% mutate(outcome = "Overall breast cancer")

source_data[["fig4b"]]<- res_outliers_radial


forest_radial <- mr_forest_plot_outliers(res_outliers_radial,  outliers_list=outliers_list, outliers_colour = "orange", method = "Radial-MR") 

ggsave(plot=forest_radial, 
      height=5, width=6,
      filename=paste0(results_path, dir, "/presso_radial/", exp_code,"_snps_outliers_forest_radial.png"))

# radial supl file

supl <- res_outliers_radial %>% 
  mutate(is_outlier = ifelse(SNP %in% ivwrad$sortoutliers$SNP, T, F)) %>% 
  select(-nsnp, -id.exposure)

write_tsv(supl, paste0(results_path, dir, "/presso_radial/", exp_code,"_radial_supl.tsv"))


```


# MR Clust

```{r}
library(mrclust)

# Only keep single-SNP results from relevant columns
res_single <- mr_singlesnp(harmonised)%>% 
                  mutate(SNP = gsub("Inverse variance weighted", "IVW", SNP)) %>% 
                  dplyr::select("SNP", "exposure", "outcome",  "b", "se") %>% 
                  filter(grepl("rs", SNP))


# Keep relevant columns for clustering
harmonised_sub <- harmonised %>%  dplyr::select("SNP", "exposure", "outcome",
                                       "beta.exposure", "se.exposure",
                                       "beta.outcome", "se.outcome")

cluster_data <- left_join(harmonised_sub, res_single) %>% drop_na()
# check how namy SNPs for dropped - not sure why this happens
print(paste("SNP count before and after harmonisation+singleMR:", nrow(harmonised), "vs", nrow(cluster_data)))

cluster_results <- mr_clust_em(theta = cluster_data$b, 
                                   theta_se = cluster_data$se,
                                   bx = cluster_data$beta.exposure, 
                                   by = cluster_data$beta.outcome,
                                   bxse = cluster_data$se.exposure,
                                   byse = cluster_data$se.outcome,
                                   obs_names = cluster_data$SNP)
save(cluster_results, file=paste0(results_path,"MRClust_results/",exp_code ,"_mrclust_results.Rdata"))
load(file=paste0(results_path,"MRClust_results/",exp_code ,"_mrclust_results.Rdata"))


#clusters = unique(cluster_results$results$best$cluster_class)

clust_plot_best = cluster_results$plots$two_stage +
  ggplot2::xlab(paste0("Genetic association with ", exposure_name)) +
  ggplot2::ylab("Genetic association with Overall breast cancer") +
  ggplot2::labs(title = "")+
  ggplot2::labs(subtitle = "MR-Clust scatter plot")+
  ggplot2::theme(axis.title.y = ggplot2::element_text(size = 8), 
                 axis.title.x = ggplot2::element_text(size = 8) )


clust_plot_best

ggsave(plot=clust_plot_best, 
       filename=paste0(results_path,"MRClust_results/",exp_code ,"_snps_clusters.png"))


source_data[["fig4c"]] <-cluster_results$results$best

# extract SNP (and optionally drop null/junk)
clust_list <- names(cluster_results$cluster_membership)
#clust_list <- clust_list[!grepl("Null|Junk",clust_list )]


# for each SNP find the cluster with the higjer probability
clust_snp_df <- tibble()
for (cluster_name in clust_list){
  
  clust_assignments <- cluster_results$cluster_membership[[cluster_name]]
  # unlist data
  clust_assignments_df <- data.frame(probability = rep(names(clust_assignments), sapply(clust_assignments, length)),
                 rsID = unlist(clust_assignments)) %>% 
                            mutate(cluster = cluster_name)
  # drop rownames
  rownames(clust_assignments_df)<-NULL
  # add to total df
  clust_snp_df <- bind_rows(clust_snp_df, clust_assignments_df)
}
# select higher probability cluster for each SNP
clust_snp_df_top_cluster <- 
  clust_snp_df %>%
  group_by(rsID) %>%
  arrange(desc(probability)) %>% 
  dplyr::slice(1) # pick top one

write_tsv(clust_snp_df_top_cluster, paste0(results_path,"MRClust_results/",exp_code ,"_snps_clusters.tsv"))


# cluster paletter from MR-Clust function
cbpalette <- c(cluster_Null = "#CC79A7", cluster_Junk = "#000000", `cluster_1` = "#999999", 
    `cluster_2` = "#0072B2", `cluster_3` = "#D55E00", `cluster_4` = "#F0E442", 
    `cluster_5` = "#009E73",  `cluster_6` = "#56B4E9", `cluster_7` = "#E69F00")

clust_snp_df_top_cluster<-read_tsv(paste0(results_path,"MRClust_results/",exp_code ,"_snps_clusters.tsv"))

res_single <- res_single %>% mutate(outcome = "Overall breast cancer")

forest_by_clusters <- mr_forest_plot_clusters(res_single,  outliers_df=clust_snp_df_top_cluster, outliers_colour_list=cbpalette)

forest_data = list(forest_by_clusters=forest_by_clusters,
                   res_single=res_single)
save(forest_data , file = paste0(results_path,"MRClust_results/",exp_code ,"_forest_plot.Rdata"))

#load(paste0(results_path,"MRClust_results/",exp_code ,"_forest_plot.Rdata"))

# do mr by cluster
mr_cluster_list <- list()

for (cluster_name in unique(clust_snp_df_top_cluster$cluster)){
  
  cluster_snps <- clust_snp_df_top_cluster %>% filter(cluster == cluster_name) %>% pull(rsID)
  
  yy <- harmonised %>% filter(SNP %in% cluster_snps)
  yy_mr <- mr(yy, method_list = c("mr_ivw")) %>% 
            generate_odds_ratios() %>%
            select(-lo_ci, -up_ci, id.exposure) %>% 
            mutate(cluster_no = cluster_name)
  mr_cluster_list[[cluster_name]] <- yy_mr
  
}

out <- bind_rows(mr_cluster_list) %>% filter(cluster_no != 'cluster_Junk')


# add cluster mr to plot

# add cluster MR to res_single
res_single_w_clust <- bind_rows(res_single, 
                                out %>% rename(SNP = cluster_no, p = pval) %>% 
                                        select(-c(method, nsnp, or, or_lci95, or_uci95))) %>% 
                      filter(!grepl("All", SNP))  %>% mutate(outcome = "Overall breast cancer")

extra_cluster_df <- data.frame(rsID = c("cluster_1", "cluster_2", "cluster_3", "cluster_4"),
                              cluster = c("cluster_1", "cluster_2", "cluster_3", "cluster_4"))
clust_snp_df_top_cluster <- bind_rows(clust_snp_df_top_cluster,extra_cluster_df)

res_single <- res_single %>% mutate(outcome = "Overall breast cancer")

source_data[["fig4d"]]<- res_single_w_clust 
forest_by_clusters <- mr_forest_plot_clusters(res_single_w_clust, 
                                              outliers_df=clust_snp_df_top_cluster, outliers_colour_list=cbpalette)
forest_by_clusters$p
ggsave(plot=forest_by_clusters$p, 
      height=5, width=6,
       filename=paste0(results_path,"MRClust_results/",exp_code ,"_snps_clusters_forest.png"))

# supl cluster
supl <- full_join(forest_by_clusters$dat, out) %>% generate_odds_ratios() %>% 
   select(SNP, exposure, outcome, b,se, starts_with('or'), cluster, probability) %>% arrange( SNP, cluster) %>% 
  filter(SNP != "")

write_tsv(supl, paste0(results_path,"MRClust_results/",exp_code ,"_cluster_supl.tsv"))

```


# PheWAS

```{r}
#phenoscanner
library(phenoscanner)
library(ieugwasr)

# get list of EU GWAS fro filtering Phewas results
ao_eu <-TwoSampleMR::available_outcomes() %>% filter(population == "European")

# test
#xx <- get_pheno_assoc(snp="rs11877925", ao_eu)

# snps list 
clust_snp_df_top_cluster <- read_tsv(paste0(results_path,"MRClust_results/",exp_code ,"_snps_clusters.tsv"))

# run PheWAS for each SNP
snp_phewas <- lapply(clust_snp_df_top_cluster$rsID, get_pheno_assoc, ao_eu )
names(snp_phewas) <- clust_snp_df_top_cluster$rsID

save(snp_phewas, file=paste0(results_path, "phewas/",exp_code,"_cluster_phewas.RData"))
```

```{r}
# pheWAS tidy
load(file=paste0(results_path, "phewas/",exp_code,"_cluster_phewas.RData"))


```


```{r}
# wrangling data for phewas plot
snp_phewas_df<-  snp_phewas %>% 
  purrr::reduce(plyr::rbind.fill) %>% 
  mutate(beta=as.numeric(beta)) %>%
  mutate(se=as.numeric(se)) %>% 
  # add FDR correction
  arrange(p) %>% mutate(p.fdr = p.adjust(p, method = "BH")) 


snp_phewas_df2 <- left_join(snp_phewas_df, clust_snp_df_top_cluster %>% dplyr::select(-probability), by=c('rsid' = 'rsID')) %>% 
                 mutate(log10P = -log10(p.fdr)) %>%  # using FDR pval
                 mutate(log10P_trunc = ifelse(log10P >= 50, 50, log10P)) %>% 
                 left_join(res_single %>% dplyr::select(rsid=SNP, MD_wald_ratio_beta = b), by='rsid') 

snp_order = res_single %>% dplyr::select(SNP,  b) %>% filter(grepl("rs", SNP)) %>% arrange(b) %>% pull(SNP)

snp_phewas_df2 <- snp_phewas_df2  %>%   mutate(rsid = factor(rsid, levels=snp_order))


snp_phewas_df3 <- snp_phewas_df2 %>% 
mutate(exposure_cat = case_when(
    grepl("breast cancer|Malignant neoplasm of breast|breast|mamm", trait, ignore.case = T) ~ "Breast related",
    TRUE ~ 'Other')) 
snp_phewas_df3 %>% filter(exposure_cat == "Breast related") %>% count(trait) %>% View()


breast_exceptions <- c("Breast size", "Breast size bra cup size in women", "Hypertrophy of breast", "Disorders of breast",
                       "Illnesses of mother: Breast cancer", "Mammographic density dense area")

snp_phewas_df3<- snp_phewas_df3 %>% 
  mutate(label = case_when((exposure_cat == "Breast related" & !trait %in% breast_exceptions) ~ "Breast cancer",
                                                             TRUE ~ "Other phenotypes")) %>% 
  mutate(include_in_plot = ifelse (p.fdr<=5e-08 , T, F))

snp_phewas_df3 %>%  filter(exposure_cat == "Breast related") %>% dplyr::select(rsid, trait, log10P, study, source, exposure_cat, label) %>% arrange(trait, rsid) %>% View()

write_tsv(snp_phewas_df3 %>% arrange(exposure_cat, rsid), 
          paste0(results_path,"phewas/",exp_code ,"_phewas_results.tsv"))


cbpalette <- c(cluster_Null = "#CC79A7", cluster_Junk = "#000000", `cluster_1` = "#999999", 
    `cluster_2` = "#0072B2", `cluster_3` = "#D55E00", `cluster_4` = "#F0E442", 
    `cluster_5` = "#009E73",  `cluster_6` = "#56B4E9", `cluster_7` = "#E69F00")

snp_phewas_df3_sub <- snp_phewas_df3 %>%  filter(p<=5e-08)
phewas_plot <- ggplot(data = snp_phewas_df3_sub, 
                      mapping = aes(x = rsid, y = log10P_trunc, colour=cluster, shape= label, size=label)) + 
  # modify jitter geom to look like a bubble plot
  geom_jitter(alpha=0.8,  position = position_jitter(width = 0.4))+
  theme_minimal_vgrid(8, rel_small = 1) +
  scale_color_manual(values=cbpalette)+
  scale_shape_manual(values=c(18, 3))+
  scale_size_manual(values=c(3.7, 2))+
  coord_flip()+
  theme(axis.text.x = element_text(angle = 0, hjust = 1),
        legend.position = "right")+
  labs(subtitle="PheWAS results for Dense area SNPs", colour= "Cluster", shape  = "Association with",
       y = "-log10 p-value of association with other traits", x = "Dense area SNPs, by cluster", colour= "cluster")+
  guides(size="none")

source_data[["fig5"]] <- snp_phewas_df3_sub

ggsave(plot=phewas_plot, 
      height=3.9, width=9, scale=1, dpi=300, # DA
      #height=3, width=9, scale=1, dpi=300, # NDA
      #height=3.3, width=9, scale=1, dpi=300, # PD
       filename=paste0(results_path,"phewas/",exp_code ,"_phewas_plot.png"))

ggsave(plot=phewas_plot, 
      height=3.9, width=9, scale=1, dpi=300, # DA
      #height=3, width=9, scale=1, dpi=300, # NDA
      #height=3.3, width=9, scale=1, dpi=300, # PD
       filename=paste0("figures_manuscript/figure5_poster.png"))


ggsave(plot=phewas_plot, 
      height=8, width=18, scale=1.2, dpi=300, units = c("cm"), # DA
      #height=3, width=9, scale=1, dpi=300, # NDA
      #height=3.3, width=9, scale=1, dpi=300, # PD
       filename=paste0("figures_manuscript/figure5_paper.svg"))


```


```{r}
# save source data
names(source_data)
openxlsx::write.xlsx(source_data, file = 'figures_manuscript/source_data2.xlsx')

```


# ~ collect plots
```{r}
library(cowplot)

four_plots <-
  plot_grid(forest_presso, forest_radial,
           clust_plot_best, forest_by_clusters$p,
           labels = c("a",
                      "b",
                      "c",
                      "d"),
           label_size = 12,
          axis = "t", nrow=2) 

ggsave(plot=four_plots, 
      #height=10, width=10, scale=1, dpi=300, # DA
      #height=8, width=10, scale=1, dpi=300, # NDA
      height=7, width=10, scale=1, dpi=300, # PD
      # filename=paste0(results_path,"MRClust_results/",exp_code ,"_snps_combined_plots.png"))
      filename=paste0("figures_manuscript/","figure4_", exp_code ,".png"))


ggsave(plot=four_plots, 
      height=16.5, width=18, dpi=300, scale=1.37, units=c("cm"),# DA
      #height=8, width=10, scale=1, dpi=300, # NDA
      #height=7, width=10, scale=1, dpi=300, # PD
      # filename=paste0(results_path,"MRClust_results/",exp_code ,"_snps_combined_plots.png"))
      filename=paste0("figures_manuscript/","figure4_", exp_code ,".svg"))


all <- plot_grid(top_row, bottom_row, ncol = 1, rel_heights = c(0.33, 0.66))

# version with phewas:

top_row <- plot_grid(forest_presso, forest_radial,forest_by_clusters ,
                     labels = c("a","b","c"),
                     label_size = 12, nrow=1)

bottom_row <-plot_grid(clust_plot_best, phewas_plot,
                       labels = c("d", "e"),
                       label_size = 12, nrow=1) 


five_figs<- plot_grid(top_row,NULL,  bottom_row,  label_size = 12, nrow = 3, rel_heights = c(0.42,0.07, 0.51))


# version for poster
two_plots <-
  plot_grid(clust_plot_best, forest_by_clusters$p,
           label_size = 12,
          axis = "t", nrow=1) 

ggsave(plot=two_plots, 
      #height=10, width=10, scale=1, dpi=300, # DA
      #height=8, width=10, scale=1, dpi=300, # PD
      height=7, width=14, scale=0.6, dpi=300, # PD
       filename=paste0("figures_manuscript/figure4_poster.png"))

```