CPTR5_analysis_markdown.Rmd

---
title: "CPTR-5 Krug: DSP Analysis of KS samples"
output: html_document
date: "2024-July-31"
---
# Setup

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = FALSE, 
                      warning = FALSE, 
                      message = FALSE)

# Increase the time out for downloading the DSP package
options(timeout = max(300, getOption("timeout")))

# Global parameters

# Knitting Markdown
include.qc <- FALSE
include.DE <- FALSE
include.PCA <- FALSE

# Exporting Results
global.export.deg <- FALSE
global.export.volcano <- FALSE
global.export.heatmap <- FALSE

export.gsea.input <- FALSE
```

```{r, include=FALSE}

library(GeomxTools)
library(dplyr)
library(limma)
library(edgeR)
library(ggplot2)
library(ggrepel)
library(stringr)
library(PCAtools)
library(readxl)
library(gridExtra)
library(grid)

# Source the helper functions
source("DSP_functions.R")

# Results folder where the results should be exported
results.folder <- "results/"
run.folder <- "7_31_2024/"

```

```{r Load DSPWorkflow, include=include.qc}

# Install DSPWorkflow package

install.DSP <- FALSE
if(install.DSP == TRUE){
  library(devtools)
  install_github("NIDAP-Community/DSPWorkflow", ref = "dev")
}

library(DSPWorkflow)

```

``` {r Load Data, include=include.qc}
# Load all inputs

dcc.files <- list.files(file.path("dcc"),
  pattern = ".dcc$",
  full.names = TRUE,
  recursive = TRUE
)

pkc.files <- "Hs_R_NGS_WTA_v1.0.pkc"
pheno.data.file <- "annotation_Krug_CPTR_5_July2024_NC_edit.xlsx"

```

# Study Design

```{r Study Design, include=include.qc}

# Save the output from the study design function into a list
sdesign.list <- studyDesign(dcc.files = dcc.files, 
                                pkc.files = pkc.files,
                                pheno.data.file = pheno.data.file,
                                pheno.data.sheet = "annotation",
                                pheno.data.dcc.col.name = "Sample_ID",
                                protocol.data.col.names = c("ROI"),
                                experiment.data.col.names = c("panel"),
                                slide.name.col = "slide name", 
                                class.col = "class", 
                                region.col = "Region", 
                                segment.col = "segment",
                                area.col = "area",
                                nuclei.col = "nuclei", 
                                sankey.exclude.slide = FALSE, 
                                segment.id.length = 10)

# The output of the study design function is a Geomxset Object and a Plot
# Print out a summary of the object

print(sdesign.list$object)

# Print out the Sankey Plot

print(sdesign.list$sankey.plot)
```

# QC

```{r QC Preprocessing, include=include.qc}

qc.output <-  qcProc(object = sdesign.list$object,
                        min.segment.reads = 1000, 
                        percent.trimmed = 80,    
                        percent.stitched = 80,   
                        percent.aligned = 80,    
                        percent.saturation = 50, 
                        min.negative.count = 3,   
                        max.ntc.count = 1000,     
                        min.nuclei = 200,         
                        min.area = 1000,
                        print.plots = TRUE)
    print(qc.output$segments.qc)
    print(qc.output$segment.flags)
    print(qc.output$probe.flags)
    
    
  # Export the flags table
    
  export.flags <- FALSE
  
  if(export.flags == TRUE){
    
    write.csv(qc.output$segment.flags, file =  "qc/segment_qc_flags.csv")
    
    write.csv(qc.output$probe.flags, file =  "qc/probe_qc_flags.csv")
    
  }
    
```
# Filtering

```{r Segment Filtering by Gene Detection, include=include.qc}

library(GeomxTools)
library(dplyr)
library(knitr)


object <- qc.output$object

# Set up lists of segment IDs
segment.list.total <- pData(object)$segmentID

# Define Modules
modules <- gsub(".pkc", "", pkc.files)

# Calculate limit of quantification (LOQ) in each segment
# LOQ = geomean(NegProbes) * geoSD(NegProbes)^(LOQ cutoff)
# LOQ is calculated for each module (pkc file)
loq <- data.frame(row.names = colnames(object))

loq.min <- 2
loq.cutoff <- 2

for(module in modules) {
  vars <- paste0(c("NegGeoMean_", "NegGeoSD_"),
                 module)
  if(all(vars[1:2] %in% colnames(pData(object)))) {
    
    neg.geo.mean <- vars[1]
    neg.geo.sd <- vars[2]
    
    loq[, module] <-
      pmax(loq.min,
           pData(object)[, neg.geo.mean] * 
             pData(object)[, neg.geo.sd] ^ loq.cutoff)
  }
}

# Store the loq df in the annotation df
pData(object)$loq <- loq

# Setup a master loq matrix
loq.mat <- c()


for(module in modules) {
  # Gather rows with the given module
  ind <- fData(object)$Module == module
  
  # Check if each feature has counts above the LOQ
  mat.i <- t(esApply(object[ind, ], MARGIN = 1,
                     FUN = function(x) {
                       x > loq[, module]
                     }))
  
  # Store results in the master loq matrix
  loq.mat <- rbind(loq.mat, mat.i)
}

# ensure ordering since this is stored outside of the geomxSet
loq.mat <- loq.mat[fData(object)$TargetName, ]

# Evaluate and Filter Segment Gene Detection Rate
# Save detection rate information to pheno data
pData(object)$GenesDetected <- colSums(loq.mat, na.rm = TRUE)
pData(object)$GeneDetectionRate <- 100*(pData(object)$GenesDetected / nrow(object))

# Establish detection bins
detection.bins <- c("less_than_1", "1_5", "5_10", "10_15", "greater_than_15")

# Determine detection thresholds: 1%, 5%, 10%, 15%, >15%
pData(object)$DetectionThreshold <- 
  cut(pData(object)$GeneDetectionRate,
      breaks = c(0, 1, 5, 10, 15, 100),
      labels = detection.bins)

# stacked bar plot of different cut points (1%, 5%, 10%, 15%)
segment.stacked.bar.plot<- ggplot(pData(object),
                          aes(x = DetectionThreshold)) +
  geom_bar(aes(fill = region)) +
  geom_text(stat = "count", aes(label = ..count..), vjust = -0.5) +
  theme_bw() +
  scale_y_continuous(expand = expansion(mult = c(0, 0.1))) +
  labs(x = "Gene Detection Rate",
         y = "Segments, #",
         fill = "Segment Type")
  
# cut percent genes detected at 1, 5, 10, 15
segment.table <- kable(table(pData(object)$DetectionThreshold, 
                             pData(object)$class))

# Make a list of segments with low detection
low.detection.segments <- pData(object) %>% 
  filter(GeneDetectionRate < 5) %>% 
  select(any_of(c("segmentID", "GeneDetectionRate")))

print(low.detection.segments)

# Export a summary of the segment gene detection
segment.detection.summary <- pData(object) %>% 
  select(any_of(c("segmentID", "GeneDetectionRate", "DetectionThreshold")))

export.segment.detection.summary <- FALSE

if(export.segment.detection.summary == TRUE){
  
  write.csv(segment.detection.summary, "qc/segment_detection_summary.csv")
  
}

```

```{r Remove Segments, include=include.qc}

# Filter the data using the cutoff for gene detection rate
segment.gene.rate.cutoff <- 1

object.segment.filtered <-
    object[, pData(object)$GeneDetectionRate >= segment.gene.rate.cutoff]


```

```{r Gene Filtering by Detection per Segment, include=include.qc}
library(scales)

# Evaluate and Filter Study-wide Gene Detection Rate 
# Calculate detection rate:
loq.mat <- loq.mat[, colnames(object.segment.filtered)]

fData(object.segment.filtered)$DetectedSegments <- rowSums(loq.mat, na.rm = TRUE)
fData(object.segment.filtered)$DetectionRate <-
  100*(fData(object.segment.filtered)$DetectedSegments / nrow(pData(object)))

# Establish detection bins
detection.bins <- c("0", "less_than_1", "1_5", "5_10", "10_20", "20_30", "30_40", "40_50", "greater_than_50")

# Determine detection thresholds: 1%, 5%, 10%, 15%, >15%
fData(object.segment.filtered)$DetectionThreshold <- 
  cut(fData(object.segment.filtered)$DetectionRate,
      breaks = c(-1, 0, 1, 5, 10, 20, 30, 40, 50, 100),
      labels = detection.bins)


gene.stacked.bar.plot <- ggplot(fData(object.segment.filtered),
                          aes(x = DetectionThreshold)) +
  geom_bar(aes(fill = Module)) +
  geom_text(stat = "count", aes(label = ..count..), vjust = -0.5) +
  theme_bw() +
  scale_y_continuous(expand = expansion(mult = c(0, 0.1))) +
  labs(x = "Gene Detection Rate",
         y = "Genes, #",
         fill = "Probe Set")


# Gene of interest detection table
goi <- c("A2M", "CD44")

goi.table <- data.frame(Gene = goi,
                        Number = fData(object.segment.filtered)[goi, "DetectedSegments"],
                        DetectionRate = percent(fData(object.segment.filtered)[goi, "DetectionRate"]))
print(goi.table)

# Plot detection rate:
plot.detect <- data.frame(Freq = c(1, 5, 10, 20, 30, 50))
plot.detect$Number <-
  unlist(lapply(c(1, 5, 10, 20, 30, 50),
                function(x) {sum(fData(object.segment.filtered)$DetectionRate >= x)}))

plot.detect$Rate <- plot.detect$Number / nrow(fData(object.segment.filtered))
rownames(plot.detect) <- plot.detect$Freq

genes.detected.plot <- ggplot(plot.detect, aes(x = as.factor(Freq), y = Rate, fill = Rate)) +
  geom_bar(stat = "identity") +
  geom_text(aes(label = formatC(Number, format = "d", big.mark = ",")),
            vjust = 1.6, color = "black", size = 4) +
  scale_fill_gradient2(low = "orange2", mid = "lightblue",
                       high = "dodgerblue3", midpoint = 0.65,
                       limits = c(0,1),
                       labels = scales::percent) +
  theme_bw() +
  scale_y_continuous(labels = scales::percent, limits = c(0,1),
                     expand = expansion(mult = c(0, 0))) +
  labs(x = "% of Segments",
       y = "Genes Detected, % of Panel > loq")


# Export a summary of the gene detection
gene.detection.summary <- fData(object.segment.filtered) %>% 
  select(any_of(c("segmentID", "DetectionRate", "DetectionThreshold")))

export.gene.detection.summary <- FALSE

if(export.gene.detection.summary == TRUE){
  
  write.csv(gene.detection.summary, "qc/gene_detection_summary.csv")
  
}

```

```{r Remove Genes, include=include.qc}

# Set the cutoff for gene detection
study.gene.rate.cutoff <- 0.01

# Subset for genes above the study gene detection rate cutoff
# Manually include the negative control probe, for downstream use
negative.probe.fData <- subset(fData(object.segment.filtered), CodeClass == "Negative")
neg.probes <- unique(negative.probe.fData$TargetName)
object.gene.filtered <- object.segment.filtered[fData(object.segment.filtered)$DetectionRate >= study.gene.rate.cutoff |
                   fData(object.segment.filtered)$TargetName %in% neg.probes, ]

```
  
# Normalization

```{r Normalization, include=include.qc}
  
    q3.normalization.output <- geomxNorm(
                                  object = object.gene.filtered, 
                                  norm = "q3")
    
    print(q3.normalization.output$multi.plot)
    print(q3.normalization.output$boxplot.raw)
    print(q3.normalization.output$boxplot.norm)
    
    neg.normalization.output <- geomxNorm(
                                  object = object.gene.filtered, 
                                  norm = "neg")
    
    print(neg.normalization.output$multi.plot)
    print(neg.normalization.output$boxplot.raw)
    print(neg.normalization.output$boxplot.norm)
    
    # Export normalized counts
    export.counts <- TRUE
    if(export.counts == TRUE){ 
    
      # Gather the counts
      norm.counts <-  q3.normalization.output$object@assayData$q_norm
      
      # Write to csv
      write.csv(norm.counts, 
                file = paste0(results.folder, "q3_norm_counts.csv"))
      
      
    }
    
```

```{r Heatmap of Variable Genes, include=include.PCA}

# Create a slot for log2 counts
assayDataElement(object = q3.normalization.output$object, elt = "log_q") <-
    assayDataApply(q3.normalization.output$object, 2, FUN = log, base = 2, elt = "q_norm")

# Make a df from the log counts
q3.norm.log.counts <- q3.normalization.output$object@assayData$log_q

# create Coefficient of Variation (CV) function and apply to the log counts
calc_CV <- function(x) {sd(x) / mean(x)}
cv.df <- data.frame(CV = apply(q3.norm.log.counts, 1, calc_CV))

# Take the top 500 most variable genes by CV score
cv.df.top <- cv.df %>% arrange(desc(CV)) %>% slice(1:500)

# Get the list of top CV genes
top.cv.gene.list <- rownames(cv.df.top)

# Subset the counts for the top CV genes
top.cv.heatmap.counts <- q3.norm.log.counts[rownames(q3.norm.log.counts) %in% top.cv.gene.list, ]

# Order the counts by top CV
top.cv.heatmap.counts <- top.cv.heatmap.counts[match(top.cv.gene.list, rownames(top.cv.heatmap.counts)), ]

anno.colors = list(
      Tumor = c(Biopsy = "slateblue", 
                PDX = "coral"), 
      region = c(tumor = "pink",
                 vessel = "seagreen1", 
                 immune = "lightblue", 
                 'immune/stroma' = "maroon"), 
      segment = c(full_ROI = "lightsalmon", 
                  LANA_pos = "aquamarine"), 
      Source = c(Skin = "steelblue1", 
                 GI = "indianred")
      
      )

# Set up the annotation
annotation <- pData(q3.normalization.output$object)

annotation.heatmap <- annotation %>% 
  select(names(anno.colors))

cv.heatmap.all.samples <- pheatmap(top.cv.heatmap.counts, 
         main = "Top 500 Variable Genes - All Samples", 
         scale = "row",   
         show_colnames = FALSE,
         show_rownames = FALSE,
         border_color = NA, 
         cluster_rows = TRUE, 
         cluster_cols = TRUE, 
         clustering_method = "average", 
         clustering_distance_rows = "correlation", 
         clustering_distance_cols = "correlation", 
         color = colorRampPalette(c("blue", "white", "red"))(120),
         annotation_row = NULL, 
         annotation_col = annotation.heatmap,  
         annotation_colors = anno.colors)


# Skin

# Subset the annotation
annotation.heatmap.skin <- annotation.heatmap %>% 
  filter(Source == "Skin")

# Get the sample IDs for skin
skin.sample.IDs <- rownames(annotation.heatmap.skin)

# Subset the counts files for only the skin samples
top.cv.heatmap.counts.skin <- as.data.frame(top.cv.heatmap.counts) %>% 
  select(all_of(skin.sample.IDs))

cv.heatmap.skin <- pheatmap(top.cv.heatmap.counts.skin, 
         main = "Top 500 Variable Genes - Skin Only", 
         scale = "row",   
         show_colnames = FALSE,
         show_rownames = FALSE,
         border_color = NA, 
         cluster_rows = TRUE, 
         cluster_cols = TRUE, 
         clustering_method = "average", 
         clustering_distance_rows = "correlation", 
         clustering_distance_cols = "correlation", 
         color = colorRampPalette(c("blue", "white", "red"))(120),
         annotation_row = NULL, 
         annotation_col = annotation.heatmap.skin,  
         annotation_colors = anno.colors)


# GI

# Subset the annotation
annotation.heatmap.gi <- annotation.heatmap %>% 
  filter(Source == "GI")

# Get the sample IDs for skin
gi.sample.IDs <- rownames(annotation.heatmap.gi)

# Subset the counts files for only the skin samples
top.cv.heatmap.counts.gi <- as.data.frame(top.cv.heatmap.counts) %>% 
  select(all_of(gi.sample.IDs))

cv.heatmap.gi <- pheatmap(top.cv.heatmap.counts.gi, 
         main = "Top 500 Variable Genes - GI Only", 
         scale = "row",   
         show_colnames = FALSE,
         show_rownames = FALSE,
         border_color = NA, 
         cluster_rows = TRUE, 
         cluster_cols = TRUE, 
         clustering_method = "average", 
         clustering_distance_rows = "correlation", 
         clustering_distance_cols = "correlation", 
         color = colorRampPalette(c("blue", "white", "red"))(120),
         annotation_row = NULL, 
         annotation_col = annotation.heatmap.gi,  
         annotation_colors = anno.colors)


# Export all CV heatmaps
export.cv.heatmaps <- FALSE
if(export.cv.heatmaps == TRUE){
  
  ggsave(paste0(results.folder, 
                run.folder, 
                "cv_heatmap_all_samples.png"), 
         cv.heatmap.all.samples, 
         height = 10, 
         width = 14)
  
  ggsave(paste0(results.folder, 
                run.folder, 
                "cv_heatmap_skin.png"), 
         cv.heatmap.skin, 
         height = 10, 
         width = 14)
  
  ggsave(paste0(results.folder, 
                run.folder, 
                "cv_heatmap_gi.png"), 
         cv.heatmap.gi, 
         height = 10, 
         width = 14)
  
}

```


# Count AOIs per annotation

```{r Count AOIs per annotation, include=TRUE}

# Subset for annotation types to count
annotation.subset <- annotation %>% 
  select(segment, region, class, Tumor, Source, 'Corrections MOH')

aoi.counts <- lapply(annotation.subset, table)

print(aoi.counts)

```

```{r PCA Setup, include=include.qc}

object <- q3.normalization.output$object


# Load the normalized counts
norm.counts <- object@assayData$q_norm

log.counts <- as.data.frame(log(norm.counts, base = 2))

# Load and clean up the annotation
annotation <- pData(object)

# Order of rownames of annotation need to match columns of count data
cleaned.annotation.df <- annotation[order(rownames(annotation)), ]

log.counts.cleaned <- as.data.frame(log.counts[, order(colnames(log.counts))])

# Create a function for the main PCA annotations

main_annotation_PCA <- function(pca.table){
  
  # Create a PCA for the main annotations, then group together
  
  pca.plot.slide <- biplot(pca.table, 
                         colby = "slide_name", 
                         legendPosition = "right", 
                         legendLabSize = 10, 
                         legendIconSize = 5, 
                         lab = NULL,
                         title = NULL, 
                         subtitle = "Slide Analysis")
                         
  pca.plot.region <- biplot(pca.table, 
                         colby = "region", 
                         legendPosition = "right", 
                         legendLabSize = 10, 
                         legendIconSize = 5, 
                         lab = NULL,
                         title = NULL, 
                         subtitle = "Region analysis")
  
  pca.plot.segment <- biplot(pca.table, 
                         colby = "segment", 
                         legendPosition = "right", 
                         legendLabSize = 10, 
                         legendIconSize = 5, 
                         lab = NULL,
                         title = NULL, 
                         subtitle = "Segment Analysis")
  
  pca.plot.class <- biplot(pca.table, 
                         colby = "class", 
                         legendPosition = "right", 
                         legendLabSize = 10, 
                         legendIconSize = 5, 
                         lab = NULL,
                         title = NULL, 
                         subtitle = "Class Analysis")
  
  
  combined.plot <- arrangeGrob(ggplotGrob(pca.plot.slide), 
                               ggplotGrob(pca.plot.region), 
                               ggplotGrob(pca.plot.segment), 
                               ggplotGrob(pca.plot.class), 
                               nrow = 2, ncol = 2)
  
  return(combined.plot)
  
}


```

```{r PCA analysis for all AOIs, include = include.PCA}
# Generate a PCA table for all samples
pca.table <- pca(log.counts.cleaned, 
                 metadata = cleaned.annotation.df, 
                 removeVar = 0.1)

all.aoi.pca <- main_annotation_PCA(pca.table = pca.table)

ggsave("results/April23_2024/all_aoi_pca.pdf", all.aoi.pca, width = 14, height = 12)


# Extra PCAs
pca.plot.gene.detect <- biplot(pca.table, 
                         colby = "DetectionThreshold", 
                         legendPosition = "right", 
                         legendLabSize = 10, 
                         legendIconSize = 5, 
                         lab = NULL,
                         title = "All AOIs", 
                         subtitle = "by Gene Detection")

print(pca.plot.gene.detect)


```

```{r ## PCA analysis for only immune AOIs, include = include.PCA}

# Subset annotation
annotation.immune <- cleaned.annotation.df %>% 
  filter(region %in% c("immune", "immune/stroma"))

# Subset counts
log.counts.immune <- log.counts.cleaned %>% 
  select(rownames(annotation.immune))

pca.immune <- pca(log.counts.immune, 
                 metadata = annotation.immune, 
                 removeVar = 0.1)

all.aoi.pca <- main_annotation_PCA(pca.table = pca.immune)

grid.draw(all.aoi.pca)

#ggsave("results/April23_2024/immune_pca.pdf", all.aoi.pca, width = 14, height = 12)

```

```{r PCA analysis with immune/stroma removed, include = include.PCA}

# Subset annotation
annotation.stroma.removed <- cleaned.annotation.df %>% 
  filter(region != "immune/stroma")

# Subset counts
log.counts.stroma.removed <- log.counts.cleaned %>% 
  select(rownames(annotation.stroma.removed))

pca.stroma.removed <- pca(log.counts.stroma.removed, 
                 metadata = annotation.stroma.removed, 
                 removeVar = 0.1)

pca.plot <- main_annotation_PCA(pca.table = pca.stroma.removed)

grid.draw(pca.plot)

#ggsave("results/April23_2024/stroma_removed_pca.pdf", pca.plot, width = 14, height = 12)

```

```{r PCA analysis for tumor only, include = include.PCA}

# Subset annotation
annotation.tumor <- cleaned.annotation.df %>% 
  filter(region == "tumor")

# Subset counts
log.counts.tumor <- log.counts.cleaned %>% 
  select(rownames(annotation.tumor))

pca.tumor <- pca(log.counts.tumor, 
                 metadata = annotation.tumor, 
                 removeVar = 0.1)

pca.plot <- main_annotation_PCA(pca.table = pca.tumor)

grid.draw(pca.plot)

#ggsave("results/April23_2024/tumor_only_pca.pdf", pca.plot, width = 14, height = 12)

```

```{r PCA analysis for skin only, include = include.PCA}

# Subset annotation
annotation.skin <- cleaned.annotation.df %>% 
  filter(Source == "Skin")

# Subset counts
log.counts.skin <- log.counts.cleaned %>% 
  select(rownames(annotation.skin))

pca.skin <- pca(log.counts.skin, 
                 metadata = annotation.skin, 
                 removeVar = 0.1)

pca.plot <- main_annotation_PCA(pca.table = pca.skin)

grid.draw(pca.plot)

#ggsave("results/April23_2024/skin_only_pca.pdf", pca.plot, width = 14, height = 12)

```

```{r PCA analysis for GI only, include = include.PCA}

# Subset annotation
annotation.gi <- cleaned.annotation.df %>% 
  filter(Source == "GI")

# Subset counts
log.counts.gi <- log.counts.cleaned %>% 
  select(rownames(annotation.gi))

pca.gi <- pca(log.counts.gi, 
                 metadata = annotation.gi, 
                 removeVar = 0.1)

pca.plot <- main_annotation_PCA(pca.table = pca.gi)

grid.draw(pca.plot)

#ggsave("results/April23_2024/gi_only_pca.pdf", pca.plot, width = 14, height = 12)

```

```{r PCA analysis for Full ROI only, include = include.PCA}

# Subset annotation
annotation.full_roi <- cleaned.annotation.df %>% 
  filter(segment == "full_ROI")

# Subset counts
log.counts.full_roi <- log.counts.cleaned %>% 
  select(rownames(annotation.full_roi))

pca.full_roi <- pca(log.counts.full_roi, 
                 metadata = annotation.full_roi, 
                 removeVar = 0.1)

pca.plot <- main_annotation_PCA(pca.table = pca.full_roi)

grid.draw(pca.plot)

#ggsave("results/April23_2024/full_roi_only_pca.pdf", pca.plot, width = 14, height = 12)

```

```{r PCA analysis for LANA+ only, include = include.PCA}

# Subset annotation
annotation.lana <- cleaned.annotation.df %>% 
  filter(segment == "LANA_pos")

# Subset counts
log.counts.lana <- log.counts.cleaned %>% 
  select(rownames(annotation.lana))

pca.lana <- pca(log.counts.lana, 
                 metadata = annotation.lana, 
                 removeVar = 0.1)

pca.plot <- main_annotation_PCA(pca.table = pca.lana)

grid.draw(pca.plot)

#ggsave("results/April23_2024/lana_only_pca.pdf", pca.plot, width = 14, height = 12)

```

# Differential Expression

```{r DE Setup, include=FALSE}

# Set up annotation colors for the heatmaps
anno.colors = list(
      Tumor = c(Biopsy = "slateblue", 
                PDX = "coral"), 
      region = c(tumor = "pink",
                 vessel = "seagreen1", 
                 immune = "lightblue", 
                 'immune/stroma' = "maroon"), 
      segment = c(full_ROI = "lightsalmon", 
                  LANA_pos = "aquamarine"), 
      Source = c(Skin = "steelblue1", 
                 GI = "indianred")
      
      )

DE.results.folder <- paste0(results.folder, run.folder, "DE/")
DE.results.files <- list.files(DE.results.folder)

```

### DE contrast: Skin Biopsy, Tumor (infected) & Vessel (uninfected), Full ROI

```{r, include=include.DE}

# Setup annotation groups for the contrast
tumor.types <- c("Biopsy")
region.types <- c("tumor", "vessel")
source.types <- c("Skin")
segment.types <- c("full_ROI")

# Define the name of the contrast
contrast.name <- paste0(tumor.types[[1]], 
                        "_",
                        source.types[[1]], 
                        "_", 
                        segment.types[[1]], 
                        "_", 
                        region.types[[1]], 
                        "_", 
                        region.types[[2]])

# Gather the column names to be used in defining contrasts
contrast.groups.list <- list(
  "Tumor" = tumor.types, 
  "region" = region.types, 
  "segment" = segment.types, 
  "Source" = source.types)


# Filter data for the chosen annotations
object <- q3.normalization.output$object

# Generate the counts, annotation, and subset object for lmm
lmm.input <- subset_for_lmm(object = object, 
                                subset.list = contrast.groups.list)

# Create summary table of group counts
summary.table.df <- pData(lmm.input$subset.object) %>%  
  select(c(names(contrast.groups.list)))

summary.table <- table(summary.table.df)

print("Sample Numbers per Annotation Group")
print("-----------------------------------")
print(summary.table)

# Check if the DE results have already been generated
DE.result.file <- grepl(contrast.name, DE.results.files)
if(sum(DE.result.file) >= 1) {
  
  # Load the previously generated DE results
  results.df <- as.data.frame(read.csv(paste0(results.folder, run.folder, "DE/", contrast.name, "_de.results.csv")))
  
  annotation.df <- lmm.input$annotation 
  
  annotation.df$loq <- annotation.df$loq[, 1]
  
  write.csv(annotation.df, paste0(results.folder, run.folder, "DE/", contrast.name, "_annotation.csv"), row.names = FALSE)
  
  
} else {
  # Use the function from the DSPWorkflow package

  # Within slide analysis
  # Listed contrasts are condition, reference 
  results.list <- diffExpr(object = lmm.input$subset.object, 
                        analysis.type = "Within Groups", 
                        region.col = "region", 
                        regions = c("tumor", "vessel"), 
                        group.col = "Tumor", 
                        groups = c("Biopsy"), 
                        n.cores = parallel::detectCores())
  
  # Create the results df
  results.df <- results.list$results

  write.csv(results.df, paste0(results.folder, run.folder, "DE/", contrast.name, "_de.results.csv"))
  
}

# Adjust column names
logfc.column <- colnames(results.df[grepl("logFC",colnames(results.df))])
results.df$logfc <- results.df[[logfc.column]]

pval.column <- colnames(results.df[grepl("_pval",colnames(results.df))])
results.df$pval <- results.df[[pval.column]]

adj.pval.column <- colnames(results.df[grepl("adjpval",colnames(results.df))])
results.df$padj <- results.df[[adj.pval.column]]

results.df$gene <- results.df$Gene

# Keep only the necessary columns
results.df <- results.df %>% select(c("gene", 
                                      "logfc", 
                                      "pval", 
                                      "padj"))


# Export the results
export.deg.list <- global.export.deg
if(export.deg.list == TRUE){ 
  write.csv(results.df, 
            file = paste0(results.folder, run.folder, "DE/DEG_lists/", contrast.name, "_deg_list.csv"), 
            row.names = FALSE)  
}

# Make the volcano plot
volcano.output <- make_volcano(lmm.results = results.df, 
                               title = contrast.name, 
                               legend.title = "DE in Tumor", 
                               x.axis.title = "Infected (Tumor) vs. Uninfected (Vessel)")

# Export the volcano
export.volcano <- global.export.volcano
if(export.volcano == TRUE){ 
  ggsave(filename = paste0(results.folder, run.folder, "DE/volcano/", contrast.name, "_volcano_plot.png"), 
         width = 14, 
         height = 10)
}

print(volcano.output$volcano.plot)

# Make the heatmap

# Subset the annotation just for the heatmap annotations of interest
annotation.heatmap <- lmm.input$annotation %>% 
  select(names(anno.colors))

heatmap.plot <- make_heatmap(
  normalized.log.counts.df = lmm.input$log.counts, 
  de.results = results.df, 
  top.degs = TRUE, 
  annotation.column = annotation.heatmap,
  annotation.row = NULL, 
  anno.colors = anno.colors, 
  cluster.rows = FALSE, 
  cluster.columns = TRUE, 
  main.title = contrast.name, 
  row.gaps = NULL, 
  column.gaps = NULL)

print(heatmap.plot)

export.heatmap <- global.export.heatmap
if(global.export.heatmap == TRUE){
  
  ggsave(heatmap.plot, 
         filename = paste0(results.folder, run.folder, "DE/heatmap/", contrast.name, "_heatmap_plot.png"), 
         width = 14, 
         height = 10)
  
}


```

#### GSEA preranked list

```{r}

# Gather the signal to noise ratio for GSEA ranking
# Default method for ranking genes from GSEA manual:
# https://www.gsea-msigdb.org/gsea/doc/GSEAUserGuideTEXT.htm#_Metrics_for_Ranking

# Define the main contrast groups
contrast.field <- "region"
contrast.levels <- c("tumor", "vessel")

# Gather the annotation, sample IDs, and log counts for each contrast

# Contrast level A is the "condition" (positive when calculating fold change)
contrast.A.annotation <- lmm.input$annotation %>% 
  filter(!!sym(contrast.field) == contrast.levels[1])

contrast.A.sampleIDs <- rownames(contrast.A.annotation)

contrast.A.counts <- as.data.frame(lmm.input$log.counts) %>% 
  select(all_of(contrast.A.sampleIDs))

contrast.A.counts$gene <- rownames(contrast.A.counts)

# Contrast level B is the "reference" (negative when calculating fold change)

contrast.B.annotation <- lmm.input$annotation %>% 
  filter(!!sym(contrast.field) == contrast.levels[2])

contrast.B.sampleIDs <- rownames(contrast.B.annotation)

contrast.B.counts <- as.data.frame(lmm.input$log.counts) %>% 
  select(all_of(contrast.B.sampleIDs))

contrast.B.counts$gene <- rownames(contrast.B.counts)

# Add a column to each contrast level for the mean and standard deviation
contrast.A.counts <- contrast.A.counts %>% 
  mutate(mean.A = rowMeans(select_if(., is.numeric))) %>%  
  mutate(stdev.A = apply(select_if(., is.numeric), 1, sd))

contrast.B.counts <- contrast.B.counts %>% 
  mutate(mean.B = rowMeans(select_if(., is.numeric))) %>%  
  mutate(stdev.B = apply(select_if(., is.numeric), 1, sd))

GSEA.preanked.df <- merge(contrast.A.counts, contrast.B.counts, by = "gene")

GSEA.preanked.df <- GSEA.preanked.df %>% 
  mutate(signal2noise = (mean.A - mean.B)/(stdev.A + stdev.B)) %>% 
  arrange(desc(signal2noise)) %>% 
  select(c(gene, mean.A, mean.B, stdev.A, stdev.B, signal2noise))

if(export.gsea.input == TRUE){
  
  write.csv(GSEA.preanked.df, file = paste0(results.folder, run.folder, "GSEA/", contrast.name, "_gsea_preranked_input.csv"), row.names = FALSE)
  
}

```


### DE contrast: Skin Biopsy, Tumor (infected) & Immune, Full ROI

```{r, include=include.DE}

# Setup annotation groups for the contrast
tumor.types <- c("Biopsy")
region.types <- c("tumor", "immune")
source.types <- c("Skin")
segment.types <- c("full_ROI")

# Define the name of the contrast
contrast.name <- paste0(tumor.types[[1]], 
                        "_",
                        source.types[[1]], 
                        "_", 
                        segment.types[[1]], 
                        "_", 
                        region.types[[1]], 
                        "_", 
                        region.types[[2]])

# Gather the column names to be used in defining contrasts
contrast.groups.list <- list(
  "Tumor" = tumor.types, 
  "region" = region.types, 
  "segment" = segment.types, 
  "Source" = source.types)


# Filter data for the chosen annotations
object <- q3.normalization.output$object

# Generate the counts, annotation, and subset object for lmm
lmm.input <- subset_for_lmm(object = object, 
                                subset.list = contrast.groups.list)

# Create summary table of group counts
summary.table.df <- pData(lmm.input$subset.object) %>%  
  select(c(names(contrast.groups.list)))

summary.table <- table(summary.table.df)

print("Sample Numbers per Annotation Group")
print("-----------------------------------")
print(summary.table)

# Check if the DE results have already been generated
DE.result.file <- grepl(contrast.name, DE.results.files)
if(sum(DE.result.file) == 1) {
  
  # Load the previously generated DE results
  results.df <- as.data.frame(read.csv(paste0(results.folder, run.folder, "DE/", contrast.name, "_de.results.csv")))
         
  annotation.df <- lmm.input$annotation 
  
  annotation.df$loq <- annotation.df$loq[, 1]
  
  write.csv(annotation.df, paste0(results.folder, run.folder, "DE/", contrast.name, "_annotation.csv"), row.names = FALSE)
  
  
} else {
  # Use the function from the DSPWorkflow package

  # Within slide analysis
  # Listed contrasts are condition, reference 
  results.list <- diffExpr(object = lmm.input$subset.object, 
                      analysis.type = "Within Groups", 
                      region.col = "region", 
                      regions = c("tumor", "immune"), 
                      group.col = "Tumor", 
                      groups = c("Biopsy"), 
                      n.cores = parallel::detectCores())
  
  # Create the results df
  results.df <- results.list$results

  write.csv(results.df, paste0(results.folder, run.folder, "DE/", contrast.name, "_de.results.csv"))
  
}

# Adjust column names
logfc.column <- colnames(results.df[grepl("logFC",colnames(results.df))])
results.df$logfc <- results.df[[logfc.column]]

pval.column <- colnames(results.df[grepl("_pval",colnames(results.df))])
results.df$pval <- results.df[[pval.column]]

adj.pval.column <- colnames(results.df[grepl("adjpval",colnames(results.df))])
results.df$padj <- results.df[[adj.pval.column]]

results.df$gene <- results.df$Gene

# Keep only the necessary columns
results.df <- results.df %>% select(c("gene", 
                                      "logfc", 
                                      "pval", 
                                      "padj"))


# Export the results
export.deg.list <- global.export.deg
if(export.deg.list == TRUE){ 
  write.csv(results.df, 
            file = paste0(results.folder, run.folder, "DE/DEG_lists/", contrast.name, "_deg_list.csv"), 
            row.names = FALSE)  
}

# Make the volcano plot
volcano.output <- make_volcano(lmm.results = results.df, 
                               title = contrast.name, 
                               legend.title = "DE in Tumor", 
                               x.axis.title = "Infected (Tumor) vs. Immune")

# Export the volcano
export.volcano <- global.export.volcano
if(export.volcano == TRUE){ 
  ggsave(filename = paste0(results.folder, run.folder, "DE/volcano/", contrast.name, "_volcano_plot.png"), 
         width = 14, 
         height = 10)
}

print(volcano.output$volcano.plot)

# Make the heatmap

# Subset the annotation just for the heatmap annotations of interest
annotation.heatmap <- lmm.input$annotation %>% 
  select(names(anno.colors))

heatmap.plot <- make_heatmap(
  normalized.log.counts.df = lmm.input$log.counts, 
  de.results = results.df, 
  top.degs = TRUE, 
  annotation.column = annotation.heatmap,
  annotation.row = NULL, 
  anno.colors = anno.colors, 
  cluster.rows = FALSE, 
  cluster.columns = TRUE, 
  main.title = contrast.name, 
  row.gaps = NULL, 
  column.gaps = NULL)

print(heatmap.plot)

export.heatmap <- global.export.heatmap
if(global.export.heatmap == TRUE){
  
  ggsave(heatmap.plot, 
         filename = paste0(results.folder, run.folder, "DE/heatmap/", contrast.name, "_heatmap_plot.png"), 
         width = 14, 
         height = 10)
  
}

```

#### GSEA preranked list

```{r}

# Gather the signal to noise ratio for GSEA ranking
# Default method for ranking genes from GSEA manual:
# https://www.gsea-msigdb.org/gsea/doc/GSEAUserGuideTEXT.htm#_Metrics_for_Ranking

# Define the main contrast groups
contrast.field <- "region"
contrast.levels <- c("tumor", "immune")

# Gather the annotation, sample IDs, and log counts for each contrast

# Contrast level A is the "condition" (positive when calculating fold change)
contrast.A.annotation <- lmm.input$annotation %>% 
  filter(!!sym(contrast.field) == contrast.levels[1])

contrast.A.sampleIDs <- rownames(contrast.A.annotation)

contrast.A.counts <- as.data.frame(lmm.input$log.counts) %>% 
  select(all_of(contrast.A.sampleIDs))

contrast.A.counts$gene <- rownames(contrast.A.counts)

# Contrast level B is the "reference" (negative when calculating fold change)

contrast.B.annotation <- lmm.input$annotation %>% 
  filter(!!sym(contrast.field) == contrast.levels[2])

contrast.B.sampleIDs <- rownames(contrast.B.annotation)

contrast.B.counts <- as.data.frame(lmm.input$log.counts) %>% 
  select(all_of(contrast.B.sampleIDs))

contrast.B.counts$gene <- rownames(contrast.B.counts)

# Add a column to each contrast level for the mean and standard deviation
contrast.A.counts <- contrast.A.counts %>% 
  mutate(mean.A = rowMeans(select_if(., is.numeric))) %>%  
  mutate(stdev.A = apply(select_if(., is.numeric), 1, sd))

contrast.B.counts <- contrast.B.counts %>% 
  mutate(mean.B = rowMeans(select_if(., is.numeric))) %>%  
  mutate(stdev.B = apply(select_if(., is.numeric), 1, sd))

GSEA.preanked.df <- merge(contrast.A.counts, contrast.B.counts, by = "gene")

GSEA.preanked.df <- GSEA.preanked.df %>% 
  mutate(signal2noise = (mean.A - mean.B)/(stdev.A + stdev.B)) %>% 
  arrange(desc(signal2noise)) %>% 
  select(c(gene, mean.A, mean.B, stdev.A, stdev.B, signal2noise))

if(export.gsea.input == TRUE){
  
  write.csv(GSEA.preanked.df, file = paste0(results.folder, run.folder, "GSEA/", contrast.name, "_gsea_preranked_input.csv"), row.names = FALSE)
  
}

```

### DE contrast: Skin Biopsy, Vessel & Immune, Full ROI

```{r, include=include.DE}

# Setup annotation groups for the contrast
tumor.types <- c("Biopsy")
region.types <- c("vessel", "immune")
source.types <- c("Skin")
segment.types <- c("full_ROI")

# Define the name of the contrast
contrast.name <- paste0(tumor.types[[1]], 
                        "_",
                        source.types[[1]], 
                        "_", 
                        segment.types[[1]], 
                        "_", 
                        region.types[[1]], 
                        "_", 
                        region.types[[2]])

# Gather the column names to be used in defining contrasts
contrast.groups.list <- list(
  "Tumor" = tumor.types, 
  "region" = region.types, 
  "segment" = segment.types, 
  "Source" = source.types)


# Filter data for the chosen annotations
object <- q3.normalization.output$object

# Generate the counts, annotation, and subset object for lmm
lmm.input <- subset_for_lmm(object = object, 
                                subset.list = contrast.groups.list)

# Create summary table of group counts
summary.table.df <- pData(lmm.input$subset.object) %>%  
  select(c(names(contrast.groups.list)))

summary.table <- table(summary.table.df)

print("Sample Numbers per Annotation Group")
print("-----------------------------------")
print(summary.table)

# Check if the DE results have already been generated
DE.result.file <- grepl(contrast.name, DE.results.files)
if(sum(DE.result.file) == 1) {
  
  # Load the previously generated DE results
  results.df <- as.data.frame(read.csv(paste0(results.folder, run.folder, "DE/", contrast.name, "_de.results.csv")))
  
  annotation.df <- lmm.input$annotation 
  
  annotation.df$loq <- annotation.df$loq[, 1]
  
  write.csv(annotation.df, paste0(results.folder, run.folder, "DE/", contrast.name, "_annotation.csv"), row.names = FALSE)
                              
} else {
  # Use the function from the DSPWorkflow package

  # Within slide analysis
  # Listed contrasts are condition, reference 
  results.list <- diffExpr(object = lmm.input$subset.object, 
                      analysis.type = "Within Groups", 
                      region.col = "region", 
                      regions = region.types, 
                      group.col = "Tumor", 
                      groups = tumor.types, 
                      n.cores = parallel::detectCores())
  
  # Create the results df
  results.df <- results.list$results

  write.csv(results.df, paste0(results.folder, run.folder, "DE/", contrast.name, "_de.results.csv"))
  
}

# Adjust column names
logfc.column <- colnames(results.df[grepl("logFC",colnames(results.df))])
results.df$logfc <- results.df[[logfc.column]]

pval.column <- colnames(results.df[grepl("_pval",colnames(results.df))])
results.df$pval <- results.df[[pval.column]]

adj.pval.column <- colnames(results.df[grepl("adjpval",colnames(results.df))])
results.df$padj <- results.df[[adj.pval.column]]

results.df$gene <- results.df$Gene

# Keep only the necessary columns
results.df <- results.df %>% select(c("gene", 
                                      "logfc", 
                                      "pval", 
                                      "padj"))


# Export the results
export.volcano <- global.export.deg
if(export.deg.list == TRUE){ 
  write.csv(results.df, 
            file = paste0(results.folder, run.folder, "DE/DEG_lists/", contrast.name, "_deg_list.csv"), 
            row.names = FALSE)  
}

# Make the volcano plot
volcano.output <- make_volcano(lmm.results = results.df, 
                               title = contrast.name, 
                               legend.title = paste0("DE in ", region.types[[1]]), 
                               x.axis.title = paste0(region.types[[1]],
                                                     " vs. ",
                                                     region.types[[2]]))

# Export the volcano
export.volcano <- global.export.volcano
if(export.volcano == TRUE){ 
  ggsave(filename = paste0(results.folder, run.folder, "DE/volcano/", contrast.name, "_volcano_plot.png"), 
         width = 14, 
         height = 10)
}

print(volcano.output$volcano.plot)

# Make the heatmap

# Subset the annotation just for the heatmap annotations of interest
annotation.heatmap <- lmm.input$annotation %>% 
  select(names(anno.colors))

heatmap.plot <- make_heatmap(
  normalized.log.counts.df = lmm.input$log.counts, 
  de.results = results.df, 
  top.degs = TRUE, 
  annotation.column = annotation.heatmap,
  annotation.row = NULL, 
  anno.colors = anno.colors, 
  cluster.rows = FALSE, 
  cluster.columns = TRUE, 
  main.title = contrast.name, 
  row.gaps = NULL, 
  column.gaps = NULL)

print(heatmap.plot)

export.heatmap <- global.export.heatmap
if(global.export.heatmap == TRUE){
  
  ggsave(heatmap.plot, 
         filename = paste0(results.folder, run.folder, "DE/heatmap/", contrast.name, "_heatmap_plot.png"), 
         width = 14, 
         height = 10)
  
}

```


#### GSEA preranked list

```{r}

# Gather the signal to noise ratio for GSEA ranking
# Default method for ranking genes from GSEA manual:
# https://www.gsea-msigdb.org/gsea/doc/GSEAUserGuideTEXT.htm#_Metrics_for_Ranking

# Define the main contrast groups
contrast.field <- "region"
contrast.levels <- c("vessel", "immune")

# Gather the annotation, sample IDs, and log counts for each contrast

# Contrast level A is the "condition" (positive when calculating fold change)
contrast.A.annotation <- lmm.input$annotation %>% 
  filter(!!sym(contrast.field) == contrast.levels[1])

contrast.A.sampleIDs <- rownames(contrast.A.annotation)

contrast.A.counts <- as.data.frame(lmm.input$log.counts) %>% 
  select(all_of(contrast.A.sampleIDs))

contrast.A.counts$gene <- rownames(contrast.A.counts)

# Contrast level B is the "reference" (negative when calculating fold change)

contrast.B.annotation <- lmm.input$annotation %>% 
  filter(!!sym(contrast.field) == contrast.levels[2])

contrast.B.sampleIDs <- rownames(contrast.B.annotation)

contrast.B.counts <- as.data.frame(lmm.input$log.counts) %>% 
  select(all_of(contrast.B.sampleIDs))

contrast.B.counts$gene <- rownames(contrast.B.counts)

# Add a column to each contrast level for the mean and standard deviation
contrast.A.counts <- contrast.A.counts %>% 
  mutate(mean.A = rowMeans(select_if(., is.numeric))) %>%  
  mutate(stdev.A = apply(select_if(., is.numeric), 1, sd))

contrast.B.counts <- contrast.B.counts %>% 
  mutate(mean.B = rowMeans(select_if(., is.numeric))) %>%  
  mutate(stdev.B = apply(select_if(., is.numeric), 1, sd))

GSEA.preanked.df <- merge(contrast.A.counts, contrast.B.counts, by = "gene")

GSEA.preanked.df <- GSEA.preanked.df %>% 
  mutate(signal2noise = (mean.A - mean.B)/(stdev.A + stdev.B)) %>% 
  arrange(desc(signal2noise)) %>% 
  select(c(gene, mean.A, mean.B, stdev.A, stdev.B, signal2noise))

if(export.gsea.input == TRUE){
  
  write.csv(GSEA.preanked.df, file = paste0(results.folder, run.folder, "GSEA/", contrast.name, "_gsea_preranked_input.csv"), row.names = FALSE)
  
}

```

### DE contrast: Skin Biopsy & PDX, Tumor, LANA

```{r, include=include.DE}

# Setup annotation groups for the contrast
tumor.types <- c("Biopsy", "PDX")
region.types <- c("tumor")
source.types <- c("Skin")
segment.types <- c("LANA_pos")
paired.samples <- c("TB0118", "323473", 
                        "TB0133",	"323477",
                        "TB0144",	"338852",
                        "TB0164",	"343961")

# Gather the column names to be used in defining contrasts
contrast.groups.list <- list(
  "Tumor" = tumor.types, 
  "region" = region.types, 
  "segment" = segment.types, 
  "Source" = source.types, 
  "Corrections MOH" = paired.samples)

# Define the name of the contrast
contrast.name <- paste0(tumor.types[[1]], 
                        "_",
                        tumor.types[[2]], 
                        "_", 
                        source.types[[1]], 
                        "_", 
                        segment.types[[1]], 
                        "_", 
                        region.types[[1]]
                        )

# Filter data for the chosen annotations
object <- q3.normalization.output$object

# Generate the counts, annotation, and subset object for lmm
lmm.input <- subset_for_lmm(object = object, 
                                subset.list = contrast.groups.list)

# Create summary table of group counts
summary.table.df <- pData(lmm.input$subset.object) %>%  
  select(c(names(contrast.groups.list)))

summary.table <- table(summary.table.df)

print("Sample Numbers per Annotation Group")
print("-----------------------------------")
print(summary.table)

# Check if the DE results have already been generated
DE.result.file <- grepl(contrast.name, DE.results.files)
if(sum(DE.result.file) == 1) {
  
  # Load the previously generated DE results
  results.df <- as.data.frame(read.csv(paste0(results.folder, run.folder, "DE/", contrast.name, "_de.results.csv")))
  
  annotation.df <- lmm.input$annotation 
  
  annotation.df$loq <- annotation.df$loq[, 1]
  
  write.csv(annotation.df, paste0(results.folder, run.folder, "DE/", contrast.name, "_annotation.csv"), row.names = FALSE)
                              
} else {
  # Use the function from the DSPWorkflow package

  # Within slide analysis
  # Listed contrasts are condition, reference 
  results.list <- diffExpr(object = lmm.input$subset.object, 
                      analysis.type = "Within Groups", 
                      region.col = "Tumor", 
                      regions = tumor.types, 
                      group.col = "Source", 
                      groups = source.types, 
                      n.cores = parallel::detectCores())
  
  # Create the results df
  results.df <- results.list$results

  write.csv(results.df, paste0(results.folder, run.folder, "DE/", contrast.name, "_de.results.csv"))
  
}

# Adjust column names
logfc.column <- colnames(results.df[grepl("logFC",colnames(results.df))])
results.df$logfc <- results.df[[logfc.column]]

pval.column <- colnames(results.df[grepl("_pval",colnames(results.df))])
results.df$pval <- results.df[[pval.column]]

adj.pval.column <- colnames(results.df[grepl("adjpval",colnames(results.df))])
results.df$padj <- results.df[[adj.pval.column]]

results.df$gene <- results.df$Gene

# Keep only the necessary columns
results.df <- results.df %>% select(c("gene", 
                                      "logfc", 
                                      "pval", 
                                      "padj"))


# Export the results
export.volcano <- global.export.deg
if(export.deg.list == TRUE){ 
  write.csv(results.df, 
            file = paste0(results.folder, run.folder, "DE/DEG_lists/", contrast.name, "_deg_list.csv"), 
            row.names = FALSE)  
}

# Make the volcano plot
volcano.output <- make_volcano(lmm.results = results.df, 
                               title = contrast.name, 
                               legend.title = paste0("DE in ", tumor.types[[1]]), 
                               x.axis.title = paste0(tumor.types[[1]],
                                                     " vs. ",
                                                     tumor.types[[2]]))

# Export the volcano
export.volcano <- global.export.volcano
if(export.volcano == TRUE){ 
  ggsave(filename = paste0(results.folder, run.folder, "DE/volcano/", contrast.name, "_volcano_plot.png"), 
         width = 14, 
         height = 10)
}

print(volcano.output$volcano.plot)

# Make the heatmap

# Subset the annotation just for the heatmap annotations of interest
annotation.heatmap <- lmm.input$annotation %>% 
  select(names(anno.colors))

heatmap.plot <- make_heatmap(
  normalized.log.counts.df = lmm.input$log.counts, 
  de.results = results.df, 
  top.degs = TRUE, 
  annotation.column = annotation.heatmap,
  annotation.row = NULL, 
  anno.colors = anno.colors, 
  cluster.rows = FALSE, 
  cluster.columns = TRUE, 
  main.title = contrast.name, 
  row.gaps = NULL, 
  column.gaps = NULL)

print(heatmap.plot)

export.heatmap <- global.export.heatmap
if(global.export.heatmap == TRUE){
  
  ggsave(heatmap.plot, 
         filename = paste0(results.folder, run.folder, "DE/heatmap/", contrast.name, "_heatmap_plot.png"), 
         width = 14, 
         height = 10)
  
}

```

#### GSEA preranked list

```{r}

# Gather the signal to noise ratio for GSEA ranking
# Default method for ranking genes from GSEA manual:
# https://www.gsea-msigdb.org/gsea/doc/GSEAUserGuideTEXT.htm#_Metrics_for_Ranking

# Define the main contrast groups
contrast.field <- "Tumor"
contrast.levels <- c("Biopsy", "PDX")

# Gather the annotation, sample IDs, and log counts for each contrast

# Contrast level A is the "condition" (positive when calculating fold change)
contrast.A.annotation <- lmm.input$annotation %>% 
  filter(!!sym(contrast.field) == contrast.levels[1])

contrast.A.sampleIDs <- rownames(contrast.A.annotation)

contrast.A.counts <- as.data.frame(lmm.input$log.counts) %>% 
  select(all_of(contrast.A.sampleIDs))

contrast.A.counts$gene <- rownames(contrast.A.counts)

# Contrast level B is the "reference" (negative when calculating fold change)

contrast.B.annotation <- lmm.input$annotation %>% 
  filter(!!sym(contrast.field) == contrast.levels[2])

contrast.B.sampleIDs <- rownames(contrast.B.annotation)

contrast.B.counts <- as.data.frame(lmm.input$log.counts) %>% 
  select(all_of(contrast.B.sampleIDs))

contrast.B.counts$gene <- rownames(contrast.B.counts)

# Add a column to each contrast level for the mean and standard deviation
contrast.A.counts <- contrast.A.counts %>% 
  mutate(mean.A = rowMeans(select_if(., is.numeric))) %>%  
  mutate(stdev.A = apply(select_if(., is.numeric), 1, sd))

contrast.B.counts <- contrast.B.counts %>% 
  mutate(mean.B = rowMeans(select_if(., is.numeric))) %>%  
  mutate(stdev.B = apply(select_if(., is.numeric), 1, sd))

GSEA.preanked.df <- merge(contrast.A.counts, contrast.B.counts, by = "gene")

GSEA.preanked.df <- GSEA.preanked.df %>% 
  mutate(signal2noise = (mean.A - mean.B)/(stdev.A + stdev.B)) %>% 
  arrange(desc(signal2noise)) %>% 
  select(c(gene, mean.A, mean.B, stdev.A, stdev.B, signal2noise))

if(export.gsea.input == TRUE){
  
  write.csv(GSEA.preanked.df, file = paste0(results.folder, run.folder, "GSEA/", contrast.name, "_gsea_preranked_input.csv"), row.names = FALSE)
  
}

```


### DE contrast: Skin Biopsy & PDX, Tumor, full ROI

```{r, include=include.DE}

# Setup annotation groups for the contrast
tumor.types <- c("Biopsy", "PDX")
region.types <- c("tumor")
source.types <- c("Skin")
segment.types <- c("full_ROI")
paired.samples <- c("TB0118", "323473", 
                        "TB0133",	"323477",
                        "TB0144",	"338852",
                        "TB0164",	"343961")

# Gather the column names to be used in defining contrasts
contrast.groups.list <- list(
  "Tumor" = tumor.types, 
  "region" = region.types, 
  "segment" = segment.types, 
  "Source" = source.types, 
  "Corrections MOH" = paired.samples)

# Define the name of the contrast
contrast.name <- paste0(tumor.types[[1]], 
                        "_",
                        tumor.types[[2]], 
                        "_", 
                        source.types[[1]], 
                        "_", 
                        segment.types[[1]], 
                        "_", 
                        region.types[[1]]
                        )

# Filter data for the chosen annotations
object <- q3.normalization.output$object

# Generate the counts, annotation, and subset object for lmm
lmm.input <- subset_for_lmm(object = object, 
                                subset.list = contrast.groups.list)

# Create summary table of group counts
summary.table.df <- pData(lmm.input$subset.object) %>%  
  select(c(names(contrast.groups.list)))

summary.table <- table(summary.table.df)

print("Sample Numbers per Annotation Group")
print("-----------------------------------")
print(summary.table)

# Check if the DE results have already been generated
DE.result.file <- grepl(contrast.name, DE.results.files)
if(sum(DE.result.file) == 1) {
  
  # Load the previously generated DE results
  results.df <- as.data.frame(read.csv(paste0(results.folder, run.folder, "DE/", contrast.name, "_de.results.csv")))
  
  annotation.df <- lmm.input$annotation 
  
  annotation.df$loq <- annotation.df$loq[, 1]
  
  write.csv(annotation.df, paste0(results.folder, run.folder, "DE/", contrast.name, "_annotation.csv"), row.names = FALSE)
                              
} else {
  # Use the function from the DSPWorkflow package

  # Within slide analysis
  # Listed contrasts are condition, reference 
  results.list <- diffExpr(object = lmm.input$subset.object, 
                      analysis.type = "Within Groups", 
                      region.col = "Tumor", 
                      regions = tumor.types, 
                      group.col = "Source", 
                      groups = source.types, 
                      n.cores = parallel::detectCores())
  
  # Create the results df
  results.df <- results.list$results

  write.csv(results.df, paste0(results.folder, run.folder, "DE/", contrast.name, "_de.results.csv"))
  
}

# Adjust column names
logfc.column <- colnames(results.df[grepl("logFC",colnames(results.df))])
results.df$logfc <- results.df[[logfc.column]]

pval.column <- colnames(results.df[grepl("_pval",colnames(results.df))])
results.df$pval <- results.df[[pval.column]]

adj.pval.column <- colnames(results.df[grepl("adjpval",colnames(results.df))])
results.df$padj <- results.df[[adj.pval.column]]

results.df$gene <- results.df$Gene

# Keep only the necessary columns
results.df <- results.df %>% select(c("gene", 
                                      "logfc", 
                                      "pval", 
                                      "padj"))


# Export the results
export.volcano <- global.export.deg
if(export.deg.list == TRUE){ 
  write.csv(results.df, 
            file = paste0(results.folder, run.folder, "DE/DEG_lists/", contrast.name, "_deg_list.csv"), 
            row.names = FALSE)  
}

# Make the volcano plot
volcano.output <- make_volcano(lmm.results = results.df, 
                               title = contrast.name, 
                               legend.title = paste0("DE in ", tumor.types[[1]]), 
                               x.axis.title = paste0(tumor.types[[1]],
                                                     " vs. ",
                                                     tumor.types[[2]]))

# Export the volcano
export.volcano <- global.export.volcano
if(export.volcano == TRUE){ 
  ggsave(filename = paste0(results.folder, run.folder, "DE/volcano/", contrast.name, "_volcano_plot.png"), 
         width = 14, 
         height = 10)
}

print(volcano.output$volcano.plot)

# Make the heatmap

# Subset the annotation just for the heatmap annotations of interest
annotation.heatmap <- lmm.input$annotation %>% 
  select(names(anno.colors))

heatmap.plot <- make_heatmap(
  normalized.log.counts.df = lmm.input$log.counts, 
  de.results = results.df, 
  top.degs = TRUE, 
  annotation.column = annotation.heatmap,
  annotation.row = NULL, 
  anno.colors = anno.colors, 
  cluster.rows = FALSE, 
  cluster.columns = TRUE, 
  main.title = contrast.name, 
  row.gaps = NULL, 
  column.gaps = NULL)

print(heatmap.plot)

export.heatmap <- global.export.heatmap
if(global.export.heatmap == TRUE){
  
  ggsave(heatmap.plot, 
         filename = paste0(results.folder, run.folder, "DE/heatmap/", contrast.name, "_heatmap_plot.png"), 
         width = 14, 
         height = 10)
  
}

```

#### GSEA preranked list

```{r}

# Gather the signal to noise ratio for GSEA ranking
# Default method for ranking genes from GSEA manual:
# https://www.gsea-msigdb.org/gsea/doc/GSEAUserGuideTEXT.htm#_Metrics_for_Ranking

# Define the main contrast groups
contrast.field <- "Tumor"
contrast.levels <- c("Biopsy", "PDX")

# Gather the annotation, sample IDs, and log counts for each contrast

# Contrast level A is the "condition" (positive when calculating fold change)
contrast.A.annotation <- lmm.input$annotation %>% 
  filter(!!sym(contrast.field) == contrast.levels[1])

contrast.A.sampleIDs <- rownames(contrast.A.annotation)

contrast.A.counts <- as.data.frame(lmm.input$log.counts) %>% 
  select(all_of(contrast.A.sampleIDs))

contrast.A.counts$gene <- rownames(contrast.A.counts)

# Contrast level B is the "reference" (negative when calculating fold change)

contrast.B.annotation <- lmm.input$annotation %>% 
  filter(!!sym(contrast.field) == contrast.levels[2])

contrast.B.sampleIDs <- rownames(contrast.B.annotation)

contrast.B.counts <- as.data.frame(lmm.input$log.counts) %>% 
  select(all_of(contrast.B.sampleIDs))

contrast.B.counts$gene <- rownames(contrast.B.counts)

# Add a column to each contrast level for the mean and standard deviation
contrast.A.counts <- contrast.A.counts %>% 
  mutate(mean.A = rowMeans(select_if(., is.numeric))) %>%  
  mutate(stdev.A = apply(select_if(., is.numeric), 1, sd))

contrast.B.counts <- contrast.B.counts %>% 
  mutate(mean.B = rowMeans(select_if(., is.numeric))) %>%  
  mutate(stdev.B = apply(select_if(., is.numeric), 1, sd))

GSEA.preanked.df <- merge(contrast.A.counts, contrast.B.counts, by = "gene")

GSEA.preanked.df <- GSEA.preanked.df %>% 
  mutate(signal2noise = (mean.A - mean.B)/(stdev.A + stdev.B)) %>% 
  arrange(desc(signal2noise)) %>% 
  select(c(gene, mean.A, mean.B, stdev.A, stdev.B, signal2noise))

if(export.gsea.input == TRUE){
  
  write.csv(GSEA.preanked.df, file = paste0(results.folder, run.folder, "GSEA/", contrast.name, "_gsea_preranked_input.csv"), row.names = FALSE)
  
}

```

### DE contrast: GI Biopsy & PDX, Tumor, LANA

```{r, include=include.DE}

# Setup annotation groups for the contrast
tumor.types <- c("Biopsy", "PDX")
region.types <- c("tumor")
source.types <- c("GI")
segment.types <- c("LANA_pos")
paired.samples <- c("BM3748133", "360174")

# Gather the column names to be used in defining contrasts
contrast.groups.list <- list(
  "Tumor" = tumor.types, 
  "region" = region.types, 
  "segment" = segment.types, 
  "Source" = source.types, 
  "Corrections MOH" = paired.samples)

# Define the name of the contrast
contrast.name <- paste0(tumor.types[[1]], 
                        "_",
                        tumor.types[[2]], 
                        "_", 
                        source.types[[1]], 
                        "_", 
                        segment.types[[1]], 
                        "_", 
                        region.types[[1]]
                        )

# Filter data for the chosen annotations
object <- q3.normalization.output$object

# Generate the counts, annotation, and subset object for lmm
lmm.input <- subset_for_lmm(object = object, 
                                subset.list = contrast.groups.list)

# Create summary table of group counts
summary.table.df <- pData(lmm.input$subset.object) %>%  
  select(c(names(contrast.groups.list)))

summary.table <- table(summary.table.df)

print("Sample Numbers per Annotation Group")
print("-----------------------------------")
print(summary.table)

# Check if the DE results have already been generated
DE.result.file <- grepl(contrast.name, DE.results.files)
if(sum(DE.result.file) == 1) {
  
  # Load the previously generated DE results
  results.df <- as.data.frame(read.csv(paste0(results.folder, run.folder, "DE/", contrast.name, "_de.results.csv")))
  
  annotation.df <- lmm.input$annotation 
  
  annotation.df$loq <- annotation.df$loq[, 1]
  
  write.csv(annotation.df, paste0(results.folder, run.folder, "DE/", contrast.name, "_annotation.csv"), row.names = FALSE)
                              
} else {
  # Use the function from the DSPWorkflow package

  # Within slide analysis
  # Listed contrasts are condition, reference 
  results.list <- diffExpr(object = lmm.input$subset.object, 
                      analysis.type = "Within Groups", 
                      region.col = "Tumor", 
                      regions = tumor.types, 
                      group.col = "Source", 
                      groups = source.types, 
                      n.cores = parallel::detectCores())
  
  # Create the results df
  results.df <- results.list$results

  write.csv(results.df, paste0(results.folder, run.folder, "DE/", contrast.name, "_de.results.csv"))
  
}

# Adjust column names
logfc.column <- colnames(results.df[grepl("logFC",colnames(results.df))])
results.df$logfc <- results.df[[logfc.column]]

pval.column <- colnames(results.df[grepl("_pval",colnames(results.df))])
results.df$pval <- results.df[[pval.column]]

adj.pval.column <- colnames(results.df[grepl("adjpval",colnames(results.df))])
results.df$padj <- results.df[[adj.pval.column]]

results.df$gene <- results.df$Gene

# Keep only the necessary columns
results.df <- results.df %>% select(c("gene", 
                                      "logfc", 
                                      "pval", 
                                      "padj"))


# Export the results
export.volcano <- global.export.deg
if(export.deg.list == TRUE){ 
  write.csv(results.df, 
            file = paste0(results.folder, run.folder, "DE/DEG_lists/", contrast.name, "_deg_list.csv"), 
            row.names = FALSE)  
}

# Make the volcano plot
volcano.output <- make_volcano(lmm.results = results.df, 
                               title = contrast.name, 
                               legend.title = paste0("DE in ", tumor.types[[1]]), 
                               x.axis.title = paste0(tumor.types[[1]],
                                                     " vs. ",
                                                     tumor.types[[2]]))

# Export the volcano
export.volcano <- global.export.volcano
if(export.volcano == TRUE){ 
  ggsave(filename = paste0(results.folder, run.folder, "DE/volcano/", contrast.name, "_volcano_plot.png"), 
         width = 14, 
         height = 10)
}

print(volcano.output$volcano.plot)

# Make the heatmap

# Subset the annotation just for the heatmap annotations of interest
annotation.heatmap <- lmm.input$annotation %>% 
  select(names(anno.colors))

heatmap.plot <- make_heatmap(
  normalized.log.counts.df = lmm.input$log.counts, 
  de.results = results.df, 
  top.degs = TRUE, 
  annotation.column = annotation.heatmap,
  annotation.row = NULL, 
  anno.colors = anno.colors, 
  cluster.rows = FALSE, 
  cluster.columns = TRUE, 
  main.title = contrast.name, 
  row.gaps = NULL, 
  column.gaps = NULL)

print(heatmap.plot)

export.heatmap <- global.export.heatmap
if(global.export.heatmap == TRUE){
  
  ggsave(heatmap.plot, 
         filename = paste0(results.folder, run.folder, "DE/heatmap/", contrast.name, "_heatmap_plot.png"), 
         width = 14, 
         height = 10)
  
}

```

#### GSEA preranked list

```{r}

# Gather the signal to noise ratio for GSEA ranking
# Default method for ranking genes from GSEA manual:
# https://www.gsea-msigdb.org/gsea/doc/GSEAUserGuideTEXT.htm#_Metrics_for_Ranking

# Define the main contrast groups
contrast.field <- "Tumor"
contrast.levels <- c("Biopsy", "PDX")

# Gather the annotation, sample IDs, and log counts for each contrast

# Contrast level A is the "condition" (positive when calculating fold change)
contrast.A.annotation <- lmm.input$annotation %>% 
  filter(!!sym(contrast.field) == contrast.levels[1])

contrast.A.sampleIDs <- rownames(contrast.A.annotation)

contrast.A.counts <- as.data.frame(lmm.input$log.counts) %>% 
  select(all_of(contrast.A.sampleIDs))

contrast.A.counts$gene <- rownames(contrast.A.counts)

# Contrast level B is the "reference" (negative when calculating fold change)

contrast.B.annotation <- lmm.input$annotation %>% 
  filter(!!sym(contrast.field) == contrast.levels[2])

contrast.B.sampleIDs <- rownames(contrast.B.annotation)

contrast.B.counts <- as.data.frame(lmm.input$log.counts) %>% 
  select(all_of(contrast.B.sampleIDs))

contrast.B.counts$gene <- rownames(contrast.B.counts)

# Add a column to each contrast level for the mean and standard deviation
contrast.A.counts <- contrast.A.counts %>% 
  mutate(mean.A = rowMeans(select_if(., is.numeric))) %>%  
  mutate(stdev.A = apply(select_if(., is.numeric), 1, sd))

contrast.B.counts <- contrast.B.counts %>% 
  mutate(mean.B = rowMeans(select_if(., is.numeric))) %>%  
  mutate(stdev.B = apply(select_if(., is.numeric), 1, sd))

GSEA.preanked.df <- merge(contrast.A.counts, contrast.B.counts, by = "gene")

GSEA.preanked.df <- GSEA.preanked.df %>% 
  mutate(signal2noise = (mean.A - mean.B)/(stdev.A + stdev.B)) %>% 
  arrange(desc(signal2noise)) %>% 
  select(c(gene, mean.A, mean.B, stdev.A, stdev.B, signal2noise))

if(export.gsea.input == TRUE){
  
  write.csv(GSEA.preanked.df, file = paste0(results.folder, run.folder, "GSEA/", contrast.name, "_gsea_preranked_input.csv"), row.names = FALSE)
  
}

```

### DE contrast: GI Biopsy & PDX, Tumor, full ROI

```{r, include=include.DE}

# Setup annotation groups for the contrast
tumor.types <- c("Biopsy", "PDX")
region.types <- c("tumor")
source.types <- c("GI")
segment.types <- c("full_ROI")
paired.samples <- c("BM3748133", "360174")

# Gather the column names to be used in defining contrasts
contrast.groups.list <- list(
  "Tumor" = tumor.types, 
  "region" = region.types, 
  "segment" = segment.types, 
  "Source" = source.types, 
  "Corrections MOH" = paired.samples)

# Define the name of the contrast
contrast.name <- paste0(tumor.types[[1]], 
                        "_",
                        tumor.types[[2]], 
                        "_", 
                        source.types[[1]], 
                        "_", 
                        segment.types[[1]], 
                        "_", 
                        region.types[[1]]
                        )

# Filter data for the chosen annotations
object <- q3.normalization.output$object

# Generate the counts, annotation, and subset object for lmm
lmm.input <- subset_for_lmm(object = object, 
                                subset.list = contrast.groups.list)

# Create summary table of group counts
summary.table.df <- pData(lmm.input$subset.object) %>%  
  select(c(names(contrast.groups.list)))

summary.table <- table(summary.table.df)

print("Sample Numbers per Annotation Group")
print("-----------------------------------")
print(summary.table)

# Check if the DE results have already been generated
DE.result.file <- grepl(contrast.name, DE.results.files)
if(sum(DE.result.file) == 1) {
  
  # Load the previously generated DE results
  results.df <- as.data.frame(read.csv(paste0(results.folder, run.folder, "DE/", contrast.name, "_de.results.csv")))
  
  annotation.df <- lmm.input$annotation 
  
  annotation.df$loq <- annotation.df$loq[, 1]
  
  write.csv(annotation.df, paste0(results.folder, run.folder, "DE/", contrast.name, "_annotation.csv"), row.names = FALSE)
                              
} else {
  # Use the function from the DSPWorkflow package

  # Within slide analysis
  # Listed contrasts are condition, reference 
  results.list <- diffExpr(object = lmm.input$subset.object, 
                      analysis.type = "Within Groups", 
                      region.col = "Tumor", 
                      regions = tumor.types, 
                      group.col = "Source", 
                      groups = source.types, 
                      n.cores = parallel::detectCores())
  
  # Create the results df
  results.df <- results.list$results

  write.csv(results.df, paste0(results.folder, run.folder, "DE/", contrast.name, "_de.results.csv"))
  
}

# Adjust column names
logfc.column <- colnames(results.df[grepl("logFC",colnames(results.df))])
results.df$logfc <- results.df[[logfc.column]]

pval.column <- colnames(results.df[grepl("_pval",colnames(results.df))])
results.df$pval <- results.df[[pval.column]]

adj.pval.column <- colnames(results.df[grepl("adjpval",colnames(results.df))])
results.df$padj <- results.df[[adj.pval.column]]

results.df$gene <- results.df$Gene

# Keep only the necessary columns
results.df <- results.df %>% select(c("gene", 
                                      "logfc", 
                                      "pval", 
                                      "padj"))


# Export the results
export.volcano <- global.export.deg
if(export.deg.list == TRUE){ 
  write.csv(results.df, 
            file = paste0(results.folder, run.folder, "DE/DEG_lists/", contrast.name, "_deg_list.csv"), 
            row.names = FALSE)  
}

# Make the volcano plot
volcano.output <- make_volcano(lmm.results = results.df, 
                               title = contrast.name, 
                               legend.title = paste0("DE in ", tumor.types[[1]]), 
                               x.axis.title = paste0(tumor.types[[1]],
                                                     " vs. ",
                                                     tumor.types[[2]]))

# Export the volcano
export.volcano <- global.export.volcano
if(export.volcano == TRUE){ 
  ggsave(filename = paste0(results.folder, run.folder, "DE/volcano/", contrast.name, "_volcano_plot.png"), 
         width = 14, 
         height = 10)
}

print(volcano.output$volcano.plot)

# Make the heatmap

# Subset the annotation just for the heatmap annotations of interest
annotation.heatmap <- lmm.input$annotation %>% 
  select(names(anno.colors))

heatmap.plot <- make_heatmap(
  normalized.log.counts.df = lmm.input$log.counts, 
  de.results = results.df, 
  top.degs = TRUE, 
  annotation.column = annotation.heatmap,
  annotation.row = NULL, 
  anno.colors = anno.colors, 
  cluster.rows = FALSE, 
  cluster.columns = TRUE, 
  main.title = contrast.name, 
  row.gaps = NULL, 
  column.gaps = NULL)

print(heatmap.plot)

export.heatmap <- global.export.heatmap
if(global.export.heatmap == TRUE){
  
  ggsave(heatmap.plot, 
         filename = paste0(results.folder, run.folder, "DE/heatmap/", contrast.name, "_heatmap_plot.png"), 
         width = 14, 
         height = 10)
  
}

```

#### GSEA preranked list

```{r}

# Gather the signal to noise ratio for GSEA ranking
# Default method for ranking genes from GSEA manual:
# https://www.gsea-msigdb.org/gsea/doc/GSEAUserGuideTEXT.htm#_Metrics_for_Ranking

# Define the main contrast groups
contrast.field <- "Tumor"
contrast.levels <- c("Biopsy", "PDX")

# Gather the annotation, sample IDs, and log counts for each contrast

# Contrast level A is the "condition" (positive when calculating fold change)
contrast.A.annotation <- lmm.input$annotation %>% 
  filter(!!sym(contrast.field) == contrast.levels[1])

contrast.A.sampleIDs <- rownames(contrast.A.annotation)

contrast.A.counts <- as.data.frame(lmm.input$log.counts) %>% 
  select(all_of(contrast.A.sampleIDs))

contrast.A.counts$gene <- rownames(contrast.A.counts)

# Contrast level B is the "reference" (negative when calculating fold change)

contrast.B.annotation <- lmm.input$annotation %>% 
  filter(!!sym(contrast.field) == contrast.levels[2])

contrast.B.sampleIDs <- rownames(contrast.B.annotation)

contrast.B.counts <- as.data.frame(lmm.input$log.counts) %>% 
  select(all_of(contrast.B.sampleIDs))

contrast.B.counts$gene <- rownames(contrast.B.counts)

# Add a column to each contrast level for the mean and standard deviation
contrast.A.counts <- contrast.A.counts %>% 
  mutate(mean.A = rowMeans(select_if(., is.numeric))) %>%  
  mutate(stdev.A = apply(select_if(., is.numeric), 1, sd))

contrast.B.counts <- contrast.B.counts %>% 
  mutate(mean.B = rowMeans(select_if(., is.numeric))) %>%  
  mutate(stdev.B = apply(select_if(., is.numeric), 1, sd))

GSEA.preanked.df <- merge(contrast.A.counts, contrast.B.counts, by = "gene")

GSEA.preanked.df <- GSEA.preanked.df %>% 
  mutate(signal2noise = (mean.A - mean.B)/(stdev.A + stdev.B)) %>% 
  arrange(desc(signal2noise)) %>% 
  select(c(gene, mean.A, mean.B, stdev.A, stdev.B, signal2noise))

if(export.gsea.input == TRUE){
  
  write.csv(GSEA.preanked.df, file = paste0(results.folder, run.folder, "GSEA/", contrast.name, "_gsea_preranked_input.csv"), row.names = FALSE)
  
}

```

### DE contrast: GI Biopsy, Tumor & Vessel, full ROI

```{r, include=include.DE}

# Setup annotation groups for the contrast
tumor.types <- c("Biopsy")
region.types <- c("tumor", "vessel")
source.types <- c("GI")
segment.types <- c("full_ROI")
#paired.samples <- c("BM3748133", "360174")

# Define the main contrast
contrast.types <- region.types

# Gather the column names to be used in defining contrasts
#contrast.groups.list <- list(
#  "Tumor" = tumor.types, 
#  "region" = region.types, 
#  "segment" = segment.types, 
#  "Source" = source.types, 
#  "Corrections MOH" = paired.samples)

contrast.groups.list <- list(
  "Tumor" = tumor.types, 
  "region" = region.types, 
  "segment" = segment.types, 
  "Source" = source.types)

# Define the name of the contrast
contrast.name <- paste0(tumor.types[[1]], 
                        "_",
                        source.types[[1]], 
                        "_", 
                        segment.types[[1]], 
                        "_", 
                        region.types[[1]], 
                        "_", 
                        region.types[[2]]
                        )

# Filter data for the chosen annotations
object <- q3.normalization.output$object

# Generate the counts, annotation, and subset object for lmm
lmm.input <- subset_for_lmm(object = object, 
                                subset.list = contrast.groups.list)

# Create summary table of group counts
summary.table.df <- pData(lmm.input$subset.object) %>%  
  select(c(names(contrast.groups.list)))

summary.table <- table(summary.table.df)

print("Sample Numbers per Annotation Group")
print("-----------------------------------")
print(summary.table)

# Check if the DE results have already been generated
DE.result.file <- grepl(contrast.name, DE.results.files)
if(sum(DE.result.file) == 1) {
  
  # Load the previously generated DE results
  results.df <- as.data.frame(read.csv(paste0(results.folder, run.folder, "DE/", contrast.name, "_de.results.csv")))
  
  annotation.df <- lmm.input$annotation 
  
  annotation.df$loq <- annotation.df$loq[, 1]
  
  write.csv(annotation.df, paste0(results.folder, run.folder, "DE/", contrast.name, "_annotation.csv"), row.names = FALSE)
                              
} else {
  # Use the function from the DSPWorkflow package

  # Within slide analysis
  # Listed contrasts are condition, reference 
  results.list <- diffExpr(object = lmm.input$subset.object, 
                      analysis.type = "Within Groups", 
                      region.col = "region", 
                      regions = region.types, 
                      group.col = "Source", 
                      groups = source.types, 
                      n.cores = parallel::detectCores())
  
  # Create the results df
  results.df <- results.list$results

  write.csv(results.df, paste0(results.folder, run.folder, "DE/", contrast.name, "_de.results.csv"))
  
}

# Adjust column names
logfc.column <- colnames(results.df[grepl("logFC",colnames(results.df))])
results.df$logfc <- results.df[[logfc.column]]

pval.column <- colnames(results.df[grepl("_pval",colnames(results.df))])
results.df$pval <- results.df[[pval.column]]

adj.pval.column <- colnames(results.df[grepl("adjpval",colnames(results.df))])
results.df$padj <- results.df[[adj.pval.column]]

results.df$gene <- results.df$Gene

# Keep only the necessary columns
results.df <- results.df %>% select(c("gene", 
                                      "logfc", 
                                      "pval", 
                                      "padj"))


# Export the results
export.volcano <- global.export.deg
if(export.deg.list == TRUE){ 
  write.csv(results.df, 
            file = paste0(results.folder, run.folder, "DE/DEG_lists/", contrast.name, "_deg_list.csv"), 
            row.names = FALSE)  
}

# Make the volcano plot
volcano.output <- make_volcano(lmm.results = results.df, 
                               title = contrast.name, 
                               legend.title = paste0("DE in ", contrast.types[[1]]), 
                               x.axis.title = paste0(contrast.types[[1]],
                                                     " vs. ",
                                                     contrast.types[[2]]))

# Export the volcano
export.volcano <- global.export.volcano
if(export.volcano == TRUE){ 
  ggsave(filename = paste0(results.folder, run.folder, "DE/volcano/", contrast.name, "_volcano_plot.png"), 
         width = 14, 
         height = 10)
}

print(volcano.output$volcano.plot)

# Make the heatmap

# Subset the annotation just for the heatmap annotations of interest
annotation.heatmap <- lmm.input$annotation %>% 
  select(names(anno.colors))

heatmap.plot <- make_heatmap(
  normalized.log.counts.df = lmm.input$log.counts, 
  de.results = results.df, 
  top.degs = TRUE, 
  annotation.column = annotation.heatmap,
  annotation.row = NULL, 
  anno.colors = anno.colors, 
  cluster.rows = FALSE, 
  cluster.columns = TRUE, 
  main.title = contrast.name, 
  row.gaps = NULL, 
  column.gaps = NULL)

print(heatmap.plot)

export.heatmap <- global.export.heatmap
if(global.export.heatmap == TRUE){
  
  ggsave(heatmap.plot, 
         filename = paste0(results.folder, run.folder, "DE/heatmap/", contrast.name, "_heatmap_plot.png"), 
         width = 14, 
         height = 10)
  
}

```

#### GSEA preranked list

```{r}

# Gather the signal to noise ratio for GSEA ranking
# Default method for ranking genes from GSEA manual:
# https://www.gsea-msigdb.org/gsea/doc/GSEAUserGuideTEXT.htm#_Metrics_for_Ranking

# Define the main contrast groups
contrast.field <- "region"
contrast.levels <- c("tumor", "vessel")

# Gather the annotation, sample IDs, and log counts for each contrast

# Contrast level A is the "condition" (positive when calculating fold change)
contrast.A.annotation <- lmm.input$annotation %>% 
  filter(!!sym(contrast.field) == contrast.levels[1])

contrast.A.sampleIDs <- rownames(contrast.A.annotation)

contrast.A.counts <- as.data.frame(lmm.input$log.counts) %>% 
  select(all_of(contrast.A.sampleIDs))

contrast.A.counts$gene <- rownames(contrast.A.counts)

# Contrast level B is the "reference" (negative when calculating fold change)

contrast.B.annotation <- lmm.input$annotation %>% 
  filter(!!sym(contrast.field) == contrast.levels[2])

contrast.B.sampleIDs <- rownames(contrast.B.annotation)

contrast.B.counts <- as.data.frame(lmm.input$log.counts) %>% 
  select(all_of(contrast.B.sampleIDs))

contrast.B.counts$gene <- rownames(contrast.B.counts)

# Add a column to each contrast level for the mean and standard deviation
contrast.A.counts <- contrast.A.counts %>% 
  mutate(mean.A = rowMeans(select_if(., is.numeric))) %>%  
  mutate(stdev.A = apply(select_if(., is.numeric), 1, sd))

contrast.B.counts <- contrast.B.counts %>% 
  mutate(mean.B = rowMeans(select_if(., is.numeric))) %>%  
  mutate(stdev.B = apply(select_if(., is.numeric), 1, sd))

GSEA.preanked.df <- merge(contrast.A.counts, contrast.B.counts, by = "gene")

GSEA.preanked.df <- GSEA.preanked.df %>% 
  mutate(signal2noise = (mean.A - mean.B)/(stdev.A + stdev.B)) %>% 
  arrange(desc(signal2noise)) %>% 
  select(c(gene, mean.A, mean.B, stdev.A, stdev.B, signal2noise))

if(export.gsea.input == TRUE){
  
  write.csv(GSEA.preanked.df, file = paste0(results.folder, run.folder, "GSEA/", contrast.name, "_gsea_preranked_input.csv"), row.names = FALSE)
  
}

```

### DE contrast: Skin & GI Biopsy, Tumor, LANA

```{r, include=include.DE}

# Setup annotation groups for the contrast
tumor.types <- c("Biopsy")
region.types <- c("tumor")
source.types <- c("Skin", "GI")
segment.types <- c("LANA_pos")
#paired.samples <- c("BM3748133", "360174")

# Define the main contrast
contrast.column <- "Source"
contrast.types <- source.types

# A column used for DE that does not have multiple types in this contrast
group.column <- "Tumor"
group.types <- tumor.types

# Gather the column names to be used in defining contrasts
#contrast.groups.list <- list(
#  "Tumor" = tumor.types, 
#  "region" = region.types, 
#  "segment" = segment.types, 
#  "Source" = source.types, 
#  "Corrections MOH" = paired.samples)

contrast.groups.list <- list(
  "Tumor" = tumor.types, 
  "region" = region.types, 
  "segment" = segment.types, 
  "Source" = source.types)

# Define the name of the contrast
contrast.name <- paste0(tumor.types[[1]], 
                        "_",
                        source.types[[1]], 
                        "_", 
                        source.types[[2]], 
                        "_",
                        segment.types[[1]], 
                        "_", 
                        region.types[[1]]
                        )

# Filter data for the chosen annotations
object <- q3.normalization.output$object

# Generate the counts, annotation, and subset object for lmm
lmm.input <- subset_for_lmm(object = object, 
                                subset.list = contrast.groups.list)

# Create summary table of group counts
summary.table.df <- pData(lmm.input$subset.object) %>%  
  select(c(names(contrast.groups.list)))

summary.table <- table(summary.table.df)

print("Sample Numbers per Annotation Group")
print("-----------------------------------")
print(summary.table)

# Check if the DE results have already been generated
DE.result.file <- grepl(contrast.name, DE.results.files)
if(sum(DE.result.file) == 1) {
  
  # Load the previously generated DE results
  results.df <- as.data.frame(read.csv(paste0(results.folder, run.folder, "DE/", contrast.name, "_de.results.csv")))
  
  annotation.df <- lmm.input$annotation 
  
  annotation.df$loq <- annotation.df$loq[, 1]
  
  write.csv(annotation.df, paste0(results.folder, run.folder, "DE/", contrast.name, "_annotation.csv"), row.names = FALSE)
                              
} else {
  # Use the function from the DSPWorkflow package

  # Within slide analysis
  # Listed contrasts are condition, reference 
  results.list <- diffExpr(object = lmm.input$subset.object, 
                      analysis.type = "Within Groups", 
                      region.col = contrast.column, 
                      regions = contrast.types, 
                      group.col = group.column, 
                      groups = group.types, 
                      n.cores = parallel::detectCores())
  
  # Create the results df
  results.df <- results.list$results

  write.csv(results.df, paste0(results.folder, run.folder, "DE/", contrast.name, "_de.results.csv"))
  
}

# Adjust column names
logfc.column <- colnames(results.df[grepl("logFC",colnames(results.df))])
results.df$logfc <- results.df[[logfc.column]]

pval.column <- colnames(results.df[grepl("_pval",colnames(results.df))])
results.df$pval <- results.df[[pval.column]]

adj.pval.column <- colnames(results.df[grepl("adjpval",colnames(results.df))])
results.df$padj <- results.df[[adj.pval.column]]

results.df$gene <- results.df$Gene

# Keep only the necessary columns
results.df <- results.df %>% select(c("gene", 
                                      "logfc", 
                                      "pval", 
                                      "padj"))


# Export the results
export.volcano <- global.export.deg
if(export.deg.list == TRUE){ 
  write.csv(results.df, 
            file = paste0(results.folder, run.folder, "DE/DEG_lists/", contrast.name, "_deg_list.csv"), 
            row.names = FALSE)  
}

# Make the volcano plot
volcano.output <- make_volcano(lmm.results = results.df, 
                               title = contrast.name, 
                               legend.title = paste0("DE in ", contrast.types[[1]]), 
                               x.axis.title = paste0(contrast.types[[1]],
                                                     " vs. ",
                                                     contrast.types[[2]]))

# Export the volcano
export.volcano <- global.export.volcano
if(export.volcano == TRUE){ 
  ggsave(filename = paste0(results.folder, run.folder, "DE/volcano/", contrast.name, "_volcano_plot.png"), 
         width = 14, 
         height = 10)
}

print(volcano.output$volcano.plot)

# Make the heatmap

# Subset the annotation just for the heatmap annotations of interest
annotation.heatmap <- lmm.input$annotation %>% 
  select(names(anno.colors))

heatmap.plot <- make_heatmap(
  normalized.log.counts.df = lmm.input$log.counts, 
  de.results = results.df, 
  top.degs = TRUE, 
  annotation.column = annotation.heatmap,
  annotation.row = NULL, 
  anno.colors = anno.colors, 
  cluster.rows = FALSE, 
  cluster.columns = TRUE, 
  main.title = contrast.name, 
  row.gaps = NULL, 
  column.gaps = NULL)

print(heatmap.plot)

export.heatmap <- global.export.heatmap
if(global.export.heatmap == TRUE){
  
  ggsave(heatmap.plot, 
         filename = paste0(results.folder, run.folder, "DE/heatmap/", contrast.name, "_heatmap_plot.png"), 
         width = 14, 
         height = 10)
  
}

```

#### GSEA preranked list

```{r}

# Gather the signal to noise ratio for GSEA ranking
# Default method for ranking genes from GSEA manual:
# https://www.gsea-msigdb.org/gsea/doc/GSEAUserGuideTEXT.htm#_Metrics_for_Ranking

# Define the main contrast groups
contrast.field <- "Source"
contrast.levels <- c("Skin", "GI")

# Gather the annotation, sample IDs, and log counts for each contrast

# Contrast level A is the "condition" (positive when calculating fold change)
contrast.A.annotation <- lmm.input$annotation %>% 
  filter(!!sym(contrast.field) == contrast.levels[1])

contrast.A.sampleIDs <- rownames(contrast.A.annotation)

contrast.A.counts <- as.data.frame(lmm.input$log.counts) %>% 
  select(all_of(contrast.A.sampleIDs))

contrast.A.counts$gene <- rownames(contrast.A.counts)

# Contrast level B is the "reference" (negative when calculating fold change)

contrast.B.annotation <- lmm.input$annotation %>% 
  filter(!!sym(contrast.field) == contrast.levels[2])

contrast.B.sampleIDs <- rownames(contrast.B.annotation)

contrast.B.counts <- as.data.frame(lmm.input$log.counts) %>% 
  select(all_of(contrast.B.sampleIDs))

contrast.B.counts$gene <- rownames(contrast.B.counts)

# Add a column to each contrast level for the mean and standard deviation
contrast.A.counts <- contrast.A.counts %>% 
  mutate(mean.A = rowMeans(select_if(., is.numeric))) %>%  
  mutate(stdev.A = apply(select_if(., is.numeric), 1, sd))

contrast.B.counts <- contrast.B.counts %>% 
  mutate(mean.B = rowMeans(select_if(., is.numeric))) %>%  
  mutate(stdev.B = apply(select_if(., is.numeric), 1, sd))

GSEA.preanked.df <- merge(contrast.A.counts, contrast.B.counts, by = "gene")

GSEA.preanked.df <- GSEA.preanked.df %>% 
  mutate(signal2noise = (mean.A - mean.B)/(stdev.A + stdev.B)) %>% 
  arrange(desc(signal2noise)) %>% 
  select(c(gene, mean.A, mean.B, stdev.A, stdev.B, signal2noise))

if(export.gsea.input == TRUE){
  
  write.csv(GSEA.preanked.df, file = paste0(results.folder, run.folder, "GSEA/", contrast.name, "_gsea_preranked_input.csv"), row.names = FALSE)
  
}

```

### DE contrast: Skin & GI PDX, Tumor, LANA

```{r, include=include.DE}

# Setup annotation groups for the contrast
tumor.types <- c("PDX")
region.types <- c("tumor")
source.types <- c("Skin", "GI")
segment.types <- c("LANA_pos")
#paired.samples <- c("BM3748133", "360174")

# Define the main contrast
contrast.column <- "Source"
contrast.types <- source.types

# A column used for DE that does not have multiple types in this contrast
group.column <- "Tumor"
group.types <- tumor.types

# Gather the column names to be used in defining contrasts
#contrast.groups.list <- list(
#  "Tumor" = tumor.types, 
#  "region" = region.types, 
#  "segment" = segment.types, 
#  "Source" = source.types, 
#  "Corrections MOH" = paired.samples)

contrast.groups.list <- list(
  "Tumor" = tumor.types, 
  "region" = region.types, 
  "segment" = segment.types, 
  "Source" = source.types)

# Define the name of the contrast
contrast.name <- paste0(tumor.types[[1]], 
                        "_",
                        source.types[[1]], 
                        "_", 
                        source.types[[2]], 
                        "_",
                        segment.types[[1]], 
                        "_", 
                        region.types[[1]]
                        )

# Filter data for the chosen annotations
object <- q3.normalization.output$object

# Generate the counts, annotation, and subset object for lmm
lmm.input <- subset_for_lmm(object = object, 
                                subset.list = contrast.groups.list)

# Create summary table of group counts
summary.table.df <- pData(lmm.input$subset.object) %>%  
  select(c(names(contrast.groups.list)))

summary.table <- table(summary.table.df)

print("Sample Numbers per Annotation Group")
print("-----------------------------------")
print(summary.table)

# Check if the DE results have already been generated
DE.result.file <- grepl(contrast.name, DE.results.files)
if(sum(DE.result.file) == 1) {
  
  # Load the previously generated DE results
  results.df <- as.data.frame(read.csv(paste0(results.folder, run.folder, "DE/", contrast.name, "_de.results.csv")))
                              
  annotation.df <- lmm.input$annotation 
  
  annotation.df$loq <- annotation.df$loq[, 1]
  
  write.csv(annotation.df, paste0(results.folder, run.folder, "DE/", contrast.name, "_annotation.csv"), row.names = FALSE)
  
} else {
  # Use the function from the DSPWorkflow package

  # Within slide analysis
  # Listed contrasts are condition, reference 
  results.list <- diffExpr(object = lmm.input$subset.object, 
                      analysis.type = "Within Groups", 
                      region.col = contrast.column, 
                      regions = contrast.types, 
                      group.col = group.column, 
                      groups = group.types, 
                      n.cores = parallel::detectCores())
  
  # Create the results df
  results.df <- results.list$results

  write.csv(results.df, paste0(results.folder, run.folder, "DE/", contrast.name, "_de.results.csv"))
  
}

# Adjust column names
logfc.column <- colnames(results.df[grepl("logFC",colnames(results.df))])
results.df$logfc <- results.df[[logfc.column]]

pval.column <- colnames(results.df[grepl("_pval",colnames(results.df))])
results.df$pval <- results.df[[pval.column]]

adj.pval.column <- colnames(results.df[grepl("adjpval",colnames(results.df))])
results.df$padj <- results.df[[adj.pval.column]]

results.df$gene <- results.df$Gene

# Keep only the necessary columns
results.df <- results.df %>% select(c("gene", 
                                      "logfc", 
                                      "pval", 
                                      "padj"))


# Export the results
export.volcano <- global.export.deg
if(export.deg.list == TRUE){ 
  write.csv(results.df, 
            file = paste0(results.folder, run.folder, "DE/DEG_lists/", contrast.name, "_deg_list.csv"), 
            row.names = FALSE)  
}

# Make the volcano plot
volcano.output <- make_volcano(lmm.results = results.df, 
                               title = contrast.name, 
                               legend.title = paste0("DE in ", contrast.types[[1]]), 
                               x.axis.title = paste0(contrast.types[[1]],
                                                     " vs. ",
                                                     contrast.types[[2]]))

# Export the volcano
export.volcano <- global.export.volcano
if(export.volcano == TRUE){ 
  ggsave(filename = paste0(results.folder, run.folder, "DE/volcano/", contrast.name, "_volcano_plot.png"), 
         width = 14, 
         height = 10)
}

print(volcano.output$volcano.plot)

# Make the heatmap

# Subset the annotation just for the heatmap annotations of interest
annotation.heatmap <- lmm.input$annotation %>% 
  select(names(anno.colors))

heatmap.plot <- make_heatmap(
  normalized.log.counts.df = lmm.input$log.counts, 
  de.results = results.df, 
  top.degs = TRUE, 
  annotation.column = annotation.heatmap,
  annotation.row = NULL, 
  anno.colors = anno.colors, 
  cluster.rows = FALSE, 
  cluster.columns = TRUE, 
  main.title = contrast.name, 
  row.gaps = NULL, 
  column.gaps = NULL)

print(heatmap.plot)

export.heatmap <- global.export.heatmap
if(global.export.heatmap == TRUE){
  
  ggsave(heatmap.plot, 
         filename = paste0(results.folder, run.folder, "DE/heatmap/", contrast.name, "_heatmap_plot.png"), 
         width = 14, 
         height = 10)
  
}

```

#### GSEA preranked list

```{r}

# Gather the signal to noise ratio for GSEA ranking
# Default method for ranking genes from GSEA manual:
# https://www.gsea-msigdb.org/gsea/doc/GSEAUserGuideTEXT.htm#_Metrics_for_Ranking

# Define the main contrast groups
contrast.field <- "Source"
contrast.levels <- c("Skin", "GI")

# Gather the annotation, sample IDs, and log counts for each contrast

# Contrast level A is the "condition" (positive when calculating fold change)
contrast.A.annotation <- lmm.input$annotation %>% 
  filter(!!sym(contrast.field) == contrast.levels[1])

contrast.A.sampleIDs <- rownames(contrast.A.annotation)

contrast.A.counts <- as.data.frame(lmm.input$log.counts) %>% 
  select(all_of(contrast.A.sampleIDs))

contrast.A.counts$gene <- rownames(contrast.A.counts)

# Contrast level B is the "reference" (negative when calculating fold change)

contrast.B.annotation <- lmm.input$annotation %>% 
  filter(!!sym(contrast.field) == contrast.levels[2])

contrast.B.sampleIDs <- rownames(contrast.B.annotation)

contrast.B.counts <- as.data.frame(lmm.input$log.counts) %>% 
  select(all_of(contrast.B.sampleIDs))

contrast.B.counts$gene <- rownames(contrast.B.counts)

# Add a column to each contrast level for the mean and standard deviation
contrast.A.counts <- contrast.A.counts %>% 
  mutate(mean.A = rowMeans(select_if(., is.numeric))) %>%  
  mutate(stdev.A = apply(select_if(., is.numeric), 1, sd))

contrast.B.counts <- contrast.B.counts %>% 
  mutate(mean.B = rowMeans(select_if(., is.numeric))) %>%  
  mutate(stdev.B = apply(select_if(., is.numeric), 1, sd))

GSEA.preanked.df <- merge(contrast.A.counts, contrast.B.counts, by = "gene")

GSEA.preanked.df <- GSEA.preanked.df %>% 
  mutate(signal2noise = (mean.A - mean.B)/(stdev.A + stdev.B)) %>% 
  arrange(desc(signal2noise)) %>% 
  select(c(gene, mean.A, mean.B, stdev.A, stdev.B, signal2noise))

if(export.gsea.input == TRUE){
  
  write.csv(GSEA.preanked.df, file = paste0(results.folder, run.folder, "GSEA/", contrast.name, "_gsea_preranked_input.csv"), row.names = FALSE)
  
}

```

### DE contrast: Skin & GI Biopsy, Tumor, full ROI

```{r, include=include.DE}

# Setup annotation groups for the contrast
tumor.types <- c("Biopsy")
region.types <- c("tumor")
source.types <- c("Skin", "GI")
segment.types <- c("full_ROI")
#paired.samples <- c("BM3748133", "360174")

# Define the main contrast
contrast.column <- "Source"
contrast.types <- source.types

# A column used for DE that does not have multiple types in this contrast
group.column <- "Tumor"
group.types <- tumor.types

# Gather the column names to be used in defining contrasts
#contrast.groups.list <- list(
#  "Tumor" = tumor.types, 
#  "region" = region.types, 
#  "segment" = segment.types, 
#  "Source" = source.types, 
#  "Corrections MOH" = paired.samples)

contrast.groups.list <- list(
  "Tumor" = tumor.types, 
  "region" = region.types, 
  "segment" = segment.types, 
  "Source" = source.types)

# Define the name of the contrast
contrast.name <- paste0(tumor.types[[1]], 
                        "_",
                        source.types[[1]], 
                        "_", 
                        source.types[[2]], 
                        "_",
                        segment.types[[1]], 
                        "_", 
                        region.types[[1]]
                        )

# Filter data for the chosen annotations
object <- q3.normalization.output$object

# Generate the counts, annotation, and subset object for lmm
lmm.input <- subset_for_lmm(object = object, 
                                subset.list = contrast.groups.list)

# Create summary table of group counts
summary.table.df <- pData(lmm.input$subset.object) %>%  
  select(c(names(contrast.groups.list)))

summary.table <- table(summary.table.df)

print("Sample Numbers per Annotation Group")
print("-----------------------------------")
print(summary.table)

# Check if the DE results have already been generated
DE.result.file <- grepl(contrast.name, DE.results.files)
if(sum(DE.result.file) == 1) {
  
  # Load the previously generated DE results
  results.df <- as.data.frame(read.csv(paste0(results.folder, run.folder, "DE/", contrast.name, "_de.results.csv")))
  
  annotation.df <- lmm.input$annotation 
  
  annotation.df$loq <- annotation.df$loq[, 1]
  
  write.csv(annotation.df, paste0(results.folder, run.folder, "DE/", contrast.name, "_annotation.csv"), row.names = FALSE)
                              
} else {
  # Use the function from the DSPWorkflow package

  # Within slide analysis
  # Listed contrasts are condition, reference 
  results.list <- diffExpr(object = lmm.input$subset.object, 
                      analysis.type = "Within Groups", 
                      region.col = contrast.column, 
                      regions = contrast.types, 
                      group.col = group.column, 
                      groups = group.types, 
                      n.cores = parallel::detectCores())
  
  # Create the results df
  results.df <- results.list$results

  write.csv(results.df, paste0(results.folder, run.folder, "DE/", contrast.name, "_de.results.csv"))
  
}

# Adjust column names
logfc.column <- colnames(results.df[grepl("logFC",colnames(results.df))])
results.df$logfc <- results.df[[logfc.column]]

pval.column <- colnames(results.df[grepl("_pval",colnames(results.df))])
results.df$pval <- results.df[[pval.column]]

adj.pval.column <- colnames(results.df[grepl("adjpval",colnames(results.df))])
results.df$padj <- results.df[[adj.pval.column]]

results.df$gene <- results.df$Gene

# Keep only the necessary columns
results.df <- results.df %>% select(c("gene", 
                                      "logfc", 
                                      "pval", 
                                      "padj"))


# Export the results
export.volcano <- global.export.deg
if(export.deg.list == TRUE){ 
  write.csv(results.df, 
            file = paste0(results.folder, run.folder, "DE/DEG_lists/", contrast.name, "_deg_list.csv"), 
            row.names = FALSE)  
}

# Make the volcano plot
volcano.output <- make_volcano(lmm.results = results.df, 
                               title = contrast.name, 
                               legend.title = paste0("DE in ", contrast.types[[1]]), 
                               x.axis.title = paste0(contrast.types[[1]],
                                                     " vs. ",
                                                     contrast.types[[2]]))

# Export the volcano
export.volcano <- global.export.volcano
if(export.volcano == TRUE){ 
  ggsave(filename = paste0(results.folder, run.folder, "DE/volcano/", contrast.name, "_volcano_plot.png"), 
         width = 14, 
         height = 10)
}

print(volcano.output$volcano.plot)

# Make the heatmap

# Subset the annotation just for the heatmap annotations of interest
annotation.heatmap <- lmm.input$annotation %>% 
  select(names(anno.colors))

heatmap.plot <- make_heatmap(
  normalized.log.counts.df = lmm.input$log.counts, 
  de.results = results.df, 
  top.degs = TRUE, 
  annotation.column = annotation.heatmap,
  annotation.row = NULL, 
  anno.colors = anno.colors, 
  cluster.rows = FALSE, 
  cluster.columns = TRUE, 
  main.title = contrast.name, 
  row.gaps = NULL, 
  column.gaps = NULL)

print(heatmap.plot)

export.heatmap <- global.export.heatmap
if(global.export.heatmap == TRUE){
  
  ggsave(heatmap.plot, 
         filename = paste0(results.folder, run.folder, "DE/heatmap/", contrast.name, "_heatmap_plot.png"), 
         width = 14, 
         height = 10)
  
}

```

#### GSEA preranked list

```{r}

# Gather the signal to noise ratio for GSEA ranking
# Default method for ranking genes from GSEA manual:
# https://www.gsea-msigdb.org/gsea/doc/GSEAUserGuideTEXT.htm#_Metrics_for_Ranking

# Define the main contrast groups
contrast.field <- "Source"
contrast.levels <- c("Skin", "GI")

# Gather the annotation, sample IDs, and log counts for each contrast

# Contrast level A is the "condition" (positive when calculating fold change)
contrast.A.annotation <- lmm.input$annotation %>% 
  filter(!!sym(contrast.field) == contrast.levels[1])

contrast.A.sampleIDs <- rownames(contrast.A.annotation)

contrast.A.counts <- as.data.frame(lmm.input$log.counts) %>% 
  select(all_of(contrast.A.sampleIDs))

contrast.A.counts$gene <- rownames(contrast.A.counts)

# Contrast level B is the "reference" (negative when calculating fold change)

contrast.B.annotation <- lmm.input$annotation %>% 
  filter(!!sym(contrast.field) == contrast.levels[2])

contrast.B.sampleIDs <- rownames(contrast.B.annotation)

contrast.B.counts <- as.data.frame(lmm.input$log.counts) %>% 
  select(all_of(contrast.B.sampleIDs))

contrast.B.counts$gene <- rownames(contrast.B.counts)

# Add a column to each contrast level for the mean and standard deviation
contrast.A.counts <- contrast.A.counts %>% 
  mutate(mean.A = rowMeans(select_if(., is.numeric))) %>%  
  mutate(stdev.A = apply(select_if(., is.numeric), 1, sd))

contrast.B.counts <- contrast.B.counts %>% 
  mutate(mean.B = rowMeans(select_if(., is.numeric))) %>%  
  mutate(stdev.B = apply(select_if(., is.numeric), 1, sd))

GSEA.preanked.df <- merge(contrast.A.counts, contrast.B.counts, by = "gene")

GSEA.preanked.df <- GSEA.preanked.df %>% 
  mutate(signal2noise = (mean.A - mean.B)/(stdev.A + stdev.B)) %>% 
  arrange(desc(signal2noise)) %>% 
  select(c(gene, mean.A, mean.B, stdev.A, stdev.B, signal2noise))

if(export.gsea.input == TRUE){
  
  write.csv(GSEA.preanked.df, file = paste0(results.folder, run.folder, "GSEA/", contrast.name, "_gsea_preranked_input.csv"), row.names = FALSE)
  
}

```

### DE contrast: Skin & GI PDX, Tumor, full ROI

```{r, include=include.DE}

# Setup annotation groups for the contrast
tumor.types <- c("PDX")
region.types <- c("tumor")
source.types <- c("Skin", "GI")
segment.types <- c("full_ROI")
#paired.samples <- c("BM3748133", "360174")

# Define the main contrast
contrast.column <- "Source"
contrast.types <- source.types

# A column used for DE that does not have multiple types in this contrast
group.column <- "Tumor"
group.types <- tumor.types

# Gather the column names to be used in defining contrasts
#contrast.groups.list <- list(
#  "Tumor" = tumor.types, 
#  "region" = region.types, 
#  "segment" = segment.types, 
#  "Source" = source.types, 
#  "Corrections MOH" = paired.samples)

contrast.groups.list <- list(
  "Tumor" = tumor.types, 
  "region" = region.types, 
  "segment" = segment.types, 
  "Source" = source.types)

# Define the name of the contrast
contrast.name <- paste0(tumor.types[[1]], 
                        "_",
                        source.types[[1]], 
                        "_", 
                        source.types[[2]], 
                        "_",
                        segment.types[[1]], 
                        "_", 
                        region.types[[1]]
                        )

# Filter data for the chosen annotations
object <- q3.normalization.output$object

# Generate the counts, annotation, and subset object for lmm
lmm.input <- subset_for_lmm(object = object, 
                                subset.list = contrast.groups.list)

# Create summary table of group counts
summary.table.df <- pData(lmm.input$subset.object) %>%  
  select(c(names(contrast.groups.list)))

summary.table <- table(summary.table.df)

print("Sample Numbers per Annotation Group")
print("-----------------------------------")
print(summary.table)

# Check if the DE results have already been generated
DE.result.file <- grepl(contrast.name, DE.results.files)
if(sum(DE.result.file) == 1) {
  
  # Load the previously generated DE results
  results.df <- as.data.frame(read.csv(paste0(results.folder, run.folder, "DE/", contrast.name, "_de.results.csv")))
  
  annotation.df <- lmm.input$annotation 
  
  annotation.df$loq <- annotation.df$loq[, 1]
  
  write.csv(annotation.df, paste0(results.folder, run.folder, "DE/", contrast.name, "_annotation.csv"), row.names = FALSE)
                              
} else {
  # Use the function from the DSPWorkflow package

  # Within slide analysis
  # Listed contrasts are condition, reference 
  results.list <- diffExpr(object = lmm.input$subset.object, 
                      analysis.type = "Within Groups", 
                      region.col = contrast.column, 
                      regions = contrast.types, 
                      group.col = group.column, 
                      groups = group.types, 
                      n.cores = parallel::detectCores())
  
  # Create the results df
  results.df <- results.list$results

  write.csv(results.df, paste0(results.folder, run.folder, "DE/", contrast.name, "_de.results.csv"))
  
}

# Adjust column names
logfc.column <- colnames(results.df[grepl("logFC",colnames(results.df))])
results.df$logfc <- results.df[[logfc.column]]

pval.column <- colnames(results.df[grepl("_pval",colnames(results.df))])
results.df$pval <- results.df[[pval.column]]

adj.pval.column <- colnames(results.df[grepl("adjpval",colnames(results.df))])
results.df$padj <- results.df[[adj.pval.column]]

results.df$gene <- results.df$Gene

# Keep only the necessary columns
results.df <- results.df %>% select(c("gene", 
                                      "logfc", 
                                      "pval", 
                                      "padj"))


# Export the results
export.volcano <- global.export.deg
if(export.deg.list == TRUE){ 
  write.csv(results.df, 
            file = paste0(results.folder, run.folder, "DE/DEG_lists/", contrast.name, "_deg_list.csv"), 
            row.names = FALSE)  
}

# Make the volcano plot
volcano.output <- make_volcano(lmm.results = results.df, 
                               title = contrast.name, 
                               legend.title = paste0("DE in ", contrast.types[[1]]), 
                               x.axis.title = paste0(contrast.types[[1]],
                                                     " vs. ",
                                                     contrast.types[[2]]))

# Export the volcano
export.volcano <- global.export.volcano
if(export.volcano == TRUE){ 
  ggsave(filename = paste0(results.folder, run.folder, "DE/volcano/", contrast.name, "_volcano_plot.png"), 
         width = 14, 
         height = 10)
}

print(volcano.output$volcano.plot)

# Make the heatmap

# Subset the annotation just for the heatmap annotations of interest
annotation.heatmap <- lmm.input$annotation %>% 
  select(names(anno.colors))

heatmap.plot <- make_heatmap(
  normalized.log.counts.df = lmm.input$log.counts, 
  de.results = results.df, 
  top.degs = TRUE, 
  annotation.column = annotation.heatmap,
  annotation.row = NULL, 
  anno.colors = anno.colors, 
  cluster.rows = FALSE, 
  cluster.columns = TRUE, 
  main.title = contrast.name, 
  row.gaps = NULL, 
  column.gaps = NULL)

print(heatmap.plot)

export.heatmap <- global.export.heatmap
if(global.export.heatmap == TRUE){
  
  ggsave(heatmap.plot, 
         filename = paste0(results.folder, run.folder, "DE/heatmap/", contrast.name, "_heatmap_plot.png"), 
         width = 14, 
         height = 10)
  
}

```

#### GSEA preranked list

```{r}

# Gather the signal to noise ratio for GSEA ranking
# Default method for ranking genes from GSEA manual:
# https://www.gsea-msigdb.org/gsea/doc/GSEAUserGuideTEXT.htm#_Metrics_for_Ranking

# Define the main contrast groups
contrast.field <- "Source"
contrast.levels <- c("Skin", "GI")

# Gather the annotation, sample IDs, and log counts for each contrast

# Contrast level A is the "condition" (positive when calculating fold change)
contrast.A.annotation <- lmm.input$annotation %>% 
  filter(!!sym(contrast.field) == contrast.levels[1])

contrast.A.sampleIDs <- rownames(contrast.A.annotation)

contrast.A.counts <- as.data.frame(lmm.input$log.counts) %>% 
  select(all_of(contrast.A.sampleIDs))

contrast.A.counts$gene <- rownames(contrast.A.counts)

# Contrast level B is the "reference" (negative when calculating fold change)

contrast.B.annotation <- lmm.input$annotation %>% 
  filter(!!sym(contrast.field) == contrast.levels[2])

contrast.B.sampleIDs <- rownames(contrast.B.annotation)

contrast.B.counts <- as.data.frame(lmm.input$log.counts) %>% 
  select(all_of(contrast.B.sampleIDs))

contrast.B.counts$gene <- rownames(contrast.B.counts)

# Add a column to each contrast level for the mean and standard deviation
contrast.A.counts <- contrast.A.counts %>% 
  mutate(mean.A = rowMeans(select_if(., is.numeric))) %>%  
  mutate(stdev.A = apply(select_if(., is.numeric), 1, sd))

contrast.B.counts <- contrast.B.counts %>% 
  mutate(mean.B = rowMeans(select_if(., is.numeric))) %>%  
  mutate(stdev.B = apply(select_if(., is.numeric), 1, sd))

GSEA.preanked.df <- merge(contrast.A.counts, contrast.B.counts, by = "gene")

GSEA.preanked.df <- GSEA.preanked.df %>% 
  mutate(signal2noise = (mean.A - mean.B)/(stdev.A + stdev.B)) %>% 
  arrange(desc(signal2noise)) %>% 
  select(c(gene, mean.A, mean.B, stdev.A, stdev.B, signal2noise))

if(export.gsea.input == TRUE){
  
  write.csv(GSEA.preanked.df, file = paste0(results.folder, run.folder, "GSEA/", contrast.name, "_gsea_preranked_input.csv"), row.names = FALSE)
  
}

```

### DE contrast: Skin & GI Biopsy, Vessel, full ROI

```{r, include=include.DE}

# Setup annotation groups for the contrast
tumor.types <- c("Biopsy")
region.types <- c("vessel")
source.types <- c("Skin", "GI")
segment.types <- c("full_ROI")
#paired.samples <- c("BM3748133", "360174")

# Define the main contrast
contrast.column <- "Source"
contrast.types <- source.types

# A column used for DE that does not have multiple types in this contrast
group.column <- "Tumor"
group.types <- tumor.types

# Gather the column names to be used in defining contrasts
#contrast.groups.list <- list(
#  "Tumor" = tumor.types, 
#  "region" = region.types, 
#  "segment" = segment.types, 
#  "Source" = source.types, 
#  "Corrections MOH" = paired.samples)

contrast.groups.list <- list(
  "Tumor" = tumor.types, 
  "region" = region.types, 
  "segment" = segment.types, 
  "Source" = source.types)

# Define the name of the contrast
contrast.name <- paste0(tumor.types[[1]], 
                        "_",
                        source.types[[1]], 
                        "_", 
                        source.types[[2]], 
                        "_",
                        segment.types[[1]], 
                        "_", 
                        region.types[[1]]
                        )

# Filter data for the chosen annotations
object <- q3.normalization.output$object

# Generate the counts, annotation, and subset object for lmm
lmm.input <- subset_for_lmm(object = object, 
                                subset.list = contrast.groups.list)

# Create summary table of group counts
summary.table.df <- pData(lmm.input$subset.object) %>%  
  select(c(names(contrast.groups.list)))

summary.table <- table(summary.table.df)

print("Sample Numbers per Annotation Group")
print("-----------------------------------")
print(summary.table)

# Check if the DE results have already been generated
DE.result.file <- grepl(contrast.name, DE.results.files)
if(sum(DE.result.file) == 1) {
  
  # Load the previously generated DE results
  results.df <- as.data.frame(read.csv(paste0(results.folder, run.folder, "DE/", contrast.name, "_de.results.csv")))
  
  annotation.df <- lmm.input$annotation 
  
  annotation.df$loq <- annotation.df$loq[, 1]
  
  write.csv(annotation.df, paste0(results.folder, run.folder, "DE/", contrast.name, "_annotation.csv"), row.names = FALSE)
                              
} else {
  # Use the function from the DSPWorkflow package

  # Within slide analysis
  # Listed contrasts are condition, reference 
  results.list <- diffExpr(object = lmm.input$subset.object, 
                      analysis.type = "Within Groups", 
                      region.col = contrast.column, 
                      regions = contrast.types, 
                      group.col = group.column, 
                      groups = group.types, 
                      n.cores = parallel::detectCores())
  
  # Create the results df
  results.df <- results.list$results

  write.csv(results.df, paste0(results.folder, run.folder, "DE/", contrast.name, "_de.results.csv"))
  
}

# Adjust column names
logfc.column <- colnames(results.df[grepl("logFC",colnames(results.df))])
results.df$logfc <- results.df[[logfc.column]]

pval.column <- colnames(results.df[grepl("_pval",colnames(results.df))])
results.df$pval <- results.df[[pval.column]]

adj.pval.column <- colnames(results.df[grepl("adjpval",colnames(results.df))])
results.df$padj <- results.df[[adj.pval.column]]

results.df$gene <- results.df$Gene

# Keep only the necessary columns
results.df <- results.df %>% select(c("gene", 
                                      "logfc", 
                                      "pval", 
                                      "padj"))


# Export the results
export.volcano <- global.export.deg
if(export.deg.list == TRUE){ 
  write.csv(results.df, 
            file = paste0(results.folder, run.folder, "DE/DEG_lists/", contrast.name, "_deg_list.csv"), 
            row.names = FALSE)  
}

# Make the volcano plot
volcano.output <- make_volcano(lmm.results = results.df, 
                               title = contrast.name, 
                               legend.title = paste0("DE in ", contrast.types[[1]]), 
                               x.axis.title = paste0(contrast.types[[1]],
                                                     " vs. ",
                                                     contrast.types[[2]]))

# Export the volcano
export.volcano <- global.export.volcano
if(export.volcano == TRUE){ 
  ggsave(filename = paste0(results.folder, run.folder, "DE/volcano/", contrast.name, "_volcano_plot.png"), 
         width = 14, 
         height = 10)
}

print(volcano.output$volcano.plot)

# Make the heatmap

# Subset the annotation just for the heatmap annotations of interest
annotation.heatmap <- lmm.input$annotation %>% 
  select(names(anno.colors))

heatmap.plot <- make_heatmap(
  normalized.log.counts.df = lmm.input$log.counts, 
  de.results = results.df, 
  top.degs = TRUE, 
  annotation.column = annotation.heatmap,
  annotation.row = NULL, 
  anno.colors = anno.colors, 
  cluster.rows = FALSE, 
  cluster.columns = TRUE, 
  main.title = contrast.name, 
  row.gaps = NULL, 
  column.gaps = NULL)

print(heatmap.plot)

export.heatmap <- global.export.heatmap
if(global.export.heatmap == TRUE){
  
  ggsave(heatmap.plot, 
         filename = paste0(results.folder, run.folder, "DE/heatmap/", contrast.name, "_heatmap_plot.png"), 
         width = 14, 
         height = 10)
  
}

```

#### GSEA preranked list

```{r}

# Gather the signal to noise ratio for GSEA ranking
# Default method for ranking genes from GSEA manual:
# https://www.gsea-msigdb.org/gsea/doc/GSEAUserGuideTEXT.htm#_Metrics_for_Ranking

# Define the main contrast groups
contrast.field <- "Source"
contrast.levels <- c("Skin", "GI")

# Gather the annotation, sample IDs, and log counts for each contrast

# Contrast level A is the "condition" (positive when calculating fold change)
contrast.A.annotation <- lmm.input$annotation %>% 
  filter(!!sym(contrast.field) == contrast.levels[1])

contrast.A.sampleIDs <- rownames(contrast.A.annotation)

contrast.A.counts <- as.data.frame(lmm.input$log.counts) %>% 
  select(all_of(contrast.A.sampleIDs))

contrast.A.counts$gene <- rownames(contrast.A.counts)

# Contrast level B is the "reference" (negative when calculating fold change)

contrast.B.annotation <- lmm.input$annotation %>% 
  filter(!!sym(contrast.field) == contrast.levels[2])

contrast.B.sampleIDs <- rownames(contrast.B.annotation)

contrast.B.counts <- as.data.frame(lmm.input$log.counts) %>% 
  select(all_of(contrast.B.sampleIDs))

contrast.B.counts$gene <- rownames(contrast.B.counts)

# Add a column to each contrast level for the mean and standard deviation
contrast.A.counts <- contrast.A.counts %>% 
  mutate(mean.A = rowMeans(select_if(., is.numeric))) %>%  
  mutate(stdev.A = apply(select_if(., is.numeric), 1, sd))

contrast.B.counts <- contrast.B.counts %>% 
  mutate(mean.B = rowMeans(select_if(., is.numeric))) %>%  
  mutate(stdev.B = apply(select_if(., is.numeric), 1, sd))

GSEA.preanked.df <- merge(contrast.A.counts, contrast.B.counts, by = "gene")

GSEA.preanked.df <- GSEA.preanked.df %>% 
  mutate(signal2noise = (mean.A - mean.B)/(stdev.A + stdev.B)) %>% 
  arrange(desc(signal2noise)) %>% 
  select(c(gene, mean.A, mean.B, stdev.A, stdev.B, signal2noise))

if(export.gsea.input == TRUE){
  
  write.csv(GSEA.preanked.df, file = paste0(results.folder, run.folder, "GSEA/", contrast.name, "_gsea_preranked_input.csv"), row.names = FALSE)
  
}

```


### DE contrast: Skin Biopsy, Tumor, LANA & full ROI

```{r, include=include.DE}

# Setup annotation groups for the contrast
tumor.types <- c("Biopsy")
region.types <- c("tumor")
source.types <- c("Skin")
segment.types <- c("LANA_pos", "full_ROI")
#paired.samples <- c("BM3748133", "360174")

# Define the main contrast
contrast.column <- "segment"
contrast.types <- segment.types

# A column used for DE that does not have multiple types in this contrast
group.column <- "Tumor"
group.types <- tumor.types

contrast.groups.list <- list(
  "Tumor" = tumor.types, 
  "region" = region.types, 
  "segment" = segment.types, 
  "Source" = source.types)

# Define the name of the contrast
contrast.name <- paste0(tumor.types[[1]], 
                        "_",
                        source.types[[1]], 
                        "_", 
                        segment.types[[1]], 
                        "_", 
                        segment.types[[2]], 
                        "_",
                        region.types[[1]]
                        )

# Filter data for the chosen annotations
object <- q3.normalization.output$object

# Generate the counts, annotation, and subset object for lmm
lmm.input <- subset_for_lmm(object = object, 
                                subset.list = contrast.groups.list)

# Create summary table of group counts
summary.table.df <- pData(lmm.input$subset.object) %>%  
  select(c(names(contrast.groups.list)))

summary.table <- table(summary.table.df)

print("Sample Numbers per Annotation Group")
print("-----------------------------------")
print(summary.table)

# Check if the DE results have already been generated
DE.result.file <- grepl(contrast.name, DE.results.files)
if(sum(DE.result.file) == 1) {
  
  # Load the previously generated DE results
  results.df <- as.data.frame(read.csv(paste0(results.folder, run.folder, "DE/", contrast.name, "_de.results.csv")))
  
  annotation.df <- lmm.input$annotation 
  
  annotation.df$loq <- annotation.df$loq[, 1]
  
  write.csv(annotation.df, paste0(results.folder, run.folder, "DE/", contrast.name, "_annotation.csv"), row.names = FALSE)
                              
} else {
  # Use the function from the DSPWorkflow package

  # Within slide analysis
  # Listed contrasts are condition, reference 
  results.list <- diffExpr(object = lmm.input$subset.object, 
                      analysis.type = "Within Groups", 
                      region.col = contrast.column, 
                      regions = contrast.types, 
                      group.col = group.column, 
                      groups = group.types, 
                      n.cores = parallel::detectCores())
  
  # Create the results df
  results.df <- results.list$results

  write.csv(results.df, paste0(results.folder, run.folder, "DE/", contrast.name, "_de.results.csv"))
  
}

# Adjust column names
logfc.column <- colnames(results.df[grepl("logFC",colnames(results.df))])
results.df$logfc <- results.df[[logfc.column]]

pval.column <- colnames(results.df[grepl("_pval",colnames(results.df))])
results.df$pval <- results.df[[pval.column]]

adj.pval.column <- colnames(results.df[grepl("adjpval",colnames(results.df))])
results.df$padj <- results.df[[adj.pval.column]]

results.df$gene <- results.df$Gene

# Keep only the necessary columns
results.df <- results.df %>% select(c("gene", 
                                      "logfc", 
                                      "pval", 
                                      "padj"))


# Export the results
export.volcano <- global.export.deg
if(export.deg.list == TRUE){ 
  write.csv(results.df, 
            file = paste0(results.folder, run.folder, "DE/DEG_lists/", contrast.name, "_deg_list.csv"), 
            row.names = FALSE)  
}

# Make the volcano plot
volcano.output <- make_volcano(lmm.results = results.df, 
                               title = contrast.name, 
                               legend.title = paste0("DE in ", contrast.types[[1]]), 
                               x.axis.title = paste0(contrast.types[[1]],
                                                     " vs. ",
                                                     contrast.types[[2]]))

# Export the volcano
export.volcano <- global.export.volcano
if(export.volcano == TRUE){ 
  ggsave(filename = paste0(results.folder, run.folder, "DE/volcano/", contrast.name, "_volcano_plot.png"), 
         width = 14, 
         height = 10)
}

print(volcano.output$volcano.plot)

# Make the heatmap

# Subset the annotation just for the heatmap annotations of interest
annotation.heatmap <- lmm.input$annotation %>% 
  select(names(anno.colors))

heatmap.plot <- make_heatmap(
  normalized.log.counts.df = lmm.input$log.counts, 
  de.results = results.df, 
  top.degs = TRUE, 
  annotation.column = annotation.heatmap,
  annotation.row = NULL, 
  anno.colors = anno.colors, 
  cluster.rows = FALSE, 
  cluster.columns = TRUE, 
  main.title = contrast.name, 
  row.gaps = NULL, 
  column.gaps = NULL)

print(heatmap.plot)

export.heatmap <- global.export.heatmap
if(global.export.heatmap == TRUE){
  
  ggsave(heatmap.plot, 
         filename = paste0(results.folder, run.folder, "DE/heatmap/", contrast.name, "_heatmap_plot.png"), 
         width = 14, 
         height = 10)
  
}

```

#### GSEA preranked list

```{r}

# Gather the signal to noise ratio for GSEA ranking
# Default method for ranking genes from GSEA manual:
# https://www.gsea-msigdb.org/gsea/doc/GSEAUserGuideTEXT.htm#_Metrics_for_Ranking

# Define the main contrast groups
contrast.field <- "segment"
contrast.levels <- c("LANA_pos", "full_ROI")

# Gather the annotation, sample IDs, and log counts for each contrast

# Contrast level A is the "condition" (positive when calculating fold change)
contrast.A.annotation <- lmm.input$annotation %>% 
  filter(!!sym(contrast.field) == contrast.levels[1])

contrast.A.sampleIDs <- rownames(contrast.A.annotation)

contrast.A.counts <- as.data.frame(lmm.input$log.counts) %>% 
  select(all_of(contrast.A.sampleIDs))

contrast.A.counts$gene <- rownames(contrast.A.counts)

# Contrast level B is the "reference" (negative when calculating fold change)

contrast.B.annotation <- lmm.input$annotation %>% 
  filter(!!sym(contrast.field) == contrast.levels[2])

contrast.B.sampleIDs <- rownames(contrast.B.annotation)

contrast.B.counts <- as.data.frame(lmm.input$log.counts) %>% 
  select(all_of(contrast.B.sampleIDs))

contrast.B.counts$gene <- rownames(contrast.B.counts)

# Add a column to each contrast level for the mean and standard deviation
contrast.A.counts <- contrast.A.counts %>% 
  mutate(mean.A = rowMeans(select_if(., is.numeric))) %>%  
  mutate(stdev.A = apply(select_if(., is.numeric), 1, sd))

contrast.B.counts <- contrast.B.counts %>% 
  mutate(mean.B = rowMeans(select_if(., is.numeric))) %>%  
  mutate(stdev.B = apply(select_if(., is.numeric), 1, sd))

GSEA.preanked.df <- merge(contrast.A.counts, contrast.B.counts, by = "gene")

GSEA.preanked.df <- GSEA.preanked.df %>% 
  mutate(signal2noise = (mean.A - mean.B)/(stdev.A + stdev.B)) %>% 
  arrange(desc(signal2noise)) %>% 
  select(c(gene, mean.A, mean.B, stdev.A, stdev.B, signal2noise))

if(export.gsea.input == TRUE){
  
  write.csv(GSEA.preanked.df, file = paste0(results.folder, run.folder, "GSEA/", contrast.name, "_gsea_preranked_input.csv"), row.names = FALSE)
  
}

```

### DE contrast: Skin PDX, Tumor, LANA & full ROI

```{r, include=include.DE}

# Setup annotation groups for the contrast
tumor.types <- c("PDX")
region.types <- c("tumor")
source.types <- c("Skin")
segment.types <- c("LANA_pos", "full_ROI")
#paired.samples <- c("BM3748133", "360174")

# Define the main contrast
contrast.column <- "segment"
contrast.types <- segment.types

# A column used for DE that does not have multiple types in this contrast
group.column <- "Tumor"
group.types <- tumor.types

contrast.groups.list <- list(
  "Tumor" = tumor.types, 
  "region" = region.types, 
  "segment" = segment.types, 
  "Source" = source.types)

# Define the name of the contrast
contrast.name <- paste0(tumor.types[[1]], 
                        "_",
                        source.types[[1]], 
                        "_", 
                        segment.types[[1]], 
                        "_", 
                        segment.types[[2]], 
                        "_",
                        region.types[[1]]
                        )

# Filter data for the chosen annotations
object <- q3.normalization.output$object

# Generate the counts, annotation, and subset object for lmm
lmm.input <- subset_for_lmm(object = object, 
                                subset.list = contrast.groups.list)

# Create summary table of group counts
summary.table.df <- pData(lmm.input$subset.object) %>%  
  select(c(names(contrast.groups.list)))

summary.table <- table(summary.table.df)

print("Sample Numbers per Annotation Group")
print("-----------------------------------")
print(summary.table)

# Check if the DE results have already been generated
DE.result.file <- grepl(contrast.name, DE.results.files)
if(sum(DE.result.file) == 1) {
  
  # Load the previously generated DE results
  results.df <- as.data.frame(read.csv(paste0(results.folder, run.folder, "DE/", contrast.name, "_de.results.csv")))
  
  annotation.df <- lmm.input$annotation 
  
  annotation.df$loq <- annotation.df$loq[, 1]
  
  write.csv(annotation.df, paste0(results.folder, run.folder, "DE/", contrast.name, "_annotation.csv"), row.names = FALSE)
                              
} else {
  # Use the function from the DSPWorkflow package

  # Within slide analysis
  # Listed contrasts are condition, reference 
  results.list <- diffExpr(object = lmm.input$subset.object, 
                      analysis.type = "Within Groups", 
                      region.col = contrast.column, 
                      regions = contrast.types, 
                      group.col = group.column, 
                      groups = group.types, 
                      n.cores = parallel::detectCores())
  
  # Create the results df
  results.df <- results.list$results

  write.csv(results.df, paste0(results.folder, run.folder, "DE/", contrast.name, "_de.results.csv"))
  
}

# Adjust column names
logfc.column <- colnames(results.df[grepl("logFC",colnames(results.df))])
results.df$logfc <- results.df[[logfc.column]]

pval.column <- colnames(results.df[grepl("_pval",colnames(results.df))])
results.df$pval <- results.df[[pval.column]]

adj.pval.column <- colnames(results.df[grepl("adjpval",colnames(results.df))])
results.df$padj <- results.df[[adj.pval.column]]

results.df$gene <- results.df$Gene

# Keep only the necessary columns
results.df <- results.df %>% select(c("gene", 
                                      "logfc", 
                                      "pval", 
                                      "padj"))


# Export the results
export.volcano <- global.export.deg
if(export.deg.list == TRUE){ 
  write.csv(results.df, 
            file = paste0(results.folder, run.folder, "DE/DEG_lists/", contrast.name, "_deg_list.csv"), 
            row.names = FALSE)  
}

# Make the volcano plot
volcano.output <- make_volcano(lmm.results = results.df, 
                               title = contrast.name, 
                               legend.title = paste0("DE in ", contrast.types[[1]]), 
                               x.axis.title = paste0(contrast.types[[1]],
                                                     " vs. ",
                                                     contrast.types[[2]]))

# Export the volcano
export.volcano <- global.export.volcano
if(export.volcano == TRUE){ 
  ggsave(filename = paste0(results.folder, run.folder, "DE/volcano/", contrast.name, "_volcano_plot.png"), 
         width = 14, 
         height = 10)
}

print(volcano.output$volcano.plot)

# Make the heatmap

# Subset the annotation just for the heatmap annotations of interest
annotation.heatmap <- lmm.input$annotation %>% 
  select(names(anno.colors))

heatmap.plot <- make_heatmap(
  normalized.log.counts.df = lmm.input$log.counts, 
  de.results = results.df, 
  top.degs = TRUE, 
  annotation.column = annotation.heatmap,
  annotation.row = NULL, 
  anno.colors = anno.colors, 
  cluster.rows = FALSE, 
  cluster.columns = TRUE, 
  main.title = contrast.name, 
  row.gaps = NULL, 
  column.gaps = NULL)

print(heatmap.plot)

export.heatmap <- global.export.heatmap
if(global.export.heatmap == TRUE){
  
  ggsave(heatmap.plot, 
         filename = paste0(results.folder, run.folder, "DE/heatmap/", contrast.name, "_heatmap_plot.png"), 
         width = 14, 
         height = 10)
  
}

```

#### GSEA preranked list

```{r}

# Gather the signal to noise ratio for GSEA ranking
# Default method for ranking genes from GSEA manual:
# https://www.gsea-msigdb.org/gsea/doc/GSEAUserGuideTEXT.htm#_Metrics_for_Ranking

# Define the main contrast groups
contrast.field <- "segment"
contrast.levels <- c("LANA_pos", "full_ROI")

# Gather the annotation, sample IDs, and log counts for each contrast

# Contrast level A is the "condition" (positive when calculating fold change)
contrast.A.annotation <- lmm.input$annotation %>% 
  filter(!!sym(contrast.field) == contrast.levels[1])

contrast.A.sampleIDs <- rownames(contrast.A.annotation)

contrast.A.counts <- as.data.frame(lmm.input$log.counts) %>% 
  select(all_of(contrast.A.sampleIDs))

contrast.A.counts$gene <- rownames(contrast.A.counts)

# Contrast level B is the "reference" (negative when calculating fold change)

contrast.B.annotation <- lmm.input$annotation %>% 
  filter(!!sym(contrast.field) == contrast.levels[2])

contrast.B.sampleIDs <- rownames(contrast.B.annotation)

contrast.B.counts <- as.data.frame(lmm.input$log.counts) %>% 
  select(all_of(contrast.B.sampleIDs))

contrast.B.counts$gene <- rownames(contrast.B.counts)

# Add a column to each contrast level for the mean and standard deviation
contrast.A.counts <- contrast.A.counts %>% 
  mutate(mean.A = rowMeans(select_if(., is.numeric))) %>%  
  mutate(stdev.A = apply(select_if(., is.numeric), 1, sd))

contrast.B.counts <- contrast.B.counts %>% 
  mutate(mean.B = rowMeans(select_if(., is.numeric))) %>%  
  mutate(stdev.B = apply(select_if(., is.numeric), 1, sd))

GSEA.preanked.df <- merge(contrast.A.counts, contrast.B.counts, by = "gene")

GSEA.preanked.df <- GSEA.preanked.df %>% 
  mutate(signal2noise = (mean.A - mean.B)/(stdev.A + stdev.B)) %>% 
  arrange(desc(signal2noise)) %>% 
  select(c(gene, mean.A, mean.B, stdev.A, stdev.B, signal2noise))

if(export.gsea.input == TRUE){
  
  write.csv(GSEA.preanked.df, file = paste0(results.folder, run.folder, "GSEA/", contrast.name, "_gsea_preranked_input.csv"), row.names = FALSE)
  
}

```

### DE contrast: GI Biopsy, Tumor, LANA & full ROI

```{r, include=include.DE}

# Setup annotation groups for the contrast
tumor.types <- c("Biopsy")
region.types <- c("tumor")
source.types <- c("GI")
segment.types <- c("LANA_pos", "full_ROI")
#paired.samples <- c("BM3748133", "360174")

# Define the main contrast
contrast.column <- "segment"
contrast.types <- segment.types

# A column used for DE that does not have multiple types in this contrast
group.column <- "Tumor"
group.types <- tumor.types

contrast.groups.list <- list(
  "Tumor" = tumor.types, 
  "region" = region.types, 
  "segment" = segment.types, 
  "Source" = source.types)

# Define the name of the contrast
contrast.name <- paste0(tumor.types[[1]], 
                        "_",
                        source.types[[1]], 
                        "_", 
                        segment.types[[1]], 
                        "_", 
                        segment.types[[2]], 
                        "_",
                        region.types[[1]]
                        )

# Filter data for the chosen annotations
object <- q3.normalization.output$object

# Generate the counts, annotation, and subset object for lmm
lmm.input <- subset_for_lmm(object = object, 
                                subset.list = contrast.groups.list)

# Create summary table of group counts
summary.table.df <- pData(lmm.input$subset.object) %>%  
  select(c(names(contrast.groups.list)))

summary.table <- table(summary.table.df)

print("Sample Numbers per Annotation Group")
print("-----------------------------------")
print(summary.table)

# Check if the DE results have already been generated
DE.result.file <- grepl(contrast.name, DE.results.files)
if(sum(DE.result.file) == 1) {
  
  # Load the previously generated DE results
  results.df <- as.data.frame(read.csv(paste0(results.folder, run.folder, "DE/", contrast.name, "_de.results.csv")))
  
  annotation.df <- lmm.input$annotation 
  
  annotation.df$loq <- annotation.df$loq[, 1]
  
  write.csv(annotation.df, paste0(results.folder, run.folder, "DE/", contrast.name, "_annotation.csv"), row.names = FALSE)
                              
} else {
  # Use the function from the DSPWorkflow package

  # Within slide analysis
  # Listed contrasts are condition, reference 
  results.list <- diffExpr(object = lmm.input$subset.object, 
                      analysis.type = "Within Groups", 
                      region.col = contrast.column, 
                      regions = contrast.types, 
                      group.col = group.column, 
                      groups = group.types, 
                      n.cores = parallel::detectCores())
  
  # Create the results df
  results.df <- results.list$results

  write.csv(results.df, paste0(results.folder, run.folder, "DE/", contrast.name, "_de.results.csv"))
  
}

# Adjust column names
logfc.column <- colnames(results.df[grepl("logFC",colnames(results.df))])
results.df$logfc <- results.df[[logfc.column]]

pval.column <- colnames(results.df[grepl("_pval",colnames(results.df))])
results.df$pval <- results.df[[pval.column]]

adj.pval.column <- colnames(results.df[grepl("adjpval",colnames(results.df))])
results.df$padj <- results.df[[adj.pval.column]]

results.df$gene <- results.df$Gene

# Keep only the necessary columns
results.df <- results.df %>% select(c("gene", 
                                      "logfc", 
                                      "pval", 
                                      "padj"))


# Export the results
export.volcano <- global.export.deg
if(export.deg.list == TRUE){ 
  write.csv(results.df, 
            file = paste0(results.folder, run.folder, "DE/DEG_lists/", contrast.name, "_deg_list.csv"), 
            row.names = FALSE)  
}

# Make the volcano plot
volcano.output <- make_volcano(lmm.results = results.df, 
                               title = contrast.name, 
                               legend.title = paste0("DE in ", contrast.types[[1]]), 
                               x.axis.title = paste0(contrast.types[[1]],
                                                     " vs. ",
                                                     contrast.types[[2]]))

# Export the volcano
export.volcano <- global.export.volcano
if(export.volcano == TRUE){ 
  ggsave(filename = paste0(results.folder, run.folder, "DE/volcano/", contrast.name, "_volcano_plot.png"), 
         width = 14, 
         height = 10)
}

print(volcano.output$volcano.plot)

# Make the heatmap

# Subset the annotation just for the heatmap annotations of interest
annotation.heatmap <- lmm.input$annotation %>% 
  select(names(anno.colors))

heatmap.plot <- make_heatmap(
  normalized.log.counts.df = lmm.input$log.counts, 
  de.results = results.df, 
  top.degs = TRUE, 
  annotation.column = annotation.heatmap,
  annotation.row = NULL, 
  anno.colors = anno.colors, 
  cluster.rows = FALSE, 
  cluster.columns = TRUE, 
  main.title = contrast.name, 
  row.gaps = NULL, 
  column.gaps = NULL)

print(heatmap.plot)

export.heatmap <- global.export.heatmap
if(global.export.heatmap == TRUE){
  
  ggsave(heatmap.plot, 
         filename = paste0(results.folder, run.folder, "DE/heatmap/", contrast.name, "_heatmap_plot.png"), 
         width = 14, 
         height = 10)
  
}

```

#### GSEA preranked list

```{r}

# Gather the signal to noise ratio for GSEA ranking
# Default method for ranking genes from GSEA manual:
# https://www.gsea-msigdb.org/gsea/doc/GSEAUserGuideTEXT.htm#_Metrics_for_Ranking

# Define the main contrast groups
contrast.field <- "segment"
contrast.levels <- c("LANA_pos", "full_ROI")

# Gather the annotation, sample IDs, and log counts for each contrast

# Contrast level A is the "condition" (positive when calculating fold change)
contrast.A.annotation <- lmm.input$annotation %>% 
  filter(!!sym(contrast.field) == contrast.levels[1])

contrast.A.sampleIDs <- rownames(contrast.A.annotation)

contrast.A.counts <- as.data.frame(lmm.input$log.counts) %>% 
  select(all_of(contrast.A.sampleIDs))

contrast.A.counts$gene <- rownames(contrast.A.counts)

# Contrast level B is the "reference" (negative when calculating fold change)

contrast.B.annotation <- lmm.input$annotation %>% 
  filter(!!sym(contrast.field) == contrast.levels[2])

contrast.B.sampleIDs <- rownames(contrast.B.annotation)

contrast.B.counts <- as.data.frame(lmm.input$log.counts) %>% 
  select(all_of(contrast.B.sampleIDs))

contrast.B.counts$gene <- rownames(contrast.B.counts)

# Add a column to each contrast level for the mean and standard deviation
contrast.A.counts <- contrast.A.counts %>% 
  mutate(mean.A = rowMeans(select_if(., is.numeric))) %>%  
  mutate(stdev.A = apply(select_if(., is.numeric), 1, sd))

contrast.B.counts <- contrast.B.counts %>% 
  mutate(mean.B = rowMeans(select_if(., is.numeric))) %>%  
  mutate(stdev.B = apply(select_if(., is.numeric), 1, sd))

GSEA.preanked.df <- merge(contrast.A.counts, contrast.B.counts, by = "gene")

GSEA.preanked.df <- GSEA.preanked.df %>% 
  mutate(signal2noise = (mean.A - mean.B)/(stdev.A + stdev.B)) %>% 
  arrange(desc(signal2noise)) %>% 
  select(c(gene, mean.A, mean.B, stdev.A, stdev.B, signal2noise))

if(export.gsea.input == TRUE){
  
  write.csv(GSEA.preanked.df, file = paste0(results.folder, run.folder, "GSEA/", contrast.name, "_gsea_preranked_input.csv"), row.names = FALSE)
  
}

```

### DE contrast: GI PDX, Tumor, LANA & full ROI

```{r, include=include.DE}

# Setup annotation groups for the contrast
tumor.types <- c("PDX")
region.types <- c("tumor")
source.types <- c("GI")
segment.types <- c("LANA_pos", "full_ROI")
#paired.samples <- c("BM3748133", "360174")

# Define the main contrast
contrast.column <- "segment"
contrast.types <- segment.types

# A column used for DE that does not have multiple types in this contrast
group.column <- "Tumor"
group.types <- tumor.types

contrast.groups.list <- list(
  "Tumor" = tumor.types, 
  "region" = region.types, 
  "segment" = segment.types, 
  "Source" = source.types)

# Define the name of the contrast
contrast.name <- paste0(tumor.types[[1]], 
                        "_",
                        source.types[[1]], 
                        "_", 
                        segment.types[[1]], 
                        "_", 
                        segment.types[[2]], 
                        "_",
                        region.types[[1]]
                        )

# Filter data for the chosen annotations
object <- q3.normalization.output$object

# Generate the counts, annotation, and subset object for lmm
lmm.input <- subset_for_lmm(object = object, 
                                subset.list = contrast.groups.list)

# Create summary table of group counts
summary.table.df <- pData(lmm.input$subset.object) %>%  
  select(c(names(contrast.groups.list)))

summary.table <- table(summary.table.df)

print("Sample Numbers per Annotation Group")
print("-----------------------------------")
print(summary.table)

# Check if the DE results have already been generated
DE.result.file <- grepl(contrast.name, DE.results.files)
if(sum(DE.result.file) == 1) {
  
  # Load the previously generated DE results
  results.df <- as.data.frame(read.csv(paste0(results.folder, run.folder, "DE/", contrast.name, "_de.results.csv")))
  
  annotation.df <- lmm.input$annotation 
  
  annotation.df$loq <- annotation.df$loq[, 1]
  
  write.csv(annotation.df, paste0(results.folder, run.folder, "DE/", contrast.name, "_annotation.csv"), row.names = FALSE)
                              
} else {
  # Use the function from the DSPWorkflow package

  # Within slide analysis
  # Listed contrasts are condition, reference 
  results.list <- diffExpr(object = lmm.input$subset.object, 
                      analysis.type = "Within Groups", 
                      region.col = contrast.column, 
                      regions = contrast.types, 
                      group.col = group.column, 
                      groups = group.types, 
                      n.cores = parallel::detectCores())
  
  # Create the results df
  results.df <- results.list$results

  write.csv(results.df, paste0(results.folder, run.folder, "DE/", contrast.name, "_de.results.csv"))
  
}

# Adjust column names
logfc.column <- colnames(results.df[grepl("logFC",colnames(results.df))])
results.df$logfc <- results.df[[logfc.column]]

pval.column <- colnames(results.df[grepl("_pval",colnames(results.df))])
results.df$pval <- results.df[[pval.column]]

adj.pval.column <- colnames(results.df[grepl("adjpval",colnames(results.df))])
results.df$padj <- results.df[[adj.pval.column]]

results.df$gene <- results.df$Gene

# Keep only the necessary columns
results.df <- results.df %>% select(c("gene", 
                                      "logfc", 
                                      "pval", 
                                      "padj"))


# Export the results
export.volcano <- global.export.deg
if(export.deg.list == TRUE){ 
  write.csv(results.df, 
            file = paste0(results.folder, run.folder, "DE/DEG_lists/", contrast.name, "_deg_list.csv"), 
            row.names = FALSE)  
}

# Make the volcano plot
volcano.output <- make_volcano(lmm.results = results.df, 
                               title = contrast.name, 
                               legend.title = paste0("DE in ", contrast.types[[1]]), 
                               x.axis.title = paste0(contrast.types[[1]],
                                                     " vs. ",
                                                     contrast.types[[2]]))

# Export the volcano
export.volcano <- global.export.volcano
if(export.volcano == TRUE){ 
  ggsave(filename = paste0(results.folder, run.folder, "DE/volcano/", contrast.name, "_volcano_plot.png"), 
         width = 14, 
         height = 10)
}

print(volcano.output$volcano.plot)

# Make the heatmap

# Subset the annotation just for the heatmap annotations of interest
annotation.heatmap <- lmm.input$annotation %>% 
  select(names(anno.colors))

heatmap.plot <- make_heatmap(
  normalized.log.counts.df = lmm.input$log.counts, 
  de.results = results.df, 
  top.degs = TRUE, 
  annotation.column = annotation.heatmap,
  annotation.row = NULL, 
  anno.colors = anno.colors, 
  cluster.rows = FALSE, 
  cluster.columns = TRUE, 
  main.title = contrast.name, 
  row.gaps = NULL, 
  column.gaps = NULL)

print(heatmap.plot)

export.heatmap <- global.export.heatmap
if(global.export.heatmap == TRUE){
  
  ggsave(heatmap.plot, 
         filename = paste0(results.folder, run.folder, "DE/heatmap/", contrast.name, "_heatmap_plot.png"), 
         width = 14, 
         height = 10)
  
}

```

#### GSEA preranked list

```{r}

# Gather the signal to noise ratio for GSEA ranking
# Default method for ranking genes from GSEA manual:
# https://www.gsea-msigdb.org/gsea/doc/GSEAUserGuideTEXT.htm#_Metrics_for_Ranking

# Define the main contrast groups
contrast.field <- "segment"
contrast.levels <- c("LANA_pos", "full_ROI")

# Gather the annotation, sample IDs, and log counts for each contrast

# Contrast level A is the "condition" (positive when calculating fold change)
contrast.A.annotation <- lmm.input$annotation %>% 
  filter(!!sym(contrast.field) == contrast.levels[1])

contrast.A.sampleIDs <- rownames(contrast.A.annotation)

contrast.A.counts <- as.data.frame(lmm.input$log.counts) %>% 
  select(all_of(contrast.A.sampleIDs))

contrast.A.counts$gene <- rownames(contrast.A.counts)

# Contrast level B is the "reference" (negative when calculating fold change)

contrast.B.annotation <- lmm.input$annotation %>% 
  filter(!!sym(contrast.field) == contrast.levels[2])

contrast.B.sampleIDs <- rownames(contrast.B.annotation)

contrast.B.counts <- as.data.frame(lmm.input$log.counts) %>% 
  select(all_of(contrast.B.sampleIDs))

contrast.B.counts$gene <- rownames(contrast.B.counts)

# Add a column to each contrast level for the mean and standard deviation
contrast.A.counts <- contrast.A.counts %>% 
  mutate(mean.A = rowMeans(select_if(., is.numeric))) %>%  
  mutate(stdev.A = apply(select_if(., is.numeric), 1, sd))

contrast.B.counts <- contrast.B.counts %>% 
  mutate(mean.B = rowMeans(select_if(., is.numeric))) %>%  
  mutate(stdev.B = apply(select_if(., is.numeric), 1, sd))

GSEA.preanked.df <- merge(contrast.A.counts, contrast.B.counts, by = "gene")

GSEA.preanked.df <- GSEA.preanked.df %>% 
  mutate(signal2noise = (mean.A - mean.B)/(stdev.A + stdev.B)) %>% 
  arrange(desc(signal2noise)) %>% 
  select(c(gene, mean.A, mean.B, stdev.A, stdev.B, signal2noise))

if(export.gsea.input == TRUE){
  
  write.csv(GSEA.preanked.df, file = paste0(results.folder, run.folder, "GSEA/", contrast.name, "_gsea_preranked_input.csv"), row.names = FALSE)
  
}

```

### DE contrast: Skin & GI Biopsy, Immune, full ROI

```{r, include=include.DE}

# Setup annotation groups for the contrast
tumor.types <- c("Biopsy")
region.types <- c("immune", "immune/stroma")
source.types <- c("Skin", "GI")
segment.types <- c("full_ROI")
#paired.samples <- c("BM3748133", "360174")

# Define the main contrast
contrast.column <- "Source"
contrast.types <- source.types

# A column used for DE that does not have multiple types in this contrast
group.column <- "Tumor"
group.types <- tumor.types

contrast.groups.list <- list(
  "Tumor" = tumor.types, 
  "region" = region.types, 
  "segment" = segment.types, 
  "Source" = source.types)

# Define the name of the contrast
contrast.name <- paste0(tumor.types[[1]], 
                        "_",
                        source.types[[1]], 
                        "_", 
                        source.types[[2]], 
                        "_",
                        segment.types[[1]], 
                        "_",
                        region.types[[1]]
                        )

# Filter data for the chosen annotations
object <- q3.normalization.output$object

# Generate the counts, annotation, and subset object for lmm
lmm.input <- subset_for_lmm(object = object, 
                                subset.list = contrast.groups.list)

# Create summary table of group counts
summary.table.df <- pData(lmm.input$subset.object) %>%  
  select(c(names(contrast.groups.list)))

summary.table <- table(summary.table.df)

print("Sample Numbers per Annotation Group")
print("-----------------------------------")
print(summary.table)

# Check if the DE results have already been generated
DE.result.file <- grepl(contrast.name, DE.results.files)
if(sum(DE.result.file) == 1) {
  
  # Load the previously generated DE results
  results.df <- as.data.frame(read.csv(paste0(results.folder, run.folder, "DE/", contrast.name, "_de.results.csv")))
  
  annotation.df <- lmm.input$annotation 
  
  annotation.df$loq <- annotation.df$loq[, 1]
  
  write.csv(annotation.df, paste0(results.folder, run.folder, "DE/", contrast.name, "_annotation.csv"), row.names = FALSE)
                              
} else {
  # Use the function from the DSPWorkflow package

  # Within slide analysis
  # Listed contrasts are condition, reference 
  results.list <- diffExpr(object = lmm.input$subset.object, 
                      analysis.type = "Within Groups", 
                      region.col = contrast.column, 
                      regions = contrast.types, 
                      group.col = group.column, 
                      groups = group.types, 
                      n.cores = parallel::detectCores())
  
  # Create the results df
  results.df <- results.list$results

  write.csv(results.df, paste0(results.folder, run.folder, "DE/", contrast.name, "_de.results.csv"))
  
}

# Adjust column names
logfc.column <- colnames(results.df[grepl("logFC",colnames(results.df))])
results.df$logfc <- results.df[[logfc.column]]

pval.column <- colnames(results.df[grepl("_pval",colnames(results.df))])
results.df$pval <- results.df[[pval.column]]

adj.pval.column <- colnames(results.df[grepl("adjpval",colnames(results.df))])
results.df$padj <- results.df[[adj.pval.column]]

results.df$gene <- results.df$Gene

# Keep only the necessary columns
results.df <- results.df %>% select(c("gene", 
                                      "logfc", 
                                      "pval", 
                                      "padj"))


# Export the results
export.volcano <- global.export.deg
if(export.deg.list == TRUE){ 
  write.csv(results.df, 
            file = paste0(results.folder, run.folder, "DE/DEG_lists/", contrast.name, "_deg_list.csv"), 
            row.names = FALSE)  
}

# Make the volcano plot
volcano.output <- make_volcano(lmm.results = results.df, 
                               title = contrast.name, 
                               legend.title = paste0("DE in ", contrast.types[[1]]), 
                               x.axis.title = paste0(contrast.types[[1]],
                                                     " vs. ",
                                                     contrast.types[[2]]))

# Export the volcano
export.volcano <- global.export.volcano
if(export.volcano == TRUE){ 
  ggsave(filename = paste0(results.folder, run.folder, "DE/volcano/", contrast.name, "_volcano_plot.png"), 
         width = 14, 
         height = 10)
}

# Make the heatmap

# Subset the annotation just for the heatmap annotations of interest
annotation.heatmap <- lmm.input$annotation %>% 
  select(names(anno.colors))

heatmap.plot <- make_heatmap(
  normalized.log.counts.df = lmm.input$log.counts, 
  de.results = results.df, 
  top.degs = TRUE, 
  annotation.column = annotation.heatmap,
  annotation.row = NULL, 
  anno.colors = anno.colors, 
  cluster.rows = FALSE, 
  cluster.columns = TRUE, 
  main.title = contrast.name, 
  row.gaps = NULL, 
  column.gaps = NULL)

print(heatmap.plot)

export.heatmap <- global.export.heatmap
if(global.export.heatmap == TRUE){
  
  ggsave(heatmap.plot, 
         filename = paste0(results.folder, run.folder, "DE/heatmap/", contrast.name, "_heatmap_plot.png"), 
         width = 14, 
         height = 10)
  
}

```

#### GSEA preranked list

```{r}

# Gather the signal to noise ratio for GSEA ranking
# Default method for ranking genes from GSEA manual:
# https://www.gsea-msigdb.org/gsea/doc/GSEAUserGuideTEXT.htm#_Metrics_for_Ranking

# Define the main contrast groups
contrast.field <- "region"
contrast.levels <- c("immune", "immune/stroma")

# Gather the annotation, sample IDs, and log counts for each contrast

# Contrast level A is the "condition" (positive when calculating fold change)
contrast.A.annotation <- lmm.input$annotation %>% 
  filter(!!sym(contrast.field) == contrast.levels[1])

contrast.A.sampleIDs <- rownames(contrast.A.annotation)

contrast.A.counts <- as.data.frame(lmm.input$log.counts) %>% 
  select(all_of(contrast.A.sampleIDs))

contrast.A.counts$gene <- rownames(contrast.A.counts)

# Contrast level B is the "reference" (negative when calculating fold change)

contrast.B.annotation <- lmm.input$annotation %>% 
  filter(!!sym(contrast.field) == contrast.levels[2])

contrast.B.sampleIDs <- rownames(contrast.B.annotation)

contrast.B.counts <- as.data.frame(lmm.input$log.counts) %>% 
  select(all_of(contrast.B.sampleIDs))

contrast.B.counts$gene <- rownames(contrast.B.counts)

# Add a column to each contrast level for the mean and standard deviation
contrast.A.counts <- contrast.A.counts %>% 
  mutate(mean.A = rowMeans(select_if(., is.numeric))) %>%  
  mutate(stdev.A = apply(select_if(., is.numeric), 1, sd))

contrast.B.counts <- contrast.B.counts %>% 
  mutate(mean.B = rowMeans(select_if(., is.numeric))) %>%  
  mutate(stdev.B = apply(select_if(., is.numeric), 1, sd))

GSEA.preanked.df <- merge(contrast.A.counts, contrast.B.counts, by = "gene")

GSEA.preanked.df <- GSEA.preanked.df %>% 
  mutate(signal2noise = (mean.A - mean.B)/(stdev.A + stdev.B)) %>% 
  arrange(desc(signal2noise)) %>% 
  select(c(gene, mean.A, mean.B, stdev.A, stdev.B, signal2noise))

if(export.gsea.input == TRUE){
  
  write.csv(GSEA.preanked.df, file = paste0(results.folder, run.folder, "GSEA/", contrast.name, "_gsea_preranked_input.csv"), row.names = FALSE)
  
}

```

# MA plots

```{r MA Plot, include=include.qc}

# Set up pre and post normalization counts and convert to log

pre.norm.counts <-
  log(as.data.frame(q3.normalization.output$object@assayData$exprs), base = 2)

post.norm.counts <- log(as.data.frame(q3.normalization.output$object@assayData$q_norm), base = 2)


# Example MA plot

# Setup annotation groups for the contrast
tumor.types <- c("Biopsy")
region.types <- c("tumor", "vessel")
source.types <- c("Skin")
segment.types <- c("full_ROI")

# Define the name of the contrast
contrast.name <- paste0(tumor.types[[1]], 
                        "_",
                        source.types[[1]], 
                        "_", 
                        segment.types[[1]], 
                        "_", 
                        region.types[[1]], 
                        "_", 
                        region.types[[2]])

# Example MA plot
contrast.field <- "region"
condition.label <- "tumor"
reference.label <- "vessel"
result.df <- as.data.frame(read.csv(paste0(results.folder, run.folder, "DE/", contrast.name, "_de.results.csv")))
log.counts <- post.norm.counts
raw.log.counts <- pre.norm.counts
annotation.MA <- annotation

MA.plots <- make_MA(contrast.field = contrast.field, 
                    condition.label = condition.label, 
                    reference.label = reference.label, 
                    results.df = result.df, 
                    log.counts = log.counts, 
                    raw.log.counts = raw.log.counts, 
                    annotation = annotation.MA)
  
  
grid.draw(MA.plots)

```