Skip to content

Commit

Permalink
Sync with NIDAP Global code, 2024-05-31
Browse files Browse the repository at this point in the history
  • Loading branch information
lobanovav authored Jun 3, 2024
1 parent 08ac71c commit 7cba94e
Showing 1 changed file with 88 additions and 31 deletions.
119 changes: 88 additions & 31 deletions R/Harmony.R
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,50 @@ harmonyBatchCorrect <- function(object,
stop("nvar exceed total number of variable genes in the data")
}

# Main Code Block
## Main Code Block
# Save original tSNE and UMAP for later comparison
n <- 60
qual.col.pals = brewer.pal.info[brewer.pal.info$category == 'qual',]
qual.col.pals = qual.col.pals[c(7,6,2,1,8,3,4,5),]
cols = unlist(mapply(brewer.pal, qual.col.pals$maxcolors,
rownames(qual.col.pals)))

sdat.tsne.orig <- data.frame(as.vector(object@reductions$tsne@cell.embeddings[,1]),
as.vector(object@reductions$tsne@cell.embeddings[,2]),
object@meta.data[eval(parse(text = "group.by.var"))])
names(sdat.tsne.orig) <- c("TSNE1","TSNE2","ident")

sdat.umap.orig <- data.frame(as.vector(object@reductions$umap@cell.embeddings[,1]),
as.vector(object@reductions$umap@cell.embeddings[,2]),
object@meta.data[eval(parse(text = "group.by.var"))])
names(sdat.umap.orig) <- c("UMAP1","UMAP2","ident")

orig.tsne <- ggplot(sdat.tsne.orig, aes(x=TSNE1, y=TSNE2)) +
theme_bw() +
theme(legend.title=element_blank()) +
geom_point(aes(colour=ident),size=1) +
scale_color_manual(values=cols) +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
legend.position="none",
panel.background = element_blank(),
legend.text=element_text(size=rel(0.5))) +
guides(colour = guide_legend(override.aes = list(size=5, alpha = 1))) +
annotate("text", x = Inf, y = -Inf, label = "Original tSNE", hjust = 1.1, vjust = -1, size = 5)

orig.umap <- ggplot(sdat.umap.orig, aes(x=UMAP1, y=UMAP2)) +
theme_bw() +
theme(legend.title=element_blank()) +
geom_point(aes(colour=ident),size=1) +
scale_color_manual(values=cols) +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
legend.position="none",
panel.background = element_blank(),
legend.text=element_text(size=rel(0.5))) +
guides(colour = guide_legend(override.aes = list(size=5, alpha = 1))) +
annotate("text", x = Inf, y = -Inf, label = "Original UMAP", hjust = 1.1, vjust = -1, size = 5)

# Adjusts SCT scale.data based on harmonized embedding
seur.SCT <- object@assays$SCT@scale.data

Expand All @@ -68,74 +111,88 @@ harmonyBatchCorrect <- function(object,

# Singular Value Decomposition (SVD) on scaled counts
pppca <- svd(seur.SCT)

# pppca$u: matrix that contains the left singular values
# pppca$d: vector that contains singular values, sorted in descending order
ppembed <- pppca$u %*% diag(pppca$d)
pcnames <- vector(mode = "character")
for (i in 1:dim(ppembed)[2])pcnames[i] <- paste("PC", i, sep = "_")
colnames(ppembed) <- pcnames
rownames(ppembed) <- rownames(seur.SCT)
sm.ppembed <- ppembed[1:10, 1:10]

# pppca$v: matrix that contains the right singular values
ppldngs <- pppca$v
colnames(ppldngs) <- pcnames
rownames(ppldngs) <- mvf

# Redo pca embeddings based on SVD of gene expression values
object@reductions$pca@cell.embeddings <- ppembed
object@reductions$pca@feature.loadings <- ppldngs
object@reductions$pca@stdev <- pppca$d

# By default, Harmony corrects pca embeddings.
# Set do_pca to FALSE to use your own pca embeddings.
# Stores adjusted embeddings in harmony reduction slot
object <- RunHarmony(object, group.by.var,
do_pca=FALSE,
assay.use = "SCT",
plot_convergence = TRUE,
return_object=TRUE)

object <- RunHarmony(object,
group.by.var,
do_pca=FALSE,
assay.use = "SCT",
plot_convergence = FALSE,
return_object=TRUE)

object <- RunUMAP(object, reduction = "harmony", dims=1:npc)
object <- RunTSNE(object, reduction = "harmony", dims=1:npc)

# Plot harmony embeddings annotated by variable to batch correct for
sdat <- data.frame(as.vector(object@reductions$tsne@cell.embeddings[,1]),
as.vector(object@reductions$tsne@cell.embeddings[,2]),
object@meta.data[eval(parse(text = "group.by.var"))])
names(sdat) <- c("TSNE1","TSNE2","ident")
sdat.tsne <- data.frame(as.vector(object@reductions$tsne@cell.embeddings[,1]),
as.vector(object@reductions$tsne@cell.embeddings[,2]),
object@meta.data[eval(parse(text = "group.by.var"))])
names(sdat.tsne) <- c("TSNE1","TSNE2","ident")

n <- 60
qual.col.pals = brewer.pal.info[brewer.pal.info$category == 'qual',]
qual.col.pals = qual.col.pals[c(7,6,2,1,8,3,4,5),]
cols = unlist(mapply(brewer.pal, qual.col.pals$maxcolors,
rownames(qual.col.pals)))
sdat.umap <- data.frame(as.vector(object@reductions$umap@cell.embeddings[,1]),
as.vector(object@reductions$umap@cell.embeddings[,2]),
object@meta.data[eval(parse(text = "group.by.var"))])
names(sdat.umap) <- c("UMAP1","UMAP2","ident")

set.seed(10)
g1 <- ggplot(sdat, aes(x=TSNE1, y=TSNE2)) +
harm.tsne <- ggplot(sdat.tsne, aes(x=TSNE1, y=TSNE2)) +
theme_bw() +
theme(legend.title=element_blank()) +
geom_point(aes(colour=ident),size=1) +
scale_color_manual(values=cols) +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
legend.position="top",
legend.position="right",
panel.background = element_blank(),
legend.text=element_text(size=rel(0.5))) +
guides(colour = guide_legend(override.aes = list(size=5, alpha = 1))
)
legend.text=element_text(size=rel(1.5))) +
guides(colour = guide_legend(override.aes = list(size=5, alpha = 1))) +
annotate("text", x = Inf, y = -Inf, label = "Harmonized tSNE", hjust = 1.1, vjust = -1, size = 5)

harm.umap <- ggplot(sdat.umap, aes(x=UMAP1, y=UMAP2)) +
theme_bw() +
theme(legend.title=element_blank()) +
geom_point(aes(colour=ident),size=1) +
scale_color_manual(values=cols) +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
legend.position="right",
panel.background = element_blank(),
legend.text=element_text(size=rel(1.5))) +
guides(colour = guide_legend(override.aes = list(size=5, alpha = 1))) +
annotate("text", x = Inf, y = -Inf, label = "Harmonized UMAP", hjust = 1.1, vjust = -1, size = 5)

print((orig.tsne + harm.tsne) + plot_layout(ncol = 2))

print((orig.umap + harm.umap) + plot_layout(ncol = 2))

# Calculate adjusted gene expression from embeddings
seur.SCT <- t(object@assays$SCT@scale.data)
harm.embeds <- object@reductions$harmony@cell.embeddings
harm.lvl.backcalc <- harm.embeds %*% t(ppldngs)

# Insert back-calculated data into seurat
harmSCT <- CreateAssayObject(data = t(harm.lvl.backcalc))
object[["harmSCT"]] <- harmSCT
# Replace SCT scale.data expression with backcalculated data
object$SCT@scale.data <- t(harm.lvl.backcalc)

harmony.res <- list(adj.object = object,
adj.tsne = g1)
#harm_res <- list(harm.object = object, harm.tsne = g1, harm.umap = g2)

return(object)
}

0 comments on commit 7cba94e

Please sign in to comment.