diff --git a/Results-template/Scripts/doublets.R b/Results-template/Scripts/doublets.R new file mode 100644 index 0000000..b9441bd --- /dev/null +++ b/Results-template/Scripts/doublets.R @@ -0,0 +1,63 @@ +library("DoubletFinder",lib.loc="/data/CCBR_Pipeliner/db/PipeDB/scrna_lib/") +library("modes",lib.loc="/data/CCBR_Pipeliner/db/PipeDB/scrna_lib/") +library("Seurat") + +args <- commandArgs(trailingOnly = TRUE) + +rds <- as.character(args[1]) +sample = basename(rds) +rdata = as.character(args[2]) +load(rdata) + + +doublets <-function(dfso){ + dfso <- NormalizeData(dfso, verbose = F) + dfso <- ScaleData(dfso,verbose = F) + dfso <- FindVariableFeatures(dfso, do.plot=FALSE) + dfso <- RunPCA(dfso, pc.genes = dfso@var.genes, pcs.print = 0,verbose = F,npcs =30) + npcs = 10 + dfso <- RunUMAP(dfso, verbose=TRUE,dims = 1:npcs) + + + sweep.res.list_kidney <- paramSweep_v3(dfso,PCs = 1:10, sct = FALSE) + sweep.stats_kidney <- summarizeSweep(sweep.res.list_kidney, GT = FALSE) + bcmvn_kidney <- find.pK(sweep.stats_kidney) + + ## pK Identification (ground-truth) ------------------------------------------------------------------------------------------ +# sweep.res.list_kidney <- paramSweep_v3(dfso, PCs = 1:10, sct = FALSE) +# gt.calls <- dfso@meta.data[rownames(sweep.res.list_kidney[[1]]), "GT"] +# sweep.stats_kidney <- summarizeSweep(sweep.res.list_kidney, GT = TRUE, GT.calls = gt.calls) +# bcmvn_kidney <- find.pK(sweep.stats_kidney) + + + ## Homotypic Doublet Proportion Estimate ------------------------------------------------------------------------------------- + homotypic.prop <- modelHomotypic(dfso$annot) + perc = 0.008 * (length(colnames(dfso))/1000) + nExp_poi <- round(perc*length(colnames(dfso)))#dfso@cell.names + nExp_poi.adj <- round(nExp_poi*(1-homotypic.prop)) + + ## Run DoubletFinder with varying classification stringencies ---------------------------------------------------------------- + dfso <- doubletFinder_v3(dfso, pN = 0.25, pK = 0.09, nExp = nExp_poi, reuse.pANN = FALSE,PCs = 1:10) + pAAN=tail(names(dfso@meta.data),2)[1] + dfso <- doubletFinder_v3(dfso, pN = 0.25, pK = 0.09, nExp = nExp_poi.adj, reuse.pANN = pAAN,PCs = 1:10) + + ## Plot results -------------------------------------------------------------------------------------------------------------- + DF1=tail(names(dfso@meta.data),2)[1] + DF2=tail(names(dfso@meta.data),2)[2] + + dfso@meta.data[,"DF_hi.lo"] <- dfso[[DF1]] + dfso@meta.data$DF_hi.lo[which(dfso@meta.data$DF_hi.lo == "Doublet" & dfso@meta.data[[DF2]] == "Singlet")] <- "Doublet_lo" + dfso@meta.data$DF_hi.lo[which(dfso@meta.data$DF_hi.lo == "Doublet")] <- "Doublet_hi" + return(dfso$DF_hi.lo) +} + +#so = readRDS(file) +so$annot = so$HPCA +doublets = doublets(so) +so$DF_hi.lo = doublets +#saveRDS(so,"x2.rds") +so=SubsetData(so,cells=names(so$DF_hi.lo)[so$DF_hi.lo =="Singlet"]) +saveRDS(so,rds) +save(list=c("sce_BC","sce_filt","so"),file = rdata) + + diff --git a/Results-template/Scripts/integrateBatches.R b/Results-template/Scripts/integrateBatches.R new file mode 100644 index 0000000..3ed6a17 --- /dev/null +++ b/Results-template/Scripts/integrateBatches.R @@ -0,0 +1,196 @@ +library(Biobase,lib.loc="/data/CCBR_Pipeliner/db/PipeDB/Rlibrary_3.6.0_scRNA") +library(farver,lib.loc="/data/CCBR_Pipeliner/db/PipeDB/Rlibrary_3.6.0_scRNA") +library(S4Vectors,lib.loc="/data/CCBR_Pipeliner/db/PipeDB/Rlibrary_3.6.0_scRNA") +library("SingleR",lib.loc="/data/CCBR_Pipeliner/db/PipeDB/Rlibrary_3.6.0_scRNA") +library(scRNAseq) +library(SingleCellExperiment) +library("destiny",lib.loc="/data/CCBR_Pipeliner/db/PipeDB/Rlibrary_3.6.0_scRNA") +library("URD",lib.loc="/data/CCBR_Pipeliner/db/PipeDB/scrna_lib") +library("Seurat") +library(dplyr) +library(Matrix) +library(tools) +library(stringr) + + + +args <- commandArgs(trailingOnly = TRUE) + +matrix <- as.character(args[1]) +#output = as.character(args[2]) +outDirSeurat = as.character(args[2]) +outDirMerge = as.character(args[3]) +specie = as.character(args[4]) +resolution = as.numeric(args[5]) +clusterAlg = as.numeric(args[6]) +annotDB = as.character(args[7]) +nAnchors = as.numeric(args[8]) +groups = as.character(args[9]) +contrasts = as.character(args[10]) + + + +file.names <- dir(path = matrix,pattern ="rds") + +if (groups == "YES") { + +groupFile = read.delim("groups.tab",header=F,stringsAsFactors = F) +groupFile=groupFile[groupFile$V2 %in% stringr::str_split_fixed(contrasts,pattern = "-",n = Inf)[1,],] +#groupFile = groupFile[groupFile$V2 == strsplit(contrasts,"-")[[1]][1] | groupFile$V2 == strsplit(contrasts,"-")[[1]][2] ,] + +splitFiles = gsub(".rds","",file.names)#str_split_fixed(file.names,pattern = "[.rd]",n = 2) +file.names=file.names[match(groupFile$V1,splitFiles,nomatch = F)] +print(groupFile$V1) +print(splitFiles) +print(file.names) +} + +readObj = list() +for (obj in file.names) { + Name=strsplit(obj,".rds")[[1]][1] + assign(paste0("S_",Name),readRDS(paste0(matrix,"/",obj))) + readObj = append(readObj,paste0("S_",Name)) + } + +#for (obj in readObj) { + # print(obj) + #sample = CreateSeuratObject(GetAssayData(object = eval(parse(text =obj)),slot = "counts")[,colnames(x = eval(parse(text =obj)))]) + #sample@assays$RNA@data = GetAssayData(object = eval(parse(text =obj)),slot = "data")[,colnames(x = eval(parse(text =obj)))] + #sample@meta.data =eval(parse(text =obj))@meta.data + #assign(obj,sample) + +#} + + +combinedObj.list=list() +i=1 +for (p in readObj){ + combinedObj.list[[p]] <- eval(parse(text = readObj[[i]])) + i <- i + 1 + } + + +reference.list <- combinedObj.list[unlist(readObj)] +print(reference.list) +for (i in 1:length(x = reference.list)) { +# reference.list[[i]] <- NormalizeData(object = reference.list[[i]], verbose = FALSE) + reference.list[[i]] <- FindVariableFeatures(object = reference.list[[i]], + selection.method = "vst", nfeatures = nAnchors, verbose = FALSE) +} + +print(length(reference.list)) +print(reference.list) +combinedObj.anchors <- FindIntegrationAnchors(object.list = reference.list, dims = 1:30,anchor.features = nAnchors) +combinedObj.integrated <- IntegrateData(anchorset = combinedObj.anchors, dims = 1:30) + + +DefaultAssay(object = combinedObj.integrated) <- "integrated" +combinedObj.integrated <- ScaleData(object = combinedObj.integrated, verbose = FALSE) + + + +combinedObj.integratedRNA = combinedObj.integrated +DefaultAssay(object = combinedObj.integratedRNA) <- "SCT" + +combinedObj.integratedRNA = FindVariableFeatures(combinedObj.integratedRNA,mean.cutoff = c(0.0125, 3),dispersion.cutoff = c(0.5, Inf),selection.method = "vst") +combinedObj.integratedRNA <- ScaleData(object = combinedObj.integratedRNA, verbose = FALSE) + + +mat1 <- t(as.matrix(FetchData(object = combinedObj.integratedRNA,slot = "counts",vars = rownames(combinedObj.integratedRNA)))) +urdObj <- createURD(count.data = mat1, min.cells=3, min.counts=3) +varGenes_batch = VariableFeatures(combinedObj.integrated)[VariableFeatures(combinedObj.integrated) %in% rownames(urdObj@logupx.data)] +varGenes_merge = VariableFeatures(combinedObj.integratedRNA)[VariableFeatures(combinedObj.integratedRNA) %in% rownames(urdObj@logupx.data)] + +pcs_batch <- calcPCA(urdObj, pcs.store = 50,genes.use=varGenes_batch,mp.factor = 1.2) +npcs_batch = sum(pcs_batch@pca.sig) +print(npcs_batch) + +pcs_merge <- calcPCA(urdObj, pcs.store = 50,genes.use=varGenes_merge,mp.factor = 1.2) +npcs_merge = sum(pcs_merge@pca.sig) +print(npcs_merge) + + +runInt = function(obj,res,npcs){ +obj <- RunPCA(object = obj, npcs = 50, verbose = FALSE) +obj <- FindNeighbors(obj,dims = 1:npcs) +obj <- FindClusters(obj, reduction.type = "pca", dims.use = 1:npcs, save.SNN = T,resolution = res,algorithm = clusterAlg) + +obj <- RunUMAP(object = obj, reduction = "pca", + dims = 1:npcs,n.components = 3) + +obj$groups = groupFile$V2[match(obj$Sample, groupFile$V1,nomatch = F)] + + + +runSingleR = function(obj,refFile,fineORmain){ +avg = AverageExpression(obj,assays = "SCT") +avg = as.data.frame(avg) +ref = refFile +s = SingleR(test = as.matrix(avg),ref = ref,labels = ref[[fineORmain]]) + +clustAnnot = s$labels +names(clustAnnot) = colnames(avg) +names(clustAnnot) = gsub("SCT.","",names(clustAnnot)) + +obj$clustAnnot = clustAnnot[match(obj$seurat_clusters,names(clustAnnot))] +return(obj$clustAnnot) +} + +if(annotDB == "HPCA"){ +obj$clustAnnot <- runSingleR(obj,HumanPrimaryCellAtlasData(),"label.main") +obj$clustAnnotDetail <- runSingleR(obj,HumanPrimaryCellAtlasData(),"label.fine") +} + +if(annotDB == "BP_encode"){ +obj$clustAnnot <- runSingleR(obj,BlueprintEncodeData(),"label.main") +obj$clustAnnotDetail <- runSingleR(obj,BlueprintEncodeData(),"label.fine") +} +if(annotDB == "monaco"){ +obj$clustAnnot <- runSingleR(obj,MonacoImmuneData(),"label.main") +obj$clustAnnotDetail <- runSingleR(obj,MonacoImmuneData(),"label.fine") +} +if(annotDB == "immu_cell_exp"){ +obj$clustAnnot <- runSingleR(obj,DatabaseImmuneCellExpressionData(),"label.main") +obj$clustAnnotDetail <- runSingleR(obj,DatabaseImmuneCellExpressionData(),"label.fine") +} + +if(annotDB == "immgen"){ +obj$clustAnnot <- runSingleR(obj,ImmGenData(),"label.main") +obj$clustAnnotDetail <- runSingleR(obj,ImmGenData(),"label.fine") +} +if(annotDB == "mouseRNAseq"){ +obj$clustAnnot <- runSingleR(obj,MouseRNAseqData(),"label.main") +obj$clustAnnotDetail <- runSingleR(obj,MouseRNAseqData(),"label.fine") +} +return(obj) +} + +combinedObj.integrated = runInt(combinedObj.integrated,0.3,npcs_batch) +saveRDS(combinedObj.integrated, paste0(outDirSeurat,"_0.3.rds")) +combinedObj.integrated = runInt(combinedObj.integrated,0.6,npcs_batch) +saveRDS(combinedObj.integrated, paste0(outDirSeurat,"_0.6.rds")) +combinedObj.integrated = runInt(combinedObj.integrated,0.8,npcs_batch) +saveRDS(combinedObj.integrated, paste0(outDirSeurat,"_0.8.rds")) +combinedObj.integrated = runInt(combinedObj.integrated,1.0,npcs_batch) +saveRDS(combinedObj.integrated, paste0(outDirSeurat,"_1.0.rds")) +combinedObj.integrated = runInt(combinedObj.integrated,1.2,npcs_batch) +saveRDS(combinedObj.integrated, paste0(outDirSeurat,"_1.2.rds")) + + + +combinedObj.integratedRNA = runInt(combinedObj.integratedRNA,0.3,npcs_merge) +saveRDS(combinedObj.integratedRNA, paste0(outDirMerge,"_0.3.rds")) + +combinedObj.integratedRNA = runInt(combinedObj.integratedRNA,0.6,npcs_merge) +saveRDS(combinedObj.integratedRNA, paste0(outDirMerge,"_0.6.rds")) + +combinedObj.integratedRNA = runInt(combinedObj.integratedRNA,0.8,npcs_merge) +saveRDS(combinedObj.integratedRNA, paste0(outDirMerge,"_0.8.rds")) + +combinedObj.integratedRNA = runInt(combinedObj.integratedRNA,1.0,npcs_merge) +saveRDS(combinedObj.integratedRNA, paste0(outDirMerge,"_1.0.rds")) + +combinedObj.integratedRNA = runInt(combinedObj.integratedRNA,1.2,npcs_merge) +saveRDS(combinedObj.integratedRNA, paste0(outDirMerge,"_1.2.rds")) + + diff --git a/Results-template/Scripts/scrnaQC.R b/Results-template/Scripts/scrnaQC.R new file mode 100644 index 0000000..2640968 --- /dev/null +++ b/Results-template/Scripts/scrnaQC.R @@ -0,0 +1,212 @@ +#.libPaths(c("/data//CCBR_Pipeliner/db/PipeDB/scrna_lib/R-3.6.1/library",.libPaths()[1],.libPaths()[2],.libPaths()[3])) +.libPaths(c("/data/CCBR_Pipeliner/db/PipeDB/Rlibrary_3.6.0_scRNA",.libPaths()[1],.libPaths()[2],.libPaths()[3])) + + +args <- commandArgs(trailingOnly = TRUE) + +file <- as.character(args[1]) +sample = basename(file) +outRDS = as.character(args[2]) +outRdata = as.character(args[3]) +species = as.character(args[4]) +resolution = as.numeric(args[5]) +clusterAlg = as.numeric(args[6]) +annotDB = as.character(args[7]) +#matrix = paste0(file,"/outs/filtered_feature_bc_matrix.h5") + +matrix=file + +#library(future) +#plan("multiprocess", workers = 8) + + +#library(DropletUtils,lib.loc="/data/CCBR_Pipeliner/db/PipeDB/scrna_lib/") +#library(scran,lib.loc="/data/CCBR_Pipeliner/db/PipeDB/scrna_lib/") +#library(scater,lib.loc="/data/CCBR_Pipeliner/db/PipeDB/scrna_lib/") +library(Biobase,lib.loc="/data/CCBR_Pipeliner/db/PipeDB/Rlibrary_3.6.0_scRNA") +library(farver,lib.loc="/data/CCBR_Pipeliner/db/PipeDB/Rlibrary_3.6.0_scRNA") +library(S4Vectors,lib.loc="/data/CCBR_Pipeliner/db/PipeDB/Rlibrary_3.6.0_scRNA") +library("Seurat",lib.loc="/data/CCBR_Pipeliner/db/PipeDB/scrna_lib/") +library("DoubletFinder",lib.loc="/data/CCBR_Pipeliner/db/PipeDB/scrna_lib/") +library(AnnotationDbi) +library("modes",lib.loc="/data/CCBR_Pipeliner/db/PipeDB/scrna_lib/") + +library(SingleR) +library(scRNAseq) +library(SingleCellExperiment) +library("modes",lib.loc="/data/CCBR_Pipeliner/db/PipeDB/scrna_lib/") +#library("SingleR",lib.loc="/data/CCBR_Pipeliner/db/PipeDB/scrna_lib/") +library("URD",lib.loc="/data/CCBR_Pipeliner/db/PipeDB/scrna_lib/") +library(Routliers,lib.loc="/data/CCBR_Pipeliner/db/PipeDB/scrna_lib/") +library(dplyr) +library(Matrix) +library(reshape2) +library(tools) +library("DoubletFinder",lib.loc="/data/CCBR_Pipeliner/db/PipeDB/scrna_lib/") +library("modes",lib.loc="/data/CCBR_Pipeliner/db/PipeDB/scrna_lib/") + + +###Run Seurat Clustering +seuratClustering = function(so){ + + +tail(strsplit(file,"/")[[1]],1) +so$Sample=tail(strsplit(file,"/")[[1]],1) +so = SCTransform(so) + +so <- RunPCA(object = so, features = VariableFeatures(object = so), do.print = TRUE, pcs.print = 1:5,genes.print = 0,verbose=F,npcs = 30) + +if(ncol(so)<50000) { +mat1 <- t(as.matrix(FetchData(object = so,slot = "counts",vars = rownames(so)))) +urdObj <- createURD(count.data = mat1, min.cells=3, min.counts=3) +varGenes = so@assays$RNA@var.features[so@assays$RNA@var.features %in% rownames(urdObj@logupx.data)] +pcs <- calcPCA(urdObj, mp.factor = 1,pcs.store = 30,genes.use=varGenes) +npcs = sum(pcs@pca.sig) +} + +else { +npcs = 30 + +} + +so <- FindNeighbors(so,dims = 1:npcs) +so <- FindClusters(so,dims = 1:npcs, print.output = 0, resolution = resolution,algorithm = clusterAlg) +so <- RunUMAP(so,dims = 1:npcs) + + +return(so) +} + +doublets <-function(dfso){ + dfso <- SCTransform(dfso) + dfso <- RunPCA(dfso, pc.genes = dfso@var.genes, pcs.print = 0,verbose = F,npcs =30) + npcs = 10 + dfso <- RunUMAP(dfso, verbose=TRUE,dims = 1:npcs) + + + sweep.res.list_kidney <- paramSweep_v3(dfso,PCs = 1:10, sct = T) + sweep.stats_kidney <- summarizeSweep(sweep.res.list_kidney, GT = FALSE) + print("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") + bcmvn_kidney <- find.pK(sweep.stats_kidney) + ## pK Identification (ground-truth) ------------------------------------------------------------------------------------------ +# sweep.res.list_kidney <- paramSweep_v3(dfso, PCs = 1:10, sct = FALSE) +# gt.calls <- dfso@meta.data[rownames(sweep.res.list_kidney[[1]]), "GT"] +# sweep.stats_kidney <- summarizeSweep(sweep.res.list_kidney, GT = TRUE, GT.calls = gt.calls) +# bcmvn_kidney <- find.pK(sweep.stats_kidney) + + + ## Homotypic Doublet Proportion Estimate ------------------------------------------------------------------------------------- + homotypic.prop <- modelHomotypic(dfso$annot) + perc = 0.008 * (length(colnames(dfso))/1000) + nExp_poi <- round(perc*length(colnames(dfso)))#dfso@cell.names + nExp_poi.adj <- round(nExp_poi*(1-homotypic.prop)) + print("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx") + ## Run DoubletFinder with varying classification stringencies ---------------------------------------------------------------- + dfso <- doubletFinder_v3(dfso, pN = 0.25, pK = 0.09, nExp = nExp_poi, reuse.pANN = FALSE,PCs = 1:10,sct = T) + pAAN=tail(names(dfso@meta.data),2)[1] + dfso <- doubletFinder_v3(dfso, pN = 0.25, pK = 0.09, nExp = nExp_poi.adj, reuse.pANN = pAAN,PCs = 1:10,sct = T) + + ## Plot results -------------------------------------------------------------------------------------------------------------- + #DF1=tail(names(dfso@meta.data),2)[1] + # DF2=tail(names(dfso@meta.data),2)[2] +# dfso@meta.data[,"DF_hi.lo"] <- dfso[[DF2]] +# dfso@meta.data$DF_hi.lo[which(dfso@meta.data$DF_hi.lo == "Doublet" & dfso@meta.data[[DF2]] == "Singlet")] <- "Doublet_lo" + # dfso@meta.data$DF_hi.lo[which(dfso@meta.data$DF_hi.lo == "Doublet")] <- "Doublet_hi" + return(dfso) +} + + + +convertHumanGeneList <- function(x){ + + require("biomaRt") + human = useMart("ensembl", dataset = "hsapiens_gene_ensembl") + mouse = useMart("ensembl", dataset = "mmusculus_gene_ensembl") + + genesV2 = getLDS(attributes = c("hgnc_symbol"), filters = "hgnc_symbol", values = x , mart = human, attributesL = c("mgi_symbol"), martL = mouse, uniqueRows=T) + + humanx <- unique(genesV2[, 2]) + + return(humanx) +} + +so_BC = Read10X_h5(matrix) +so_BC = CreateSeuratObject(so_BC) + +if(species=="human"){so_BC[["percent.mt"]] <- PercentageFeatureSet(so_BC, pattern = "^MT-")} +if(species=="mouse"){so_BC[["percent.mt"]] <- PercentageFeatureSet(so_BC, pattern = "^mt-")} + + + +nCount_out = outliers_mad(so_BC$nCount_RNA,threshold = 3)$LL_CI_MAD +nFeature_out = outliers_mad(so_BC$nFeature_RNA,threshold = 3)$LL_CI_MAD +mt_out = outliers_mad(so_BC$percent.mt,threshold = 3)$UL_CI_MAD + +so_filt <- subset(so_BC, subset = nFeature_RNA > nFeature_out & nCount_RNA > nFeature_out & percent.mt < mt_out) + + +load("/data/CCBR_Pipeliner/db/PipeDB/scrna_lib/immgenMod.RData") + +#rownames(sce_norm) = gsub("_","-",rownames(sce_norm)) +#rownames(sce_norm) = make.names(sce_norm@rowRanges@elementMetadata$Symbol,unique = T) + + +so = seuratClustering(so_filt) + +if(species=="human"){ +s.genes <- cc.genes$s.genes +g2m.genes <- cc.genes$g2m.genes +} + +if(species=="mouse"){ +s.genes <- convertHumanGeneList(cc.genes$s.genes) +g2m.genes <- convertHumanGeneList(cc.genes$g2m.genes) +} + + +so = CellCycleScoring(so,s.features = s.genes, g2m.features = g2m.genes, set.ident = TRUE) + +runSingleR = function(obj,refFile,fineORmain){ +sce = as.SingleCellExperiment(obj,assay = "SCT") +ref = refFile +s = SingleR(test = sce, ref = ref,labels = ref[[fineORmain]]) +return(s$pruned.labels) +print(head(s$pruned.labels)) +} + +if(species == "human"){ +so$HPCA_main <- runSingleR(so,HumanPrimaryCellAtlasData(),"label.main") +so$HPCA <- runSingleR(so,HumanPrimaryCellAtlasData(),"label.fine") +so$BP_encode_main <- runSingleR(so,BlueprintEncodeData(),"label.main") +so$BP_encode <- runSingleR(so,BlueprintEncodeData(),"label.fine") +so$monaco_main <- runSingleR(so,MonacoImmuneData(),"label.main") +so$monaco <- runSingleR(so,MonacoImmuneData(),"label.fine") +so$immu_cell_exp_main <- runSingleR(so,DatabaseImmuneCellExpressionData(),"label.main") +so$immu_cell_exp <- runSingleR(so,DatabaseImmuneCellExpressionData(),"label.fine") + +} + +if(species == "mouse"){ + +so$immgen_main <- runSingleR(so,ImmGenData(),"label.main") +so$immgen <- runSingleR(so,ImmGenData(),"label.fine") +so$mouseRNAseq_main <- runSingleR(so,MouseRNAseqData(),"label.main") +so$mouseRNAseq <- runSingleR(so,MouseRNAseqData(),"label.fine") + +} + + + + +so$annot = so[[paste0(annotDB,"_main")]] + +so = doublets(so) +so$DF_hi.lo = so[[tail(names(so@meta.data),1)]] +so_noDoublet=subset(so,cells=names(so$DF_hi.lo)[so$DF_hi.lo =="Singlet"]) + + +saveRDS(so_noDoublet,outRDS) +save.image(outRdata) + + + diff --git a/Results-template/Scripts/scrnaQC.Rmd b/Results-template/Scripts/scrnaQC.Rmd new file mode 100755 index 0000000..aa5883f --- /dev/null +++ b/Results-template/Scripts/scrnaQC.Rmd @@ -0,0 +1,44 @@ +--- +params: + dir: "path/to/count/matrix" +--- + +```{r headers, include=FALSE, warning=FALSE, message=FALSE} +dateandtime<-format(Sys.time(), "%a %b %d %Y - %X") +dir<-params$dir +``` + + + +```{r setup, echo=FALSE, warning=FALSE,message=FALSE,fig.keep='all'} + +library("Seurat",lib.loc="/data/CCBR_Pipeliner/db/PipeDB/scrna_lib/") +#opts_chunk$set(root.dir = dir) +``` + + +```{r plots, echo=FALSE , warning=FALSE,fig.keep='all',results='asis'} + +setwd(dir) +file.names <- dir(path = dir ,pattern =".RData") + +for (file in file.names){ +fullPath=paste0(dir,"/",file) +load(file) + +Name=strsplit(file,".RData")[[1]][1] + +print(Name) + +print(VlnPlot(so_BC, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)) #before + +print(VlnPlot(so_filt, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)) #after + +print(DimPlot(so,group.by = "annot")) #annot + +print(DimPlot(so,group.by = "DF_hi.lo")) #double + +} + + +``` diff --git a/Results-template/Scripts/scrnaQC_Reports.R b/Results-template/Scripts/scrnaQC_Reports.R new file mode 100755 index 0000000..32fe1ae --- /dev/null +++ b/Results-template/Scripts/scrnaQC_Reports.R @@ -0,0 +1,9 @@ +args <- commandArgs(trailingOnly = TRUE) + +dir <- as.character(args[1]) +setwd(dir) +#sample=strsplit(dir,".Rdat")[[1]][1] +#outDir = as.character(args[2]) +rmarkdown::render("Scripts/scrnaQC.Rmd", output_file="samples_QC.html", params = list( + dir=dir +)) diff --git a/Rules/miRSeq.snakefile b/Rules/miRSeq.snakefile new file mode 100644 index 0000000..ad31dbc --- /dev/null +++ b/Rules/miRSeq.snakefile @@ -0,0 +1,437 @@ +from snakemake.utils import R +from os.path import join +configfile: "run.json" +# configfile: "run1.json.1" +from os import listdir +import os + +#various subroutines +def check_existence(filename): + if not os.path.exists(filename): + exit("File: %s does not exists!"%(filename)) + +def check_readaccess(filename): + check_existence(filename) + if not os.access(filename,os.R_OK): + exit("File: %s exists, but cannot be read!"%(filename)) + +def check_writeaccess(filename): + check_existence(filename) + if not os.access(filename,os.W_OK): + exit("File: %s exists, but cannot be read!"%(filename)) + +def createConstrasts(cList): + contrastsList = [] + for i in range(0, len(cList)-1, 2): + contrastsList.append("-".join(cList[i:i+2])) + return contrastsList + +def get_cpm_cutoff(degdir,contrasts_w_cpm_cutoff_list,cpm_cutoff_list): + return cpm_cutoff_list[contrasts_w_cpm_cutoff_list.index(degdir)] + +samples=config['project']['groups']['rsamps'] + +workpath=config['project']['workpath'] + +# contrastsList = createConstrasts(config['project']['contrasts']['rcontrasts']) +# cpm_cutoff_list = config['project']['contrasts']['rcontrasts_cpm_cutoff'] +# mincounts = config['project']['contrasts']['rcontrasts_min_counts'] + +# contrasts_w_cpm_cutoff_list = [] +# for i in zip(contrastsList,cpm_cutoff_list,mincounts): +# contrasts_w_cpm_cutoff_list.append(str(i[0]) + "_" + str(i[1]) + "_" + str(i[2])) +# print(contrasts_w_cpm_cutoff_list, contrastsList) + + +#trim #cutadapt +se = "" +pe = "" +workpath = config['project']['workpath'] +# +# if config['project']['nends'] == 2 : +# se="no" +# elif config['project']['nends'] == 1 : +# se="yes" + +se = "yes" +trim_dir='trim' +kraken_dir='kraken' +log_dir="logfiles" +# bams_dir="bams" +# degall_dir="DEG_ALL" +pfamily = config['project']['pfamily'] + +for d in [trim_dir,kraken_dir,log_dir]:#trim_dir,kraken_dir,bams_dir,star_dir,log_dir,rseqc_dir,preseq_dir,degall_dir + if not os.path.exists(join(workpath,d)): + os.mkdir(join(workpath,d)) + +if not os.path.exists("slurmfiles"): + os.mkdir ("slurmfiles") + +if se=="yes": + if (config['project']['novelMirs'] == "YES"): + rule all: + params: + batch = '--time=168:00:00' + input: + join(workpath,'rawQC'), + join(workpath,'QC'), + expand(join(workpath,kraken_dir,"{name}.trim.fastq.kraken_bacteria.taxa.txt"),name=samples), + expand(join(workpath,kraken_dir,"{name}.trim.fastq.kraken_bacteria.krona.html"),name=samples), + expand(join(workpath,trim_dir,"{name}.R1.trim.fastq.gz"),name=samples), + join(workpath,'Reports/multiqc_report.html'), + expand(join(workpath,"FQscreen","{name}.R1.trim_screen.txt"),name=samples), + expand(join(workpath,"FQscreen2","{name}.R1.trim_screen.txt"),name=samples), + expand(join(workpath,"FQscreen","{name}.R1.trim_screen.png"),name=samples), + expand(join(workpath,"FQscreen2","{name}.R1.trim_screen.png"),name=samples), + # # # # expand(join(workpath,"fasta/trim/{name}.trim.fasta.gz"),name=samples), + # # # # expand(join(workpath,"fasta/trim/{name}.trim.fasta"),name=samples), + expand(join(workpath,"fasta","{name}.trimfasta.done"),name=samples), + join(workpath,"fasta","collapsed_trim.fa"), + join(workpath,"mirdeep2_1p/miRNAs_expressed_all_samples_1p.txt"), + join(workpath,"fasta/novel_mature.fa"), + join(workpath,"fasta/novel_hairpin.fa"), + join(workpath,"mirdeep2_2p/novel_miRNAs_expressed_all_samples_2p.txt"), + join(workpath,"mirdeep2_results/annotatedCounts.txt"), + join(workpath,"mirdeep2_results/novelCounts.txt") + elif(config['project']['novelMirs'] == "NO"): + rule all: + params: + batch = '--time=72:00:00' + input: + join(workpath,'rawQC'), + join(workpath,'QC'), + expand(join(workpath,kraken_dir,"{name}.trim.fastq.kraken_bacteria.taxa.txt"),name=samples), + expand(join(workpath,kraken_dir,"{name}.trim.fastq.kraken_bacteria.krona.html"),name=samples), + expand(join(workpath,trim_dir,"{name}.R1.trim.fastq.gz"),name=samples), + join(workpath,'Reports/multiqc_report.html'), + expand(join(workpath,"FQscreen","{name}.R1.trim_screen.txt"),name=samples), + expand(join(workpath,"FQscreen2","{name}.R1.trim_screen.txt"),name=samples), + expand(join(workpath,"FQscreen","{name}.R1.trim_screen.png"),name=samples), + expand(join(workpath,"FQscreen2","{name}.R1.trim_screen.png"),name=samples), + # # # # expand(join(workpath,"fasta/trim/{name}.trim.fasta.gz"),name=samples), + # # # # expand(join(workpath,"fasta/trim/{name}.trim.fasta"),name=samples), + expand(join(workpath,"fasta","{name}.trimfasta.done"),name=samples), + join(workpath,"fasta","collapsed_trim.fa"), + join(workpath,"mirdeep2_out","miRNAs_expressed_all_samples.txt"), + join(workpath,"mirdeep2_results/annotated_counts.txt") + + + rule raw_fastqc: + input: + expand("{name}.R1.fastq.gz", name = samples) + output: + join(workpath,"rawQC") + priority: 2 + params: + rname = 'pl:rawfastqc', + batch = '--cpus-per-task=32 --mem=110g --time=48:00:00', + fastqcver = config['bin'][pfamily]['tool_versions']['FASTQCVER'], + threads: 32 + shell: """ + mkdir -p {output}; + module load {params.fastqcver}; + fastqc {input} -t {threads} -o {output}; + """ + rule trim_se: + input: + infq=join(workpath,"{name}.R1.fastq.gz") #+config['project']['filetype'] used if needed for file flexibility + output: + out11=join(workpath,trim_dir,"{name}.R1.trim.fastq.gz") + #err=join(workpath,"QC","{name}_run_cutadapt.err") + params: + rname='pl:trim_se', + batch='--cpus-per-task=32 --mem=110g --time=48:00:00', + cutadaptver=config['bin'][pfamily]['tool_versions']['CUTADAPTVER'], + fastawithadaptersetd=config['bin'][pfamily]['tool_parameters']['FASTAWITHADAPTERSETD'], + leadingquality=config['bin'][pfamily]['tool_parameters']['LEADINGQUALITY'], + trailingquality=config['bin'][pfamily]['tool_parameters']['TRAILINGQUALITY'], + minlen=config['bin'][pfamily]['tool_parameters']['MINLEN'] + threads: 32 + shell:""" + module load {params.cutadaptver}; + cutadapt --nextseq-trim=2 --trim-n -n 5 -O 5 -q {params.leadingquality},{params.trailingquality} -m {params.minlen} -b file:{params.fastawithadaptersetd} -j {threads} -o {output.out11} {input.infq} + """ + + rule kraken: + input: + fq=join(workpath,trim_dir,"{name}.R1.trim.fastq.gz") + output: + krakentaxa=join(workpath,kraken_dir,"{name}.trim.fastq.kraken_bacteria.taxa.txt"), + kronahtml=join(workpath,kraken_dir,"{name}.trim.fastq.kraken_bacteria.krona.html") + params: + rname='pl:kraken', + prefix="{name}", + outdir=join(workpath,kraken_dir), + bacdb=config['bin'][pfamily]['tool_parameters']['KRAKENBACDB'], + krakenver=config['bin'][pfamily]['tool_versions']['KRAKENVER'], + kronatoolsver=config['bin'][pfamily]['tool_versions']['KRONATOOLSVER'], + threads: 24 + shell: """ + module load {params.krakenver}; + module load {params.kronatoolsver}; + if [ ! -d {params.outdir} ]; then mkdir {params.outdir}; fi + if [ ! -e /lscratch/$SLURM_JOBID ]; then mkdir /lscratch/$SLURM_JOBID ; fi + cd /lscratch/$SLURM_JOBID; + cp -rv {params.bacdb} /lscratch/$SLURM_JOBID/; + dbName=`echo {params.bacdb}|awk -F "/" '{{print \$NF}}'`; + kraken --db /lscratch/$SLURM_JOBID/$dbName --fastq-input --gzip-compressed --threads {threads} --output /lscratch/$SLURM_JOBID/{params.prefix}.krakenout --preload {input.fq} + kraken-translate --mpa-format --db /lscratch/$SLURM_JOBID/`echo {params.bacdb}|awk -F "/" '{{print \$NF}}'` /lscratch/$SLURM_JOBID/{params.prefix}.krakenout |cut -f2|sort|uniq -c|sort -k1,1nr > /lscratch/$SLURM_JOBID/{params.prefix}.krakentaxa + cut -f2,3 /lscratch/$SLURM_JOBID/{params.prefix}.krakenout | ktImportTaxonomy - -o /lscratch/$SLURM_JOBID/{params.prefix}.kronahtml + mv /lscratch/$SLURM_JOBID/{params.prefix}.krakentaxa {output.krakentaxa} + mv /lscratch/$SLURM_JOBID/{params.prefix}.kronahtml {output.kronahtml} + """ + + rule fastqc: + input: + expand(join(workpath,trim_dir,"{name}.R1.trim.fastq.gz"),name=samples), + output: + join(workpath,"QC") + priority: 2 + params: + rname='pl:fastqc', + batch='--cpus-per-task=32 --mem=110g --time=48:00:00', + fastqcver=config['bin'][pfamily]['tool_versions']['FASTQCVER'], + threads: 32 + shell: """ + if [ ! -d {output} ]; then mkdir -p {output}; fi + module load {params.fastqcver}; + fastqc {input} -t {threads} -o {output}; + module load python/3.5; + python Scripts/get_read_length.py {output} > {output}/readlength.txt 2> {output}/readlength.err + """ + + rule fastq_screen: + input: + file1=join(workpath,trim_dir,"{name}.R1.trim.fastq.gz") + output: + out1=join(workpath,"FQscreen","{name}.R1.trim_screen.txt"), + out2=join(workpath,"FQscreen","{name}.R1.trim_screen.png"), + out3=join(workpath,"FQscreen2","{name}.R1.trim_screen.txt"), + out4=join(workpath,"FQscreen2","{name}.R1.trim_screen.png"), + params: + rname='pl:fqscreen', + batch='--cpus-per-task=24 --mem=64g --time=10:00:00', + fastq_screen=config['bin'][pfamily]['tool_versions']['FASTQ_SCREEN'], + outdir = join(workpath,"FQscreen"), + outdir2 = join(workpath,"FQscreen2"), + fastq_screen_config=config['bin'][pfamily]['tool_parameters']['FASTQ_SCREEN_CONFIG'], + fastq_screen_config2=config['bin'][pfamily]['tool_parameters']['FASTQ_SCREEN_CONFIG2'], + perlver=config['bin'][pfamily]['tool_versions']['PERLVER'], + bowtie2ver=config['bin'][pfamily]['tool_versions']['BOWTIE2VER'], + threads: 24 + shell: """ + module load {params.bowtie2ver}; + module load {params.perlver}; + {params.fastq_screen} --conf {params.fastq_screen_config} --outdir {params.outdir} --threads {threads} --subset 1000000 --aligner bowtie2 --force {input.file1} + {params.fastq_screen} --conf {params.fastq_screen_config2} --outdir {params.outdir2} --threads {threads} --subset 1000000 --aligner bowtie2 --force {input.file1} + """ + rule multiqc: + input: + expand(join(workpath,"FQscreen","{name}.R1.trim_screen.png"),name=samples), + output: + join(workpath,"Reports","multiqc_report.html") + params: + rname="pl:multiqc", + logsdir=join(workpath,log_dir), + outdir=join(workpath,"Reports"), + multiqcver=config['bin'][pfamily]['tool_versions']['MULTIQCVER'], + qcconfig=config['bin'][pfamily]['tool_parameters']['CONFMULTIQC'] + threads: 1 + shell: """ + module load {params.multiqcver} + cd {params.outdir} + multiqc -f -c {params.qcconfig} --interactive -e cutadapt -d ../ + cd {workpath}/slurmfiles + multiqc -f --interactive . + """ + + rule fastaConversion: + input: + join(workpath,trim_dir,"{name}.R1.trim.fastq.gz"), + output: + join(workpath,"fasta","{name}.trimfasta.done") + params: + rname='pl:fastq2fasta', + batch='--cpus-per-task=32 --mem=64g --time=24:00:00', + fastxtoolkitver = config['bin'][pfamily]['tool_versions']['FASTXTOOLKITVER'], + outfile = join(workpath,"fasta/trim/{name}.trim.fasta"), + outdir = join(workpath,"fasta","trim") + + threads: 32 + shell: """ + module load {params.fastxtoolkitver}; + mkdir -p {params.outdir} + zcat {input} | fastq_to_fasta -n -v -o {params.outfile} + sed -i -e 's/\s/|/g' {params.outfile} + touch {output} + """ + + rule mirdeep2_mapper: + input: + configFile=join(workpath,"mirdeep_config.txt"), + # file1=expand(join(workpath,"fasta/trim/{name}.trim.fasta.gz"),name=samples) + fileCatch=expand(join(workpath,"fasta","{name}.trimfasta.done"),name=samples) + # fileCatch=join(workpath,"fasta/trim/{name}.trimfasta.done") + output: + #join(workpath,"fasta/trim/{name}.trim.fasta.gz"), + collapsedFile=join(workpath,"fasta","collapsed_trim.fa"), + arfFile=join(workpath,"trimmed.arf") + params: + rname='pl:mirdeep2_mapper', + #outdir = join(workpath,"fasta/collapsed"), + mirdeepver=config['bin'][pfamily]['tool_versions']['MIRDEEP2VER'], + bowtieindex = config['references'][pfamily]['BOWTIE_REF'], + trimmedFolder = join(workpath,"fasta","trim"), + out_tar = join(workpath,"fasta","trim.tar.gz"), + # file1=expand(join(workpath,"fasta/trim/{name}.trim.fasta"),name=samples) + threads: 8 + shell: """ + module load {params.mirdeepver} + mapper.pl {input.configFile} -d -c -j -l 18 -m -q -p {params.bowtieindex} -s {output.collapsedFile} -t {output.arfFile} -v -o 8 2>> logfiles/mapper.log + tar -czvf {params.out_tar} {params.trimmedFolder} --remove-files + """ + + rule mirdeep2_1p: + input: + collapsedFile = join(workpath,"fasta","collapsed_trim.fa"), + arfFile = join(workpath,"trimmed.arf") + output: + annotAlign=join(workpath,"mirdeep2_1p","miRNAs_expressed_all_samples_1p.txt"), + novelmaturefasta = join(workpath,"fasta/novel_mature.fa"), + novelhairpinfasta = join(workpath,"fasta/novel_hairpin.fa") + params: + rname="pl:mirdeep2_1p", + mirdeepver=config['bin'][pfamily]['tool_versions']['MIRDEEP2VER'], + genomefasta = config['references'][pfamily]['FASTA_REF'], + maturefasta = config['references'][pfamily]['MIRBASE_MATURE'], + hairpinfasta = config['references'][pfamily]['MIRBASE_HAIRPIN'], + organism=config['references'][pfamily]['ORGANISM'], + resultdir=join(workpath,"mirdeep2_1p"), + + threads: 2 + shell: """ + module load {params.mirdeepver} + mkdir -p /lscratch/$SLURM_JOBID + mkdir -p {params.resultdir} + cd {params.resultdir} + sed -e 's/\s.*$//g' {params.genomefasta} > /lscratch/$SLURM_JOBID/genome.fa #remove whitespace, per miRDeep2 + sed -e 's/Homo sapiens/Homo_sapiens/g' {params.maturefasta}| sed -e 's/\s/|/g' > /lscratch/$SLURM_JOBID/mature.fa + sed -e 's/Homo sapiens/Homo_sapiens/g' {params.hairpinfasta}| sed -e 's/\s/|/g' > /lscratch/$SLURM_JOBID/hairpin.fa + + miRDeep2.pl {input.collapsedFile} /lscratch/$SLURM_JOBID/genome.fa {input.arfFile} /lscratch/$SLURM_JOBID/mature.fa none /lscratch/$SLURM_JOBID/hairpin.fa -t {params.organism} -P + cat mirna_results*/novel_mature_*.fa <(sed '/^>.*/s/$/_star/' mirdeep2_1p/mirna_results*/novel_star*.fa) > {output.novelmaturefasta} + cp mirna_results*/novel_pres_*.fa {output.novelhairpinfasta} + mv miRNAs_expressed_all*.csv {output.annotAlign} + """ + rule mirdeep2_2p: + input: + collapsedFile = join(workpath,"fasta","collapsed_trim.fa"), + arfFile = join(workpath,"trimmed.arf"), + novelmaturefasta = join(workpath,"fasta","novel_mature.fa"), + novelhairpinfasta = join(workpath,"fasta","novel_hairpin.fa") + output: + join(workpath,"mirdeep2_2p","novel_miRNAs_expressed_all_samples_2p.txt") + params: + rname="pl:mirdeep2_2p", + mirdeepver=config['bin'][pfamily]['tool_versions']['MIRDEEP2VER'], + genomefasta = config['references'][pfamily]['FASTA_REF'], + organism=config['references'][pfamily]['ORGANISM'], + resultdir=join(workpath,"mirdeep2_2p") + threads: 16 + shell: """ + module load {params.mirdeepver} + mkdir -p /lscratch/$SLURM_JOBID + mkdir -p {params.resultdir} + cd {params.resultdir} + sed -e 's/\s.*$//g' {params.genomefasta} > /lscratch/$SLURM_JOBID/genome.fa #remove whitespace, per miRDeep2 + quantifier.pl -r {input.collapsedFile} -m {input.novelmaturefasta} -p {input.novelhairpinfasta} -T 8 + mv miRNAs_expressed_all*.csv {output} + """ + rule capture_counts_2p: + input: + mirbaseResults=join(workpath,"mirdeep2_1p/miRNAs_expressed_all_samples_1p.txt"), + novelResults=join(workpath,"mirdeep2_2p/novel_miRNAs_expressed_all_samples_2p.txt") + output: + mirbaseCts=join(workpath,"mirdeep2_results/annotatedCounts.txt"), + novelCts=join(workpath,"mirdeep2_results/novelCounts.txt") + params: + rname="pl:capture_counts_2p", + configFile=join(workpath,"mirdeep_config.txt"), + resultdir = join(workpath,"mirdeep2_results"), + sampleCounts = len(samples)+4 + threads: 4 + shell: """ + mkdir -p {params.resultdir} + cut -f 1-{params.sampleCounts} {input.mirbaseResults} > {output.mirbaseCts} + cut -f 1-{params.sampleCounts} {input.novelResults} > {output.novelCts} + """ + # + rule mirdeep2_single: + input: + collapsedFile = join(workpath,"fasta/collapsed_trim.fa"), + configFile=join(workpath,"mirdeep_config.txt"), + output: + join(workpath,"mirdeep2_out","miRNAs_expressed_all_samples.txt") + params: + rname="pl:mirdeep2_single", + maturefasta = config['references'][pfamily]['MIRBASE_MATURE'], + hairpinfasta = config['references'][pfamily]['MIRBASE_HAIRPIN'], + mirdeepver=config['bin'][pfamily]['tool_versions']['MIRDEEP2VER'], + genomefasta = config['references'][pfamily]['FASTA_REF'], + organism=config['references'][pfamily]['ORGANISM'], + resultdir=join(workpath,"mirdeep2_out") + threads:16 + shell: """ + module load {params.mirdeepver} + mkdir -p /lscratch/$SLURM_JOBID + mkdir -p {params.resultdir} + cd {params.resultdir} + #sed -e 's/\s.*$//g' {params.genomefasta} > /lscratch/$SLURM_JOBID/genome.fa #remove whitespace, per miRDeep2 + sed -e 's/Homo sapiens/Homo_sapiens/g' {params.maturefasta}| sed -e 's/\s/|/g' > /lscratch/$SLURM_JOBID/mature.fa + sed -e 's/Homo sapiens/Homo_sapiens/g' {params.hairpinfasta}| sed -e 's/\s/|/g' > /lscratch/$SLURM_JOBID/hairpin.fa + quantifier.pl -c {input.configFile} -r {input.collapsedFile} -m /lscratch/$SLURM_JOBID/mature.fa -p /lscratch/$SLURM_JOBID/hairpin.fa -T 8 -t {params.organism} -P + mv miRNAs_expressed_all*.csv {output} + """ + + rule capture_counts_single: + input: + mirbaseResults=join(workpath,"mirdeep2_out/miRNAs_expressed_all_samples.txt"), + output: + mirbaseCts=join(workpath,"mirdeep2_results/annotated_counts.txt"), + params: + rname="pl:capture_counts_single", + configFile=join(workpath,"mirdeep_config.txt"), + resultdir = join(workpath,"mirdeep2_results"), + sampleCounts = len(samples)+4, + groupsFile = join(workpath,"groups.tab") + threads: 4 + shell: """ + mkdir -p {params.resultdir} + cut -f 1-{params.sampleCounts} {input.mirbaseResults} > {output.mirbaseCts} + while read FILE ABB; do FILE=${{FILE%%.trim*}};FILE=${{FILE##*trim/}}; sed -i "s/$ABB/$FILE/" {output.mirbaseCts}; done < {params.configFile} + while read FILE GROUP NAME; do sed -i "s/$FILE/$NAME/" {output.mirbaseCts}; done < {params.groupsFile} + sed -i 's/^#//' {output.mirbaseCts} + """ + + # + + +#needed rules: +#QC Side +#trim: cutadapt - done +#kraken: -done +# +#post-trim fastqc- done +#multiqc - done +#Alignment +#fastq to fasta:fastxtoolkit - done + +#collapse (miRDeep2) - done +#align to mature miRs (Bowtie1) -done +#align to precursor miRs (Bowtie1) - done +#align to genome (Bowtie1/miRDeep2 pass1) - done +#align to novel (Bowtie1/miRDeep2 pass2) - done +#expand counts (miRDeep2) +#ShortStack integration +#Differential expression analysis (edgeR) diff --git a/Rules/scrnaQC.snakefile b/Rules/scrnaQC.snakefile new file mode 100644 index 0000000..e59bf6c --- /dev/null +++ b/Rules/scrnaQC.snakefile @@ -0,0 +1,103 @@ +from snakemake.utils import R +from os.path import join +from os import listdir +import os +import re + +configfile: "run.json" +samples=config['project']['groups']['rsamps'] +workpath=config['project']['workpath'] + +# workpath="/data/abdelmaksoudaa/klion" +# samples = [f for f in os.listdir('.') if re.search('4H', f)] + +specie = config['project']['SPECIES'] #human +resolution = config['project']['CLUSTRESOLUTION']#"0.8" +clustAlg = config['project']['CLUSTALG'] #1-3 +annotDB = config['project']['ANNOTDB']#"HPCA" #mouse +nAnchors = "2000" +groups = "YES" #YES/NO + + +contList = [] +contrasts = open("contrasts.tab") +for line in contrasts: + line = line.strip("\n") +# print(re.sub('\t', '-', line)) + contList.append(re.sub('\t', '-', line)) + + +intType = ["seurat_batch_corrected","merged"] + +rule all: + params: + batch='--time=168:00:00', + input: + expand(join(workpath,"QC","{name}.RData"),name=samples), + expand(join(workpath,"filtered","{name}.rds"),name=samples), + expand(join(workpath,"flags","{name}.txt"),name=samples), + join(workpath,"QC","samples_QC.html"), + expand(join(workpath,"integration","seurat_batch_corrected","{myGroups}","{myGroups}.rds"),myGroups=contList), + expand(join(workpath,"integration","merged","{myGroups}","{myGroups}.rds"),myGroups=contList), + +rule qc_scrna: + input: + join(workpath,"{name}") + output: + rds=join(workpath,"filtered","{name}.rds"), + rdata=join(workpath,"QC","{name}.RData"), + qc = join(workpath,"flags","{name}.txt"), + params: + rname='pl:qc_scrna', + specie = specie, + resolution = resolution, + clustAlg = clustAlg, + annotDB = annotDB, + outDir= join(workpath,"QC") + shell: """ + +module load R/3.6.0; +Rscript Scripts/scrnaQC.R {input} {output.rds} {output.rdata} {params.specie} {params.resolution} {params.clustAlg} {params.annotDB}; +touch {output.qc} + """ + +rule qcReport_scrna: + input: + files = expand(join(workpath,"QC","{name}.RData"),name=samples), + path=join (workpath, "QC") + output: + join(workpath,"QC","samples_QC.html") + params: + rname='pl:qcReport_scrna', + shell: """ +module load R/3.6.0 +Rscript Scripts/scrnaQC_Reports.R {input.path} + """ + +rule integratedBatch: + input: + rds=expand(join(workpath,"filtered","{name}.rds"),name=samples), + flag=expand(join(workpath,"flags","{name}.txt"),name=samples) + output: + rdsBatch=join(workpath,"integration","seurat_batch_corrected","{myGroups}","{myGroups}.rds"), + mergeRDS=join(workpath,"integration","merged","{myGroups}","{myGroups}.rds"), + params: + rname='pl:integratedBatch', + batch='--cpus-per-task=8 --mem=48g --time=24:00:00', + specie = specie, + resolution = resolution, + clustAlg = clustAlg, + annotDB = annotDB, +# outDirSeurat= rdsBatch#join(workpath,"integration","seurat_batch_corrected_RDS"), +# outDirMerged= mergeRDS#join(workpath,"integration","mergedRDS"), + nAnchors = nAnchors, + groups = groups, + dir = join(workpath,"filtered"), + contrasts = "{myGroups}" + shell: """ +module load R/3.6.0; +Rscript Scripts/integrateBatches.R {params.dir} {output.rdsBatch} {output.mergeRDS} {params.specie} {params.resolution} {params.clustAlg} {params.annotDB} {params.nAnchors} {params.groups} {params.contrasts}; + """ + + + diff --git a/cluster.json b/cluster.json index 636b99d..b73e093 100644 --- a/cluster.json +++ b/cluster.json @@ -453,6 +453,11 @@ "HOMER_motif": { "threads": "48" }, + "integratedBatch":{ + "threads": "8", + "mem": "64g", + "time": "1-00:00:00" + }, "kraken_pe": { "cpus-per-task": "24", "gres": "lscratch:256", @@ -497,11 +502,26 @@ "threads": "1", "time": "2-00:00:00" }, + "mirdeep2_1p": { + "mem": "16g", + "threads": "4", + "time": "1-00:00:00" + }, + "mirdeep2_2p": { + "mem": "16g", + "threads": "16", + "time": "1-00:00:00" + }, "mirdeep2_mapper": { "mem": "16g", "threads": "1", "time": "4-00:00:00" }, + "mirdeep2_single":{ + "mem": "16g", + "threads": "16", + "time": "1-00:00:00" + }, "mirseq_gencode_classification": { "mem": "16g", "threads": "1", @@ -632,6 +652,16 @@ "preseq": { "mem": "24g" }, + "qc_scrna":{ + "threads": "1", + "mem": "48g", + "time": "8:00:00" + }, + "qcReport_scrna":{ + "threads": "1", + "mem": "48g", + "time": "1:00:00" + }, "qualibam": { "threads": "8", "mem": "12g", @@ -873,3 +903,4 @@ "time": "2-00:00:00" } } + diff --git a/gui/frame.py b/gui/frame.py index d967a08..ba89ed8 100755 --- a/gui/frame.py +++ b/gui/frame.py @@ -664,6 +664,13 @@ def make_symlinks(self): print("Found 'peakcall.tab': Symlinking file!") cmd="cp {0}peakcall.tab {1}".format(data, self.workpath.get()) p = Popen(cmd, shell=True, stdin=PIPE, stdout=PIPE, stderr=STDOUT, close_fds=True) + elif pl == 'scRNAseq': + print (os.path.join(data,"h5")) + if os.path.isdir(os.path.join(data,"h5")): + print ("Found 'h5/' directory: Symlinking h5 files") + h5Path = os.path.join(data,"h5") + cmd = "for f in `ls {0}/*[._]h5`;do ln -s $f {1};done".format(h5Path,self.workpath.get()) + p = Popen(cmd, shell=True, stdin = PIPE, stdout=PIPE, stderr = STDOUT, close_fds=True) except Exception as e: showerror("Error",str(e)) diff --git a/gui/mirseq.py b/gui/mirseq.py index 70941f1..6e5e72e 100755 --- a/gui/mirseq.py +++ b/gui/mirseq.py @@ -40,18 +40,35 @@ def __init__(self, pipepanel, pipeline_name, *args, **kwargs) : label = Label(eframe,text="Pipeline")#,fg=textLightColor,bg=baseColor) label.grid(row=3,column=0,sticky=W,padx=10,pady=5) - Pipelines=["CAPmirseq-plus"] + PipelineLabels=["miRSeq_v2","CAPmirseq-plus"] + Pipelines=["miRSeq_v2","CAPmirseq-plus"] + + self.label2pipeline = { k:v for k,v in zip(PipelineLabels, Pipelines)} + PipelineLabel = self.PipelineLabel = StringVar() Pipeline = self.Pipeline = StringVar() - Pipeline.set(Pipelines[0]) + PipelineLabel.set(PipelineLabels[0]) - om = OptionMenu(eframe, Pipeline, *Pipelines, command=self.option_controller) + om = OptionMenu(eframe, PipelineLabel, *PipelineLabels, command=self.option_controller) om.config()#bg = widgetBgColor,fg=widgetFgColor) om["menu"].config()#bg = widgetBgColor,fg=widgetFgColor) #om.pack(side=LEFT,padx=20,pady=5) om.grid(row=3,column=1,sticky=W,padx=10,pady=5) + + # self.mirseqOpts = mirseqOpts = LabelFrame(eframe,text="miRSeq Settings") + self.novelMirsFrame = novelMirsFrame = Frame(eframe) + self.novelMirs = novelMirs = StringVar() + novelMirLabel = Label(novelMirsFrame, text= "Identify Novel miRNAs: ") + novelMirsDropdown = ["YES","NO"] + novelMirs.set(novelMirsDropdown[1]) + novelMirs_om = OptionMenu(novelMirsFrame,novelMirs,*novelMirsDropdown) + novelMirLabel.grid(row=5,column=1,sticky=W,padx=10,pady=5) + novelMirs_om.grid(row=5,column=2,sticky=W,padx=10,pady=0) + # novelMirsFrame.grid(row=5,column=0, columnspan=4, sticky=W, padx=20, pady=10 ) + + self.add_info( eframe ) - + self.option_controller() def add_info( self, parent ) : if not self.info : @@ -70,7 +87,7 @@ def add_info( self, parent ) : self.contrasts_button.grid( row=5, column=6, padx=10, pady=5 ) - self.info.grid(row=10,column=0, columnspan=6, sticky=W, padx=20, pady=10 ) + self.info.grid(row=11,column=0, columnspan=6, sticky=W, padx=20, pady=10 ) def popup_groups( self ) : @@ -115,7 +132,19 @@ def loadfunc() : info.grid(row=7,column=0, columnspan=6, sticky=W, padx=20, pady=10 ) top.focus_force() - + def option_controller( self, *args, **kwargs ) : + + PipelineFrame.option_controller( self ) + + self.Pipeline.set( self.label2pipeline[self.PipelineLabel.get()] ) + print( self.Pipeline.get() ) + + if self.Pipeline.get() == 'miRSeq_v2' : + self.novelMirsFrame.grid(row=5,column=0, columnspan=4, sticky=W, padx=20, pady=10 ) + elif self.Pipeline.get() == "CAPmirseq-plus": + self.novelMirsFrame.grid_forget() + + def makejson(self, *args): #print(args[0]) caller=args[0] @@ -302,6 +331,7 @@ def makejson(self, *args): "TRIM": "yes", "groups": groups, "contrasts": contrasts, + "novelMirs": self.novelMirs.get(), } } diff --git a/gui/scrnaseq.py b/gui/scrnaseq.py index 340a566..d18615d 100644 --- a/gui/scrnaseq.py +++ b/gui/scrnaseq.py @@ -42,8 +42,8 @@ def __init__(self, pipepanel, pipeline_name, *args, **kwargs) : label = Label(eframe,text="Pipeline")#,fg=textLightColor,bg=baseColor) label.grid(row=3,column=0,sticky=W,padx=10,pady=5) - PipelineLabels=["CellRanger","Initial/QC","Clustering","Multi-Sample Clustering" ] - Pipelines=["cellranger","scrnaseqinit","scrnaseqcluster", "scrnaseqmulticluster"] + PipelineLabels=["Initial QC","Differential Expression"] + Pipelines=["scrnaQC","scrnaDE"] self.label2pipeline = { k:v for k,v in zip(PipelineLabels, Pipelines)} @@ -56,124 +56,104 @@ def __init__(self, pipepanel, pipeline_name, *args, **kwargs) : om["menu"].config()#bg = widgetBgColor,fg=widgetFgColor) #om.pack(side=LEFT,padx=20,pady=5) om.grid(row=3,column=1,sticky=W,padx=10,pady=5) - - self.crOpts = crOpts = LabelFrame( eframe, - text="CellRanger Settings" ) - self.scrCRID = scrCRID = StringVar() - scrCRID.set("SPECIFY_PREFIX_HERE") - self.scrExpected = scrExpected = StringVar() - scrExpected.set("3000") - - scrcridL = Label(crOpts, text="CellRanger Sample ID: ") - scrcridE = Entry(crOpts, bd =2, width=25, textvariable=scrCRID) - scrcridL.grid(row=9,column=1,sticky=W,padx=10,pady=5) - scrcridE.grid(row=9,column=2,sticky=W,padx=0,pady=5) - - screxpectedL = Label(crOpts, text="Expected number of cells: ") - screxpectedE = Entry(crOpts, bd =2, width=8, textvariable=scrExpected) - screxpectedL.grid(row=10,column=1,sticky=W,padx=10,pady=5) - screxpectedE.grid(row=10,column=2,sticky=W,padx=0,pady=5) - - self.clusterOpts = clusterOpts = LabelFrame( eframe, - text="Clustering and tSNE Options" ) - - self.scrPCs = scrPCs = StringVar() - scrPCs.set("12") - self.scrRes = scrRes = StringVar() - scrRes.set("0.6") - - #scrPCs.trace('w', lambda a,b,c,x="scrPCs": makejson(x)) - #Filter out genes < [5] read counts in < [2] samples - scrpcsL = Label(clusterOpts, text="Include principal components 1 through ") - scrpcsE = Entry(clusterOpts, bd =2, width=3, textvariable=scrPCs) - scrresL = Label(clusterOpts, text="with clustering resolution: ") - scrresE = Entry(clusterOpts, bd =2, width=3, textvariable=scrRes) + +######## QUALITY CONTROL/FILTERING/CLUSTERING ############### + self.qcOptions = qcOptions = LabelFrame( eframe, text = "QC, Filtering, and Initial Clustering") + + #algorithm selection + self.clustAlg = clustAlg = StringVar() + clustAlgLabel = Label(qcOptions, text="Clustering Algorithm: ") + clustAlgDropdown = ["SLM (Smart Local Moving)", "Louvain (Original)","Louvain (with Multilevel Refinement)"] + clustAlg.set(clustAlgDropdown[0]) + clustAlgOptMenu = OptionMenu(qcOptions, clustAlg, *clustAlgDropdown) + clustAlgLabel.grid(row=8,column=1,sticky=W,padx=10,pady=5) + clustAlgOptMenu.grid(row=8,column=2,sticky=W,padx=0,pady=5) - scrpcsL.grid(row=9,column=1,sticky=W,padx=10,pady=5) - scrpcsE.grid(row=9,column=2,sticky=W,padx=0,pady=5) - scrresL.grid(row=9,column=3,sticky=W,padx=5,pady=5) - scrresE.grid(row=9,column=4,sticky=W,padx=0,pady=5) - #scrRes.trace('w', lambda a,b,c,x="scrPCs": makejson(x)) + #clustering resolutions + self.resolution = resolution = StringVar() + resolution.set("0.4,0.6,0.8,1.0,1.2") + resolutionLabel = Label(qcOptions,text="Clustering Resolution(s): \nSeparate with commas") + resolutionEntry = Entry(qcOptions,bd=2,width=25,textvariable=resolution) + resolutionLabel.grid(row=9,column=1,sticky=W,padx=10,pady=5) + resolutionEntry.grid(row=9,column=2,sticky=W,padx=0,pady=5) - clusterOpts.grid( row=8, column=0, columnspan=4, sticky=W, padx=20, pady=10 ) + #annotation db: human: HPCA/BP Encode; mouse: immgen/mouseRNAseq + self.annotDB = annotDB = StringVar() + annotLabel = Label(qcOptions,text="Annotation database: ") + annotHuman = ["HPCA","BP Encode"] + annotMouse = ["immgen","mouseRNAseq"] + annotDropdown = [] + genome = self.global_info.annotation.get() + if (genome == "GRCh38"): + annotDropdown = annotHuman.copy() + elif (genome == "mm10"): + annotDropdown = annotMouse.copy() + else: + annotDropdown.append("No genome selected") + annotDB.set(annotDropdown[0]) + annotDB_om = OptionMenu(qcOptions,annotDB,*annotDropdown) + annotLabel.grid(row=10,column=1,sticky=W,padx =10, pady =5) + annotDB_om.grid(row=10,column=2,sticky=W,padx=0,pady=5) - - self.multiclusterOpts = multiclusterOpts = LabelFrame( eframe, - text = "Multi-Sample Clustering and tSNE Options") - - scrccsL = Label(multiclusterOpts, text="Include canonical components 1 through ") - scrccsE = Entry(multiclusterOpts, bd =2, width=3, textvariable=scrPCs) - scrmcresL = Label(multiclusterOpts, text="with clustering resolution: ") - scrmcresE = Entry(multiclusterOpts, bd =2, width=3, textvariable=scrRes) - - scrccsL.grid(row=9,column=1,sticky=W,padx=10,pady=5) - scrccsE.grid(row=9,column=2,sticky=W,padx=0,pady=5) - scrmcresL.grid(row=9,column=3,sticky=W,padx=5,pady=5) - scrmcresE.grid(row=9,column=4,sticky=W,padx=0,pady=5) - - - self.qcOpts = qcOpts = LabelFrame( eframe, - text="Initial Settings" ) - countL = Label( qcOpts, text="Counts/Matrix Dir:" ) - countL.grid(row=9, column=1, sticky=W, padx=10, pady=5 ) - countpath=StringVar() - self.countpath = countpath - count_entry = Entry(qcOpts, - bd =2, - width = 50, - #bg = entryBgColor, - #fg = entryFgColor, - textvariable = countpath, state='normal' - ) - count_entry.grid( row=9, column=2, columnspan=3 ) - self.count_button = count_button = Button( qcOpts, - text="Open Directory", - command=self.set_count_directory ) - count_button.grid( row=9, column=5 ) - - self.mattype = mattype = StringVar() - mattypeL = Label(qcOpts, text="Count matrix format: ") - scrMatTypeDropdown = ["cellranger", "cellranger_raw", "zumi", "biorad"] - mattype.set(scrMatTypeDropdown[0]) - mattype_om = OptionMenu(qcOpts, mattype, *scrMatTypeDropdown) - mattypeL.grid(row=10,column=1,sticky=W,padx=10,pady=5) - mattype_om.grid(row=10,column=2,sticky=W,padx=0,pady=5) + #set groups, mostly for relabeling purposes + self.add_info( qcOptions ) #Position 11 + # self.option_controller() + + # groups_buttonL = Label(qcOptions, text="Sample Information: ") + # groups_button = Button(qcOptions, + # text="Set Groups", + # command = self.popup_groups ) + # groups_buttonL.grid(row=12,column=1,sticky=W,padx=10,pady=5) + # groups_button.grid(row=12,column=2,sticky=W,padx=0,pady=5) + # + # +######### DIFFERENTIAL EXPRESSION ######### + self.deOptions = deOptions = LabelFrame( eframe, text = "Differential Gene Expression") - self.docycleregress = docycleregress = StringVar() - docycleregressL = Label(qcOpts, text="Do cell cycle regression? ") - scrCycleDropdown = ["TRUE", "FALSE"] - docycleregress.set(scrCycleDropdown[0]) - cycle_om = OptionMenu(qcOpts, docycleregress, *scrCycleDropdown) - docycleregressL.grid(row=11,column=1,sticky=W,padx=10,pady=5) - cycle_om.grid(row=11,column=2,sticky=W,padx=0,pady=5) - - usecycleregressL_c = Label(clusterOpts, text="Use cell cycle regressed data? ") - docycleregress.set(scrCycleDropdown[0]) - cycle_om_c = OptionMenu(clusterOpts, docycleregress, *scrCycleDropdown) - usecycleregressL_c.grid(row=10,column=1,sticky=W,padx=10,pady=5) - cycle_om_c.grid(row=10,column=2,sticky=W,padx=0,pady=5) - - usecycleregressL_mc = Label(multiclusterOpts, text="Use cell cycle regressed data? ") - docycleregress.set(scrCycleDropdown[0]) - cycle_om_mc = OptionMenu(multiclusterOpts, docycleregress, *scrCycleDropdown) - usecycleregressL_mc.grid(row=10,column=1,sticky=W,padx=10,pady=5) - cycle_om_mc.grid(row=10,column=2,sticky=W,padx=0,pady=5) - - - groups_buttonL = Label(qcOpts, text="SAMPLE INFORMATION: ") - groups_button = Button(qcOpts, - text="Set Groups", - command = self.popup_groups ) - groups_buttonL.grid(row=12,column=1,sticky=W,padx=10,pady=5) - groups_button.grid(row=12,column=2,sticky=W,padx=0,pady=5) - ##################### + #option for cluster resolution for DE + self.resolutionDE = resolutionDE = StringVar() + resolutionDE.set("0.8") + resolutionDELabel = Label(deOptions,text="Clustering Resolution: \nChoose a previous resolution or \nselect a new resolution to run") + resolutionDEEntry = Entry(deOptions,bd=2,width=25,textvariable=resolutionDE) + resolutionDELabel.grid(row=10,column=1,sticky=W,padx=10,pady=5) + resolutionDEEntry.grid(row=10,column=2,sticky=W,padx=0,pady=5) + #option for merged/integrated object + # self.integrateOption = integrateOption = StringVar() + # integrateLabel = Label(deOptions, text="Merged or Integrated (batch corrected): ") + # integrateDropdown = ["Merged","Integrated"] + # integrateOption.set(integrateDropdown[0]) + # integrateOptMenu = OptionMenu(deOptions, integrateOption, *integrateDropdown) + # integrateLabel.grid(row=9,column=1,sticky=W,padx=10,pady=5) + # integrateOptMenu.grid(row=9,column=2,sticky=W,padx=0,pady=5) + + self.add_info( deOptions ) #Position 11 self.option_controller() +### SUBFUNCTION FOR CREATING GROUPS.TAB AND CLUSTERS.TAB WINDOW ###### + def add_info( self, parent ) : + if not self.info : + self.info = LabelFrame(parent, text="Sample Information") + self.groups_button = Button(self.info, + text="Set Groups", + command = self.popup_groups ) + self.contrasts_button = Button(self.info, + text="Set Contrasts", + command = self.popup_contrasts ) + + #self.pairs_load_button.pack( side=BOTTOM, padx=5, pady=5 ) + #self.pairs_save_button.pack( side=BOTTOM, padx=5, pady=5 ) + + self.groups_button.grid( row=5, column=5, padx=10, pady=5 ) + self.contrasts_button.grid( row=5, column=6, padx=10, pady=5 ) + + self.info.grid(row=11,column=0, columnspan=6, sticky=W, padx=20, pady=10) + def popup_groups( self ) : self.popup_window( "Groups Information", "groups.tab" ) - + def popup_contrasts( self ) : + self.popup_window( "Contrasts Information", "contrasts.tab" ) def popup_window( self, text, filename ) : top = Toplevel() @@ -223,27 +203,13 @@ def option_controller( self, *args, **kwargs ) : self.Pipeline.set( self.label2pipeline[self.PipelineLabel.get()] ) print( self.Pipeline.get() ) - if self.Pipeline.get() == 'cellranger' : - self.clusterOpts.grid_forget() - self.crOpts.grid(row=8,column=0, columnspan=6, sticky=W, padx=20, pady=10 ) - self.qcOpts.grid_forget() - self.multiclusterOpts.grid_forget() - elif self.Pipeline.get() == 'scrnaseqcluster' : - self.clusterOpts.grid( row=8, column=0, columnspan=4, sticky=W, padx=20, pady=10 ) - self.crOpts.grid_forget() - self.qcOpts.grid_forget() - self.multiclusterOpts.grid_forget() - elif self.Pipeline.get() == 'scrnaseqinit' : - self.clusterOpts.grid_forget() - self.crOpts.grid_forget() - self.qcOpts.grid( row=8, column=0, columnspan=4, sticky=W, padx=20, pady=10 ) - self.multiclusterOpts.grid_forget() - elif self.Pipeline.get() == 'scrnaseqmulticluster' : - self.clusterOpts.grid_forget() - self.crOpts.grid_forget() - self.qcOpts.grid_forget() - self.multiclusterOpts.grid( row=8, column=0, columnspan=4, sticky=W, padx=20, pady=10 ) + if self.Pipeline.get() == 'scrnaQC' : + self.deOptions.grid_forget() + self.qcOptions.grid(row=8,column=0, columnspan=6, sticky=W, padx=20, pady=10 ) + elif self.Pipeline.get() == 'scrnaDE' : + self.deOptions.grid( row=8, column=0, columnspan=4, sticky=W, padx=20, pady=10 ) + self.qcOptions.grid_forget() def makejson_wrapper( self, *args, **kwargs ) : @@ -390,6 +356,15 @@ def makejson(self, *args): PD=dict() smparams=[] + algorithmDict = {"Louvain (Original)": 1, "Louvain (with Multilevel Refinement)": 2,"SLM (Smart Local Moving)":3} + + algorithm = algorithmDict[self.clustAlg.get()] + resolutionStr = self.resolution.get() + resolutionStr = re.sub("\s",",",resolutionStr) #remove whitespaces + resolutionStr = re.sub("[,]+",",",resolutionStr) #remove multiple commas + + + for i in range(len(self.parameters)): @@ -401,7 +376,12 @@ def makejson(self, *args): ), "r" ).read() ) - gi = self.global_info + gi = self.global_info + species = "" + if gi.annotation.get() == "GRCh38": + species = "human" + elif gi.annotation.get() == "mm10": + species = "mouse" PD={ 'project': { 'pfamily': gi.pfamily.get(), @@ -432,13 +412,21 @@ def makejson(self, *args): "cluster": "cluster_medium.json", "description": gi.description.get('1.0',END), "technique" : gi.technique.get(), - "CRID": self.scrCRID.get(), - "EXPECTED": self.scrExpected.get(), - "COUNTSPATH": self.countpath.get(), - "MATTYPE": self.mattype.get(), - "DOCYCLEREGRESS": self.docycleregress.get(), - "RESOLUTION": self.scrRes.get(), - "PCS": self.scrPCs.get(), + + "CLUSTALG" : algorithm, + "ANNOTDB" : self.annotDB.get(), + "CLUSTRESOLUTION": resolutionStr, + # "INTEGRATEDE": self.integrateOption.get(), + "CLUSTERDE": self.resolutionDE.get(), + "SPECIES": species, + # "CRID": self.scrCRID.get(), + # "EXPECTED": self.scrExpected.get(), + # "COUNTSPATH": self.countpath.get(), + # "MATTYPE": self.mattype.get(), + # "DOCYCLEREGRESS": self.docycleregress.get(), + # "RESOLUTION": self.scrRes.get(), + # "PCS": self.scrPCs.get(), + "contrasts" : contrasts, "groups": groups } } diff --git a/hg38.json b/hg38.json index 82424dc..90d01bf 100755 --- a/hg38.json +++ b/hg38.json @@ -289,7 +289,12 @@ "MUTECTGENOME": "/fdb/muTect/human_g1k_v37.fasta", "MUTECTVARIANTS": "--dbsnp /fdb/muTect/dbsnp_137.b37.vcf --cosmic /data/CCBR_Pipeliner/db/PipeDB/lib/COSMIC_82_hg38.vcf.gz" }, -"mirseq": { + "mirseq": { + "BOWTIE_REF": "/data/CCBR_Pipeliner/db/PipeDB/miRseq/bowtie_indices/hg38", + "FASTA_REF": "/data/CCBR_Pipeliner/db/PipeDB/Indices/hg38_basic/hg38_basic.fa", + "MIRBASE_MATURE": "/data/CCBR_Pipeliner/db/PipeDB/miRseq/miRBase_v22/hsa_mature.fa", + "MIRBASE_HAIRPIN": "/data/CCBR_Pipeliner/db/PipeDB/miRseq/miRBase_v22/hsa_hairpin.fa", + "ORGANISM": "Human", "run_info":{ "TOOL": "miRNA", "VERSION": "1.1", diff --git a/mm10.json b/mm10.json index aade463..0961b35 100755 --- a/mm10.json +++ b/mm10.json @@ -1,4 +1,4 @@ - {"references": { +{"references": { "exomeseq": { "VARDICTBED": "/data/CCBR_Pipeliner/db/PipeDB/lib/mm10.chroms.bed", "BWAGENOME": "/fdb/igenomes/Mus_musculus/UCSC/mm10/Sequence/BWAIndex/genome.fa", @@ -256,6 +256,13 @@ "DB29": "/data/CCBR_Pipeliner/db/PipeDB/lib/snpEff_db/dbNSFP2.9.txt.gz", "MUTECTCOSMIC": "", "MUTECTSNP": "" +}, +"mirseq":{ + "BOWTIE_REF": "/data/CCBR_Pipeliner/db/PipeDB/miRseq/bowtie_indices/mm10", + "FASTA_REF": "/data/CCBR_Pipeliner/db/PipeDB/Indices/mm10_basic/mm10_basic.fa", + "MIRBASE_MATURE": "/data/CCBR_Pipeliner/db/PipeDB/miRseq/miRBase_v22/mmu_mature.fa", + "MIRBASE_HAIRPIN": "/data/CCBR_Pipeliner/db/PipeDB/miRseq/miRBase_v22/mmu_hairpin.fa", + "ORGANISM": "Mouse" }, "ChIPseq":{ "BLACKLISTBWAINDEX":"/data/CCBR_Pipeliner/db/PipeDB/Indices/mm10_basic/indexes/mm10.blacklist.chrM.chr_rDNA", diff --git a/rules.json b/rules.json index a57723f..a6a0dae 100755 --- a/rules.json +++ b/rules.json @@ -183,7 +183,10 @@ "scrnaseqinit": "scrnaseq.snakefile", "scrnaseqcluster": "scrnaseq.snakefile", "CAPmirseq-plus": "CAPmirseq-plus.snakefile", - + "miRSeq_v2": "miRSeq.snakefile", + "scRNA_v2": "scrna.snakefile", + "scrnaQC": "scrnaQC.snakefile", + "scrnaDE": "scrnaDE.snakefile", "InitialChIPseqQC":"InitialChIPseqQC.snakefile", "ChIPseq":"ChIPseq.snakefile", } diff --git a/standard-bin.json b/standard-bin.json index ef042cd..4fbc029 100644 --- a/standard-bin.json +++ b/standard-bin.json @@ -339,6 +339,18 @@ }, "mirseq":{ + "tool_versions": { + "FASTQCVER": "fastqc/0.11.5", + "CUTADAPTVER": "cutadapt/1.18", + "KRAKENVER": "kraken/1.1", + "KRONATOOLSVER": "kronatools/2.7", + "FASTXTOOLKITVER": "fastxtoolkit/0.0.14", + "FASTQ_SCREEN": "/data/CCBR_Pipeliner/db/PipeDB/bin/fastq_screen_v0.9.3/fastq_screen", + "BOWTIE2VER": "bowtie/2-2.3.4", + "PERLVER": "perl/5.24.3", + "MULTIQCVER": "multiqc/1.4", + "MIRDEEP2VER": "mirdeep/2.0.0.8" + }, "tool_paths":{ @@ -370,6 +382,15 @@ # "CUTADAPT_PARAMS": "-b AATCTCGTATGCCGTCTTCTGCTTGC -O 3 -m 17 -f fastq", # "CUTADAPT_PARAMS": "-a CAAGCAGAAGACGGCATACGAGAT -O 3 -m 17 -f fastq", # "CUTADAPT_PARAMS": "-b CAAGCAGAAGACGGCATACGAGATNNNNNNGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA -b TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACNNNNNNATCTCGTATGCCGTCTTCTGCTTG -O 3 -m 17 -f fastq", + "FASTAWITHADAPTERSETD": "/data/CCBR_Pipeliner/db/PipeDB/dev/TruSeq_and_nextera_adapters.consolidated.fa", + "LEADINGQUALITY": 10, + "TRAILINGQUALITY": 10, + "MINLEN": 17, + "KRAKENBACDB": "/fdb/kraken/20170202_bacteria", + "FASTQ_SCREEN_CONFIG": "/data/CCBR_Pipeliner/db/PipeDB/lib/fastq_screen.conf", + "FASTQ_SCREEN_CONFIG2": "/data/CCBR_Pipeliner/db/PipeDB/lib/fastq_screen_2.conf", + "CONFMULTIQC": "/data/CCBR_Pipeliner/db/PipeDB/Rnaseq/multiqc_config.yaml", + "CUTADAPT_PARAMS": "-b CAAGCAGAAGACGGCATACGAGAT -b TGGAATTCTCGGGTGCCAAGGAACTCCAGTCAC -O 3 -m 17 -f fastq", "TRIMMOMATIC": "java -Xmx8g -Djava.io.tmpdir=/scratch -jar /usr/local/apps/trimmomatic/Trimmomatic-0.33/trimmomatic-0.33.jar", "TRIMMOMATIC.ADAPTERS": "/data/CCBR_Pipeliner/db/PipeDB/miRseq/References/adapters2.fa",