Skip to content

Commit

Permalink
get basic priors working, but requires two rounds
Browse files Browse the repository at this point in the history
  • Loading branch information
cjfields committed Jun 28, 2024
1 parent 43134b9 commit 4702bf0
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 22 deletions.
8 changes: 6 additions & 2 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -746,6 +746,8 @@ if (params.reads != false || params.input != false ) { // TODO maybe we should c
input:
tuple val(meta), file(reads) from readsToPerSample
file(errs) from errorModelsPerSample.collect()
path(fp, stageAs: "priors_R1") from fwd_priors
path(rp, stageAs: "priors_R2") from rev_priors

output:
file("${meta.id}.{R1,merged}.RDS") into combinedReads
Expand All @@ -758,8 +760,8 @@ if (params.reads != false || params.input != false ) { // TODO maybe we should c
script:
dadaOpt = !params.dadaOpt.isEmpty() ? "'${params.dadaOpt.collect{k,v->"$k=$v"}.join(", ")}'" : 'NA'
readmode = errs.size() == 2 ? 'merged' : 'R1'
fpriors = params.fwd_priors ? ", priors=${fwd_priors}" : ""
rpriors = params.rev_priors ? ", priors=${rev_priors}" : ""
run_fpriors = params.fwd_priors ? "TRUE" : "FALSE"
run_rpriors = params.rev_priors ? "TRUE" : "FALSE"
template "PerSampleDadaInfer.R"
}

Expand All @@ -781,6 +783,7 @@ if (params.reads != false || params.input != false ) { // TODO maybe we should c
params.precheck == false

script:
dadaOpt = !params.dadaOpt.isEmpty() ? "'${params.dadaOpt.collect{k,v->"$k=$v"}.join(", ")}'" : 'NA'
template "MergePerSampleDada.R"
}

Expand All @@ -796,6 +799,7 @@ if (params.reads != false || params.input != false ) { // TODO maybe we should c
tuple val(readmode), file("seqtab.${readmode}.RDS") into seqTable,rawSeqTableToRename
file "all.merged.RDS" optional true into mergerTracking,mergerQC
file "seqtab.original.${readmode}.RDS" into seqtabQC // we keep this for comparison and possible QC
file "priors.${params.idType}.R{1,2}.fna" optional true

when:
params.precheck == false
Expand Down
53 changes: 40 additions & 13 deletions templates/MergePerSampleDada.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,35 +3,62 @@ suppressPackageStartupMessages(library(dada2))
suppressPackageStartupMessages(library(ShortRead))
suppressPackageStartupMessages(library(openssl))

dadaOpt <- ${dadaOpt}

if (!is.na(dadaOpt)) {
setDadaOpt(dadaOpt)
cat("dada Options:\\n",dadaOpt,"\\n")
}

dadaopts <- getDadaOpt()

# we want to standardize these for later changes, so let's generate a
# simple FASTA file of the priors, using
generate_priors <- function(x, idtype="md5") {
generate_priors <- function(x, opts = dadaopts, idtype="md5") {
st <- makeSequenceTable(x)
priors <- st[,colSums(st>1) > 1] |> colnames()
ids <- switch(idtype, simple=paste("priorF_", 1:length(priors), sep = ""),
md5=md5(priors))
seqs.dna <- ShortRead(sread = DNAStringSet(priors), id = BStringSet(ids))
return(seqs.dna)
# Moving to using the pseudo priors code from Ben here:
# https://github.com/benjjneb/dada2/blame/278f5f3ec03a846fe157b283cc08f2dd30430ae0/R/dada.R#L400
priors <- colnames(st)[colSums(st>0) >= opts\$PSEUDO_PREVALENCE | colSums(st) >= opts\$PSEUDO_ABUNDANCE]
if (length(priors) > 0) {
ids <- switch(idtype, simple=paste("priorF_", 1:length(priors), sep = ""),
md5=md5(priors))
seqs.dna <- ShortRead(sread = DNAStringSet(priors), id = BStringSet(ids))
return(seqs.dna)
} else {
return(NA)
}
}

# this is necessary for QC, but we also want to do this if we want priors from the run
dadaFs <- lapply(list.files(path = '.', pattern = '.dd.R1.RDS'), function (x) readRDS(x))
dadaRs <- lapply(list.files(path = '.', pattern = '.dd.R2.RDS'), function (x) readRDS(x))
names(dadaFs) <- sub('.dd.R1.RDS', '', list.files('.', pattern = '.dd.R1.RDS'))
saveRDS(dadaFs, "all.dd.R1.RDS")

if (as.logical("${params.generate_priors}")) {
priorsF <- generate_priors(dadaFs, "${params.idType}")
writeFasta(priorsF, file = 'priors.${params.idType}.R1.fna')
priorsF <- generate_priors(dadaFs, idtype="${params.idType}")
if (is.na(priorsF)) {
message("No priors found for R1!")
file.create("priors.${params.idType}.R1.fna")
} else {
writeFasta(priorsF, file = 'priors.${params.idType}.R1.fna')
}
if (length(dadaRs) > 0) {
priorsR <- generate_priors(dadaRs, idtype="${params.idType}")
if (is.na(priorsR)) {
message("No priors found for R2!")
file.create("priors.${params.idType}.R2.fna")
} else {
writeFasta(priorsR, file = "priors.${params.idType}.R2.fna")
}
}
}

dadaRs <- lapply(list.files(path = '.', pattern = '.dd.R2.RDS'), function (x) readRDS(x))

if (length(dadaRs) > 0) {
names(dadaRs) <- sub('.dd.R2.RDS', '', list.files('.', pattern = '.dd.R2.RDS'))
saveRDS(dadaRs, "all.dd.R2.RDS")
if (as.logical("${params.generate_priors}")) {
priorsR <- generate_priors(dadaRs, "${params.idType}")
priorsR <- generate_priors(dadaRs, idtype="${params.idType}")
writeFasta(priorsR, file = 'priors.${params.idType}.R2.fna')
}
}

}
16 changes: 9 additions & 7 deletions templates/PerSampleDadaInfer.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,31 +22,33 @@ derepF <- derepFastq("${reads[0]}", n=100000)
# TODO: there is probably a better way of doing this
# when using optparse
paramsF <- list(
x=derepF,
derep=derepF,
err=errF,
multithread=${task.cpus},
pool=as.logical("${params.pool}")
)
if ("${params.fwd_priors}") {
paramsF\$priors <- getPriors("${fwd_priors}")
if (as.logical("$run_fpriors")) {
paramsF\$priors <- getPriors("${fp}")
}

ddF <- do.call(dada, paramsF)
saveRDS(ddF, "${meta.id}.dd.R1.RDS")

if (file.exists ("errors.R2.RDS")) {
errR <- readRDS("errors.R2.RDS")
derepR <- derepFastq("${reads[1]}", n=100000)
paramsR <- list(
x=derepR,
derep=derepR,
err=errR,
multithread=${task.cpus},
pool=as.logical("${params.pool}")
)
if ("${params.rev_priors}") {
paramsR\$priors <- getPriors("${rev_priors}")
if (as.logical("$run_rpriors")) {
paramsR\$priors <- getPriors("${rp}")
}

message("DADA2 params, R2:", paramsR, "\\n")
ddR <- do.call(dada, paramsR)
# dada(derepR, err=errR, multithread=${task.cpus}, pool=as.logical("${params.pool}") ${rpriors})
saveRDS(ddR, "${meta.id}.dd.R2.RDS")

merger <- mergePairs(ddF, derepF, ddR, derepR,
Expand Down

0 comments on commit 4702bf0

Please sign in to comment.