Skip to content

Commit

Permalink
Get error estimation and pooled inference working
Browse files Browse the repository at this point in the history
  • Loading branch information
cjfields committed Mar 31, 2024
1 parent e02d81c commit c475678
Show file tree
Hide file tree
Showing 10 changed files with 553 additions and 288 deletions.
26 changes: 24 additions & 2 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,12 @@ process {
withName: PLOTQUALITYPROFILE {
publishDir = [
[
path: { "${params.outdir}/Plots/" },
path: { "${params.outdir}/Plots/QualityProfile" },
mode: params.publish_dir_mode,
pattern: "*.pdf"
],
[
path: { "${params.outdir}/R/Plots/" },
path: { "${params.outdir}/R/QualityProfile" },
mode: params.publish_dir_mode,
pattern: "*.RDS"
]
Expand All @@ -44,6 +44,28 @@ process {
]
}

withName: MERGETRIMTABLES {
publishDir = [
path: { "${params.outdir}/TrimmedReads/" },
mode: params.publish_dir_mode
]
}

withName: LEARNERRORS {
publishDir = [
[
path: { "${params.outdir}/Plots/ErrorModel/" },
mode: params.publish_dir_mode,
pattern: "*.pdf"
],
[
path: { "${params.outdir}/R/" },
mode: params.publish_dir_mode,
pattern: "*.RDS"
]
]
}

withName: CUSTOM_DUMPSOFTWAREVERSIONS {
publishDir = [
path: { "${params.outdir}/pipeline_info" },
Expand Down
101 changes: 37 additions & 64 deletions modules/local/dadainfer.nf
Original file line number Diff line number Diff line change
@@ -1,91 +1,64 @@
// TODO nf-core: If in doubt look at other nf-core/modules to see how we are doing things! :)
// https://github.com/nf-core/modules/tree/master/modules/nf-core/
// You can also ask for help via your pull request or on the #modules channel on the nf-core Slack workspace:
// https://nf-co.re/join
// TODO nf-core: A module file SHOULD only define input and output files as command-line parameters.
// All other parameters MUST be provided using the "task.ext" directive, see here:
// https://www.nextflow.io/docs/latest/process.html#ext
// where "task.ext" is a string.
// Any parameters that need to be evaluated in the context of a particular sample
// e.g. single-end/paired-end data MUST also be defined and evaluated appropriately.
// TODO nf-core: Software that can be piped together SHOULD be added to separate module files
// unless there is a run-time, storage advantage in implementing in this way
// e.g. it's ok to have a single module for bwa to output BAM instead of SAM:
// bwa mem | samtools view -B -T ref.fasta
// TODO nf-core: Optional inputs are not currently supported by Nextflow. However, using an empty
// list (`[]`) instead of a file can be used to work around this issue.

process DADAINFER {
tag "$meta.id"
tag "$readmode"
label 'process_medium'

// TODO nf-core: List required Conda package(s).
// Software MUST be pinned to channel (i.e. "bioconda"), version (i.e. "1.10").
// For Conda, the build (i.e. "h9402c20_2") must be EXCLUDED to support installation on different operating systems.
// TODO nf-core: See section in main README for further information regarding finding and adding container addresses to the section below.
conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/YOUR-TOOL-HERE':
'biocontainers/YOUR-TOOL-HERE' }"
container "ghcr.io/h3abionet/tada:dev"

input:
// TODO nf-core: Where applicable all sample-specific information e.g. "id", "single_end", "read_group"
// MUST be provided as an input via a Groovy Map called "meta".
// This information may not be required in some instances e.g. indexing reference genome files:
// https://github.com/nf-core/modules/blob/master/modules/nf-core/bwa/index/main.nf
// TODO nf-core: Where applicable please provide/convert compressed files as input/output
// e.g. "*.fastq.gz" and NOT "*.fastq", "*.bam" and NOT "*.sam" etc.
tuple val(meta), path(bam)
tuple val(readmode), file(err), file(reads)

output:
// TODO nf-core: Named file extensions MUST be emitted for ALL output channels
tuple val(meta), path("*.bam"), emit: bam
// TODO nf-core: List additional required output channels/values here
path "versions.yml" , emit: versions
// path "versions.yml" , emit: versions
path("all.dd.${readmode}.RDS"), emit: inferred

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
// TODO nf-core: Where possible, a command MUST be provided to obtain the version number of the software e.g. 1.10
// If the software is unable to output a version number on the command-line then it can be manually specified
// e.g. https://github.com/nf-core/modules/blob/master/modules/nf-core/homer/annotatepeaks/main.nf
// Each software used MUST provide the software name and version number in the YAML version file (versions.yml)
// TODO nf-core: It MUST be possible to pass additional parameters to the tool as a command-line string via the "task.ext.args" directive
// TODO nf-core: If the tool supports multi-threading then you MUST provide the appropriate parameter
// using the Nextflow "task" variable e.g. "--threads $task.cpus"
// TODO nf-core: Please replace the example samtools command below with your module's command
// TODO nf-core: Please indent the command appropriately (4 spaces!!) to help with readability ;)
def dadaOpt = !params.dadaOpt.isEmpty() ? "'${params.dadaOpt.collect{k,v->"$k=$v"}.join(", ")}'" : 'NA'
"""
samtools \\
sort \\
$args \\
-@ $task.cpus \\
-o ${prefix}.bam \\
-T $prefix \\
$bam
#!/usr/bin/env Rscript
suppressPackageStartupMessages(library(dada2))
dadaOpt <- ${dadaOpt}
if (!is.na(dadaOpt)) {
setDadaOpt(dadaOpt)
cat("dada Options:\\n",dadaOpt,"\\n")
}
filts <- list.files('.', pattern="${readmode}.filtered.fastq.gz", full.names = TRUE)
err <- readRDS("${err}")
cat("Processing all samples\\n")
cat <<-END_VERSIONS > versions.yml
"${task.process}":
dadainfer: \$(samtools --version |& sed '1!d ; s/samtools //')
END_VERSIONS
#Variable selection from CLI input flag --pool
pool <- "${params.pool}"
# 'pool' is a weird flag, either 'pseudo' (string), or T/F (bool)
if(pool != "pseudo"){
pool <- as.logical(pool)
}
dereps <- derepFastq(filts, n=100000, verbose=TRUE)
cat(paste0("Denoising ${readmode} reads: pool:", pool, "\\n"))
dds <- dada(dereps, err=err, multithread=${task.cpus}, pool=pool)
saveRDS(dds, "all.dd.${readmode}.RDS")
"""

stub:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"

// TODO nf-core: A stub section should mimic the execution of the original module as best as possible
// Have a look at the following examples:
// Simple example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bcftools/annotate/main.nf#L47-L63
// Complex example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bedtools/split/main.nf#L38-L54
"""
touch ${prefix}.bam
cat <<-END_VERSIONS > versions.yml
"${task.process}":
dadainfer: \$(samtools --version |& sed '1!d ; s/samtools //')
END_VERSIONS
# add some real stuff here
touch all.dd.${readmode}.RDS
"""
}
11 changes: 8 additions & 3 deletions modules/local/filterandtrim.nf
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

process FILTERANDTRIM {
tag "$meta.id"
label 'FILTERANDTRIM'
label 'process_medium'

// TODO nf-core: List required Conda package(s).
// Software MUST be pinned to channel (i.e. "bioconda"), version (i.e. "1.10").
Expand All @@ -37,8 +37,13 @@ process FILTERANDTRIM {

output:
// TODO nf-core: Named file extensions MUST be emitted for ALL output channels
tuple val(meta), path("*.fastq.gz"), emit: trimmed_reads
tuple val(meta), path("*.txt"), emit: trimmed_report
// tuple val(meta), path("*.fastq.gz"), emit: trimmed_reads
// tuple val(meta), path("*.txt"), emit: trimmed_report
tuple val(meta), path("${meta.id}.R1.filtered.fastq.gz"), optional: true, emit: trimmed_R1
tuple val(meta), path("${meta.id}.R2.filtered.fastq.gz"), optional: true, emit: trimmed_R2
tuple val(meta), path("${meta.id}.R[12].filtered.fastq.gz"), optional: true, emit: trimmed
path("*.trimmed.txt"), emit: trimmed_report

// TODO nf-core: List additional required output channels/values here
// path "versions.yml" , emit: versions

Expand Down
114 changes: 49 additions & 65 deletions modules/local/learnerrors.nf
Original file line number Diff line number Diff line change
@@ -1,91 +1,75 @@
// TODO nf-core: If in doubt look at other nf-core/modules to see how we are doing things! :)
// https://github.com/nf-core/modules/tree/master/modules/nf-core/
// You can also ask for help via your pull request or on the #modules channel on the nf-core Slack workspace:
// https://nf-co.re/join
// TODO nf-core: A module file SHOULD only define input and output files as command-line parameters.
// All other parameters MUST be provided using the "task.ext" directive, see here:
// https://www.nextflow.io/docs/latest/process.html#ext
// where "task.ext" is a string.
// Any parameters that need to be evaluated in the context of a particular sample
// e.g. single-end/paired-end data MUST also be defined and evaluated appropriately.
// TODO nf-core: Software that can be piped together SHOULD be added to separate module files
// unless there is a run-time, storage advantage in implementing in this way
// e.g. it's ok to have a single module for bwa to output BAM instead of SAM:
// bwa mem | samtools view -B -T ref.fasta
// TODO nf-core: Optional inputs are not currently supported by Nextflow. However, using an empty
// list (`[]`) instead of a file can be used to work around this issue.

process LEARNERRORS {
tag "$meta.id"
label 'process_me'
tag "$readmode"
label 'process_medium'

// TODO nf-core: List required Conda package(s).
// Software MUST be pinned to channel (i.e. "bioconda"), version (i.e. "1.10").
// For Conda, the build (i.e. "h9402c20_2") must be EXCLUDED to support installation on different operating systems.
// TODO nf-core: See section in main README for further information regarding finding and adding container addresses to the section below.
conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/YOUR-TOOL-HERE':
'biocontainers/YOUR-TOOL-HERE' }"
container "ghcr.io/h3abionet/tada:dev"

input:
// TODO nf-core: Where applicable all sample-specific information e.g. "id", "single_end", "read_group"
// MUST be provided as an input via a Groovy Map called "meta".
// This information may not be required in some instances e.g. indexing reference genome files:
// https://github.com/nf-core/modules/blob/master/modules/nf-core/bwa/index/main.nf
// TODO nf-core: Where applicable please provide/convert compressed files as input/output
// e.g. "*.fastq.gz" and NOT "*.fastq", "*.bam" and NOT "*.sam" etc.
tuple val(meta), path(bam)
tuple val(readmode), file(reads)

output:
// TODO nf-core: Named file extensions MUST be emitted for ALL output channels
tuple val(meta), path("*.bam"), emit: bam
// TODO nf-core: List additional required output channels/values here
path "versions.yml" , emit: versions
// path "versions.yml" , emit: versions
tuple val(readmode), file("errors.${readmode}.RDS"), emit: error_models
// path("errors.R[12].RDS"), emit: errorModelsPerSample
path("${readmode}.err.pdf"), emit: pdf

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
// TODO nf-core: Where possible, a command MUST be provided to obtain the version number of the software e.g. 1.10
// If the software is unable to output a version number on the command-line then it can be manually specified
// e.g. https://github.com/nf-core/modules/blob/master/modules/nf-core/homer/annotatepeaks/main.nf
// Each software used MUST provide the software name and version number in the YAML version file (versions.yml)
// TODO nf-core: It MUST be possible to pass additional parameters to the tool as a command-line string via the "task.ext.args" directive
// TODO nf-core: If the tool supports multi-threading then you MUST provide the appropriate parameter
// using the Nextflow "task" variable e.g. "--threads $task.cpus"
// TODO nf-core: Please replace the example samtools command below with your module's command
// TODO nf-core: Please indent the command appropriately (4 spaces!!) to help with readability ;)
def prefix = task.ext.prefix
def derepreads = 100000
// TODO: not even sure the below will work w/ nf-core style parameters,
// may need to go to a string or args-type modifications
def dadaOpt = !params.dadaOpt.isEmpty() ? "'${params.dadaOpt.collect{k,v->"$k=$v"}.join(", ")}'" : 'NA'
// TODO: will need to if-else this when implementing alternative error models
"""
samtools \\
sort \\
$args \\
-@ $task.cpus \\
-o ${prefix}.bam \\
-T $prefix \\
$bam
#!/usr/bin/env Rscript
suppressPackageStartupMessages(library(dada2))
dadaOpt <- ${dadaOpt}
if (!is.na(dadaOpt)) {
setDadaOpt(dadaOpt)
cat("dada Options:\n",dadaOpt,"\n")
}
# File parsing (these come from the process input channel)
filts <- list.files('.', pattern=paste0("${readmode}",".filtered.fastq.gz"), full.names = TRUE)
sample.names <- sapply(strsplit(basename(filts), "_"), `[`, 1) # Assumes filename = samplename_XXX.fastq.gz
set.seed(100)
# Learn forward error rates
err <- learnErrors(filts, multithread=${task.cpus}, verbose=1)
cat <<-END_VERSIONS > versions.yml
"${task.process}":
learnerrors: \$(samtools --version |& sed '1!d ; s/samtools //')
END_VERSIONS
# This is a rough correction for NovaSeq binning issues
# See https://github.com/h3abionet/TADA/issues/31, we'll likely
# add alternatives here soon
if (as.logical("${params.quality_binning}") == TRUE ) {
# TODO: add alternatives for binning
print("Running binning correction")
errs <- t(apply(getErrors(err), 1, function(x) { x[x < x[40]] = x[40]; return(x)} ))
err\$err_out <- errs
}
pdf(paste0("${readmode}",".err.pdf"))
plotErrors(err, nominalQ=TRUE)
dev.off()
saveRDS(err, paste0("errors.","${readmode}",".RDS"))
"""

stub:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def prefix = task.ext.prefix
// TODO nf-core: A stub section should mimic the execution of the original module as best as possible
// Have a look at the following examples:
// Simple example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bcftools/annotate/main.nf#L47-L63
// Complex example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bedtools/split/main.nf#L38-L54
"""
touch ${prefix}.bam
cat <<-END_VERSIONS > versions.yml
"${task.process}":
learnerrors: \$(samtools --version |& sed '1!d ; s/samtools //')
END_VERSIONS
# TODO: make a proper stub
"""
}
Loading

0 comments on commit c475678

Please sign in to comment.