From 86845fc0de064e1add51d96cbadf20487ca78a3a Mon Sep 17 00:00:00 2001 From: Chris Fields Date: Tue, 2 Apr 2024 16:33:04 -0500 Subject: [PATCH] move filtering and dada2 denoising to subworkflows --- modules/local/dadainfer.nf | 2 +- modules/local/filterandtrim.nf | 2 +- modules/local/learnerrors.nf | 2 +- modules/local/mergetrimtables.nf | 2 +- modules/local/pooledseqtable.nf | 2 +- modules/local/removechimeras.nf | 2 +- modules/local/renameasvs.nf | 2 +- subworkflows/local/dada2_denoise.nf | 60 +++++++++++++++++++++++++++ subworkflows/local/filter_and_trim.nf | 52 +++++++++++++++++++++++ subworkflows/local/filterandtrim.nf | 30 -------------- 10 files changed, 119 insertions(+), 37 deletions(-) create mode 100644 subworkflows/local/dada2_denoise.nf create mode 100644 subworkflows/local/filter_and_trim.nf delete mode 100644 subworkflows/local/filterandtrim.nf diff --git a/modules/local/dadainfer.nf b/modules/local/dadainfer.nf index 79ae2c9..3ae5d0c 100644 --- a/modules/local/dadainfer.nf +++ b/modules/local/dadainfer.nf @@ -1,4 +1,4 @@ -process DADAINFER { +process DADA_INFER { tag "$readmode" label 'process_medium' diff --git a/modules/local/filterandtrim.nf b/modules/local/filterandtrim.nf index c626a78..2a7b2a3 100644 --- a/modules/local/filterandtrim.nf +++ b/modules/local/filterandtrim.nf @@ -1,4 +1,4 @@ -process FILTERANDTRIM { +process ILLUMINA_FILTER_AND_TRIM { tag "$meta.id" label 'process_medium' diff --git a/modules/local/learnerrors.nf b/modules/local/learnerrors.nf index 45089e9..3806dc9 100644 --- a/modules/local/learnerrors.nf +++ b/modules/local/learnerrors.nf @@ -1,4 +1,4 @@ -process LEARNERRORS { +process LEARN_ERRORS { tag "$readmode" label 'process_medium' diff --git a/modules/local/mergetrimtables.nf b/modules/local/mergetrimtables.nf index 0f34f92..525dd09 100644 --- a/modules/local/mergetrimtables.nf +++ b/modules/local/mergetrimtables.nf @@ -1,4 +1,4 @@ -process MERGETRIMTABLES { +process MERGE_TRIM_TABLES { label 'process_low' container "ghcr.io/h3abionet/tada:dev" diff --git a/modules/local/pooledseqtable.nf b/modules/local/pooledseqtable.nf index d8ec902..8f748a5 100644 --- a/modules/local/pooledseqtable.nf +++ b/modules/local/pooledseqtable.nf @@ -1,4 +1,4 @@ -process POOLEDSEQTABLE { +process POOLED_SEQTABLE { label 'process_medium' container "ghcr.io/h3abionet/tada:dev" diff --git a/modules/local/removechimeras.nf b/modules/local/removechimeras.nf index bd6b2c5..defc82e 100644 --- a/modules/local/removechimeras.nf +++ b/modules/local/removechimeras.nf @@ -1,4 +1,4 @@ -process REMOVECHIMERAS { +process REMOVE_CHIMERAS { label 'process_medium' container "ghcr.io/h3abionet/tada:dev" diff --git a/modules/local/renameasvs.nf b/modules/local/renameasvs.nf index 16574b6..42b16bd 100644 --- a/modules/local/renameasvs.nf +++ b/modules/local/renameasvs.nf @@ -1,4 +1,4 @@ -process RENAMEASVS { +process RENAME_ASVS { label 'process_low' container "ghcr.io/h3abionet/tada:dev" diff --git a/subworkflows/local/dada2_denoise.nf b/subworkflows/local/dada2_denoise.nf new file mode 100644 index 0000000..84adf16 --- /dev/null +++ b/subworkflows/local/dada2_denoise.nf @@ -0,0 +1,60 @@ +// TODO: move to a subworkflow and implement pooled vs per-sample + optional priors +include { LEARN_ERRORS } from '../../modules/local/learnerrors' +include { DADA_INFER } from '../../modules/local/dadainfer' +include { POOLED_SEQTABLE } from '../../modules/local/pooledseqtable' +include { REMOVE_CHIMERAS } from '../../modules/local/removechimeras' +include { RENAME_ASVS } from '../../modules/local/renameasvs' + +workflow DADA2_DENOISE { + + take: + // TODO nf-core: edit input (take) channels + ch_trimmed_infer // channel: [ val(meta), [ bam ] ] + + main: + + ch_versions = Channel.empty() + + // TODO nf-core: substitute modules here for the modules of your subworkflow + + LEARN_ERRORS ( + ch_trimmed_infer + ) + + ch_infer = LEARN_ERRORS.out.error_models.join(ch_trimmed_infer) + + // TODO: add single-sample ('big data') run + // this is always in pooled mode at the moment, should be adjusted + // if (params.pool == "T" || params.pool == 'pseudo') { + DADA_INFER( + ch_infer + ) + + ch_trimmed = ch_trimmed_infer + .map { it[1] } + .flatten() + .collect() + + POOLED_SEQTABLE( + DADA_INFER.out.inferred.collect(), + ch_trimmed + ) + + REMOVE_CHIMERAS( + POOLED_SEQTABLE.out.filtered_seqtable + ) + + RENAME_ASVS( + REMOVE_CHIMERAS.out.nonchim_seqtable, + POOLED_SEQTABLE.out.filtered_seqtable + ) + + emit: + nonchimeric_asvs = RENAME_ASVS.out.nonchimeric_asvs + seqtable_renamed = RENAME_ASVS.out.seqtable_renamed + readmap = RENAME_ASVS.out.readmap + inferred = DADA_INFER.out.inferred + merged_seqs = POOLED_SEQTABLE.out.merged_seqs + filtered_seqtable = POOLED_SEQTABLE.out.filtered_seqtable +} + diff --git a/subworkflows/local/filter_and_trim.nf b/subworkflows/local/filter_and_trim.nf new file mode 100644 index 0000000..39156c7 --- /dev/null +++ b/subworkflows/local/filter_and_trim.nf @@ -0,0 +1,52 @@ +// TODO nf-core: If in doubt look at other nf-core/subworkflows to see how we are doing things! :) +// https://github.com/nf-core/modules/tree/master/subworkflows +// You can also ask for help via your pull request or on the #subworkflows channel on the nf-core Slack workspace: +// https://nf-co.re/join +// TODO nf-core: A subworkflow SHOULD import at least two modules + +include { ILLUMINA_FILTER_AND_TRIM } from '../../modules/local/filterandtrim' +include { MERGE_TRIM_TABLES } from '../../modules/local/mergetrimtables' + +workflow FILTER_AND_TRIM { + + take: + input //channel: [val(meta), path(reads) + + main: + ILLUMINA_FILTER_AND_TRIM( + input + ) + + ch_reports = ILLUMINA_FILTER_AND_TRIM.out.trimmed_report.collect() + + // TODO: add variable-length and PacBio + MERGE_TRIM_TABLES( + ch_reports + ) + + // Channel setup + + // We need to group data depending on which downstream steps are needed. There + // are two combinations possible + + // 1. The immediate downstream QC steps can use the meta info and the read pairs. + // Instead of doing handstands reusing the two channels above, we emit channels + // with the reads paired if needed. + + // 2. LearnErrors and the pooled denoising branch requires all R1 and all R2, but + // the two groups can be processed in parallel. So we set up the channels with + // this in mind. No sample ID info is really needed. + // ch_trimmed_infer = FILTERANDTRIM.out.trimmed_R1 + // .map { [ 'R1', it[1]] } + // .concat(FILTERANDTRIM.out.trimmed_R2.map {['R2', it[1]] } ) + // .groupTuple(sort: true) + emit: + trimmed = ILLUMINA_FILTER_AND_TRIM.out.trimmed + trimmed_report = MERGE_TRIM_TABLES.out.trimmed_report // channel: [ RDS ] + trimmed_infer = ILLUMINA_FILTER_AND_TRIM.out.trimmed_R1 + .map { [ 'R1', it[1]] } + .concat(ILLUMINA_FILTER_AND_TRIM.out.trimmed_R2.map {['R2', it[1]] } ) + .groupTuple(sort: true) + // versions = ch_versions // channel: [ versions.yml ] +} + diff --git a/subworkflows/local/filterandtrim.nf b/subworkflows/local/filterandtrim.nf deleted file mode 100644 index b9ac327..0000000 --- a/subworkflows/local/filterandtrim.nf +++ /dev/null @@ -1,30 +0,0 @@ -// TODO nf-core: If in doubt look at other nf-core/subworkflows to see how we are doing things! :) -// https://github.com/nf-core/modules/tree/master/subworkflows -// You can also ask for help via your pull request or on the #subworkflows channel on the nf-core Slack workspace: -// https://nf-co.re/join -// TODO nf-core: A subworkflow SHOULD import at least two modules - -include { ILLUMINAFILTERANDTRIM } from '../../../modules/local/filterandtrim' - -workflow FILTERANDTRIM { - - take: - input //channel: [val(meta), path(reads) - - main: - ILLUMINAFILTERANDTRIM() - // ch_versions = Channel.empty() - - // TODO nf-core: substitute modules here for the modules of your subworkflow - - - - emit: - // TODO nf-core: edit emitted channels - bam = SAMTOOLS_SORT.out.bam // channel: [ val(meta), [ bam ] ] - bai = SAMTOOLS_INDEX.out.bai // channel: [ val(meta), [ bai ] ] - csi = SAMTOOLS_INDEX.out.csi // channel: [ val(meta), [ csi ] ] - - versions = ch_versions // channel: [ versions.yml ] -} -