move filtering and dada2 denoising to subworkflows

h3abionet · Apr 2, 2024 · 86845fc · 86845fc
1 parent 78c341c
commit 86845fc
Show file tree

Hide file tree

Showing 10 changed files with 119 additions and 37 deletions.
diff --git a/modules/local/dadainfer.nf b/modules/local/dadainfer.nf
@@ -1,4 +1,4 @@
-process DADAINFER {
+process DADA_INFER {
     tag "$readmode"    
     label 'process_medium'
 

diff --git a/modules/local/filterandtrim.nf b/modules/local/filterandtrim.nf
@@ -1,4 +1,4 @@
-process FILTERANDTRIM {
+process ILLUMINA_FILTER_AND_TRIM {
     tag "$meta.id"
     label 'process_medium'
 

diff --git a/modules/local/learnerrors.nf b/modules/local/learnerrors.nf
@@ -1,4 +1,4 @@
-process LEARNERRORS {
+process LEARN_ERRORS {
     tag "$readmode"
     label 'process_medium'
 

diff --git a/modules/local/mergetrimtables.nf b/modules/local/mergetrimtables.nf
@@ -1,4 +1,4 @@
-process MERGETRIMTABLES {
+process MERGE_TRIM_TABLES {
     label 'process_low'
 
     container "ghcr.io/h3abionet/tada:dev"

diff --git a/modules/local/pooledseqtable.nf b/modules/local/pooledseqtable.nf
@@ -1,4 +1,4 @@
-process POOLEDSEQTABLE {
+process POOLED_SEQTABLE {
     label 'process_medium'
 
     container "ghcr.io/h3abionet/tada:dev"

diff --git a/modules/local/removechimeras.nf b/modules/local/removechimeras.nf
@@ -1,4 +1,4 @@
-process REMOVECHIMERAS {
+process REMOVE_CHIMERAS {
     label 'process_medium'
 
     container "ghcr.io/h3abionet/tada:dev"

diff --git a/modules/local/renameasvs.nf b/modules/local/renameasvs.nf
@@ -1,4 +1,4 @@
-process RENAMEASVS {
+process RENAME_ASVS {
     label 'process_low'
 
     container "ghcr.io/h3abionet/tada:dev"

diff --git a/subworkflows/local/dada2_denoise.nf b/subworkflows/local/dada2_denoise.nf
@@ -0,0 +1,60 @@
+// TODO: move to a subworkflow and implement pooled vs per-sample + optional priors
+include { LEARN_ERRORS           } from '../../modules/local/learnerrors'
+include { DADA_INFER             } from '../../modules/local/dadainfer'
+include { POOLED_SEQTABLE        } from '../../modules/local/pooledseqtable'
+include { REMOVE_CHIMERAS        } from '../../modules/local/removechimeras'
+include { RENAME_ASVS            } from '../../modules/local/renameasvs'
+
+workflow DADA2_DENOISE {
+
+    take:
+    // TODO nf-core: edit input (take) channels
+    ch_trimmed_infer // channel: [ val(meta), [ bam ] ]
+
+    main:
+
+    ch_versions = Channel.empty()
+
+    // TODO nf-core: substitute modules here for the modules of your subworkflow
+
+    LEARN_ERRORS (
+        ch_trimmed_infer
+    )
+
+    ch_infer = LEARN_ERRORS.out.error_models.join(ch_trimmed_infer)
+
+    // TODO: add single-sample ('big data') run
+    // this is always in pooled mode at the moment, should be adjusted
+    // if (params.pool == "T" || params.pool == 'pseudo') { 
+    DADA_INFER(
+        ch_infer
+    )
+
+    ch_trimmed = ch_trimmed_infer
+        .map { it[1] }
+        .flatten()
+        .collect()
+
+    POOLED_SEQTABLE(
+        DADA_INFER.out.inferred.collect(),
+        ch_trimmed
+        )
+
+    REMOVE_CHIMERAS(
+        POOLED_SEQTABLE.out.filtered_seqtable
+    )
+
+    RENAME_ASVS(
+        REMOVE_CHIMERAS.out.nonchim_seqtable,
+        POOLED_SEQTABLE.out.filtered_seqtable
+    )    
+
+    emit:
+    nonchimeric_asvs = RENAME_ASVS.out.nonchimeric_asvs
+    seqtable_renamed = RENAME_ASVS.out.seqtable_renamed
+    readmap = RENAME_ASVS.out.readmap
+    inferred = DADA_INFER.out.inferred
+    merged_seqs = POOLED_SEQTABLE.out.merged_seqs
+    filtered_seqtable = POOLED_SEQTABLE.out.filtered_seqtable
+}
+
diff --git a/subworkflows/local/filter_and_trim.nf b/subworkflows/local/filter_and_trim.nf
@@ -0,0 +1,52 @@
+// TODO nf-core: If in doubt look at other nf-core/subworkflows to see how we are doing things! :)
+//               https://github.com/nf-core/modules/tree/master/subworkflows
+//               You can also ask for help via your pull request or on the #subworkflows channel on the nf-core Slack workspace:
+//               https://nf-co.re/join
+// TODO nf-core: A subworkflow SHOULD import at least two modules
+
+include { ILLUMINA_FILTER_AND_TRIM   } from '../../modules/local/filterandtrim'
+include { MERGE_TRIM_TABLES          } from '../../modules/local/mergetrimtables'
+
+workflow FILTER_AND_TRIM {
+
+    take:
+    input //channel: [val(meta), path(reads)
+
+    main:
+    ILLUMINA_FILTER_AND_TRIM(
+        input
+    )
+
+    ch_reports = ILLUMINA_FILTER_AND_TRIM.out.trimmed_report.collect()
+
+    // TODO: add variable-length and PacBio
+    MERGE_TRIM_TABLES(
+        ch_reports
+    )
+
+    // Channel setup
+
+    // We need to group data depending on which downstream steps are needed.  There
+    // are two combinations possible
+
+    // 1. The immediate downstream QC steps can use the meta info and the read pairs.
+    //    Instead of doing handstands reusing the two channels above, we emit channels 
+    //    with the reads paired if needed.
+
+    // 2. LearnErrors and the pooled denoising branch requires all R1 and all R2, but 
+    //    the two groups can be processed in parallel.  So we set up the channels with 
+    //    this in mind. No sample ID info is really needed.
+    // ch_trimmed_infer = FILTERANDTRIM.out.trimmed_R1
+    //         .map { [ 'R1', it[1]] }
+    //         .concat(FILTERANDTRIM.out.trimmed_R2.map {['R2', it[1]] } )
+    //         .groupTuple(sort: true)
+    emit:
+    trimmed = ILLUMINA_FILTER_AND_TRIM.out.trimmed
+    trimmed_report = MERGE_TRIM_TABLES.out.trimmed_report // channel: [ RDS ]
+    trimmed_infer = ILLUMINA_FILTER_AND_TRIM.out.trimmed_R1
+            .map { [ 'R1', it[1]] }
+            .concat(ILLUMINA_FILTER_AND_TRIM.out.trimmed_R2.map {['R2', it[1]] } )
+            .groupTuple(sort: true)
+    // versions = ch_versions                     // channel: [ versions.yml ]
+}
+
diff --git a/subworkflows/local/filterandtrim.nf b/subworkflows/local/filterandtrim.nf