From 86845fc0de064e1add51d96cbadf20487ca78a3a Mon Sep 17 00:00:00 2001
From: Chris Fields <cjfields@illinois.edu>
Date: Tue, 2 Apr 2024 16:33:04 -0500
Subject: [PATCH] move filtering and dada2 denoising to subworkflows

---
 modules/local/dadainfer.nf            |  2 +-
 modules/local/filterandtrim.nf        |  2 +-
 modules/local/learnerrors.nf          |  2 +-
 modules/local/mergetrimtables.nf      |  2 +-
 modules/local/pooledseqtable.nf       |  2 +-
 modules/local/removechimeras.nf       |  2 +-
 modules/local/renameasvs.nf           |  2 +-
 subworkflows/local/dada2_denoise.nf   | 60 +++++++++++++++++++++++++++
 subworkflows/local/filter_and_trim.nf | 52 +++++++++++++++++++++++
 subworkflows/local/filterandtrim.nf   | 30 --------------
 10 files changed, 119 insertions(+), 37 deletions(-)
 create mode 100644 subworkflows/local/dada2_denoise.nf
 create mode 100644 subworkflows/local/filter_and_trim.nf
 delete mode 100644 subworkflows/local/filterandtrim.nf

diff --git a/modules/local/dadainfer.nf b/modules/local/dadainfer.nf
index 79ae2c9..3ae5d0c 100644
--- a/modules/local/dadainfer.nf
+++ b/modules/local/dadainfer.nf
@@ -1,4 +1,4 @@
-process DADAINFER {
+process DADA_INFER {
     tag "$readmode"    
     label 'process_medium'
 
diff --git a/modules/local/filterandtrim.nf b/modules/local/filterandtrim.nf
index c626a78..2a7b2a3 100644
--- a/modules/local/filterandtrim.nf
+++ b/modules/local/filterandtrim.nf
@@ -1,4 +1,4 @@
-process FILTERANDTRIM {
+process ILLUMINA_FILTER_AND_TRIM {
     tag "$meta.id"
     label 'process_medium'
 
diff --git a/modules/local/learnerrors.nf b/modules/local/learnerrors.nf
index 45089e9..3806dc9 100644
--- a/modules/local/learnerrors.nf
+++ b/modules/local/learnerrors.nf
@@ -1,4 +1,4 @@
-process LEARNERRORS {
+process LEARN_ERRORS {
     tag "$readmode"
     label 'process_medium'
 
diff --git a/modules/local/mergetrimtables.nf b/modules/local/mergetrimtables.nf
index 0f34f92..525dd09 100644
--- a/modules/local/mergetrimtables.nf
+++ b/modules/local/mergetrimtables.nf
@@ -1,4 +1,4 @@
-process MERGETRIMTABLES {
+process MERGE_TRIM_TABLES {
     label 'process_low'
 
     container "ghcr.io/h3abionet/tada:dev"
diff --git a/modules/local/pooledseqtable.nf b/modules/local/pooledseqtable.nf
index d8ec902..8f748a5 100644
--- a/modules/local/pooledseqtable.nf
+++ b/modules/local/pooledseqtable.nf
@@ -1,4 +1,4 @@
-process POOLEDSEQTABLE {
+process POOLED_SEQTABLE {
     label 'process_medium'
 
     container "ghcr.io/h3abionet/tada:dev"
diff --git a/modules/local/removechimeras.nf b/modules/local/removechimeras.nf
index bd6b2c5..defc82e 100644
--- a/modules/local/removechimeras.nf
+++ b/modules/local/removechimeras.nf
@@ -1,4 +1,4 @@
-process REMOVECHIMERAS {
+process REMOVE_CHIMERAS {
     label 'process_medium'
 
     container "ghcr.io/h3abionet/tada:dev"
diff --git a/modules/local/renameasvs.nf b/modules/local/renameasvs.nf
index 16574b6..42b16bd 100644
--- a/modules/local/renameasvs.nf
+++ b/modules/local/renameasvs.nf
@@ -1,4 +1,4 @@
-process RENAMEASVS {
+process RENAME_ASVS {
     label 'process_low'
 
     container "ghcr.io/h3abionet/tada:dev"
diff --git a/subworkflows/local/dada2_denoise.nf b/subworkflows/local/dada2_denoise.nf
new file mode 100644
index 0000000..84adf16
--- /dev/null
+++ b/subworkflows/local/dada2_denoise.nf
@@ -0,0 +1,60 @@
+// TODO: move to a subworkflow and implement pooled vs per-sample + optional priors
+include { LEARN_ERRORS           } from '../../modules/local/learnerrors'
+include { DADA_INFER             } from '../../modules/local/dadainfer'
+include { POOLED_SEQTABLE        } from '../../modules/local/pooledseqtable'
+include { REMOVE_CHIMERAS        } from '../../modules/local/removechimeras'
+include { RENAME_ASVS            } from '../../modules/local/renameasvs'
+
+workflow DADA2_DENOISE {
+
+    take:
+    // TODO nf-core: edit input (take) channels
+    ch_trimmed_infer // channel: [ val(meta), [ bam ] ]
+
+    main:
+
+    ch_versions = Channel.empty()
+
+    // TODO nf-core: substitute modules here for the modules of your subworkflow
+
+    LEARN_ERRORS (
+        ch_trimmed_infer
+    )
+
+    ch_infer = LEARN_ERRORS.out.error_models.join(ch_trimmed_infer)
+
+    // TODO: add single-sample ('big data') run
+    // this is always in pooled mode at the moment, should be adjusted
+    // if (params.pool == "T" || params.pool == 'pseudo') { 
+    DADA_INFER(
+        ch_infer
+    )
+
+    ch_trimmed = ch_trimmed_infer
+        .map { it[1] }
+        .flatten()
+        .collect()
+
+    POOLED_SEQTABLE(
+        DADA_INFER.out.inferred.collect(),
+        ch_trimmed
+        )
+
+    REMOVE_CHIMERAS(
+        POOLED_SEQTABLE.out.filtered_seqtable
+    )
+
+    RENAME_ASVS(
+        REMOVE_CHIMERAS.out.nonchim_seqtable,
+        POOLED_SEQTABLE.out.filtered_seqtable
+    )    
+
+    emit:
+    nonchimeric_asvs = RENAME_ASVS.out.nonchimeric_asvs
+    seqtable_renamed = RENAME_ASVS.out.seqtable_renamed
+    readmap = RENAME_ASVS.out.readmap
+    inferred = DADA_INFER.out.inferred
+    merged_seqs = POOLED_SEQTABLE.out.merged_seqs
+    filtered_seqtable = POOLED_SEQTABLE.out.filtered_seqtable
+}
+
diff --git a/subworkflows/local/filter_and_trim.nf b/subworkflows/local/filter_and_trim.nf
new file mode 100644
index 0000000..39156c7
--- /dev/null
+++ b/subworkflows/local/filter_and_trim.nf
@@ -0,0 +1,52 @@
+// TODO nf-core: If in doubt look at other nf-core/subworkflows to see how we are doing things! :)
+//               https://github.com/nf-core/modules/tree/master/subworkflows
+//               You can also ask for help via your pull request or on the #subworkflows channel on the nf-core Slack workspace:
+//               https://nf-co.re/join
+// TODO nf-core: A subworkflow SHOULD import at least two modules
+
+include { ILLUMINA_FILTER_AND_TRIM   } from '../../modules/local/filterandtrim'
+include { MERGE_TRIM_TABLES          } from '../../modules/local/mergetrimtables'
+
+workflow FILTER_AND_TRIM {
+
+    take:
+    input //channel: [val(meta), path(reads)
+
+    main:
+    ILLUMINA_FILTER_AND_TRIM(
+        input
+    )
+
+    ch_reports = ILLUMINA_FILTER_AND_TRIM.out.trimmed_report.collect()
+
+    // TODO: add variable-length and PacBio
+    MERGE_TRIM_TABLES(
+        ch_reports
+    )
+
+    // Channel setup
+
+    // We need to group data depending on which downstream steps are needed.  There
+    // are two combinations possible
+
+    // 1. The immediate downstream QC steps can use the meta info and the read pairs.
+    //    Instead of doing handstands reusing the two channels above, we emit channels 
+    //    with the reads paired if needed.
+
+    // 2. LearnErrors and the pooled denoising branch requires all R1 and all R2, but 
+    //    the two groups can be processed in parallel.  So we set up the channels with 
+    //    this in mind. No sample ID info is really needed.
+    // ch_trimmed_infer = FILTERANDTRIM.out.trimmed_R1
+    //         .map { [ 'R1', it[1]] }
+    //         .concat(FILTERANDTRIM.out.trimmed_R2.map {['R2', it[1]] } )
+    //         .groupTuple(sort: true)
+    emit:
+    trimmed = ILLUMINA_FILTER_AND_TRIM.out.trimmed
+    trimmed_report = MERGE_TRIM_TABLES.out.trimmed_report // channel: [ RDS ]
+    trimmed_infer = ILLUMINA_FILTER_AND_TRIM.out.trimmed_R1
+            .map { [ 'R1', it[1]] }
+            .concat(ILLUMINA_FILTER_AND_TRIM.out.trimmed_R2.map {['R2', it[1]] } )
+            .groupTuple(sort: true)
+    // versions = ch_versions                     // channel: [ versions.yml ]
+}
+
diff --git a/subworkflows/local/filterandtrim.nf b/subworkflows/local/filterandtrim.nf
deleted file mode 100644
index b9ac327..0000000
--- a/subworkflows/local/filterandtrim.nf
+++ /dev/null
@@ -1,30 +0,0 @@
-// TODO nf-core: If in doubt look at other nf-core/subworkflows to see how we are doing things! :)
-//               https://github.com/nf-core/modules/tree/master/subworkflows
-//               You can also ask for help via your pull request or on the #subworkflows channel on the nf-core Slack workspace:
-//               https://nf-co.re/join
-// TODO nf-core: A subworkflow SHOULD import at least two modules
-
-include { ILLUMINAFILTERANDTRIM   } from '../../../modules/local/filterandtrim'
-
-workflow FILTERANDTRIM {
-
-    take:
-    input //channel: [val(meta), path(reads)
-
-    main:
-    ILLUMINAFILTERANDTRIM()
-    // ch_versions = Channel.empty()
-
-    // TODO nf-core: substitute modules here for the modules of your subworkflow
-
-    
-
-    emit:
-    // TODO nf-core: edit emitted channels
-    bam      = SAMTOOLS_SORT.out.bam           // channel: [ val(meta), [ bam ] ]
-    bai      = SAMTOOLS_INDEX.out.bai          // channel: [ val(meta), [ bai ] ]
-    csi      = SAMTOOLS_INDEX.out.csi          // channel: [ val(meta), [ csi ] ]
-
-    versions = ch_versions                     // channel: [ versions.yml ]
-}
-