Skip to content

Commit

Permalink
move filtering and dada2 denoising to subworkflows
Browse files Browse the repository at this point in the history
  • Loading branch information
cjfields committed Apr 2, 2024
1 parent 78c341c commit 86845fc
Show file tree
Hide file tree
Showing 10 changed files with 119 additions and 37 deletions.
2 changes: 1 addition & 1 deletion modules/local/dadainfer.nf
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
process DADAINFER {
process DADA_INFER {
tag "$readmode"
label 'process_medium'

Expand Down
2 changes: 1 addition & 1 deletion modules/local/filterandtrim.nf
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
process FILTERANDTRIM {
process ILLUMINA_FILTER_AND_TRIM {
tag "$meta.id"
label 'process_medium'

Expand Down
2 changes: 1 addition & 1 deletion modules/local/learnerrors.nf
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
process LEARNERRORS {
process LEARN_ERRORS {
tag "$readmode"
label 'process_medium'

Expand Down
2 changes: 1 addition & 1 deletion modules/local/mergetrimtables.nf
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
process MERGETRIMTABLES {
process MERGE_TRIM_TABLES {
label 'process_low'

container "ghcr.io/h3abionet/tada:dev"
Expand Down
2 changes: 1 addition & 1 deletion modules/local/pooledseqtable.nf
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
process POOLEDSEQTABLE {
process POOLED_SEQTABLE {
label 'process_medium'

container "ghcr.io/h3abionet/tada:dev"
Expand Down
2 changes: 1 addition & 1 deletion modules/local/removechimeras.nf
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
process REMOVECHIMERAS {
process REMOVE_CHIMERAS {
label 'process_medium'

container "ghcr.io/h3abionet/tada:dev"
Expand Down
2 changes: 1 addition & 1 deletion modules/local/renameasvs.nf
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
process RENAMEASVS {
process RENAME_ASVS {
label 'process_low'

container "ghcr.io/h3abionet/tada:dev"
Expand Down
60 changes: 60 additions & 0 deletions subworkflows/local/dada2_denoise.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
// TODO: move to a subworkflow and implement pooled vs per-sample + optional priors
include { LEARN_ERRORS } from '../../modules/local/learnerrors'
include { DADA_INFER } from '../../modules/local/dadainfer'
include { POOLED_SEQTABLE } from '../../modules/local/pooledseqtable'
include { REMOVE_CHIMERAS } from '../../modules/local/removechimeras'
include { RENAME_ASVS } from '../../modules/local/renameasvs'

workflow DADA2_DENOISE {

take:
// TODO nf-core: edit input (take) channels
ch_trimmed_infer // channel: [ val(meta), [ bam ] ]

main:

ch_versions = Channel.empty()

// TODO nf-core: substitute modules here for the modules of your subworkflow

LEARN_ERRORS (
ch_trimmed_infer
)

ch_infer = LEARN_ERRORS.out.error_models.join(ch_trimmed_infer)

// TODO: add single-sample ('big data') run
// this is always in pooled mode at the moment, should be adjusted
// if (params.pool == "T" || params.pool == 'pseudo') {
DADA_INFER(
ch_infer
)

ch_trimmed = ch_trimmed_infer
.map { it[1] }
.flatten()
.collect()

POOLED_SEQTABLE(
DADA_INFER.out.inferred.collect(),
ch_trimmed
)

REMOVE_CHIMERAS(
POOLED_SEQTABLE.out.filtered_seqtable
)

RENAME_ASVS(
REMOVE_CHIMERAS.out.nonchim_seqtable,
POOLED_SEQTABLE.out.filtered_seqtable
)

emit:
nonchimeric_asvs = RENAME_ASVS.out.nonchimeric_asvs
seqtable_renamed = RENAME_ASVS.out.seqtable_renamed
readmap = RENAME_ASVS.out.readmap
inferred = DADA_INFER.out.inferred
merged_seqs = POOLED_SEQTABLE.out.merged_seqs
filtered_seqtable = POOLED_SEQTABLE.out.filtered_seqtable
}

52 changes: 52 additions & 0 deletions subworkflows/local/filter_and_trim.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
// TODO nf-core: If in doubt look at other nf-core/subworkflows to see how we are doing things! :)
// https://github.com/nf-core/modules/tree/master/subworkflows
// You can also ask for help via your pull request or on the #subworkflows channel on the nf-core Slack workspace:
// https://nf-co.re/join
// TODO nf-core: A subworkflow SHOULD import at least two modules

include { ILLUMINA_FILTER_AND_TRIM } from '../../modules/local/filterandtrim'
include { MERGE_TRIM_TABLES } from '../../modules/local/mergetrimtables'

workflow FILTER_AND_TRIM {

take:
input //channel: [val(meta), path(reads)

main:
ILLUMINA_FILTER_AND_TRIM(
input
)

ch_reports = ILLUMINA_FILTER_AND_TRIM.out.trimmed_report.collect()

// TODO: add variable-length and PacBio
MERGE_TRIM_TABLES(
ch_reports
)

// Channel setup

// We need to group data depending on which downstream steps are needed. There
// are two combinations possible

// 1. The immediate downstream QC steps can use the meta info and the read pairs.
// Instead of doing handstands reusing the two channels above, we emit channels
// with the reads paired if needed.

// 2. LearnErrors and the pooled denoising branch requires all R1 and all R2, but
// the two groups can be processed in parallel. So we set up the channels with
// this in mind. No sample ID info is really needed.
// ch_trimmed_infer = FILTERANDTRIM.out.trimmed_R1
// .map { [ 'R1', it[1]] }
// .concat(FILTERANDTRIM.out.trimmed_R2.map {['R2', it[1]] } )
// .groupTuple(sort: true)
emit:
trimmed = ILLUMINA_FILTER_AND_TRIM.out.trimmed
trimmed_report = MERGE_TRIM_TABLES.out.trimmed_report // channel: [ RDS ]
trimmed_infer = ILLUMINA_FILTER_AND_TRIM.out.trimmed_R1
.map { [ 'R1', it[1]] }
.concat(ILLUMINA_FILTER_AND_TRIM.out.trimmed_R2.map {['R2', it[1]] } )
.groupTuple(sort: true)
// versions = ch_versions // channel: [ versions.yml ]
}

30 changes: 0 additions & 30 deletions subworkflows/local/filterandtrim.nf

This file was deleted.

0 comments on commit 86845fc

Please sign in to comment.