From 8f8e0d3d33280b7f7d91b7d5372999020bf9dfa9 Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Mon, 24 Jan 2022 22:06:14 +0000 Subject: [PATCH 01/21] Remove bcftools isec --- README.md | 1 - docs/output.md | 18 ------------------ modules/local/bcftools_isec.nf | 31 ------------------------------- 3 files changed, 50 deletions(-) delete mode 100644 modules/local/bcftools_isec.nf diff --git a/README.md b/README.md index c4fa3a7e..950fa1a0 100644 --- a/README.md +++ b/README.md @@ -47,7 +47,6 @@ The SRA download functionality has been removed from the pipeline (`>=2.1`) and * Lineage analysis ([`Pangolin`](https://github.com/cov-lineages/pangolin)) * Clade assignment, mutation calling and sequence quality checks ([`Nextclade`](https://github.com/nextstrain/nextclade)) * Individual variant screenshots with annotation tracks ([`ASCIIGenome`](https://asciigenome.readthedocs.io/en/latest/)) - 8. Intersect variants across callers ([`BCFTools`](http://samtools.github.io/bcftools/bcftools.html)) 6. _De novo_ assembly 1. Primer trimming ([`Cutadapt`](https://cutadapt.readthedocs.io/en/stable/guide.html); *amplicon data only*) 2. Choice of multiple assembly tools ([`SPAdes`](http://cab.spbu.ru/software/spades/) *||* [`Unicycler`](https://github.com/rrwick/Unicycler) *||* [`minia`](https://github.com/GATB/minia)) diff --git a/docs/output.md b/docs/output.md index 4ae2e65f..a2bfa18a 100644 --- a/docs/output.md +++ b/docs/output.md @@ -301,7 +301,6 @@ An example MultiQC report generated from a full-sized dataset can be viewed on t * [Pangolin](#pangolin) - Lineage analysis * [Nextclade](#nextclade) - Clade assignment, mutation calling and sequence quality checks * [ASCIIGenome](#asciigenome) - Individual variant screenshots with annotation tracks - * [BCFTools isec](#bcftools-isec) - Intersect variants across all callers * [De novo assembly](#illumina-de-novo-assembly) * [Cutadapt](#cutadapt) - Primer trimming for amplicon data * [SPAdes](#spades) *||* [Unicycler](#unicycler) *||* [minia](#minia) - Viral genome assembly @@ -647,23 +646,6 @@ As described in the documentation, [ASCIIGenome](https://asciigenome.readthedocs

ASCIIGenome screenshot

-### BCFTools isec - -
-Output files - -* `variants/intersect//` - * `*.vcf.gz`: VCF file containing variants common to both variant callers. There will be one file for each caller - see `README.txt` for details. - * `*.vcf.gz.tbi`: Index for VCF file. - * `README.txt`: File containing command used and file name mappings. - * `sites.txt`: List of variants common to both variant callers in textual format. The last column indicates presence (1) or absence (0) amongst the 2 different callers. - -**NB:** This process will only be executed when both variant callers are specified to be run i.e. `--callers ivar,bcftools`. - -
- -[BCFTools isec](http://samtools.github.io/bcftools/bcftools.html#isec) can be used to intersect the variant calls generated by the 2 different callers used in the pipeline. This permits a quick assessment of how consistently a particular variant is being called using different algorithms and to prioritise the investigation of the variants. - ## Illumina: De novo assembly A file called `summary_assembly_metrics_mqc.csv` containing a selection of read alignment and *de novo* assembly related metrics will be saved in the `multiqc/` results directory. The same metrics will also be added to the top of the MultiQC report. diff --git a/modules/local/bcftools_isec.nf b/modules/local/bcftools_isec.nf deleted file mode 100644 index a9ce5ea8..00000000 --- a/modules/local/bcftools_isec.nf +++ /dev/null @@ -1,31 +0,0 @@ -process BCFTOOLS_ISEC { - tag "$meta.id" - label 'process_low' - - conda (params.enable_conda ? 'bioconda::bcftools=1.14' : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bcftools:1.14--h88f3f91_0' : - 'quay.io/biocontainers/bcftools:1.14--h88f3f91_0' }" - - input: - tuple val(meta), path('ivar/*'), path('ivar/*'), path('bcftools/*'), path('bcftools/*') - - output: - tuple val(meta), path("${prefix}"), emit: results - path "versions.yml" , emit: versions - - script: - def args = task.ext.args ?: '' - prefix = task.ext.prefix ?: "${meta.id}" - """ - bcftools isec \\ - $args \\ - -p $prefix \\ - */*.vcf.gz - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') - END_VERSIONS - """ -} From 6179db00acec555aa6203298509ad8cdc7c9084e Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Mon, 24 Jan 2022 23:17:46 +0000 Subject: [PATCH 02/21] Add new params --- CHANGELOG.md | 3 +++ conf/modules_illumina.config | 13 ------------- nextflow.config | 1 + 3 files changed, 4 insertions(+), 13 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f2c93952..53028e10 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 | | `--nextclade_dataset_name` | | | `--nextclade_dataset_reference` | | | `--nextclade_dataset_tag` | +| | `--skip_consensus_plots` | +| `--callers` | `--variant_caller` | +| | `--consensus_caller` | > **NB:** Parameter has been __updated__ if both old and new parameter information is present. > **NB:** Parameter has been __added__ if just the new parameter information is present. diff --git a/conf/modules_illumina.config b/conf/modules_illumina.config index db244a7c..af334abf 100644 --- a/conf/modules_illumina.config +++ b/conf/modules_illumina.config @@ -748,19 +748,6 @@ if (!params.skip_variants) { } } } - - if (callers.size() > 1) { - process { - withName: 'BCFTOOLS_ISEC' { - ext.args = '--nfiles +2 --output-type z' - publishDir = [ - path: { "${params.outdir}/variants/intersect" }, - mode: 'copy', - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - } - } } if (!params.skip_assembly) { diff --git a/nextflow.config b/nextflow.config index 1f0470a3..9f3027db 100644 --- a/nextflow.config +++ b/nextflow.config @@ -71,6 +71,7 @@ params { skip_ivar_trim = false skip_markduplicates = true skip_picard_metrics = false + skip_consensus_plots = false skip_consensus = false skip_variants = false From 317bda1ea11fe5bbb166a12501f364202564d441 Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Mon, 24 Jan 2022 23:18:12 +0000 Subject: [PATCH 03/21] Add subworkflow for consensus calling with BCFTools --- subworkflows/local/consensus_bcftools.nf | 84 ++++++++++++++++++++++++ 1 file changed, 84 insertions(+) create mode 100644 subworkflows/local/consensus_bcftools.nf diff --git a/subworkflows/local/consensus_bcftools.nf b/subworkflows/local/consensus_bcftools.nf new file mode 100644 index 00000000..14cefb2e --- /dev/null +++ b/subworkflows/local/consensus_bcftools.nf @@ -0,0 +1,84 @@ +// +// Consensus calling with BCFTools and downstream processing QC +// + +include { BEDTOOLS_MERGE } from '../../modules/nf-core/modules/bedtools/merge/main' +include { BEDTOOLS_MASKFASTA } from '../../modules/nf-core/modules/bedtools/maskfasta/main' +include { BCFTOOLS_CONSENSUS } from '../../modules/nf-core/modules/bcftools/consensus/main' +include { MAKE_BED_MASK } from '../../modules/local/make_bed_mask' +include { CONSENSUS_QC } from './consensus_qc' + +workflow CONSENSUS_BCFTOOLS { + take: + bam // channel: [ val(meta), [ bam ] ] + vcf // channel: [ val(meta), [ vcf ] ] + tbi // channel: [ val(meta), [ tbi ] ] + fasta // channel: /path/to/genome.fasta + gff // channel: /path/to/genome.gff + nextclade_db // channel: /path/to/nextclade_db/ + + main: + + ch_versions = Channel.empty() + + // + // Create BED file with consensus regions to mask + // + MAKE_BED_MASK ( + bam.join(vcf, by: [0]), + fasta, + params.save_mpileup + ) + ch_versions = ch_versions.mix(MAKE_BED_MASK.out.versions.first()) + + // + // Merge intervals with BEDTools + // + BEDTOOLS_MERGE ( + MAKE_BED_MASK.out.bed + ) + ch_versions = ch_versions.mix(BEDTOOLS_MERGE.out.versions.first()) + + // + // Mask regions in consensus with BEDTools + // + BEDTOOLS_MASKFASTA ( + BEDTOOLS_MERGE.out.bed, + fasta + ) + ch_versions = ch_versions.mix(BEDTOOLS_MASKFASTA.out.versions.first()) + + // + // Call consensus sequence with BCFTools + // + BCFTOOLS_CONSENSUS ( + vcf.join(tbi, by: [0]).join(BEDTOOLS_MASKFASTA.out.fasta, by: [0]) + ) + ch_versions = ch_versions.mix(BCFTOOLS_CONSENSUS.out.versions.first()) + + // + // Consensus sequence QC + // + CONSENSUS_QC ( + BCFTOOLS_CONSENSUS.out.fasta, + fasta, + gff, + nextclade_db + ) + ch_versions = ch_versions.mix(CONSENSUS_QC.out.versions.first()) + + emit: + consensus = BCFTOOLS_CONSENSUS.out.fasta // channel: [ val(meta), [ fasta ] ] + + quast_results = CONSENSUS_QC.out.quast_results // channel: [ val(meta), [ results ] ] + quast_tsv = CONSENSUS_QC.out.quast_tsv // channel: [ val(meta), [ tsv ] ] + + pangolin_report = CONSENSUS_QC.out.pangolin_report // channel: [ val(meta), [ csv ] ] + + nextclade_report = CONSENSUS_QC.out.nextclade_report // channel: [ val(meta), [ csv ] ] + + bases_tsv = CONSENSUS_QC.out.bases_tsv // channel: [ val(meta), [ tsv ] ] + bases_pdf = CONSENSUS_QC.out.bases_pdf // channel: [ val(meta), [ pdf ] ] + + versions = ch_versions // channel: [ versions.yml ] +} From 332e12d40d4291b51f0d188aa9ad05617d415b6b Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Mon, 24 Jan 2022 23:18:25 +0000 Subject: [PATCH 04/21] Add subworkflow for consensus calling with iVar --- subworkflows/local/consensus_ivar.nf | 55 ++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 subworkflows/local/consensus_ivar.nf diff --git a/subworkflows/local/consensus_ivar.nf b/subworkflows/local/consensus_ivar.nf new file mode 100644 index 00000000..b3de4d94 --- /dev/null +++ b/subworkflows/local/consensus_ivar.nf @@ -0,0 +1,55 @@ +// +// Consensus calling with iVar and downstream processing QC +// + +include { IVAR_CONSENSUS } from '../../modules/nf-core/modules/ivar/consensus/main' +include { CONSENSUS_QC } from './consensus_qc' + +workflow CONSENSUS_IVAR { + take: + bam // channel: [ val(meta), [ bam ] ] + fasta // channel: /path/to/genome.fasta + gff // channel: /path/to/genome.gff + nextclade_db // channel: /path/to/nextclade_db/ + + main: + + ch_versions = Channel.empty() + + // + // Call consensus sequence with iVar + // + IVAR_CONSENSUS ( + bam, + fasta, + params.save_mpileup + ) + ch_versions = ch_versions.mix(IVAR_CONSENSUS.out.versions.first()) + + // + // Consensus sequence QC + // + CONSENSUS_QC ( + IVAR_CONSENSUS.out.fasta, + fasta, + gff, + nextclade_db + ) + ch_versions = ch_versions.mix(CONSENSUS_QC.out.versions.first()) + + emit: + consensus = IVAR_CONSENSUS.out.fasta // channel: [ val(meta), [ fasta ] ] + consensus_qual = IVAR_CONSENSUS.out.qual // channel: [ val(meta), [ qual.txt ] ] + + quast_results = CONSENSUS_QC.out.quast_results // channel: [ val(meta), [ results ] ] + quast_tsv = CONSENSUS_QC.out.quast_tsv // channel: [ val(meta), [ tsv ] ] + + pangolin_report = CONSENSUS_QC.out.pangolin_report // channel: [ val(meta), [ csv ] ] + + nextclade_report = CONSENSUS_QC.out.nextclade_report // channel: [ val(meta), [ csv ] ] + + bases_tsv = CONSENSUS_QC.out.bases_tsv // channel: [ val(meta), [ tsv ] ] + bases_pdf = CONSENSUS_QC.out.bases_pdf // channel: [ val(meta), [ pdf ] ] + + versions = ch_versions // channel: [ versions.yml ] +} \ No newline at end of file From 1fba0cc52244c9468f4b3f3e377f1afd1e3b1b6f Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Mon, 24 Jan 2022 23:18:40 +0000 Subject: [PATCH 05/21] Add Consensus QC sub-workflow --- subworkflows/local/consensus_qc.nf | 90 ++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) create mode 100644 subworkflows/local/consensus_qc.nf diff --git a/subworkflows/local/consensus_qc.nf b/subworkflows/local/consensus_qc.nf new file mode 100644 index 00000000..d370f68b --- /dev/null +++ b/subworkflows/local/consensus_qc.nf @@ -0,0 +1,90 @@ +// +// Consensus calling QC +// + +include { QUAST } from '../../modules/nf-core/modules/quast/main' +include { PANGOLIN } from '../../modules/nf-core/modules/pangolin/main' +include { NEXTCLADE_RUN } from '../../modules/nf-core/modules/nextclade/run/main' +include { PLOT_BASE_DENSITY } from '../../modules/local/plot_base_density' + +workflow CONSENSUS_QC { + take: + consensus // channel: [ val(meta), [ consensus ] ] + fasta // channel: /path/to/genome.fasta + gff // channel: /path/to/genome.gff + nextclade_db // channel: /path/to/nextclade_db/ + + main: + + ch_versions = Channel.empty() + + // + // Consensus QC report across samples with QUAST + // + ch_quast_results = Channel.empty() + ch_quast_tsv = Channel.empty() + if (!params.skip_variants_quast) { + QUAST ( + consensus.collect{ it[1] }, + fasta, + gff, + true, + params.gff + ) + ch_quast_results = QUAST.out.results + ch_quast_tsv = QUAST.out.tsv + ch_versions = ch_versions.mix(QUAST.out.versions) + } + + // + // Lineage analysis with Pangolin + // + ch_pangolin_report = Channel.empty() + if (!params.skip_pangolin) { + PANGOLIN ( + consensus + ) + ch_pangolin_report = PANGOLIN.out.report + ch_versions = ch_versions.mix(PANGOLIN.out.versions.first()) + } + + // + // Lineage analysis with Nextclade + // + ch_nextclade_report = Channel.empty() + if (!params.skip_nextclade) { + NEXTCLADE_RUN ( + consensus, + nextclade_db + ) + ch_nextclade_report = NEXTCLADE_RUN.out.csv + ch_versions = ch_versions.mix(NEXTCLADE_RUN.out.versions.first()) + } + + // + // Plot consensus base density + // + ch_bases_tsv = Channel.empty() + ch_bases_pdf = Channel.empty() + if (!params.skip_consensus_plots) { + PLOT_BASE_DENSITY ( + consensus + ) + ch_bases_tsv = PLOT_BASE_DENSITY.out.tsv + ch_bases_pdf = PLOT_BASE_DENSITY.out.pdf + ch_versions = ch_versions.mix(PLOT_BASE_DENSITY.out.versions.first()) + } + + emit: + quast_results = ch_quast_results // channel: [ val(meta), [ results ] ] + quast_tsv = ch_quast_tsv // channel: [ val(meta), [ tsv ] ] + + pangolin_report = ch_pangolin_report // channel: [ val(meta), [ csv ] ] + + nextclade_report = ch_nextclade_report // channel: [ val(meta), [ csv ] ] + + bases_tsv = ch_bases_tsv // channel: [ val(meta), [ tsv ] ] + bases_pdf = ch_bases_pdf // channel: [ val(meta), [ pdf ] ] + + versions = ch_versions // channel: [ versions.yml ] +} \ No newline at end of file From 6f410c0ff6043bcb5d8d2749a077489b1cd81154 Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Mon, 24 Jan 2022 23:18:58 +0000 Subject: [PATCH 06/21] Add variants QC sub-workflow --- subworkflows/local/variants_qc.nf | 91 +++++++++++++++++++++++++++++++ 1 file changed, 91 insertions(+) create mode 100644 subworkflows/local/variants_qc.nf diff --git a/subworkflows/local/variants_qc.nf b/subworkflows/local/variants_qc.nf new file mode 100644 index 00000000..cded165c --- /dev/null +++ b/subworkflows/local/variants_qc.nf @@ -0,0 +1,91 @@ +// +// Variant calling QC +// + +include { ASCIIGENOME } from '../../modules/local/asciigenome' +include { SNPEFF_SNPSIFT } from './snpeff_snpsift' + +workflow VARIANTS_QC { + take: + bam // channel: [ val(meta), [ bam ] ] + vcf // channel: [ val(meta), [ vcf ] ] + stats // channel: [ val(meta), [ bcftools_stats ] ] + fasta // channel: /path/to/genome.fasta + sizes // channel: /path/to/genome.sizes + gff // channel: /path/to/genome.gff + bed // channel: /path/to/primers.bed + snpeff_db // channel: /path/to/snpeff_db/ + snpeff_config // channel: /path/to/snpeff.config + + main: + + ch_versions = Channel.empty() + + // + // Annotate variants + // + ch_snpeff_vcf = Channel.empty() + ch_snpeff_tbi = Channel.empty() + ch_snpeff_stats = Channel.empty() + ch_snpeff_csv = Channel.empty() + ch_snpeff_txt = Channel.empty() + ch_snpeff_html = Channel.empty() + ch_snpsift_txt = Channel.empty() + if (params.gff && !params.skip_snpeff) { + SNPEFF_SNPSIFT ( + vcf, + snpeff_db, + snpeff_config, + fasta + ) + ch_snpeff_vcf = SNPEFF_SNPSIFT.out.vcf + ch_snpeff_tbi = SNPEFF_SNPSIFT.out.tbi + ch_snpeff_stats = SNPEFF_SNPSIFT.out.stats + ch_snpeff_csv = SNPEFF_SNPSIFT.out.csv + ch_snpeff_txt = SNPEFF_SNPSIFT.out.txt + ch_snpeff_html = SNPEFF_SNPSIFT.out.html + ch_snpsift_txt = SNPEFF_SNPSIFT.out.snpsift_txt + ch_versions = ch_versions.mix(SNPEFF_SNPSIFT.out.versions) + } + + // + // Variant screenshots with ASCIIGenome + // + ch_asciigenome_pdf = Channel.empty() + if (!params.skip_asciigenome) { + bam + .join(vcf, by: [0]) + .join(stats, by: [0]) + .map { meta, bam, vcf, stats -> + if (WorkflowCommons.getNumVariantsFromBCFToolsStats(stats) > 0) { + return [ meta, bam, vcf ] + } + } + .set { ch_asciigenome } + + ASCIIGENOME ( + ch_asciigenome, + fasta, + sizes, + gff, + bed, + params.asciigenome_window_size, + params.asciigenome_read_depth + ) + ch_asciigenome_pdf = ASCIIGENOME.out.pdf + ch_versions = ch_versions.mix(ASCIIGENOME.out.versions.first()) + } + + emit: + snpeff_vcf = ch_snpeff_vcf // channel: [ val(meta), [ vcf.gz ] ] + snpeff_tbi = ch_snpeff_tbi // channel: [ val(meta), [ tbi ] ] + snpeff_stats = ch_snpeff_stats // channel: [ val(meta), [ txt ] ] + snpeff_csv = ch_snpeff_csv // channel: [ val(meta), [ csv ] ] + snpeff_txt = ch_snpeff_txt // channel: [ val(meta), [ txt ] ] + snpeff_html = ch_snpeff_html // channel: [ val(meta), [ html ] ] + snpsift_txt = ch_snpsift_txt // channel: [ val(meta), [ txt ] ] + + asciigenome_pdf = ch_asciigenome_pdf // channel: [ val(meta), [ pdf ] ] + + versions = ch_versions // channel: [ versions.yml ] +} From 201d43efa80d814196537fdd240c2b2d4ea1d70d Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Mon, 24 Jan 2022 23:19:29 +0000 Subject: [PATCH 07/21] Add subworkflow for variant calling with BCFTools --- subworkflows/local/make_consensus.nf | 54 -------- subworkflows/local/variants_bcftools.nf | 162 ++++-------------------- 2 files changed, 27 insertions(+), 189 deletions(-) delete mode 100644 subworkflows/local/make_consensus.nf diff --git a/subworkflows/local/make_consensus.nf b/subworkflows/local/make_consensus.nf deleted file mode 100644 index 95481bd1..00000000 --- a/subworkflows/local/make_consensus.nf +++ /dev/null @@ -1,54 +0,0 @@ -// -// Run various tools to generate a masked genome consensus sequence -// - -include { BEDTOOLS_MERGE } from '../../modules/nf-core/modules/bedtools/merge/main' -include { BEDTOOLS_MASKFASTA } from '../../modules/nf-core/modules/bedtools/maskfasta/main' -include { BCFTOOLS_CONSENSUS } from '../../modules/nf-core/modules/bcftools/consensus/main' -include { MAKE_BED_MASK } from '../../modules/local/make_bed_mask' -include { PLOT_BASE_DENSITY } from '../../modules/local/plot_base_density' - -workflow MAKE_CONSENSUS { - take: - bam_vcf // channel: [ val(meta), [ bam ], [ vcf ], [ tbi ] ] - fasta - - main: - - ch_versions = Channel.empty() - - MAKE_BED_MASK ( - bam_vcf.map { meta, bam, vcf, tbi -> [ meta, bam, vcf ] }, - fasta, - params.save_mpileup - ) - ch_versions = ch_versions.mix(MAKE_BED_MASK.out.versions.first()) - - BEDTOOLS_MERGE ( - MAKE_BED_MASK.out.bed - ) - ch_versions = ch_versions.mix(BEDTOOLS_MERGE.out.versions.first()) - - BEDTOOLS_MASKFASTA ( - BEDTOOLS_MERGE.out.bed, - fasta - ) - ch_versions = ch_versions.mix(BEDTOOLS_MASKFASTA.out.versions.first()) - - BCFTOOLS_CONSENSUS ( - bam_vcf.map { meta, bam, vcf, tbi -> [ meta, vcf, tbi ] }.join( BEDTOOLS_MASKFASTA.out.fasta, by: [0] ) - ) - ch_versions = ch_versions.mix(BCFTOOLS_CONSENSUS.out.versions.first()) - - PLOT_BASE_DENSITY ( - BCFTOOLS_CONSENSUS.out.fasta - ) - ch_versions = ch_versions.mix(PLOT_BASE_DENSITY.out.versions.first()) - - emit: - fasta = BCFTOOLS_CONSENSUS.out.fasta // channel: [ val(meta), [ fasta ] ] - tsv = PLOT_BASE_DENSITY.out.tsv // channel: [ val(meta), [ tsv ] ] - pdf = PLOT_BASE_DENSITY.out.pdf // channel: [ val(meta), [ pdf ] ] - - versions = ch_versions // channel: [ versions.yml ] -} diff --git a/subworkflows/local/variants_bcftools.nf b/subworkflows/local/variants_bcftools.nf index 501ecd53..100d25dc 100644 --- a/subworkflows/local/variants_bcftools.nf +++ b/subworkflows/local/variants_bcftools.nf @@ -1,15 +1,9 @@ // -// Variant calling and downstream processing for BCFTools +// Variant calling with BCFTools, downstream processing and QC // include { BCFTOOLS_MPILEUP } from '../../modules/nf-core/modules/bcftools/mpileup/main' -include { QUAST } from '../../modules/nf-core/modules/quast/main' -include { PANGOLIN } from '../../modules/nf-core/modules/pangolin/main' -include { NEXTCLADE_RUN } from '../../modules/nf-core/modules/nextclade/run/main' -include { ASCIIGENOME } from '../../modules/local/asciigenome' - -include { MAKE_CONSENSUS } from './make_consensus' -include { SNPEFF_SNPSIFT } from './snpeff_snpsift' +include { VARIANTS_QC } from './variants_qc' workflow VARIANTS_BCFTOOLS { take: @@ -18,7 +12,6 @@ workflow VARIANTS_BCFTOOLS { sizes // channel: /path/to/genome.sizes gff // channel: /path/to/genome.gff bed // channel: /path/to/primers.bed - nextclade_db // channel: /path/to/nextclade_db/ snpeff_db // channel: /path/to/snpeff_db/ snpeff_config // channel: /path/to/snpeff.config @@ -37,136 +30,35 @@ workflow VARIANTS_BCFTOOLS { ch_versions = ch_versions.mix(BCFTOOLS_MPILEUP.out.versions.first()) // - // Create genome consensus using variants in VCF, run QUAST and pangolin - // - ch_consensus = Channel.empty() - ch_bases_tsv = Channel.empty() - ch_bases_pdf = Channel.empty() - ch_quast_results = Channel.empty() - ch_quast_tsv = Channel.empty() - ch_pangolin_report = Channel.empty() - ch_nextclade_report = Channel.empty() - if (!params.skip_consensus) { - MAKE_CONSENSUS ( - bam.join(BCFTOOLS_MPILEUP.out.vcf, by: [0]).join(BCFTOOLS_MPILEUP.out.tbi, by: [0]), - fasta - ) - ch_consensus = MAKE_CONSENSUS.out.fasta - ch_bases_tsv = MAKE_CONSENSUS.out.tsv - ch_bases_pdf = MAKE_CONSENSUS.out.pdf - ch_versions = ch_versions.mix(MAKE_CONSENSUS.out.versions) - - if (!params.skip_variants_quast) { - QUAST ( - ch_consensus.collect{ it[1] }, - fasta, - gff, - true, - params.gff - ) - ch_quast_results = QUAST.out.results - ch_quast_tsv = QUAST.out.tsv - ch_versions = ch_versions.mix(QUAST.out.versions) - } - - if (!params.skip_pangolin) { - PANGOLIN ( - ch_consensus - ) - ch_pangolin_report = PANGOLIN.out.report - ch_versions = ch_versions.mix(PANGOLIN.out.versions.first()) - } - - if (!params.skip_nextclade) { - NEXTCLADE_RUN ( - ch_consensus, - nextclade_db - ) - ch_nextclade_report = NEXTCLADE_RUN.out.csv - ch_versions = ch_versions.mix(NEXTCLADE_RUN.out.versions.first()) - } - } - - // - // Annotate variants - // - ch_snpeff_vcf = Channel.empty() - ch_snpeff_tbi = Channel.empty() - ch_snpeff_stats = Channel.empty() - ch_snpeff_csv = Channel.empty() - ch_snpeff_txt = Channel.empty() - ch_snpeff_html = Channel.empty() - ch_snpsift_txt = Channel.empty() - if (params.gff && !params.skip_snpeff) { - SNPEFF_SNPSIFT ( - BCFTOOLS_MPILEUP.out.vcf, - snpeff_db, - snpeff_config, - fasta - ) - ch_snpeff_vcf = SNPEFF_SNPSIFT.out.vcf - ch_snpeff_tbi = SNPEFF_SNPSIFT.out.tbi - ch_snpeff_stats = SNPEFF_SNPSIFT.out.stats - ch_snpeff_csv = SNPEFF_SNPSIFT.out.csv - ch_snpeff_txt = SNPEFF_SNPSIFT.out.txt - ch_snpeff_html = SNPEFF_SNPSIFT.out.html - ch_snpsift_txt = SNPEFF_SNPSIFT.out.snpsift_txt - ch_versions = ch_versions.mix(SNPEFF_SNPSIFT.out.versions) - } - - // - // Variant screenshots with ASCIIGenome + // Run downstream tools for variants QC // - ch_asciigenome_pdf = Channel.empty() - if (!params.skip_asciigenome) { - bam - .join(BCFTOOLS_MPILEUP.out.vcf, by: [0]) - .join(BCFTOOLS_MPILEUP.out.stats, by: [0]) - .map { meta, bam, vcf, stats -> - if (WorkflowCommons.getNumVariantsFromBCFToolsStats(stats) > 0) { - return [ meta, bam, vcf ] - } - } - .set { ch_asciigenome } - - ASCIIGENOME ( - ch_asciigenome, - fasta, - sizes, - gff, - bed, - params.asciigenome_window_size, - params.asciigenome_read_depth - ) - ch_asciigenome_pdf = ASCIIGENOME.out.pdf - ch_versions = ch_versions.mix(ASCIIGENOME.out.versions.first()) - } + VARIANTS_QC ( + bam, + BCFTOOLS_MPILEUP.out.vcf, + BCFTOOLS_MPILEUP.out.stats, + fasta, + sizes, + gff, + bed, + snpeff_db, + snpeff_config + ) + ch_versions = ch_versions.mix(VARIANTS_QC.out.versions) emit: - vcf = BCFTOOLS_MPILEUP.out.vcf // channel: [ val(meta), [ vcf ] ] - tbi = BCFTOOLS_MPILEUP.out.tbi // channel: [ val(meta), [ tbi ] ] - stats = BCFTOOLS_MPILEUP.out.stats // channel: [ val(meta), [ txt ] ] - - consensus = ch_consensus // channel: [ val(meta), [ fasta ] ] - bases_tsv = ch_bases_tsv // channel: [ val(meta), [ tsv ] ] - bases_pdf = ch_bases_pdf // channel: [ val(meta), [ pdf ] ] - - quast_results = ch_quast_results // channel: [ val(meta), [ results ] ] - quast_tsv = ch_quast_tsv // channel: [ val(meta), [ tsv ] ] - - snpeff_vcf = ch_snpeff_vcf // channel: [ val(meta), [ vcf.gz ] ] - snpeff_tbi = ch_snpeff_tbi // channel: [ val(meta), [ tbi ] ] - snpeff_stats = ch_snpeff_stats // channel: [ val(meta), [ txt ] ] - snpeff_csv = ch_snpeff_csv // channel: [ val(meta), [ csv ] ] - snpeff_txt = ch_snpeff_txt // channel: [ val(meta), [ txt ] ] - snpeff_html = ch_snpeff_html // channel: [ val(meta), [ html ] ] - snpsift_txt = ch_snpsift_txt // channel: [ val(meta), [ txt ] ] - - pangolin_report = ch_pangolin_report // channel: [ val(meta), [ csv ] ] + vcf = BCFTOOLS_MPILEUP.out.vcf // channel: [ val(meta), [ vcf ] ] + tbi = BCFTOOLS_MPILEUP.out.tbi // channel: [ val(meta), [ tbi ] ] + stats = BCFTOOLS_MPILEUP.out.stats // channel: [ val(meta), [ txt ] ] - nextclade_report = ch_nextclade_report // channel: [ val(meta), [ csv ] ] + snpeff_vcf = VARIANTS_QC.out.snpeff_vcf // channel: [ val(meta), [ vcf.gz ] ] + snpeff_tbi = VARIANTS_QC.out.snpeff_tbi // channel: [ val(meta), [ tbi ] ] + snpeff_stats = VARIANTS_QC.out.snpeff_stats // channel: [ val(meta), [ txt ] ] + snpeff_csv = VARIANTS_QC.out.snpeff_csv // channel: [ val(meta), [ csv ] ] + snpeff_txt = VARIANTS_QC.out.snpeff_txt // channel: [ val(meta), [ txt ] ] + snpeff_html = VARIANTS_QC.out.snpeff_html // channel: [ val(meta), [ html ] ] + snpsift_txt = VARIANTS_QC.out.snpsift_txt // channel: [ val(meta), [ txt ] ] - asciigenome_pdf = ch_asciigenome_pdf // channel: [ val(meta), [ pdf ] ] + asciigenome_pdf = VARIANTS_QC.out.asciigenome_pdf // channel: [ val(meta), [ pdf ] ] - versions = ch_versions // channel: [ versions.yml ] + versions = ch_versions // channel: [ versions.yml ] } From b7ad0c4578b280c18c303ff133f978eff22b5117 Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Mon, 24 Jan 2022 23:19:37 +0000 Subject: [PATCH 08/21] Add subworkflow for variant calling with iVar --- subworkflows/local/variants_ivar.nf | 184 +++++----------------------- 1 file changed, 33 insertions(+), 151 deletions(-) diff --git a/subworkflows/local/variants_ivar.nf b/subworkflows/local/variants_ivar.nf index a21a2825..15a10e4e 100644 --- a/subworkflows/local/variants_ivar.nf +++ b/subworkflows/local/variants_ivar.nf @@ -1,18 +1,11 @@ // -// Variant calling and downstream processing for IVar +// Variant calling with IVar, downstream processing and QC // -include { IVAR_VARIANTS_TO_VCF } from '../../modules/local/ivar_variants_to_vcf' -include { PLOT_BASE_DENSITY } from '../../modules/local/plot_base_density' include { IVAR_VARIANTS } from '../../modules/nf-core/modules/ivar/variants/main' -include { IVAR_CONSENSUS } from '../../modules/nf-core/modules/ivar/consensus/main' -include { QUAST } from '../../modules/nf-core/modules/quast/main' -include { PANGOLIN } from '../../modules/nf-core/modules/pangolin/main' -include { NEXTCLADE_RUN } from '../../modules/nf-core/modules/nextclade/run/main' -include { ASCIIGENOME } from '../../modules/local/asciigenome' - +include { IVAR_VARIANTS_TO_VCF } from '../../modules/local/ivar_variants_to_vcf' include { VCF_BGZIP_TABIX_STATS } from '../nf-core/vcf_bgzip_tabix_stats' -include { SNPEFF_SNPSIFT } from './snpeff_snpsift' +include { VARIANTS_QC } from './variants_qc' workflow VARIANTS_IVAR { take: @@ -21,7 +14,6 @@ workflow VARIANTS_IVAR { sizes // channel: /path/to/genome.sizes gff // channel: /path/to/genome.gff bed // channel: /path/to/primers.bed - nextclade_db // channel: /path/to/nextclade_db/ snpeff_db // channel: /path/to/snpeff_db/ snpeff_config // channel: /path/to/snpeff.config ivar_multiqc_header // channel: /path/to/multiqc_header for ivar variants @@ -56,151 +48,41 @@ workflow VARIANTS_IVAR { ch_versions = ch_versions.mix(VCF_BGZIP_TABIX_STATS.out.versions) // - // Create genome consensus - // - ch_consensus = Channel.empty() - ch_consensus_qual = Channel.empty() - ch_bases_tsv = Channel.empty() - ch_bases_pdf = Channel.empty() - ch_quast_results = Channel.empty() - ch_quast_tsv = Channel.empty() - ch_pangolin_report = Channel.empty() - ch_nextclade_report = Channel.empty() - if (!params.skip_consensus) { - IVAR_CONSENSUS ( - bam, - fasta, - params.save_mpileup - ) - ch_consensus = IVAR_CONSENSUS.out.fasta - ch_consensus_qual = IVAR_CONSENSUS.out.qual - ch_versions = ch_versions.mix(IVAR_CONSENSUS.out.versions.first()) - - PLOT_BASE_DENSITY ( - ch_consensus - ) - ch_bases_tsv = PLOT_BASE_DENSITY.out.tsv - ch_bases_pdf = PLOT_BASE_DENSITY.out.pdf - ch_versions = ch_versions.mix(PLOT_BASE_DENSITY.out.versions.first()) - - if (!params.skip_variants_quast) { - QUAST ( - ch_consensus.collect{ it[1] }, - fasta, - gff, - true, - params.gff - ) - ch_quast_results = QUAST.out.results - ch_quast_tsv = QUAST.out.tsv - ch_versions = ch_versions.mix(QUAST.out.versions) - } - - if (!params.skip_pangolin) { - PANGOLIN ( - ch_consensus - ) - ch_pangolin_report = PANGOLIN.out.report - ch_versions = ch_versions.mix(PANGOLIN.out.versions.first()) - } - - if (!params.skip_nextclade) { - NEXTCLADE_RUN ( - ch_consensus, - nextclade_db - ) - ch_nextclade_report = NEXTCLADE_RUN.out.csv - ch_versions = ch_versions.mix(NEXTCLADE_RUN.out.versions.first()) - } - } - - // - // Annotate variants - // - ch_snpeff_vcf = Channel.empty() - ch_snpeff_tbi = Channel.empty() - ch_snpeff_stats = Channel.empty() - ch_snpeff_csv = Channel.empty() - ch_snpeff_txt = Channel.empty() - ch_snpeff_html = Channel.empty() - ch_snpsift_txt = Channel.empty() - if (params.gff && !params.skip_snpeff) { - SNPEFF_SNPSIFT ( - VCF_BGZIP_TABIX_STATS.out.vcf, - snpeff_db, - snpeff_config, - fasta - ) - ch_snpeff_vcf = SNPEFF_SNPSIFT.out.vcf - ch_snpeff_tbi = SNPEFF_SNPSIFT.out.tbi - ch_snpeff_stats = SNPEFF_SNPSIFT.out.stats - ch_snpeff_csv = SNPEFF_SNPSIFT.out.csv - ch_snpeff_txt = SNPEFF_SNPSIFT.out.txt - ch_snpeff_html = SNPEFF_SNPSIFT.out.html - ch_snpsift_txt = SNPEFF_SNPSIFT.out.snpsift_txt - ch_versions = ch_versions.mix(SNPEFF_SNPSIFT.out.versions) - } - - // - // Variant screenshots with ASCIIGenome + // Run downstream tools for variants QC // - ch_asciigenome_pdf = Channel.empty() - if (!params.skip_asciigenome) { - bam - .join(VCF_BGZIP_TABIX_STATS.out.vcf, by: [0]) - .join(VCF_BGZIP_TABIX_STATS.out.stats, by: [0]) - .map { meta, bam, vcf, stats -> - if (WorkflowCommons.getNumVariantsFromBCFToolsStats(stats) > 0) { - return [ meta, bam, vcf ] - } - } - .set { ch_asciigenome } - - ASCIIGENOME ( - ch_asciigenome, - fasta, - sizes, - gff, - bed, - params.asciigenome_window_size, - params.asciigenome_read_depth - ) - ch_asciigenome_pdf = ASCIIGENOME.out.pdf - ch_versions = ch_versions.mix(ASCIIGENOME.out.versions.first()) - } + VARIANTS_QC ( + bam, + VCF_BGZIP_TABIX_STATS.out.vcf, + VCF_BGZIP_TABIX_STATS.out.stats, + fasta, + sizes, + gff, + bed, + snpeff_db, + snpeff_config + ) + ch_versions = ch_versions.mix(VARIANTS_QC.out.versions) emit: - tsv = IVAR_VARIANTS.out.tsv // channel: [ val(meta), [ tsv ] ] - - vcf_orig = IVAR_VARIANTS_TO_VCF.out.vcf // channel: [ val(meta), [ vcf ] ] - log_out = IVAR_VARIANTS_TO_VCF.out.log // channel: [ val(meta), [ log ] ] - multiqc_tsv = IVAR_VARIANTS_TO_VCF.out.tsv // channel: [ val(meta), [ tsv ] ] - - vcf = VCF_BGZIP_TABIX_STATS.out.vcf // channel: [ val(meta), [ vcf ] ] - tbi = VCF_BGZIP_TABIX_STATS.out.tbi // channel: [ val(meta), [ tbi ] ] - stats = VCF_BGZIP_TABIX_STATS.out.stats // channel: [ val(meta), [ txt ] ] - - consensus = ch_consensus // channel: [ val(meta), [ fasta ] ] - consensus_qual = ch_consensus_qual // channel: [ val(meta), [ fasta ] ] - bases_tsv = ch_bases_tsv // channel: [ val(meta), [ tsv ] ] - bases_pdf = ch_bases_pdf // channel: [ val(meta), [ pdf ] ] - - quast_results = ch_quast_results // channel: [ val(meta), [ results ] ] - quast_tsv = ch_quast_tsv // channel: [ val(meta), [ tsv ] ] + tsv = IVAR_VARIANTS.out.tsv // channel: [ val(meta), [ tsv ] ] - snpeff_vcf = ch_snpeff_vcf // channel: [ val(meta), [ vcf.gz ] ] - snpeff_tbi = ch_snpeff_tbi // channel: [ val(meta), [ tbi ] ] - snpeff_stats = ch_snpeff_stats // channel: [ val(meta), [ txt ] ] - snpeff_csv = ch_snpeff_csv // channel: [ val(meta), [ csv ] ] - snpeff_txt = ch_snpeff_txt // channel: [ val(meta), [ txt ] ] - snpeff_html = ch_snpeff_html // channel: [ val(meta), [ html ] ] - snpsift_txt = ch_snpsift_txt // channel: [ val(meta), [ txt ] ] + vcf_orig = IVAR_VARIANTS_TO_VCF.out.vcf // channel: [ val(meta), [ vcf ] ] + log_out = IVAR_VARIANTS_TO_VCF.out.log // channel: [ val(meta), [ log ] ] + multiqc_tsv = IVAR_VARIANTS_TO_VCF.out.tsv // channel: [ val(meta), [ tsv ] ] - pangolin_report = ch_pangolin_report // channel: [ val(meta), [ csv ] ] + vcf = VCF_BGZIP_TABIX_STATS.out.vcf // channel: [ val(meta), [ vcf ] ] + tbi = VCF_BGZIP_TABIX_STATS.out.tbi // channel: [ val(meta), [ tbi ] ] + stats = VCF_BGZIP_TABIX_STATS.out.stats // channel: [ val(meta), [ txt ] ] - nextclade_report = ch_nextclade_report // channel: [ val(meta), [ csv ] ] + snpeff_vcf = VARIANTS_QC.out.snpeff_vcf // channel: [ val(meta), [ vcf.gz ] ] + snpeff_tbi = VARIANTS_QC.out.snpeff_tbi // channel: [ val(meta), [ tbi ] ] + snpeff_stats = VARIANTS_QC.out.snpeff_stats // channel: [ val(meta), [ txt ] ] + snpeff_csv = VARIANTS_QC.out.snpeff_csv // channel: [ val(meta), [ csv ] ] + snpeff_txt = VARIANTS_QC.out.snpeff_txt // channel: [ val(meta), [ txt ] ] + snpeff_html = VARIANTS_QC.out.snpeff_html // channel: [ val(meta), [ html ] ] + snpsift_txt = VARIANTS_QC.out.snpsift_txt // channel: [ val(meta), [ txt ] ] - asciigenome_pdf = ch_asciigenome_pdf // channel: [ val(meta), [ pdf ] ] + asciigenome_pdf = VARIANTS_QC.out.asciigenome_pdf // channel: [ val(meta), [ pdf ] ] - versions = ch_versions // channel: [ versions.yml ] -} + versions = ch_versions // channel: [ versions.yml ] +} \ No newline at end of file From 76f9730d319e38cfcd051775f61902063425d1e1 Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Mon, 24 Jan 2022 23:20:00 +0000 Subject: [PATCH 09/21] Re-work main workflow for new variant and consensus calling --- workflows/illumina.nf | 125 +++++++++++++++++++++++------------------- 1 file changed, 69 insertions(+), 56 deletions(-) diff --git a/workflows/illumina.nf b/workflows/illumina.nf index c2807c3b..4998601e 100644 --- a/workflows/illumina.nf +++ b/workflows/illumina.nf @@ -53,9 +53,8 @@ ch_ivar_variants_header_mqc = file("$projectDir/assets/headers/ivar_variants_hea // // MODULE: Loaded from modules/local/ // -include { BCFTOOLS_ISEC } from '../modules/local/bcftools_isec' -include { CUTADAPT } from '../modules/local/cutadapt' -include { MULTIQC } from '../modules/local/multiqc_illumina' +include { CUTADAPT } from '../modules/local/cutadapt' +include { MULTIQC } from '../modules/local/multiqc_illumina' include { PLOT_MOSDEPTH_REGIONS as PLOT_MOSDEPTH_REGIONS_GENOME } from '../modules/local/plot_mosdepth_regions' include { PLOT_MOSDEPTH_REGIONS as PLOT_MOSDEPTH_REGIONS_AMPLICON } from '../modules/local/plot_mosdepth_regions' include { MULTIQC_TSV_FROM_LIST as MULTIQC_TSV_FAIL_READS } from '../modules/local/multiqc_tsv_from_list' @@ -70,6 +69,8 @@ include { INPUT_CHECK } from '../subworkflows/local/input_check' include { PREPARE_GENOME } from '../subworkflows/local/prepare_genome_illumina' include { VARIANTS_IVAR } from '../subworkflows/local/variants_ivar' include { VARIANTS_BCFTOOLS } from '../subworkflows/local/variants_bcftools' +include { CONSENSUS_IVAR } from '../subworkflows/local/consensus_ivar' +include { CONSENSUS_BCFTOOLS } from '../subworkflows/local/consensus_bcftools' include { ASSEMBLY_SPADES } from '../subworkflows/local/assembly_spades' include { ASSEMBLY_UNICYCLER } from '../subworkflows/local/assembly_unicycler' include { ASSEMBLY_MINIA } from '../subworkflows/local/assembly_minia' @@ -351,7 +352,6 @@ workflow ILLUMINA { ch_mosdepth_multiqc = Channel.empty() ch_amplicon_heatmap_multiqc = Channel.empty() if (!params.skip_variants && !params.skip_mosdepth) { - MOSDEPTH_GENOME ( ch_bam.join(ch_bai, by: [0]), [], @@ -384,14 +384,11 @@ workflow ILLUMINA { // // SUBWORKFLOW: Call variants with IVar // - ch_ivar_vcf = Channel.empty() - ch_ivar_tbi = Channel.empty() - ch_ivar_counts_multiqc = Channel.empty() - ch_ivar_stats_multiqc = Channel.empty() - ch_ivar_snpeff_multiqc = Channel.empty() - ch_ivar_quast_multiqc = Channel.empty() - ch_ivar_pangolin_multiqc = Channel.empty() - ch_ivar_nextclade_multiqc = Channel.empty() + ch_vcf = Channel.empty() + ch_tbi = Channel.empty() + ch_ivar_counts_multiqc = Channel.empty() + ch_ivar_stats_multiqc = Channel.empty() + ch_ivar_snpeff_multiqc = Channel.empty() if (!params.skip_variants && 'ivar' in callers) { VARIANTS_IVAR ( ch_bam, @@ -399,21 +396,60 @@ workflow ILLUMINA { PREPARE_GENOME.out.chrom_sizes, PREPARE_GENOME.out.gff, (params.protocol == 'amplicon' && params.primer_bed) ? PREPARE_GENOME.out.primer_bed : [], - PREPARE_GENOME.out.nextclade_db, PREPARE_GENOME.out.snpeff_db, PREPARE_GENOME.out.snpeff_config, ch_ivar_variants_header_mqc ) - ch_ivar_vcf = VARIANTS_IVAR.out.vcf - ch_ivar_tbi = VARIANTS_IVAR.out.tbi - ch_ivar_counts_multiqc = VARIANTS_IVAR.out.multiqc_tsv - ch_ivar_stats_multiqc = VARIANTS_IVAR.out.stats - ch_ivar_snpeff_multiqc = VARIANTS_IVAR.out.snpeff_csv - ch_ivar_quast_multiqc = VARIANTS_IVAR.out.quast_tsv - ch_ivar_pangolin_multiqc = VARIANTS_IVAR.out.pangolin_report - ch_ivar_nextclade_report = VARIANTS_IVAR.out.nextclade_report - ch_versions = ch_versions.mix(VARIANTS_IVAR.out.versions) + ch_vcf = VARIANTS_IVAR.out.vcf + ch_tbi = VARIANTS_IVAR.out.tbi + ch_ivar_counts_multiqc = VARIANTS_IVAR.out.multiqc_tsv + ch_ivar_stats_multiqc = VARIANTS_IVAR.out.stats + ch_ivar_snpeff_multiqc = VARIANTS_IVAR.out.snpeff_csv + ch_versions = ch_versions.mix(VARIANTS_IVAR.out.versions) + } + // + // SUBWORKFLOW: Call variants with BCFTools + // + ch_vcf = Channel.empty() + ch_tbi = Channel.empty() + ch_bcftools_stats_multiqc = Channel.empty() + ch_bcftools_snpeff_multiqc = Channel.empty() + if (!params.skip_variants && 'bcftools' in callers) { + VARIANTS_BCFTOOLS ( + ch_bam, + PREPARE_GENOME.out.fasta, + PREPARE_GENOME.out.chrom_sizes, + PREPARE_GENOME.out.gff, + (params.protocol == 'amplicon' && params.primer_bed) ? PREPARE_GENOME.out.primer_bed : [], + PREPARE_GENOME.out.snpeff_db, + PREPARE_GENOME.out.snpeff_config + ) + ch_vcf = VARIANTS_BCFTOOLS.out.vcf + ch_tbi = VARIANTS_BCFTOOLS.out.tbi + ch_bcftools_stats_multiqc = VARIANTS_BCFTOOLS.out.stats + ch_bcftools_snpeff_multiqc = VARIANTS_BCFTOOLS.out.snpeff_csv + ch_versions = ch_versions.mix(VARIANTS_BCFTOOLS.out.versions) + } + + // + // SUBWORKFLOW: Call consensus with iVar and downstream QC + // + ch_ivar_quast_multiqc = Channel.empty() + ch_ivar_pangolin_multiqc = Channel.empty() + ch_ivar_nextclade_multiqc = Channel.empty() + if (!params.skip_consensus && 'ivar' in callers) { + CONSENSUS_IVAR ( + ch_bam, + PREPARE_GENOME.out.fasta, + PREPARE_GENOME.out.gff, + PREPARE_GENOME.out.nextclade_db + ) + ch_ivar_quast_multiqc = CONSENSUS_IVAR.out.quast_tsv + ch_ivar_pangolin_multiqc = CONSENSUS_IVAR.out.pangolin_report + ch_ivar_nextclade_report = CONSENSUS_IVAR.out.nextclade_report + ch_versions = ch_versions.mix(CONSENSUS_IVAR.out.versions) + // // MODULE: Get Nextclade clade information for MultiQC report // @@ -432,35 +468,25 @@ workflow ILLUMINA { .set { ch_ivar_nextclade_multiqc } } + // + // SUBWORKFLOW: Call consensus with BCFTools // - // SUBWORKFLOW: Call variants with BCFTools - // - ch_bcftools_vcf = Channel.empty() - ch_bcftools_tbi = Channel.empty() - ch_bcftools_stats_multiqc = Channel.empty() - ch_bcftools_snpeff_multiqc = Channel.empty() ch_bcftools_quast_multiqc = Channel.empty() ch_bcftools_pangolin_multiqc = Channel.empty() ch_bcftools_nextclade_multiqc = Channel.empty() - if (!params.skip_variants && 'bcftools' in callers) { - VARIANTS_BCFTOOLS ( + if (!params.skip_consensus && 'bcftools' in callers && callers) { + CONSENSUS_BCFTOOLS ( ch_bam, + ch_vcf, + ch_tbi, PREPARE_GENOME.out.fasta, - PREPARE_GENOME.out.chrom_sizes, PREPARE_GENOME.out.gff, - (params.protocol == 'amplicon' && params.primer_bed) ? PREPARE_GENOME.out.primer_bed : [], - PREPARE_GENOME.out.nextclade_db, - PREPARE_GENOME.out.snpeff_db, - PREPARE_GENOME.out.snpeff_config + PREPARE_GENOME.out.nextclade_db ) - ch_bcftools_vcf = VARIANTS_BCFTOOLS.out.vcf - ch_bcftools_tbi = VARIANTS_BCFTOOLS.out.tbi - ch_bcftools_stats_multiqc = VARIANTS_BCFTOOLS.out.stats - ch_bcftools_snpeff_multiqc = VARIANTS_BCFTOOLS.out.snpeff_csv - ch_bcftools_quast_multiqc = VARIANTS_BCFTOOLS.out.quast_tsv - ch_bcftools_pangolin_multiqc = VARIANTS_BCFTOOLS.out.pangolin_report - ch_bcftools_nextclade_report = VARIANTS_BCFTOOLS.out.nextclade_report - ch_versions = ch_versions.mix(VARIANTS_BCFTOOLS.out.versions) + ch_bcftools_quast_multiqc = CONSENSUS_BCFTOOLS.out.quast_tsv + ch_bcftools_pangolin_multiqc = CONSENSUS_BCFTOOLS.out.pangolin_report + ch_bcftools_nextclade_report = CONSENSUS_BCFTOOLS.out.nextclade_report + ch_versions = ch_versions.mix(CONSENSUS_BCFTOOLS.out.versions) // // MODULE: Get Nextclade clade information for MultiQC report @@ -480,19 +506,6 @@ workflow ILLUMINA { .set { ch_bcftools_nextclade_multiqc } } - // - // MODULE: Intersect variants across callers - // - if (!params.skip_variants && callers.size() > 1) { - BCFTOOLS_ISEC ( - ch_ivar_vcf - .join(ch_ivar_tbi, by: [0]) - .join(ch_bcftools_vcf, by: [0]) - .join(ch_bcftools_tbi, by: [0]) - ) - ch_versions = ch_versions.mix(BCFTOOLS_ISEC.out.versions) - } - // // MODULE: Primer trimming with Cutadapt // From bbe4c29feec5e53a713a96cf1c4e613466cbd807 Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Tue, 25 Jan 2022 10:47:56 +0000 Subject: [PATCH 10/21] Add --variant_caller and --consensus_caller params --- nextflow.config | 3 ++- nextflow_schema.json | 16 +++++++++++++--- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/nextflow.config b/nextflow.config index 9f3027db..1e58c976 100644 --- a/nextflow.config +++ b/nextflow.config @@ -61,7 +61,8 @@ params { skip_cutadapt = false // Illumina variant calling options - callers = null + variant_caller = null + consensus_caller = null min_mapped_reads = 1000 ivar_trim_noprimer = false ivar_trim_offset = null diff --git a/nextflow_schema.json b/nextflow_schema.json index 18214bdf..906b3f97 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -383,10 +383,15 @@ "description": "Various options for the variant calling branch of the Illumina workflow.", "default": "", "properties": { - "callers": { + "variant_caller": { "type": "string", - "description": "Specify which variant calling algorithms you would like to use. Available options are 'ivar' (default for '--protocol amplicon') and 'bcftools' (default for '--protocol metagenomic').", - "fa_icon": "fas fa-phone-volume" + "fa_icon": "fas fa-phone-volume", + "description": "Specify which variant calling algorithm you would like to use. Available options are 'ivar' (default for '--protocol amplicon') and 'bcftools' (default for '--protocol metagenomic')." + }, + "consensus_caller": { + "type": "string", + "fa_icon": "fas fa-phone-volume", + "description": "Specify which consensus calling algorithm you would like to use. Available options are 'bcftools' and 'ivar' (default: 'bcftools')." }, "min_mapped_reads": { "type": "integer", @@ -441,6 +446,11 @@ "fa_icon": "fas fa-fast-forward", "description": "Skip SnpEff and SnpSift annotation of variants." }, + "skip_consensus_plots": { + "type": "boolean", + "fa_icon": "fas fa-fast-forward", + "description": "Skip creation of consensus base density plots." + }, "skip_consensus": { "type": "boolean", "fa_icon": "fas fa-fast-forward", From 765dda58ab8b917c6e4c3e341be2c317aeb7c32f Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Tue, 25 Jan 2022 12:21:30 +0000 Subject: [PATCH 11/21] Add implementation for --variant_caller and --consensus_caller everywhere --- README.md | 5 +++-- conf/modules_illumina.config | 8 +++++--- conf/test.config | 12 ++++++------ conf/test_full.config | 11 +++++++---- conf/test_full_sispa.config | 12 ++++++++++-- conf/test_sispa.config | 12 ++++++------ docs/output.md | 10 +++++----- lib/WorkflowIllumina.groovy | 17 +++++++++++++---- nextflow.config | 2 +- nextflow_schema.json | 13 +++++++++++-- workflows/illumina.nf | 24 ++++++++++++++---------- 11 files changed, 81 insertions(+), 45 deletions(-) diff --git a/README.md b/README.md index 950fa1a0..47b8e5d4 100644 --- a/README.md +++ b/README.md @@ -41,12 +41,13 @@ The SRA download functionality has been removed from the pipeline (`>=2.1`) and 4. Duplicate read marking ([`picard`](https://broadinstitute.github.io/picard/); *optional*) 5. Alignment-level QC ([`picard`](https://broadinstitute.github.io/picard/), [`SAMtools`](https://sourceforge.net/projects/samtools/files/samtools/)) 6. Genome-wide and amplicon coverage QC plots ([`mosdepth`](https://github.com/brentp/mosdepth/)) - 7. Choice of multiple variant calling and consensus sequence generation routes ([`iVar variants and consensus`](https://github.com/andersen-lab/ivar); *default for amplicon data* *||* [`BCFTools`](http://samtools.github.io/bcftools/bcftools.html), [`BEDTools`](https://github.com/arq5x/bedtools2/); *default for metagenomics data*) + 7. Choice of multiple variant callers ([`iVar variants`](https://github.com/andersen-lab/ivar); *default for amplicon data* *||* [`BCFTools`](http://samtools.github.io/bcftools/bcftools.html); *default for metagenomics data*) * Variant annotation ([`SnpEff`](http://snpeff.sourceforge.net/SnpEff.html), [`SnpSift`](http://snpeff.sourceforge.net/SnpSift.html)) + * Individual variant screenshots with annotation tracks ([`ASCIIGenome`](https://asciigenome.readthedocs.io/en/latest/)) + 8. Choice of multiple consensus callers ([`BCFTools`](http://samtools.github.io/bcftools/bcftools.html), [`BEDTools`](https://github.com/arq5x/bedtools2/); *default for both amplicon and metagenomics data* *||* [`iVar consensus`](https://github.com/andersen-lab/ivar)) * Consensus assessment report ([`QUAST`](http://quast.sourceforge.net/quast)) * Lineage analysis ([`Pangolin`](https://github.com/cov-lineages/pangolin)) * Clade assignment, mutation calling and sequence quality checks ([`Nextclade`](https://github.com/nextstrain/nextclade)) - * Individual variant screenshots with annotation tracks ([`ASCIIGenome`](https://asciigenome.readthedocs.io/en/latest/)) 6. _De novo_ assembly 1. Primer trimming ([`Cutadapt`](https://cutadapt.readthedocs.io/en/stable/guide.html); *amplicon data only*) 2. Choice of multiple assembly tools ([`SPAdes`](http://cab.spbu.ru/software/spades/) *||* [`Unicycler`](https://github.com/rrwick/Unicycler) *||* [`minia`](https://github.com/GATB/minia)) diff --git a/conf/modules_illumina.config b/conf/modules_illumina.config index af334abf..24fb8607 100644 --- a/conf/modules_illumina.config +++ b/conf/modules_illumina.config @@ -10,8 +10,10 @@ ---------------------------------------------------------------------------------------- */ +def variant_caller = params.variant_caller +if (!variant_caller) { variant_caller = params.protocol == 'amplicon' ? 'ivar' : 'bcftools' } + def assemblers = params.assemblers ? params.assemblers.split(',').collect{ it.trim().toLowerCase() } : [] -def callers = params.callers ? params.callers.split(',').collect{ it.trim().toLowerCase() } : [] // // Pre-processing and general configuration options @@ -378,7 +380,7 @@ if (!params.skip_variants) { } } - if ('ivar' in callers) { + if (variant_caller == 'ivar') { process { withName: 'IVAR_VARIANTS' { ext.args = '-t 0.25 -q 20 -m 10' @@ -569,7 +571,7 @@ if (!params.skip_variants) { } } - if ('bcftools' in callers) { + if (variant_caller == 'bcftools') { process { withName: 'BCFTOOLS_MPILEUP' { ext.args = '--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 20 --annotate FORMAT/AD,FORMAT/ADF,FORMAT/ADR,FORMAT/DP,FORMAT/SP,INFO/AD,INFO/ADF,INFO/ADR' diff --git a/conf/test.config b/conf/test.config index 44eca7bb..c3109b6a 100644 --- a/conf/test.config +++ b/conf/test.config @@ -30,10 +30,10 @@ params { genome = 'MN908947.3' kraken2_db = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/genome/kraken2/kraken2_hs22.tar.gz' - // Other pipeline options - callers = 'ivar,bcftools' - assemblers = 'spades,unicycler,minia' - - // Skip this by default to bypass Github Actions disk quota errors - skip_plasmidid = true + // Variant calling options + variant_caller = 'ivar' + + // Assembly options + assemblers = 'spades,unicycler,minia' + skip_plasmidid = true // Skip this by default to bypass Github Actions disk quota errors } diff --git a/conf/test_full.config b/conf/test_full.config index 10041d28..e8a6d2cc 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -24,13 +24,16 @@ params { // Genome references genome = 'MN908947.3' - // Other pipeline options - callers = 'ivar,bcftools' - assemblers = 'spades,unicycler,minia' + // Variant calling options + variant_caller = 'ivar' + + // Assembly options + assemblers = 'spades,unicycler,minia' + skip_plasmidid = true // Skip this by default to bypass Github Actions disk quota errors } process { - withName:PLASMIDID { + withName: 'PLASMIDID' { errorStrategy = 'ignore' } } diff --git a/conf/test_full_sispa.config b/conf/test_full_sispa.config index 49b295fc..2e8b04d2 100644 --- a/conf/test_full_sispa.config +++ b/conf/test_full_sispa.config @@ -22,7 +22,15 @@ params { // Genome references genome = 'MN908947.3' - // Other pipeline options - callers = 'ivar,bcftools' + // Variant calling options + variant_caller = 'bcftools' + + // Assembly options assemblers = 'spades,unicycler,minia' } + +process { + withName: 'PLASMIDID' { + errorStrategy = 'ignore' + } +} \ No newline at end of file diff --git a/conf/test_sispa.config b/conf/test_sispa.config index 526bf56d..9aaff136 100644 --- a/conf/test_sispa.config +++ b/conf/test_sispa.config @@ -28,10 +28,10 @@ params { genome = 'MN908947.3' kraken2_db = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/genome/kraken2/kraken2_hs22.tar.gz' - // Other pipeline options - callers = 'ivar,bcftools' - assemblers = 'spades,unicycler,minia' - - // Skip this by default to bypass Github Actions disk quota errors - skip_plasmidid = true + // Variant calling options + variant_caller = 'bcftools' + + // Assembly options + assemblers = 'spades,unicycler,minia' + skip_plasmidid = true // Skip this by default to bypass Github Actions disk quota errors } diff --git a/docs/output.md b/docs/output.md index a2bfa18a..4be3d50d 100644 --- a/docs/output.md +++ b/docs/output.md @@ -578,7 +578,7 @@ iVar outputs a tsv format, which is not compatible with downstream analysis such * `variants//snpeff/bcftools_stats/` * `*.bcftools_stats.txt`: Statistics and counts obtained from VCF file. -**NB:** The value of `` in the output directory name above is determined by the `--callers` parameter (Default: 'ivar' for '--protocol amplicon' and 'bcftools' for '--protocol metagenomic'). +**NB:** The value of `` in the output directory name above is determined by the `--variant_caller` parameter (Default: 'ivar' for '--protocol amplicon' and 'bcftools' for '--protocol metagenomic'). @@ -596,7 +596,7 @@ iVar outputs a tsv format, which is not compatible with downstream analysis such * `variants//quast/` * `report.html`: Results report in HTML format. Also available in various other file formats i.e. `report.pdf`, `report.tex`, `report.tsv` and `report.txt`. -**NB:** The value of `` in the output directory name above is determined by the `--callers` parameter (Default: 'ivar' for '--protocol amplicon' and 'bcftools' for '--protocol metagenomic'). +**NB:** The value of `` in the output directory name above is determined by the `--variant_caller` parameter (Default: 'ivar' for '--protocol amplicon' and 'bcftools' for '--protocol metagenomic'). @@ -610,7 +610,7 @@ iVar outputs a tsv format, which is not compatible with downstream analysis such * `variants//pangolin/` * `*.pangolin.csv`: Lineage analysis results from Pangolin. -**NB:** The value of `` in the output directory name above is determined by the `--callers` parameter (Default: 'ivar' for '--protocol amplicon' and 'bcftools' for '--protocol metagenomic'). +**NB:** The value of `` in the output directory name above is determined by the `--variant_caller` parameter (Default: 'ivar' for '--protocol amplicon' and 'bcftools' for '--protocol metagenomic'). @@ -624,7 +624,7 @@ Phylogenetic Assignment of Named Global Outbreak LINeages ([Pangolin](https://gi * `variants//nextclade/` * `*.csv`: Analysis results from Nextlade containing genome clade assignment, mutation calling and sequence quality checks. -**NB:** The value of `` in the output directory name above is determined by the `--callers` parameter (Default: 'ivar' for '--protocol amplicon' and 'bcftools' for '--protocol metagenomic'). +**NB:** The value of `` in the output directory name above is determined by the `--variant_caller` parameter (Default: 'ivar' for '--protocol amplicon' and 'bcftools' for '--protocol metagenomic'). @@ -638,7 +638,7 @@ Phylogenetic Assignment of Named Global Outbreak LINeages ([Pangolin](https://gi * `variants//asciigenome//` * `*.pdf`: Individual variant screenshots with annotation tracks in PDF format. -**NB:** The value of `` in the output directory name above is determined by the `--callers` parameter (Default: 'ivar' for '--protocol amplicon' and 'bcftools' for '--protocol metagenomic'). +**NB:** The value of `` in the output directory name above is determined by the `--variant_caller` parameter (Default: 'ivar' for '--protocol amplicon' and 'bcftools' for '--protocol metagenomic'). diff --git a/lib/WorkflowIllumina.groovy b/lib/WorkflowIllumina.groovy index f9531376..56b20612 100755 --- a/lib/WorkflowIllumina.groovy +++ b/lib/WorkflowIllumina.groovy @@ -31,10 +31,19 @@ class WorkflowIllumina { } // Variant calling parameter validation - def callers = params.callers ? params.callers.split(',').collect{ it.trim().toLowerCase() } : [] - if ((valid_params['callers'] + callers).unique().size() != valid_params['callers'].size()) { - log.error "Invalid option: ${params.callers}. Valid options for '--callers': ${valid_params['callers'].join(', ')}." - System.exit(1) + if (params.variant_caller) { + if (!valid_params['variant_callers'].contains(params.variant_caller)) { + log.error "Invalid option: ${params.variant_caller}. Valid options for '--variant_caller': ${valid_params['variant_callers'].join(', ')}." + System.exit(1) + } + } + + // Consensus calling parameter validation + if (params.consensus_caller) { + if (!valid_params['consensus_callers'].contains(params.consensus_caller)) { + log.error "Invalid option: ${params.consensus_caller}. Valid options for '--consensus_caller': ${valid_params['consensus_callers'].join(', ')}." + System.exit(1) + } } if (params.protocol == 'amplicon' && !params.skip_variants && !params.primer_bed) { diff --git a/nextflow.config b/nextflow.config index 1e58c976..c28cc0a9 100644 --- a/nextflow.config +++ b/nextflow.config @@ -62,7 +62,7 @@ params { // Illumina variant calling options variant_caller = null - consensus_caller = null + consensus_caller = 'bcftools' min_mapped_reads = 1000 ivar_trim_noprimer = false ivar_trim_offset = null diff --git a/nextflow_schema.json b/nextflow_schema.json index 906b3f97..34154e20 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -386,12 +386,21 @@ "variant_caller": { "type": "string", "fa_icon": "fas fa-phone-volume", - "description": "Specify which variant calling algorithm you would like to use. Available options are 'ivar' (default for '--protocol amplicon') and 'bcftools' (default for '--protocol metagenomic')." + "description": "Specify which variant calling algorithm you would like to use. Available options are 'ivar' (default for '--protocol amplicon') and 'bcftools' (default for '--protocol metagenomic').", + "enum": [ + "ivar", + "bcftools" + ] }, "consensus_caller": { "type": "string", + "default": "bcftools", "fa_icon": "fas fa-phone-volume", - "description": "Specify which consensus calling algorithm you would like to use. Available options are 'bcftools' and 'ivar' (default: 'bcftools')." + "description": "Specify which consensus calling algorithm you would like to use. Available options are 'bcftools' and 'ivar' (default: 'bcftools').", + "enum": [ + "ivar", + "bcftools" + ] }, "min_mapped_reads": { "type": "integer", diff --git a/workflows/illumina.nf b/workflows/illumina.nf index 4998601e..935f3280 100644 --- a/workflows/illumina.nf +++ b/workflows/illumina.nf @@ -5,10 +5,11 @@ */ def valid_params = [ - protocols : ['metagenomic', 'amplicon'], - callers : ['ivar', 'bcftools'], - assemblers : ['spades', 'unicycler', 'minia'], - spades_modes: ['rnaviral', 'corona', 'metaviral', 'meta', 'metaplasmid', 'plasmid', 'isolate', 'rna', 'bio'] + protocols : ['metagenomic', 'amplicon'], + variant_callers : ['ivar', 'bcftools'], + consensus_callers : ['ivar', 'bcftools'], + assemblers : ['spades', 'unicycler', 'minia'], + spades_modes : ['rnaviral', 'corona', 'metaviral', 'meta', 'metaplasmid', 'plasmid', 'isolate', 'rna', 'bio'] ] def summary_params = NfcoreSchema.paramsSummaryMap(workflow, params) @@ -28,8 +29,11 @@ if (params.input) { ch_input = file(params.input) } else { exit 1 if (params.spades_hmm) { ch_spades_hmm = file(params.spades_hmm) } else { ch_spades_hmm = [] } def assemblers = params.assemblers ? params.assemblers.split(',').collect{ it.trim().toLowerCase() } : [] -def callers = params.callers ? params.callers.split(',').collect{ it.trim().toLowerCase() } : [] -if (!callers) { callers = params.protocol == 'amplicon' ? ['ivar'] : ['bcftools'] } + +def variant_caller = params.variant_caller +if (!variant_caller) { variant_caller = params.protocol == 'amplicon' ? 'ivar' : 'bcftools' } + +def consensus_caller = params.consensus_caller ?: 'bcftools' /* ======================================================================================== @@ -389,7 +393,7 @@ workflow ILLUMINA { ch_ivar_counts_multiqc = Channel.empty() ch_ivar_stats_multiqc = Channel.empty() ch_ivar_snpeff_multiqc = Channel.empty() - if (!params.skip_variants && 'ivar' in callers) { + if (!params.skip_variants && variant_caller == 'ivar') { VARIANTS_IVAR ( ch_bam, PREPARE_GENOME.out.fasta, @@ -415,7 +419,7 @@ workflow ILLUMINA { ch_tbi = Channel.empty() ch_bcftools_stats_multiqc = Channel.empty() ch_bcftools_snpeff_multiqc = Channel.empty() - if (!params.skip_variants && 'bcftools' in callers) { + if (!params.skip_variants && variant_caller == 'bcftools') { VARIANTS_BCFTOOLS ( ch_bam, PREPARE_GENOME.out.fasta, @@ -438,7 +442,7 @@ workflow ILLUMINA { ch_ivar_quast_multiqc = Channel.empty() ch_ivar_pangolin_multiqc = Channel.empty() ch_ivar_nextclade_multiqc = Channel.empty() - if (!params.skip_consensus && 'ivar' in callers) { + if (!params.skip_consensus && consensus_caller == 'ivar') { CONSENSUS_IVAR ( ch_bam, PREPARE_GENOME.out.fasta, @@ -474,7 +478,7 @@ workflow ILLUMINA { ch_bcftools_quast_multiqc = Channel.empty() ch_bcftools_pangolin_multiqc = Channel.empty() ch_bcftools_nextclade_multiqc = Channel.empty() - if (!params.skip_consensus && 'bcftools' in callers && callers) { + if (!params.skip_consensus && consensus_caller == 'bcftools' && variant_caller) { CONSENSUS_BCFTOOLS ( ch_bam, ch_vcf, From 4e8b93ac04541afa90d3704a68471bd2781fa696 Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Tue, 25 Jan 2022 12:22:14 +0000 Subject: [PATCH 12/21] Add implementation for --variant_caller and --consensus_caller to main workflow --- workflows/illumina.nf | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/workflows/illumina.nf b/workflows/illumina.nf index 935f3280..7d6af82b 100644 --- a/workflows/illumina.nf +++ b/workflows/illumina.nf @@ -33,8 +33,6 @@ def assemblers = params.assemblers ? params.assemblers.split(',').collect{ it.tr def variant_caller = params.variant_caller if (!variant_caller) { variant_caller = params.protocol == 'amplicon' ? 'ivar' : 'bcftools' } -def consensus_caller = params.consensus_caller ?: 'bcftools' - /* ======================================================================================== CONFIG FILES @@ -442,7 +440,7 @@ workflow ILLUMINA { ch_ivar_quast_multiqc = Channel.empty() ch_ivar_pangolin_multiqc = Channel.empty() ch_ivar_nextclade_multiqc = Channel.empty() - if (!params.skip_consensus && consensus_caller == 'ivar') { + if (!params.skip_consensus && params.consensus_caller == 'ivar') { CONSENSUS_IVAR ( ch_bam, PREPARE_GENOME.out.fasta, @@ -478,7 +476,7 @@ workflow ILLUMINA { ch_bcftools_quast_multiqc = Channel.empty() ch_bcftools_pangolin_multiqc = Channel.empty() ch_bcftools_nextclade_multiqc = Channel.empty() - if (!params.skip_consensus && consensus_caller == 'bcftools' && variant_caller) { + if (!params.skip_consensus && params.consensus_caller == 'bcftools' && variant_caller) { CONSENSUS_BCFTOOLS ( ch_bam, ch_vcf, From 0bdb1a728694014dedc90c19937cc914e6c710fd Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Tue, 25 Jan 2022 13:01:35 +0000 Subject: [PATCH 13/21] Fix config options for new caller params --- conf/modules_illumina.config | 363 +++++++++++++++++------------------ 1 file changed, 181 insertions(+), 182 deletions(-) diff --git a/conf/modules_illumina.config b/conf/modules_illumina.config index 24fb8607..c51a54e1 100644 --- a/conf/modules_illumina.config +++ b/conf/modules_illumina.config @@ -391,23 +391,14 @@ if (!params.skip_variants) { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - if (params.protocol == 'amplicon') { - withName: 'IVAR_VARIANTS_TO_VCF' { - ext.args = '--ignore_strand_bias' - publishDir = [ - path: { "${params.outdir}/variants/ivar/log" }, - mode: 'copy', - pattern: '*.log' - ] - } - }else{ - withName: 'IVAR_VARIANTS_TO_VCF' { - publishDir = [ - path: { "${params.outdir}/variants/ivar/log" }, - mode: 'copy', - pattern: '*.log' - ] - } + + withName: 'IVAR_VARIANTS_TO_VCF' { + ext.args = params.protocol == 'amplicon' ? '--ignore_strand_bias' : '' + publishDir = [ + path: { "${params.outdir}/variants/ivar/log" }, + mode: 'copy', + pattern: '*.log' + ] } withName: 'NFCORE_VIRALRECON:ILLUMINA:VARIANTS_IVAR:.*:TABIX_BGZIP' { @@ -434,86 +425,11 @@ if (!params.skip_variants) { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - - withName: 'MULTIQC_TSV_IVAR_NEXTCLADE' { - publishDir = [ - path: { "${params.outdir}/multiqc" }, - enabled: false - ] - } } - - if (!params.skip_consensus) { - process { - withName: 'IVAR_CONSENSUS' { - ext.args = '-t 0.75 -q 20 -m 10 -n N' - ext.args2 = '--count-orphans --no-BAQ --max-depth 0 --min-BQ 0 -aa' - ext.prefix = { "${meta.id}.consensus" } - publishDir = [ - [ - path: { "${params.outdir}/variants/ivar/consensus" }, - mode: 'copy', - pattern: "*.{fa,txt}", - ], - [ - path: { "${params.outdir}/variants/ivar/consensus" }, - mode: 'copy', - pattern: "*.mpileup", - enabled: params.save_mpileup - ] - ] - } - - withName: 'NFCORE_VIRALRECON:ILLUMINA:VARIANTS_IVAR:PLOT_BASE_DENSITY' { - ext.prefix = { "${meta.id}.consensus" } - publishDir = [ - path: { "${params.outdir}/variants/ivar/consensus/base_qc" }, - mode: 'copy', - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - } - - if (!params.skip_pangolin) { - process { - withName: 'NFCORE_VIRALRECON:ILLUMINA:VARIANTS_IVAR:PANGOLIN' { - publishDir = [ - path: { "${params.outdir}/variants/ivar/pangolin" }, - mode: 'copy', - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - } - } - - if (!params.skip_nextclade) { - process { - withName: 'NFCORE_VIRALRECON:ILLUMINA:VARIANTS_IVAR:NEXTCLADE_RUN' { - publishDir = [ - path: { "${params.outdir}/variants/ivar/nextclade" }, - mode: 'copy', - pattern: "*.csv" - ] - } - } - } - - if (!params.skip_variants_quast) { - process { - withName: 'NFCORE_VIRALRECON:ILLUMINA:VARIANTS_IVAR:QUAST' { - publishDir = [ - path: { "${params.outdir}/variants/ivar" }, - mode: 'copy', - pattern: "quast" - ] - } - } - } - } - + if (!params.skip_asciigenome) { process { - withName: 'NFCORE_VIRALRECON:ILLUMINA:VARIANTS_IVAR:ASCIIGENOME' { + withName: 'NFCORE_VIRALRECON:ILLUMINA:VARIANTS_IVAR:.*:ASCIIGENOME' { publishDir = [ path: { "${params.outdir}/variants/ivar/asciigenome/${meta.id}" }, mode: 'copy', @@ -525,7 +441,7 @@ if (!params.skip_variants) { if (!params.skip_snpeff) { process { - withName: 'NFCORE_VIRALRECON:ILLUMINA:VARIANTS_IVAR:SNPEFF_SNPSIFT:SNPEFF_ANN' { + withName: 'NFCORE_VIRALRECON:ILLUMINA:VARIANTS_IVAR:.*:SNPEFF_SNPSIFT:SNPEFF_ANN' { publishDir = [ path: { "${params.outdir}/variants/ivar/snpeff" }, mode: 'copy', @@ -533,7 +449,7 @@ if (!params.skip_variants) { ] } - withName: 'NFCORE_VIRALRECON:ILLUMINA:VARIANTS_IVAR:SNPEFF_SNPSIFT:VCF_BGZIP_TABIX_STATS:TABIX_BGZIP' { + withName: 'NFCORE_VIRALRECON:ILLUMINA:VARIANTS_IVAR:.*:SNPEFF_SNPSIFT:VCF_BGZIP_TABIX_STATS:TABIX_BGZIP' { ext.prefix = { "${meta.id}.snpeff" } publishDir = [ path: { "${params.outdir}/variants/ivar/snpeff" }, @@ -542,7 +458,7 @@ if (!params.skip_variants) { ] } - withName: 'NFCORE_VIRALRECON:ILLUMINA:VARIANTS_IVAR:SNPEFF_SNPSIFT:VCF_BGZIP_TABIX_STATS:VCF_TABIX_STATS:TABIX_TABIX' { + withName: 'NFCORE_VIRALRECON:ILLUMINA:VARIANTS_IVAR:.*:SNPEFF_SNPSIFT:VCF_BGZIP_TABIX_STATS:VCF_TABIX_STATS:TABIX_TABIX' { ext.args = '-p vcf -f' publishDir = [ path: { "${params.outdir}/variants/ivar/snpeff" }, @@ -551,7 +467,7 @@ if (!params.skip_variants) { ] } - withName: 'NFCORE_VIRALRECON:ILLUMINA:VARIANTS_IVAR:SNPEFF_SNPSIFT:VCF_BGZIP_TABIX_STATS:VCF_TABIX_STATS:BCFTOOLS_STATS' { + withName: 'NFCORE_VIRALRECON:ILLUMINA:VARIANTS_IVAR:.*:SNPEFF_SNPSIFT:VCF_BGZIP_TABIX_STATS:VCF_TABIX_STATS:BCFTOOLS_STATS' { ext.prefix = { "${meta.id}.snpeff" } publishDir = [ path: { "${params.outdir}/variants/ivar/snpeff/bcftools_stats" }, @@ -560,7 +476,7 @@ if (!params.skip_variants) { ] } - withName: 'NFCORE_VIRALRECON:ILLUMINA:VARIANTS_IVAR:SNPEFF_SNPSIFT:SNPSIFT_EXTRACTFIELDS' { + withName: 'NFCORE_VIRALRECON:ILLUMINA:VARIANTS_IVAR:.*:SNPEFF_SNPSIFT:SNPSIFT_EXTRACTFIELDS' { publishDir = [ path: { "${params.outdir}/variants/ivar/snpeff" }, mode: 'copy', @@ -571,33 +487,28 @@ if (!params.skip_variants) { } } - if (variant_caller == 'bcftools') { + if (!params.skip_consensus && params.consensus_caller == 'ivar') { process { - withName: 'BCFTOOLS_MPILEUP' { - ext.args = '--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 20 --annotate FORMAT/AD,FORMAT/ADF,FORMAT/ADR,FORMAT/DP,FORMAT/SP,INFO/AD,INFO/ADF,INFO/ADR' - ext.args2 = '--ploidy 1 --keep-alts --keep-masked-ref --multiallelic-caller --variants-only' - ext.args3 = "--include 'INFO/DP>=10'" + withName: 'IVAR_CONSENSUS' { + ext.args = '-t 0.75 -q 20 -m 10 -n N' + ext.args2 = '--count-orphans --no-BAQ --max-depth 0 --min-BQ 0 -aa' + ext.prefix = { "${meta.id}.consensus" } publishDir = [ [ - path: { "${params.outdir}/variants/bcftools" }, + path: { "${params.outdir}/variants/ivar/consensus" }, mode: 'copy', - pattern: '*.{gz,tbi}' + pattern: "*.{fa,txt}", ], [ - path: { "${params.outdir}/variants/bcftools" }, + path: { "${params.outdir}/variants/ivar/consensus" }, mode: 'copy', - pattern: '*.mpileup', + pattern: "*.mpileup", enabled: params.save_mpileup - ], - [ - path: { "${params.outdir}/variants/bcftools/bcftools_stats" }, - mode: 'copy', - pattern: '*stats.txt' ] ] } - withName: 'MULTIQC_TSV_BCFTOOLS_NEXTCLADE' { + withName: 'MULTIQC_TSV_IVAR_NEXTCLADE' { publishDir = [ path: { "${params.outdir}/multiqc" }, enabled: false @@ -605,95 +516,86 @@ if (!params.skip_variants) { } } - if (!params.skip_consensus) { + if (!params.skip_consensus_plots) { process { - withName: 'MAKE_BED_MASK' { - ext.args = "-a --ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0" - ext.args2 = 10 - ext.prefix = { "${meta.id}.coverage.masked" } - publishDir = [ - path: { "${params.outdir}/variants/bcftools" }, - mode: 'copy', - pattern: "*.mpileup", - enabled: params.save_mpileup - ] - } - - withName: 'BEDTOOLS_MERGE' { - ext.prefix = { "${meta.id}.coverage.merged" } - publishDir = [ - path: { "${params.outdir}/variants/bcftools" }, - enabled: false - ] - } - - withName: 'BEDTOOLS_MASKFASTA' { - ext.prefix = { "${meta.id}.masked" } - publishDir = [ - path: { "${params.outdir}/variants/bcftools" }, - enabled: false - ] - } - - withName: 'BCFTOOLS_CONSENSUS' { + withName: 'NFCORE_VIRALRECON:ILLUMINA:CONSENSUS_IVAR:.*:PLOT_BASE_DENSITY' { ext.prefix = { "${meta.id}.consensus" } publishDir = [ - path: { "${params.outdir}/variants/bcftools/consensus" }, + path: { "${params.outdir}/variants/ivar/consensus/base_qc" }, mode: 'copy', saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } + } + } - withName: 'NFCORE_VIRALRECON:ILLUMINA:VARIANTS_BCFTOOLS:.*:PLOT_BASE_DENSITY' { - ext.prefix = { "${meta.id}.consensus" } + if (!params.skip_pangolin) { + process { + withName: 'NFCORE_VIRALRECON:ILLUMINA:CONSENSUS_IVAR:.*:PANGOLIN' { publishDir = [ - path: { "${params.outdir}/variants/bcftools/consensus/base_qc" }, + path: { "${params.outdir}/variants/ivar/pangolin" }, mode: 'copy', saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } } + } - if (!params.skip_pangolin) { - process { - withName: 'NFCORE_VIRALRECON:ILLUMINA:VARIANTS_BCFTOOLS:PANGOLIN' { - publishDir = [ - path: { "${params.outdir}/variants/bcftools/pangolin" }, - mode: 'copy', - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } + if (!params.skip_nextclade) { + process { + withName: 'NFCORE_VIRALRECON:ILLUMINA:CONSENSUS_IVAR:.*:NEXTCLADE_RUN' { + publishDir = [ + path: { "${params.outdir}/variants/ivar/nextclade" }, + mode: 'copy', + pattern: "*.csv" + ] } } + } - if (!params.skip_nextclade) { - process { - withName: 'NFCORE_VIRALRECON:ILLUMINA:VARIANTS_BCFTOOLS:NEXTCLADE_RUN' { - publishDir = [ - path: { "${params.outdir}/variants/bcftools/nextclade" }, - mode: 'copy', - pattern: "*.csv" - ] - } + if (!params.skip_variants_quast) { + process { + withName: 'NFCORE_VIRALRECON:ILLUMINA:CONSENSUS_IVAR:.*:QUAST' { + publishDir = [ + path: { "${params.outdir}/variants/ivar" }, + mode: 'copy', + pattern: "quast" + ] } } + } + } - if (!params.skip_variants_quast) { - process { - withName: 'NFCORE_VIRALRECON:ILLUMINA:VARIANTS_BCFTOOLS:QUAST' { - publishDir = [ - path: { "${params.outdir}/variants/bcftools" }, - mode: 'copy', - pattern: "quast" - ] - } - } + if (variant_caller == 'bcftools') { + process { + withName: 'BCFTOOLS_MPILEUP' { + ext.args = '--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 20 --annotate FORMAT/AD,FORMAT/ADF,FORMAT/ADR,FORMAT/DP,FORMAT/SP,INFO/AD,INFO/ADF,INFO/ADR' + ext.args2 = '--ploidy 1 --keep-alts --keep-masked-ref --multiallelic-caller --variants-only' + ext.args3 = "--include 'INFO/DP>=10'" + publishDir = [ + [ + path: { "${params.outdir}/variants/bcftools" }, + mode: 'copy', + pattern: '*.{gz,tbi}' + ], + [ + path: { "${params.outdir}/variants/bcftools" }, + mode: 'copy', + pattern: '*.mpileup', + enabled: params.save_mpileup + ], + [ + path: { "${params.outdir}/variants/bcftools/bcftools_stats" }, + mode: 'copy', + pattern: '*stats.txt' + ] + ] } } if (!params.skip_asciigenome) { process { - withName: 'NFCORE_VIRALRECON:ILLUMINA:VARIANTS_BCFTOOLS:ASCIIGENOME' { + withName: 'NFCORE_VIRALRECON:ILLUMINA:VARIANTS_BCFTOOLS:.*:ASCIIGENOME' { publishDir = [ path: { "${params.outdir}/variants/bcftools/asciigenome/${meta.id}" }, mode: 'copy', @@ -705,7 +607,7 @@ if (!params.skip_variants) { if (!params.skip_snpeff) { process { - withName: 'NFCORE_VIRALRECON:ILLUMINA:VARIANTS_BCFTOOLS:SNPEFF_SNPSIFT:SNPEFF_ANN' { + withName: 'NFCORE_VIRALRECON:ILLUMINA:VARIANTS_BCFTOOLS:.*:SNPEFF_SNPSIFT:SNPEFF_ANN' { publishDir = [ path: { "${params.outdir}/variants/bcftools/snpeff" }, mode: 'copy', @@ -713,7 +615,7 @@ if (!params.skip_variants) { ] } - withName: 'NFCORE_VIRALRECON:ILLUMINA:VARIANTS_BCFTOOLS:SNPEFF_SNPSIFT:VCF_BGZIP_TABIX_STATS:TABIX_BGZIP' { + withName: 'NFCORE_VIRALRECON:ILLUMINA:VARIANTS_BCFTOOLS:.*:SNPEFF_SNPSIFT:VCF_BGZIP_TABIX_STATS:TABIX_BGZIP' { ext.prefix = { "${meta.id}.snpeff" } publishDir = [ path: { "${params.outdir}/variants/bcftools/snpeff" }, @@ -722,7 +624,7 @@ if (!params.skip_variants) { ] } - withName: 'NFCORE_VIRALRECON:ILLUMINA:VARIANTS_BCFTOOLS:SNPEFF_SNPSIFT:VCF_BGZIP_TABIX_STATS:VCF_TABIX_STATS:TABIX_TABIX' { + withName: 'NFCORE_VIRALRECON:ILLUMINA:VARIANTS_BCFTOOLS:.*:SNPEFF_SNPSIFT:VCF_BGZIP_TABIX_STATS:VCF_TABIX_STATS:TABIX_TABIX' { ext.args = '-p vcf -f' publishDir = [ path: { "${params.outdir}/variants/bcftools/snpeff" }, @@ -731,7 +633,7 @@ if (!params.skip_variants) { ] } - withName: 'NFCORE_VIRALRECON:ILLUMINA:VARIANTS_BCFTOOLS:SNPEFF_SNPSIFT:VCF_BGZIP_TABIX_STATS:VCF_TABIX_STATS:BCFTOOLS_STATS' { + withName: 'NFCORE_VIRALRECON:ILLUMINA:VARIANTS_BCFTOOLS:.*:SNPEFF_SNPSIFT:VCF_BGZIP_TABIX_STATS:VCF_TABIX_STATS:BCFTOOLS_STATS' { ext.prefix = { "${meta.id}.snpeff" } publishDir = [ path: { "${params.outdir}/variants/bcftools/snpeff/bcftools_stats" }, @@ -740,7 +642,7 @@ if (!params.skip_variants) { ] } - withName: 'NFCORE_VIRALRECON:ILLUMINA:VARIANTS_BCFTOOLS:SNPEFF_SNPSIFT:SNPSIFT_EXTRACTFIELDS' { + withName: 'NFCORE_VIRALRECON:ILLUMINA:VARIANTS_BCFTOOLS:.*:SNPEFF_SNPSIFT:SNPSIFT_EXTRACTFIELDS' { publishDir = [ path: { "${params.outdir}/variants/bcftools/snpeff" }, mode: 'copy', @@ -750,6 +652,103 @@ if (!params.skip_variants) { } } } + + if (!params.skip_consensus && params.consensus_caller == 'bcftools') { + process { + withName: 'MAKE_BED_MASK' { + ext.args = "-a --ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0" + ext.args2 = 10 + ext.prefix = { "${meta.id}.coverage.masked" } + publishDir = [ + path: { "${params.outdir}/variants/bcftools" }, + mode: 'copy', + pattern: "*.mpileup", + enabled: params.save_mpileup + ] + } + + withName: 'BEDTOOLS_MERGE' { + ext.prefix = { "${meta.id}.coverage.merged" } + publishDir = [ + path: { "${params.outdir}/variants/bcftools" }, + enabled: false + ] + } + + withName: 'BEDTOOLS_MASKFASTA' { + ext.prefix = { "${meta.id}.masked" } + publishDir = [ + path: { "${params.outdir}/variants/bcftools" }, + enabled: false + ] + } + + withName: 'BCFTOOLS_CONSENSUS' { + ext.prefix = { "${meta.id}.consensus" } + publishDir = [ + path: { "${params.outdir}/variants/bcftools/consensus" }, + mode: 'copy', + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'MULTIQC_TSV_BCFTOOLS_NEXTCLADE' { + publishDir = [ + path: { "${params.outdir}/multiqc" }, + enabled: false + ] + } + } + + if (!params.skip_pangolin) { + process { + withName: 'NFCORE_VIRALRECON:ILLUMINA:CONSENSUS_BCFTOOLS:.*:PANGOLIN' { + publishDir = [ + path: { "${params.outdir}/variants/bcftools/pangolin" }, + mode: 'copy', + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } + } + + if (!params.skip_nextclade) { + process { + withName: 'NFCORE_VIRALRECON:ILLUMINA:CONSENSUS_BCFTOOLS:.*:NEXTCLADE_RUN' { + publishDir = [ + path: { "${params.outdir}/variants/bcftools/nextclade" }, + mode: 'copy', + pattern: "*.csv" + ] + } + } + } + + if (!params.skip_variants_quast) { + process { + withName: 'NFCORE_VIRALRECON:ILLUMINA:CONSENSUS_BCFTOOLS:.*:QUAST' { + publishDir = [ + path: { "${params.outdir}/variants/bcftools" }, + mode: 'copy', + pattern: "quast" + ] + } + } + } + + if (!params.skip_consensus_plots) { + process { + withName: 'NFCORE_VIRALRECON:ILLUMINA:CONSENSUS_BCFTOOLS:.*:PLOT_BASE_DENSITY' { + ext.prefix = { "${meta.id}.consensus" } + publishDir = [ + path: { "${params.outdir}/variants/bcftools/consensus/base_qc" }, + mode: 'copy', + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } + } + } } if (!params.skip_assembly) { From 653d5306ffb0f5b09f42cc0cfa6abbf05929a54d Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Tue, 25 Jan 2022 13:05:46 +0000 Subject: [PATCH 14/21] Update CHANGELOG --- CHANGELOG.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 53028e10..d2e4791a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 * [[#232](https://github.com/nf-core/viralrecon/issues/232)] - Remove duplicate variants called by ARTIC ONT pipeline * [[#235](https://github.com/nf-core/viralrecon/issues/235)] - Nextclade version bump * [[#244](https://github.com/nf-core/viralrecon/issues/244)] - Fix BCFtools consensus generation and masking +* [[#245](https://github.com/nf-core/viralrecon/issues/245)] - Mpileup file as output +* [[#247](https://github.com/nf-core/viralrecon/issues/247)] - Add strand-bias filtering option and codon fix in consecutive positions in ivar tsv conversion to vcf ### Parameters @@ -24,8 +26,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 | | `--nextclade_dataset_reference` | | | `--nextclade_dataset_tag` | | | `--skip_consensus_plots` | -| `--callers` | `--variant_caller` | | | `--consensus_caller` | +| `--callers` | `--variant_caller` | > **NB:** Parameter has been __updated__ if both old and new parameter information is present. > **NB:** Parameter has been __added__ if just the new parameter information is present. From e937a45c08325d2aff5acd9d13855406022f1a21 Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Tue, 25 Jan 2022 16:52:50 +0000 Subject: [PATCH 15/21] Replace initial process path with glob in modules configs --- conf/modules_illumina.config | 110 +++++++++++++++++------------------ conf/modules_nanopore.config | 16 ++--- 2 files changed, 63 insertions(+), 63 deletions(-) diff --git a/conf/modules_illumina.config b/conf/modules_illumina.config index c51a54e1..1736e38c 100644 --- a/conf/modules_illumina.config +++ b/conf/modules_illumina.config @@ -20,7 +20,7 @@ def assemblers = params.assemblers ? params.assemblers.split(',').collect{ it.tr // process { - withName: 'NFCORE_VIRALRECON:ILLUMINA:PREPARE_GENOME:GUNZIP_.*' { + withName: '.*:.*:PREPARE_GENOME:GUNZIP_.*' { publishDir = [ path: { "${params.outdir}/genome" }, mode: 'copy', @@ -29,7 +29,7 @@ process { ] } - withName: 'NFCORE_VIRALRECON:ILLUMINA:PREPARE_GENOME:UNTAR_.*' { + withName: '.*:.*:PREPARE_GENOME:UNTAR_.*' { ext.args2 = '--no-same-owner' publishDir = [ path: { "${params.outdir}/genome" }, @@ -49,7 +49,7 @@ process { if (!params.skip_fastqc) { process { - withName: 'NFCORE_VIRALRECON:ILLUMINA:FASTQC_FASTP:FASTQC_RAW' { + withName: '.*:.*:FASTQC_FASTP:FASTQC_RAW' { ext.args = '--quiet' publishDir = [ path: { "${params.outdir}/fastqc/raw" }, @@ -94,7 +94,7 @@ if (!params.skip_fastp) { if (!params.skip_fastqc) { process { - withName: 'NFCORE_VIRALRECON:ILLUMINA:FASTQC_FASTP:FASTQC_TRIM' { + withName: '.*:.*:FASTQC_FASTP:FASTQC_TRIM' { ext.args = '--quiet' publishDir = [ path: { "${params.outdir}/fastqc/trim" }, @@ -162,7 +162,7 @@ if (!params.skip_variants) { ] } - withName: 'NFCORE_VIRALRECON:ILLUMINA:ALIGN_BOWTIE2:BAM_SORT_SAMTOOLS:SAMTOOLS_SORT' { + withName: '.*:.*:ALIGN_BOWTIE2:BAM_SORT_SAMTOOLS:SAMTOOLS_SORT' { ext.prefix = { "${meta.id}.sorted" } publishDir = [ path: { "${params.outdir}/variants/bowtie2" }, @@ -171,7 +171,7 @@ if (!params.skip_variants) { ] } - withName: 'NFCORE_VIRALRECON:ILLUMINA:ALIGN_BOWTIE2:BAM_SORT_SAMTOOLS:SAMTOOLS_INDEX' { + withName: '.*:.*:ALIGN_BOWTIE2:BAM_SORT_SAMTOOLS:SAMTOOLS_INDEX' { publishDir = [ path: { "${params.outdir}/variants/bowtie2" }, mode: 'copy', @@ -179,7 +179,7 @@ if (!params.skip_variants) { ] } - withName: 'NFCORE_VIRALRECON:ILLUMINA:ALIGN_BOWTIE2:BAM_SORT_SAMTOOLS:BAM_STATS_SAMTOOLS:.*' { + withName: '.*:.*:ALIGN_BOWTIE2:BAM_SORT_SAMTOOLS:BAM_STATS_SAMTOOLS:.*' { publishDir = [ path: { "${params.outdir}/variants/bowtie2/samtools_stats" }, mode: 'copy', @@ -237,7 +237,7 @@ if (!params.skip_variants) { ] } - withName: 'NFCORE_VIRALRECON:ILLUMINA:PRIMER_TRIM_IVAR:BAM_SORT_SAMTOOLS:SAMTOOLS_SORT' { + withName: '.*:.*:PRIMER_TRIM_IVAR:BAM_SORT_SAMTOOLS:SAMTOOLS_SORT' { ext.prefix = { "${meta.id}.ivar_trim.sorted" } publishDir = [ path: { "${params.outdir}/variants/bowtie2" }, @@ -247,7 +247,7 @@ if (!params.skip_variants) { ] } - withName: 'NFCORE_VIRALRECON:ILLUMINA:PRIMER_TRIM_IVAR:BAM_SORT_SAMTOOLS:SAMTOOLS_INDEX' { + withName: '.*:.*:PRIMER_TRIM_IVAR:BAM_SORT_SAMTOOLS:SAMTOOLS_INDEX' { publishDir = [ path: { "${params.outdir}/variants/bowtie2" }, mode: 'copy', @@ -256,7 +256,7 @@ if (!params.skip_variants) { ] } - withName: 'NFCORE_VIRALRECON:ILLUMINA:PRIMER_TRIM_IVAR:BAM_SORT_SAMTOOLS:BAM_STATS_SAMTOOLS:.*' { + withName: '.*:.*:PRIMER_TRIM_IVAR:BAM_SORT_SAMTOOLS:BAM_STATS_SAMTOOLS:.*' { publishDir = [ path: { "${params.outdir}/variants/bowtie2/samtools_stats" }, mode: 'copy', @@ -401,7 +401,7 @@ if (!params.skip_variants) { ] } - withName: 'NFCORE_VIRALRECON:ILLUMINA:VARIANTS_IVAR:.*:TABIX_BGZIP' { + withName: '.*:.*:VARIANTS_IVAR:.*:TABIX_BGZIP' { publishDir = [ path: { "${params.outdir}/variants/ivar" }, mode: 'copy', @@ -409,7 +409,7 @@ if (!params.skip_variants) { ] } - withName: 'NFCORE_VIRALRECON:ILLUMINA:VARIANTS_IVAR:.*:.*:TABIX_TABIX' { + withName: '.*:.*:VARIANTS_IVAR:.*:.*:TABIX_TABIX' { ext.args = '-p vcf -f' publishDir = [ path: { "${params.outdir}/variants/ivar" }, @@ -418,7 +418,7 @@ if (!params.skip_variants) { ] } - withName: 'NFCORE_VIRALRECON:ILLUMINA:VARIANTS_IVAR:.*:.*:BCFTOOLS_STATS' { + withName: '.*:.*:VARIANTS_IVAR:.*:.*:BCFTOOLS_STATS' { publishDir = [ path: { "${params.outdir}/variants/ivar/bcftools_stats" }, mode: 'copy', @@ -429,7 +429,7 @@ if (!params.skip_variants) { if (!params.skip_asciigenome) { process { - withName: 'NFCORE_VIRALRECON:ILLUMINA:VARIANTS_IVAR:.*:ASCIIGENOME' { + withName: '.*:.*:VARIANTS_IVAR:.*:ASCIIGENOME' { publishDir = [ path: { "${params.outdir}/variants/ivar/asciigenome/${meta.id}" }, mode: 'copy', @@ -441,7 +441,7 @@ if (!params.skip_variants) { if (!params.skip_snpeff) { process { - withName: 'NFCORE_VIRALRECON:ILLUMINA:VARIANTS_IVAR:.*:SNPEFF_SNPSIFT:SNPEFF_ANN' { + withName: '.*:.*:VARIANTS_IVAR:.*:SNPEFF_SNPSIFT:SNPEFF_ANN' { publishDir = [ path: { "${params.outdir}/variants/ivar/snpeff" }, mode: 'copy', @@ -449,7 +449,7 @@ if (!params.skip_variants) { ] } - withName: 'NFCORE_VIRALRECON:ILLUMINA:VARIANTS_IVAR:.*:SNPEFF_SNPSIFT:VCF_BGZIP_TABIX_STATS:TABIX_BGZIP' { + withName: '.*:.*:VARIANTS_IVAR:.*:SNPEFF_SNPSIFT:VCF_BGZIP_TABIX_STATS:TABIX_BGZIP' { ext.prefix = { "${meta.id}.snpeff" } publishDir = [ path: { "${params.outdir}/variants/ivar/snpeff" }, @@ -458,7 +458,7 @@ if (!params.skip_variants) { ] } - withName: 'NFCORE_VIRALRECON:ILLUMINA:VARIANTS_IVAR:.*:SNPEFF_SNPSIFT:VCF_BGZIP_TABIX_STATS:VCF_TABIX_STATS:TABIX_TABIX' { + withName: '.*:.*:VARIANTS_IVAR:.*:SNPEFF_SNPSIFT:VCF_BGZIP_TABIX_STATS:VCF_TABIX_STATS:TABIX_TABIX' { ext.args = '-p vcf -f' publishDir = [ path: { "${params.outdir}/variants/ivar/snpeff" }, @@ -467,7 +467,7 @@ if (!params.skip_variants) { ] } - withName: 'NFCORE_VIRALRECON:ILLUMINA:VARIANTS_IVAR:.*:SNPEFF_SNPSIFT:VCF_BGZIP_TABIX_STATS:VCF_TABIX_STATS:BCFTOOLS_STATS' { + withName: '.*:.*:VARIANTS_IVAR:.*:SNPEFF_SNPSIFT:VCF_BGZIP_TABIX_STATS:VCF_TABIX_STATS:BCFTOOLS_STATS' { ext.prefix = { "${meta.id}.snpeff" } publishDir = [ path: { "${params.outdir}/variants/ivar/snpeff/bcftools_stats" }, @@ -476,7 +476,7 @@ if (!params.skip_variants) { ] } - withName: 'NFCORE_VIRALRECON:ILLUMINA:VARIANTS_IVAR:.*:SNPEFF_SNPSIFT:SNPSIFT_EXTRACTFIELDS' { + withName: '.*:.*:VARIANTS_IVAR:.*:SNPEFF_SNPSIFT:SNPSIFT_EXTRACTFIELDS' { publishDir = [ path: { "${params.outdir}/variants/ivar/snpeff" }, mode: 'copy', @@ -518,7 +518,7 @@ if (!params.skip_variants) { if (!params.skip_consensus_plots) { process { - withName: 'NFCORE_VIRALRECON:ILLUMINA:CONSENSUS_IVAR:.*:PLOT_BASE_DENSITY' { + withName: '.*:.*:CONSENSUS_IVAR:.*:PLOT_BASE_DENSITY' { ext.prefix = { "${meta.id}.consensus" } publishDir = [ path: { "${params.outdir}/variants/ivar/consensus/base_qc" }, @@ -531,7 +531,7 @@ if (!params.skip_variants) { if (!params.skip_pangolin) { process { - withName: 'NFCORE_VIRALRECON:ILLUMINA:CONSENSUS_IVAR:.*:PANGOLIN' { + withName: '.*:.*:CONSENSUS_IVAR:.*:PANGOLIN' { publishDir = [ path: { "${params.outdir}/variants/ivar/pangolin" }, mode: 'copy', @@ -543,7 +543,7 @@ if (!params.skip_variants) { if (!params.skip_nextclade) { process { - withName: 'NFCORE_VIRALRECON:ILLUMINA:CONSENSUS_IVAR:.*:NEXTCLADE_RUN' { + withName: '.*:.*:CONSENSUS_IVAR:.*:NEXTCLADE_RUN' { publishDir = [ path: { "${params.outdir}/variants/ivar/nextclade" }, mode: 'copy', @@ -555,7 +555,7 @@ if (!params.skip_variants) { if (!params.skip_variants_quast) { process { - withName: 'NFCORE_VIRALRECON:ILLUMINA:CONSENSUS_IVAR:.*:QUAST' { + withName: '.*:.*:CONSENSUS_IVAR:.*:QUAST' { publishDir = [ path: { "${params.outdir}/variants/ivar" }, mode: 'copy', @@ -595,7 +595,7 @@ if (!params.skip_variants) { if (!params.skip_asciigenome) { process { - withName: 'NFCORE_VIRALRECON:ILLUMINA:VARIANTS_BCFTOOLS:.*:ASCIIGENOME' { + withName: '.*:.*:VARIANTS_BCFTOOLS:.*:ASCIIGENOME' { publishDir = [ path: { "${params.outdir}/variants/bcftools/asciigenome/${meta.id}" }, mode: 'copy', @@ -607,7 +607,7 @@ if (!params.skip_variants) { if (!params.skip_snpeff) { process { - withName: 'NFCORE_VIRALRECON:ILLUMINA:VARIANTS_BCFTOOLS:.*:SNPEFF_SNPSIFT:SNPEFF_ANN' { + withName: '.*:.*:VARIANTS_BCFTOOLS:.*:SNPEFF_SNPSIFT:SNPEFF_ANN' { publishDir = [ path: { "${params.outdir}/variants/bcftools/snpeff" }, mode: 'copy', @@ -615,7 +615,7 @@ if (!params.skip_variants) { ] } - withName: 'NFCORE_VIRALRECON:ILLUMINA:VARIANTS_BCFTOOLS:.*:SNPEFF_SNPSIFT:VCF_BGZIP_TABIX_STATS:TABIX_BGZIP' { + withName: '.*:.*:VARIANTS_BCFTOOLS:.*:SNPEFF_SNPSIFT:VCF_BGZIP_TABIX_STATS:TABIX_BGZIP' { ext.prefix = { "${meta.id}.snpeff" } publishDir = [ path: { "${params.outdir}/variants/bcftools/snpeff" }, @@ -624,7 +624,7 @@ if (!params.skip_variants) { ] } - withName: 'NFCORE_VIRALRECON:ILLUMINA:VARIANTS_BCFTOOLS:.*:SNPEFF_SNPSIFT:VCF_BGZIP_TABIX_STATS:VCF_TABIX_STATS:TABIX_TABIX' { + withName: '.*:.*:VARIANTS_BCFTOOLS:.*:SNPEFF_SNPSIFT:VCF_BGZIP_TABIX_STATS:VCF_TABIX_STATS:TABIX_TABIX' { ext.args = '-p vcf -f' publishDir = [ path: { "${params.outdir}/variants/bcftools/snpeff" }, @@ -633,7 +633,7 @@ if (!params.skip_variants) { ] } - withName: 'NFCORE_VIRALRECON:ILLUMINA:VARIANTS_BCFTOOLS:.*:SNPEFF_SNPSIFT:VCF_BGZIP_TABIX_STATS:VCF_TABIX_STATS:BCFTOOLS_STATS' { + withName: '.*:.*:VARIANTS_BCFTOOLS:.*:SNPEFF_SNPSIFT:VCF_BGZIP_TABIX_STATS:VCF_TABIX_STATS:BCFTOOLS_STATS' { ext.prefix = { "${meta.id}.snpeff" } publishDir = [ path: { "${params.outdir}/variants/bcftools/snpeff/bcftools_stats" }, @@ -642,7 +642,7 @@ if (!params.skip_variants) { ] } - withName: 'NFCORE_VIRALRECON:ILLUMINA:VARIANTS_BCFTOOLS:.*:SNPEFF_SNPSIFT:SNPSIFT_EXTRACTFIELDS' { + withName: '.*:.*:VARIANTS_BCFTOOLS:.*:SNPEFF_SNPSIFT:SNPSIFT_EXTRACTFIELDS' { publishDir = [ path: { "${params.outdir}/variants/bcftools/snpeff" }, mode: 'copy', @@ -702,7 +702,7 @@ if (!params.skip_variants) { if (!params.skip_pangolin) { process { - withName: 'NFCORE_VIRALRECON:ILLUMINA:CONSENSUS_BCFTOOLS:.*:PANGOLIN' { + withName: '.*:.*:CONSENSUS_BCFTOOLS:.*:PANGOLIN' { publishDir = [ path: { "${params.outdir}/variants/bcftools/pangolin" }, mode: 'copy', @@ -714,7 +714,7 @@ if (!params.skip_variants) { if (!params.skip_nextclade) { process { - withName: 'NFCORE_VIRALRECON:ILLUMINA:CONSENSUS_BCFTOOLS:.*:NEXTCLADE_RUN' { + withName: '.*:.*:CONSENSUS_BCFTOOLS:.*:NEXTCLADE_RUN' { publishDir = [ path: { "${params.outdir}/variants/bcftools/nextclade" }, mode: 'copy', @@ -726,7 +726,7 @@ if (!params.skip_variants) { if (!params.skip_variants_quast) { process { - withName: 'NFCORE_VIRALRECON:ILLUMINA:CONSENSUS_BCFTOOLS:.*:QUAST' { + withName: '.*:.*:CONSENSUS_BCFTOOLS:.*:QUAST' { publishDir = [ path: { "${params.outdir}/variants/bcftools" }, mode: 'copy', @@ -738,7 +738,7 @@ if (!params.skip_variants) { if (!params.skip_consensus_plots) { process { - withName: 'NFCORE_VIRALRECON:ILLUMINA:CONSENSUS_BCFTOOLS:.*:PLOT_BASE_DENSITY' { + withName: '.*:.*:CONSENSUS_BCFTOOLS:.*:PLOT_BASE_DENSITY' { ext.prefix = { "${meta.id}.consensus" } publishDir = [ path: { "${params.outdir}/variants/bcftools/consensus/base_qc" }, @@ -791,7 +791,7 @@ if (!params.skip_assembly) { if (!params.skip_fastqc) { process { - withName: 'NFCORE_VIRALRECON:ILLUMINA:FASTQC' { + withName: '.*:.*:FASTQC' { ext.args = '--quiet' ext.prefix = { "${meta.id}.primer_trim" } publishDir = [ @@ -822,14 +822,14 @@ if (!params.skip_assembly) { ] } - withName: 'NFCORE_VIRALRECON:ILLUMINA:ASSEMBLY_SPADES:GUNZIP_SCAFFOLDS' { + withName: '.*:.*:ASSEMBLY_SPADES:GUNZIP_SCAFFOLDS' { publishDir = [ path: { "${params.outdir}/assembly/spades/${params.spades_mode}" }, enabled: false ] } - withName: 'NFCORE_VIRALRECON:ILLUMINA:ASSEMBLY_SPADES:GUNZIP_GFA' { + withName: '.*:.*:ASSEMBLY_SPADES:GUNZIP_GFA' { publishDir = [ path: { "${params.outdir}/assembly/spades/${params.spades_mode}" }, enabled: false @@ -839,7 +839,7 @@ if (!params.skip_assembly) { if (!params.skip_bandage) { process { - withName: 'NFCORE_VIRALRECON:ILLUMINA:ASSEMBLY_SPADES:BANDAGE_IMAGE' { + withName: '.*:.*:ASSEMBLY_SPADES:BANDAGE_IMAGE' { ext.args = '--height 1000' publishDir = [ path: { "${params.outdir}/assembly/spades/${params.spades_mode}/bandage" }, @@ -852,7 +852,7 @@ if (!params.skip_assembly) { if (!params.skip_blast) { process { - withName: 'NFCORE_VIRALRECON:ILLUMINA:ASSEMBLY_SPADES:.*:BLAST_BLASTN' { + withName: '.*:.*:ASSEMBLY_SPADES:.*:BLAST_BLASTN' { ext.args = "-outfmt '6 stitle std slen qlen qcovs'" publishDir = [ path: { "${params.outdir}/assembly/spades/${params.spades_mode}/blastn" }, @@ -861,7 +861,7 @@ if (!params.skip_assembly) { ] } - withName: 'NFCORE_VIRALRECON:ILLUMINA:ASSEMBLY_SPADES:.*:FILTER_BLASTN' { + withName: '.*:.*:ASSEMBLY_SPADES:.*:FILTER_BLASTN' { ext.prefix = { "${meta.id}.filter.blastn" } publishDir = [ path: { "${params.outdir}/assembly/spades/${params.spades_mode}/blastn" }, @@ -874,7 +874,7 @@ if (!params.skip_assembly) { if (!params.skip_assembly_quast) { process { - withName: 'NFCORE_VIRALRECON:ILLUMINA:ASSEMBLY_SPADES:.*:QUAST' { + withName: '.*:.*:ASSEMBLY_SPADES:.*:QUAST' { publishDir = [ path: { "${params.outdir}/assembly/spades/${params.spades_mode}" }, mode: 'copy', @@ -886,7 +886,7 @@ if (!params.skip_assembly) { if (!params.skip_abacas) { process { - withName: 'NFCORE_VIRALRECON:ILLUMINA:ASSEMBLY_SPADES:.*:ABACAS' { + withName: '.*:.*:ASSEMBLY_SPADES:.*:ABACAS' { ext.args = '-m -p nucmer' publishDir = [ path: { "${params.outdir}/assembly/spades/${params.spades_mode}/abacas" }, @@ -899,7 +899,7 @@ if (!params.skip_assembly) { if (!params.skip_plasmidid) { process { - withName: 'NFCORE_VIRALRECON:ILLUMINA:ASSEMBLY_SPADES:.*:PLASMIDID' { + withName: '.*:.*:ASSEMBLY_SPADES:.*:PLASMIDID' { ext.args = '--only-reconstruct -C 47 -S 47 -i 60 --no-trim -k 0.80' publishDir = [ path: { "${params.outdir}/assembly/spades/${params.spades_mode}/plasmidid" }, @@ -928,14 +928,14 @@ if (!params.skip_assembly) { ] } - withName: 'NFCORE_VIRALRECON:ILLUMINA:ASSEMBLY_UNICYCLER:GUNZIP_SCAFFOLDS' { + withName: '.*:.*:ASSEMBLY_UNICYCLER:GUNZIP_SCAFFOLDS' { publishDir = [ path: { "${params.outdir}/assembly/unicycler" }, enabled: false ] } - withName: 'NFCORE_VIRALRECON:ILLUMINA:ASSEMBLY_UNICYCLER:GUNZIP_GFA' { + withName: '.*:.*:ASSEMBLY_UNICYCLER:GUNZIP_GFA' { publishDir = [ path: { "${params.outdir}/assembly/unicycler" }, enabled: false @@ -945,7 +945,7 @@ if (!params.skip_assembly) { if (!params.skip_bandage) { process { - withName: 'NFCORE_VIRALRECON:ILLUMINA:ASSEMBLY_UNICYCLER:BANDAGE_IMAGE' { + withName: '.*:.*:ASSEMBLY_UNICYCLER:BANDAGE_IMAGE' { ext.args = '--height 1000' publishDir = [ path: { "${params.outdir}/assembly/unicycler/bandage" }, @@ -958,7 +958,7 @@ if (!params.skip_assembly) { if (!params.skip_blast) { process { - withName: 'NFCORE_VIRALRECON:ILLUMINA:ASSEMBLY_UNICYCLER:.*:BLAST_BLASTN' { + withName: '.*:.*:ASSEMBLY_UNICYCLER:.*:BLAST_BLASTN' { ext.args = "-outfmt '6 stitle std slen qlen qcovs'" publishDir = [ path: { "${params.outdir}/assembly/unicycler/blastn" }, @@ -967,7 +967,7 @@ if (!params.skip_assembly) { ] } - withName: 'NFCORE_VIRALRECON:ILLUMINA:ASSEMBLY_UNICYCLER:.*:FILTER_BLASTN' { + withName: '.*:.*:ASSEMBLY_UNICYCLER:.*:FILTER_BLASTN' { ext.prefix = { "${meta.id}.filter.blastn" } publishDir = [ path: { "${params.outdir}/assembly/unicycler/blastn" }, @@ -980,7 +980,7 @@ if (!params.skip_assembly) { if (!params.skip_assembly_quast) { process { - withName: 'NFCORE_VIRALRECON:ILLUMINA:ASSEMBLY_UNICYCLER:.*:QUAST' { + withName: '.*:.*:ASSEMBLY_UNICYCLER:.*:QUAST' { publishDir = [ path: { "${params.outdir}/assembly/unicycler" }, mode: 'copy', @@ -992,7 +992,7 @@ if (!params.skip_assembly) { if (!params.skip_abacas) { process { - withName: 'NFCORE_VIRALRECON:ILLUMINA:ASSEMBLY_UNICYCLER:.*:ABACAS' { + withName: '.*:.*:ASSEMBLY_UNICYCLER:.*:ABACAS' { ext.args = '-m -p nucmer' publishDir = [ path: { "${params.outdir}/assembly/unicycler/abacas" }, @@ -1005,7 +1005,7 @@ if (!params.skip_assembly) { if (!params.skip_plasmidid) { process { - withName: 'NFCORE_VIRALRECON:ILLUMINA:ASSEMBLY_UNICYCLER:.*:PLASMIDID' { + withName: '.*:.*:ASSEMBLY_UNICYCLER:.*:PLASMIDID' { ext.args = '--only-reconstruct -C 47 -S 47 -i 60 --no-trim -k 0.80' publishDir = [ path: { "${params.outdir}/assembly/unicycler/plasmidid" }, @@ -1031,7 +1031,7 @@ if (!params.skip_assembly) { if (!params.skip_blast) { process { - withName: 'NFCORE_VIRALRECON:ILLUMINA:ASSEMBLY_MINIA:.*:BLAST_BLASTN' { + withName: '.*:.*:ASSEMBLY_MINIA:.*:BLAST_BLASTN' { ext.args = "-outfmt '6 stitle std slen qlen qcovs'" publishDir = [ path: { "${params.outdir}/assembly/minia/blastn" }, @@ -1040,7 +1040,7 @@ if (!params.skip_assembly) { ] } - withName: 'NFCORE_VIRALRECON:ILLUMINA:ASSEMBLY_MINIA:.*:FILTER_BLASTN' { + withName: '.*:.*:ASSEMBLY_MINIA:.*:FILTER_BLASTN' { ext.prefix = { "${meta.id}.filter.blastn" } publishDir = [ path: { "${params.outdir}/assembly/minia/blastn" }, @@ -1053,7 +1053,7 @@ if (!params.skip_assembly) { if (!params.skip_assembly_quast) { process { - withName: 'NFCORE_VIRALRECON:ILLUMINA:ASSEMBLY_MINIA:.*:QUAST' { + withName: '.*:.*:ASSEMBLY_MINIA:.*:QUAST' { publishDir = [ path: { "${params.outdir}/assembly/minia" }, mode: 'copy', @@ -1065,7 +1065,7 @@ if (!params.skip_assembly) { if (!params.skip_abacas) { process { - withName: 'NFCORE_VIRALRECON:ILLUMINA:ASSEMBLY_MINIA:.*:ABACAS' { + withName: '.*:.*:ASSEMBLY_MINIA:.*:ABACAS' { ext.args = '-m -p nucmer' publishDir = [ path: { "${params.outdir}/assembly/minia/abacas" }, @@ -1078,7 +1078,7 @@ if (!params.skip_assembly) { if (!params.skip_plasmidid) { process { - withName: 'NFCORE_VIRALRECON:ILLUMINA:ASSEMBLY_MINIA:.*:PLASMIDID' { + withName: '.*:.*:ASSEMBLY_MINIA:.*:PLASMIDID' { ext.args = '--only-reconstruct -C 47 -S 47 -i 60 --no-trim -k 0.80' publishDir = [ path: { "${params.outdir}/assembly/minia/plasmidid" }, diff --git a/conf/modules_nanopore.config b/conf/modules_nanopore.config index 2e31cb98..c35d0f62 100644 --- a/conf/modules_nanopore.config +++ b/conf/modules_nanopore.config @@ -61,7 +61,7 @@ process { ] } - withName: 'NFCORE_VIRALRECON:NANOPORE:TABIX_TABIX' { + withName: '.*:.*:TABIX_TABIX' { ext.args = '-p vcf -f' publishDir = [ path: { "${params.outdir}/${params.artic_minion_caller}" }, @@ -70,7 +70,7 @@ process { ] } - withName: 'NFCORE_VIRALRECON:NANOPORE:.*:SAMTOOLS_VIEW' { + withName: '.*:.*:.*:SAMTOOLS_VIEW' { ext.args = '-b -F 4' ext.prefix = { "${meta.id}.mapped.sorted" } publishDir = [ @@ -80,7 +80,7 @@ process { ] } - withName: 'NFCORE_VIRALRECON:NANOPORE:.*:SAMTOOLS_INDEX' { + withName: '.*:.*:.*:SAMTOOLS_INDEX' { ext.prefix = { "${meta.id}.mapped.sorted" } publishDir = [ path: { "${params.outdir}/${params.artic_minion_caller}" }, @@ -89,7 +89,7 @@ process { ] } - withName: 'NFCORE_VIRALRECON:NANOPORE:.*:BAM_STATS_SAMTOOLS:.*' { + withName: '.*:.*:.*:BAM_STATS_SAMTOOLS:.*' { ext.prefix = { "${meta.id}.mapped.sorted" } publishDir = [ path: { "${params.outdir}/${params.artic_minion_caller}/samtools_stats" }, @@ -98,7 +98,7 @@ process { ] } - withName: 'NFCORE_VIRALRECON:NANOPORE:BCFTOOLS_STATS' { + withName: '.*:.*:BCFTOOLS_STATS' { publishDir = [ path: { "${params.outdir}/${params.artic_minion_caller}/bcftools_stats" }, mode: 'copy', @@ -275,7 +275,7 @@ if (!params.skip_snpeff) { ] } - withName: 'NFCORE_VIRALRECON:NANOPORE:.*:.*:TABIX_BGZIP' { + withName: '.*:.*:.*:.*:TABIX_BGZIP' { ext.prefix = { "${meta.id}.snpeff" } publishDir = [ path: { "${params.outdir}/${params.artic_minion_caller}/snpeff" }, @@ -284,7 +284,7 @@ if (!params.skip_snpeff) { ] } - withName: 'NFCORE_VIRALRECON:NANOPORE:.*:.*:.*:TABIX_TABIX' { + withName: '.*:.*:.*:.*:.*:TABIX_TABIX' { ext.args = '-p vcf -f' publishDir = [ path: { "${params.outdir}/${params.artic_minion_caller}/snpeff" }, @@ -293,7 +293,7 @@ if (!params.skip_snpeff) { ] } - withName: 'NFCORE_VIRALRECON:NANOPORE:.*:.*:.*:BCFTOOLS_STATS' { + withName: '.*:.*:.*:.*:.*:BCFTOOLS_STATS' { ext.prefix = { "${meta.id}.snpeff" } publishDir = [ path: { "${params.outdir}/${params.artic_minion_caller}/snpeff/bcftools_stats" }, From 87365b9a15ca248a365328ba827971ff89cef40e Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Tue, 25 Jan 2022 17:37:20 +0000 Subject: [PATCH 16/21] Add tests for new caller options --- .github/workflows/ci.yml | 2 + conf/modules_illumina.config | 330 ++++++++++++----------------------- workflows/illumina.nf | 6 +- 3 files changed, 112 insertions(+), 226 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6b58b90f..249864d9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -53,6 +53,8 @@ jobs: strategy: matrix: parameters: + - "--consensus_caller ivar" + - "--variant_caller bcftools --consensus_caller ivar" - "--skip_fastp" - "--skip_variants" - "--skip_cutadapt" diff --git a/conf/modules_illumina.config b/conf/modules_illumina.config index 1736e38c..098d7c69 100644 --- a/conf/modules_illumina.config +++ b/conf/modules_illumina.config @@ -162,7 +162,7 @@ if (!params.skip_variants) { ] } - withName: '.*:.*:ALIGN_BOWTIE2:BAM_SORT_SAMTOOLS:SAMTOOLS_SORT' { + withName: '.*:.*:ALIGN_BOWTIE2:.*:SAMTOOLS_SORT' { ext.prefix = { "${meta.id}.sorted" } publishDir = [ path: { "${params.outdir}/variants/bowtie2" }, @@ -171,7 +171,7 @@ if (!params.skip_variants) { ] } - withName: '.*:.*:ALIGN_BOWTIE2:BAM_SORT_SAMTOOLS:SAMTOOLS_INDEX' { + withName: '.*:.*:ALIGN_BOWTIE2:.*:SAMTOOLS_INDEX' { publishDir = [ path: { "${params.outdir}/variants/bowtie2" }, mode: 'copy', @@ -179,7 +179,7 @@ if (!params.skip_variants) { ] } - withName: '.*:.*:ALIGN_BOWTIE2:BAM_SORT_SAMTOOLS:BAM_STATS_SAMTOOLS:.*' { + withName: '.*:.*:ALIGN_BOWTIE2:.*:BAM_STATS_SAMTOOLS:.*' { publishDir = [ path: { "${params.outdir}/variants/bowtie2/samtools_stats" }, mode: 'copy', @@ -195,32 +195,6 @@ if (!params.skip_variants) { } } - if (!params.skip_asciigenome) { - process { - withName: 'CUSTOM_GETCHROMSIZES' { - publishDir = [ - path: { "${params.outdir}/genome" }, - mode: 'copy', - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_reference - ] - } - } - } - - if (!params.skip_snpeff) { - process { - withName: 'SNPEFF_BUILD' { - publishDir = [ - path: { "${params.outdir}/genome" }, - mode: 'copy', - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_reference - ] - } - } - } - if (!params.skip_ivar_trim && params.protocol == 'amplicon') { process { withName: 'IVAR_TRIM' { @@ -237,7 +211,7 @@ if (!params.skip_variants) { ] } - withName: '.*:.*:PRIMER_TRIM_IVAR:BAM_SORT_SAMTOOLS:SAMTOOLS_SORT' { + withName: '.*:.*:PRIMER_TRIM_IVAR:.*:SAMTOOLS_SORT' { ext.prefix = { "${meta.id}.ivar_trim.sorted" } publishDir = [ path: { "${params.outdir}/variants/bowtie2" }, @@ -247,7 +221,7 @@ if (!params.skip_variants) { ] } - withName: '.*:.*:PRIMER_TRIM_IVAR:BAM_SORT_SAMTOOLS:SAMTOOLS_INDEX' { + withName: '.*:.*:PRIMER_TRIM_IVAR:.*:SAMTOOLS_INDEX' { publishDir = [ path: { "${params.outdir}/variants/bowtie2" }, mode: 'copy', @@ -256,7 +230,7 @@ if (!params.skip_variants) { ] } - withName: '.*:.*:PRIMER_TRIM_IVAR:BAM_SORT_SAMTOOLS:BAM_STATS_SAMTOOLS:.*' { + withName: '.*:.*:PRIMER_TRIM_IVAR:.*:BAM_STATS_SAMTOOLS:.*' { publishDir = [ path: { "${params.outdir}/variants/bowtie2/samtools_stats" }, mode: 'copy', @@ -268,7 +242,7 @@ if (!params.skip_variants) { if (!params.skip_markduplicates) { process { - withName: '.*:MARK_DUPLICATES_PICARD:PICARD_MARKDUPLICATES' { + withName: 'PICARD_MARKDUPLICATES' { ext.args = [ 'ASSUME_SORTED=true VALIDATION_STRINGENCY=LENIENT TMP_DIR=tmp', params.filter_duplicates ? 'REMOVE_DUPLICATES=true' : '' @@ -426,229 +400,139 @@ if (!params.skip_variants) { ] } } - - if (!params.skip_asciigenome) { - process { - withName: '.*:.*:VARIANTS_IVAR:.*:ASCIIGENOME' { - publishDir = [ - path: { "${params.outdir}/variants/ivar/asciigenome/${meta.id}" }, - mode: 'copy', - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - } - } + } - if (!params.skip_snpeff) { - process { - withName: '.*:.*:VARIANTS_IVAR:.*:SNPEFF_SNPSIFT:SNPEFF_ANN' { - publishDir = [ - path: { "${params.outdir}/variants/ivar/snpeff" }, + if (variant_caller == 'bcftools') { + process { + withName: 'BCFTOOLS_MPILEUP' { + ext.args = '--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 20 --annotate FORMAT/AD,FORMAT/ADF,FORMAT/ADR,FORMAT/DP,FORMAT/SP,INFO/AD,INFO/ADF,INFO/ADR' + ext.args2 = '--ploidy 1 --keep-alts --keep-masked-ref --multiallelic-caller --variants-only' + ext.args3 = "--include 'INFO/DP>=10'" + publishDir = [ + [ + path: { "${params.outdir}/variants/bcftools" }, mode: 'copy', - pattern: "*.{csv,txt,html}" - ] - } - - withName: '.*:.*:VARIANTS_IVAR:.*:SNPEFF_SNPSIFT:VCF_BGZIP_TABIX_STATS:TABIX_BGZIP' { - ext.prefix = { "${meta.id}.snpeff" } - publishDir = [ - path: { "${params.outdir}/variants/ivar/snpeff" }, + pattern: '*.{gz,tbi}' + ], + [ + path: { "${params.outdir}/variants/bcftools" }, mode: 'copy', - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - - withName: '.*:.*:VARIANTS_IVAR:.*:SNPEFF_SNPSIFT:VCF_BGZIP_TABIX_STATS:VCF_TABIX_STATS:TABIX_TABIX' { - ext.args = '-p vcf -f' - publishDir = [ - path: { "${params.outdir}/variants/ivar/snpeff" }, + pattern: '*.mpileup', + enabled: params.save_mpileup + ], + [ + path: { "${params.outdir}/variants/bcftools/bcftools_stats" }, mode: 'copy', - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + pattern: '*stats.txt' ] - } + ] + } + } + } - withName: '.*:.*:VARIANTS_IVAR:.*:SNPEFF_SNPSIFT:VCF_BGZIP_TABIX_STATS:VCF_TABIX_STATS:BCFTOOLS_STATS' { - ext.prefix = { "${meta.id}.snpeff" } - publishDir = [ - path: { "${params.outdir}/variants/ivar/snpeff/bcftools_stats" }, - mode: 'copy', - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } + if (!params.skip_asciigenome) { + process { + withName: 'CUSTOM_GETCHROMSIZES' { + publishDir = [ + path: { "${params.outdir}/genome" }, + mode: 'copy', + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.save_reference + ] + } - withName: '.*:.*:VARIANTS_IVAR:.*:SNPEFF_SNPSIFT:SNPSIFT_EXTRACTFIELDS' { - publishDir = [ - path: { "${params.outdir}/variants/ivar/snpeff" }, - mode: 'copy', - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } + withName: 'ASCIIGENOME' { + publishDir = [ + path: { "${params.outdir}/variants/${variant_caller}/asciigenome/${meta.id}" }, + mode: 'copy', + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] } } } - if (!params.skip_consensus && params.consensus_caller == 'ivar') { + if (!params.skip_snpeff) { process { - withName: 'IVAR_CONSENSUS' { - ext.args = '-t 0.75 -q 20 -m 10 -n N' - ext.args2 = '--count-orphans --no-BAQ --max-depth 0 --min-BQ 0 -aa' - ext.prefix = { "${meta.id}.consensus" } + withName: 'SNPEFF_BUILD' { publishDir = [ - [ - path: { "${params.outdir}/variants/ivar/consensus" }, - mode: 'copy', - pattern: "*.{fa,txt}", - ], - [ - path: { "${params.outdir}/variants/ivar/consensus" }, - mode: 'copy', - pattern: "*.mpileup", - enabled: params.save_mpileup - ] + path: { "${params.outdir}/genome" }, + mode: 'copy', + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.save_reference ] } - withName: 'MULTIQC_TSV_IVAR_NEXTCLADE' { + withName: 'SNPEFF_ANN' { publishDir = [ - path: { "${params.outdir}/multiqc" }, - enabled: false + path: { "${params.outdir}/variants/${variant_caller}/snpeff" }, + mode: 'copy', + pattern: "*.{csv,txt,html}" ] } - } - if (!params.skip_consensus_plots) { - process { - withName: '.*:.*:CONSENSUS_IVAR:.*:PLOT_BASE_DENSITY' { - ext.prefix = { "${meta.id}.consensus" } - publishDir = [ - path: { "${params.outdir}/variants/ivar/consensus/base_qc" }, - mode: 'copy', - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } + withName: '.*:.*:.*:.*:SNPEFF_SNPSIFT:.*:TABIX_BGZIP' { + ext.prefix = { "${meta.id}.snpeff" } + publishDir = [ + path: { "${params.outdir}/variants/${variant_caller}/snpeff" }, + mode: 'copy', + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] } - } - if (!params.skip_pangolin) { - process { - withName: '.*:.*:CONSENSUS_IVAR:.*:PANGOLIN' { - publishDir = [ - path: { "${params.outdir}/variants/ivar/pangolin" }, - mode: 'copy', - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } + withName: '.*:.*:.*:.*:SNPEFF_SNPSIFT:.*:.*:TABIX_TABIX' { + ext.args = '-p vcf -f' + publishDir = [ + path: { "${params.outdir}/variants/${variant_caller}/snpeff" }, + mode: 'copy', + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] } - } - if (!params.skip_nextclade) { - process { - withName: '.*:.*:CONSENSUS_IVAR:.*:NEXTCLADE_RUN' { - publishDir = [ - path: { "${params.outdir}/variants/ivar/nextclade" }, - mode: 'copy', - pattern: "*.csv" - ] - } + withName: '.*:.*:.*:.*:SNPEFF_SNPSIFT:.*:.*:BCFTOOLS_STATS' { + ext.prefix = { "${meta.id}.snpeff" } + publishDir = [ + path: { "${params.outdir}/variants/${variant_caller}/snpeff/bcftools_stats" }, + mode: 'copy', + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] } - } - if (!params.skip_variants_quast) { - process { - withName: '.*:.*:CONSENSUS_IVAR:.*:QUAST' { - publishDir = [ - path: { "${params.outdir}/variants/ivar" }, - mode: 'copy', - pattern: "quast" - ] - } + withName: '.*:.*:.*:.*:SNPEFF_SNPSIFT:SNPSIFT_EXTRACTFIELDS' { + publishDir = [ + path: { "${params.outdir}/variants/${variant_caller}/snpeff" }, + mode: 'copy', + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] } } } - if (variant_caller == 'bcftools') { + if (!params.skip_consensus && params.consensus_caller == 'ivar') { process { - withName: 'BCFTOOLS_MPILEUP' { - ext.args = '--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 20 --annotate FORMAT/AD,FORMAT/ADF,FORMAT/ADR,FORMAT/DP,FORMAT/SP,INFO/AD,INFO/ADF,INFO/ADR' - ext.args2 = '--ploidy 1 --keep-alts --keep-masked-ref --multiallelic-caller --variants-only' - ext.args3 = "--include 'INFO/DP>=10'" + withName: 'IVAR_CONSENSUS' { + ext.args = '-t 0.75 -q 20 -m 10 -n N' + ext.args2 = '--count-orphans --no-BAQ --max-depth 0 --min-BQ 0 -aa' + ext.prefix = { "${meta.id}.consensus" } publishDir = [ [ - path: { "${params.outdir}/variants/bcftools" }, + path: { "${params.outdir}/variants/ivar/consensus" }, mode: 'copy', - pattern: '*.{gz,tbi}' + pattern: "*.{fa,txt}", ], [ - path: { "${params.outdir}/variants/bcftools" }, + path: { "${params.outdir}/variants/ivar/consensus" }, mode: 'copy', - pattern: '*.mpileup', + pattern: "*.mpileup", enabled: params.save_mpileup - ], - [ - path: { "${params.outdir}/variants/bcftools/bcftools_stats" }, - mode: 'copy', - pattern: '*stats.txt' ] ] } - } - - if (!params.skip_asciigenome) { - process { - withName: '.*:.*:VARIANTS_BCFTOOLS:.*:ASCIIGENOME' { - publishDir = [ - path: { "${params.outdir}/variants/bcftools/asciigenome/${meta.id}" }, - mode: 'copy', - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - } - } - - if (!params.skip_snpeff) { - process { - withName: '.*:.*:VARIANTS_BCFTOOLS:.*:SNPEFF_SNPSIFT:SNPEFF_ANN' { - publishDir = [ - path: { "${params.outdir}/variants/bcftools/snpeff" }, - mode: 'copy', - pattern: "*.{csv,txt,html}" - ] - } - withName: '.*:.*:VARIANTS_BCFTOOLS:.*:SNPEFF_SNPSIFT:VCF_BGZIP_TABIX_STATS:TABIX_BGZIP' { - ext.prefix = { "${meta.id}.snpeff" } - publishDir = [ - path: { "${params.outdir}/variants/bcftools/snpeff" }, - mode: 'copy', - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - - withName: '.*:.*:VARIANTS_BCFTOOLS:.*:SNPEFF_SNPSIFT:VCF_BGZIP_TABIX_STATS:VCF_TABIX_STATS:TABIX_TABIX' { - ext.args = '-p vcf -f' - publishDir = [ - path: { "${params.outdir}/variants/bcftools/snpeff" }, - mode: 'copy', - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - - withName: '.*:.*:VARIANTS_BCFTOOLS:.*:SNPEFF_SNPSIFT:VCF_BGZIP_TABIX_STATS:VCF_TABIX_STATS:BCFTOOLS_STATS' { - ext.prefix = { "${meta.id}.snpeff" } - publishDir = [ - path: { "${params.outdir}/variants/bcftools/snpeff/bcftools_stats" }, - mode: 'copy', - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - - withName: '.*:.*:VARIANTS_BCFTOOLS:.*:SNPEFF_SNPSIFT:SNPSIFT_EXTRACTFIELDS' { - publishDir = [ - path: { "${params.outdir}/variants/bcftools/snpeff" }, - mode: 'copy', - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } + withName: 'MULTIQC_TSV_IVAR_NEXTCLADE' { + publishDir = [ + path: { "${params.outdir}/multiqc" }, + enabled: false + ] } } } @@ -699,12 +583,14 @@ if (!params.skip_variants) { ] } } + } + if (!params.skip_consensus) { if (!params.skip_pangolin) { process { - withName: '.*:.*:CONSENSUS_BCFTOOLS:.*:PANGOLIN' { + withName: 'PANGOLIN' { publishDir = [ - path: { "${params.outdir}/variants/bcftools/pangolin" }, + path: { "${params.outdir}/variants/${params.consensus_caller}/pangolin" }, mode: 'copy', saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -714,9 +600,9 @@ if (!params.skip_variants) { if (!params.skip_nextclade) { process { - withName: '.*:.*:CONSENSUS_BCFTOOLS:.*:NEXTCLADE_RUN' { + withName: 'NEXTCLADE_RUN' { publishDir = [ - path: { "${params.outdir}/variants/bcftools/nextclade" }, + path: { "${params.outdir}/variants/${params.consensus_caller}/nextclade" }, mode: 'copy', pattern: "*.csv" ] @@ -726,9 +612,9 @@ if (!params.skip_variants) { if (!params.skip_variants_quast) { process { - withName: '.*:.*:CONSENSUS_BCFTOOLS:.*:QUAST' { + withName: '.*:.*:CONSENSUS_.*:.*:QUAST' { publishDir = [ - path: { "${params.outdir}/variants/bcftools" }, + path: { "${params.outdir}/variants/${params.consensus_caller}" }, mode: 'copy', pattern: "quast" ] @@ -738,10 +624,10 @@ if (!params.skip_variants) { if (!params.skip_consensus_plots) { process { - withName: '.*:.*:CONSENSUS_BCFTOOLS:.*:PLOT_BASE_DENSITY' { + withName: 'PLOT_BASE_DENSITY' { ext.prefix = { "${meta.id}.consensus" } publishDir = [ - path: { "${params.outdir}/variants/bcftools/consensus/base_qc" }, + path: { "${params.outdir}/variants/${params.consensus_caller}/consensus/base_qc" }, mode: 'copy', saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] diff --git a/workflows/illumina.nf b/workflows/illumina.nf index 7d6af82b..c9ec299e 100644 --- a/workflows/illumina.nf +++ b/workflows/illumina.nf @@ -413,8 +413,6 @@ workflow ILLUMINA { // // SUBWORKFLOW: Call variants with BCFTools // - ch_vcf = Channel.empty() - ch_tbi = Channel.empty() ch_bcftools_stats_multiqc = Channel.empty() ch_bcftools_snpeff_multiqc = Channel.empty() if (!params.skip_variants && variant_caller == 'bcftools') { @@ -451,7 +449,7 @@ workflow ILLUMINA { ch_ivar_pangolin_multiqc = CONSENSUS_IVAR.out.pangolin_report ch_ivar_nextclade_report = CONSENSUS_IVAR.out.nextclade_report ch_versions = ch_versions.mix(CONSENSUS_IVAR.out.versions) - + // // MODULE: Get Nextclade clade information for MultiQC report // @@ -470,7 +468,7 @@ workflow ILLUMINA { .set { ch_ivar_nextclade_multiqc } } - // + // // SUBWORKFLOW: Call consensus with BCFTools // ch_bcftools_quast_multiqc = Channel.empty() From 9019c0dddbbf7178ed40dd271bfae863a376d8f7 Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Tue, 25 Jan 2022 18:09:43 +0000 Subject: [PATCH 17/21] Change publishDir by consensus caller --- conf/modules_illumina.config | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/conf/modules_illumina.config b/conf/modules_illumina.config index 098d7c69..4fe305f7 100644 --- a/conf/modules_illumina.config +++ b/conf/modules_illumina.config @@ -515,12 +515,12 @@ if (!params.skip_variants) { ext.prefix = { "${meta.id}.consensus" } publishDir = [ [ - path: { "${params.outdir}/variants/ivar/consensus" }, + path: { "${params.outdir}/variants/${variant_caller}/consensus/ivar" }, mode: 'copy', pattern: "*.{fa,txt}", ], [ - path: { "${params.outdir}/variants/ivar/consensus" }, + path: { "${params.outdir}/variants/${variant_caller}/consensus/ivar" }, mode: 'copy', pattern: "*.mpileup", enabled: params.save_mpileup @@ -544,7 +544,7 @@ if (!params.skip_variants) { ext.args2 = 10 ext.prefix = { "${meta.id}.coverage.masked" } publishDir = [ - path: { "${params.outdir}/variants/bcftools" }, + path: { "${params.outdir}/variants/${variant_caller}/consensus/bcftools" }, mode: 'copy', pattern: "*.mpileup", enabled: params.save_mpileup @@ -554,7 +554,7 @@ if (!params.skip_variants) { withName: 'BEDTOOLS_MERGE' { ext.prefix = { "${meta.id}.coverage.merged" } publishDir = [ - path: { "${params.outdir}/variants/bcftools" }, + path: { "${params.outdir}/variants/${variant_caller}/consensus/bcftools" }, enabled: false ] } @@ -562,7 +562,7 @@ if (!params.skip_variants) { withName: 'BEDTOOLS_MASKFASTA' { ext.prefix = { "${meta.id}.masked" } publishDir = [ - path: { "${params.outdir}/variants/bcftools" }, + path: { "${params.outdir}/variants/${variant_caller}/consensus/bcftools" }, enabled: false ] } @@ -570,7 +570,7 @@ if (!params.skip_variants) { withName: 'BCFTOOLS_CONSENSUS' { ext.prefix = { "${meta.id}.consensus" } publishDir = [ - path: { "${params.outdir}/variants/bcftools/consensus" }, + path: { "${params.outdir}/variants/${variant_caller}/consensus/bcftools" }, mode: 'copy', saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -590,7 +590,7 @@ if (!params.skip_variants) { process { withName: 'PANGOLIN' { publishDir = [ - path: { "${params.outdir}/variants/${params.consensus_caller}/pangolin" }, + path: { "${params.outdir}/variants/${variant_caller}/consensus/${params.consensus_caller}/pangolin" }, mode: 'copy', saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -602,7 +602,7 @@ if (!params.skip_variants) { process { withName: 'NEXTCLADE_RUN' { publishDir = [ - path: { "${params.outdir}/variants/${params.consensus_caller}/nextclade" }, + path: { "${params.outdir}/variants/${variant_caller}/consensus/${params.consensus_caller}/nextclade" }, mode: 'copy', pattern: "*.csv" ] @@ -614,7 +614,7 @@ if (!params.skip_variants) { process { withName: '.*:.*:CONSENSUS_.*:.*:QUAST' { publishDir = [ - path: { "${params.outdir}/variants/${params.consensus_caller}" }, + path: { "${params.outdir}/variants/${variant_caller}/consensus/${params.consensus_caller}" }, mode: 'copy', pattern: "quast" ] @@ -627,7 +627,7 @@ if (!params.skip_variants) { withName: 'PLOT_BASE_DENSITY' { ext.prefix = { "${meta.id}.consensus" } publishDir = [ - path: { "${params.outdir}/variants/${params.consensus_caller}/consensus/base_qc" }, + path: { "${params.outdir}/variants/${variant_caller}/consensus/${params.consensus_caller}/base_qc" }, mode: 'copy', saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] From 0a51f5bbbe19573301dfeef5bd626fb0b9f43d3c Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Tue, 25 Jan 2022 18:20:07 +0000 Subject: [PATCH 18/21] Push some updates to output docs --- docs/output.md | 60 +++++++++++++++++++++++++------------------------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/docs/output.md b/docs/output.md index 4be3d50d..699e3c7b 100644 --- a/docs/output.md +++ b/docs/output.md @@ -512,19 +512,19 @@ Unless you are using [UMIs](https://emea.illumina.com/science/sequencing-method- * `*.tsv`: Original iVar variants in TSV format. * `*.vcf.gz`: iVar variants in VCF format. Converted using custom `ivar_variants_to_vcf.py` python script. * `*.vcf.gz.tbi`: iVar variants in VCF index file. -* `variants/ivar/consensus/` +* `variants/ivar/log/` + * `*.variant_counts.log`: Counts for type of variants called by iVar. +* `variants/ivar/bcftools_stats/` + * `*.bcftools_stats.txt`: Statistics and counts obtained from iVar variants VCF file. +* `variants//consensus/ivar/` * `*.consensus.fa`: Consensus Fasta file generated by iVar. * `*.consensus.qual.txt`: File with the average quality of each base in the consensus sequence. -* `variants/ivar/consensus/base_qc/` +* `variants//consensus/ivar/base_qc/` * `*.ACTG_density.pdf`: Plot showing density of ACGT bases within the consensus sequence. * `*.base_counts.pdf`: Plot showing frequency and percentages of all bases in consensus sequence. * `*.base_counts.tsv`: File containing frequency and percentages of all bases in consensus sequence. * `*.N_density.pdf`: Plot showing density of N bases within the consensus sequence. * `*.N_run.tsv`: File containing start positions and width of N bases in consensus sequence. -* `variants/ivar/log/` - * `*.variant_counts.log`: Counts for type of variants called by iVar. -* `variants/ivar/bcftools_stats/` - * `*.bcftools_stats.txt`: Statistics and counts obtained from iVar variants VCF file. @@ -542,16 +542,16 @@ iVar outputs a tsv format, which is not compatible with downstream analysis such * `variants/bcftools/` * `*.vcf.gz`: Variants VCF file. * `*.vcf.gz.tbi`: Variants VCF index file. -* `variants/bcftools/consensus/` +* `variants/bcftools/bcftools_stats/` + * `*.bcftools_stats.txt`: Statistics and counts obtained from VCF file. +* `variants//consensus/bcftools/` * `*.consensus.fa`: Consensus Fasta file generated by integrating the variants called by BCFTools into the reference genome. -* `variants/bcftools/consensus/base_qc/` +* `variants//consensus/bcftools/base_qc/` * `*.ACTG_density.pdf`: Plot showing density of ACGT bases within the consensus sequence. * `*.base_counts.pdf`: Plot showing frequency and percentages of all bases in consensus sequence. * `*.base_counts.tsv`: File containing frequency and percentages of all bases in consensus sequence. * `*.N_density.pdf`: Plot showing density of N bases within the consensus sequence. * `*.N_run.tsv`: File containing start positions and width of N bases in consensus sequence. -* `variants/bcftools/bcftools_stats/` - * `*.bcftools_stats.txt`: Statistics and counts obtained from VCF file. @@ -588,63 +588,63 @@ iVar outputs a tsv format, which is not compatible with downstream analysis such ![MultiQC - SnpEff annotation counts](images/mqc_snpeff_plot.png) -### QUAST +### ASCIIGenome
Output files -* `variants//quast/` - * `report.html`: Results report in HTML format. Also available in various other file formats i.e. `report.pdf`, `report.tex`, `report.tsv` and `report.txt`. +* `variants//asciigenome//` + * `*.pdf`: Individual variant screenshots with annotation tracks in PDF format. **NB:** The value of `` in the output directory name above is determined by the `--variant_caller` parameter (Default: 'ivar' for '--protocol amplicon' and 'bcftools' for '--protocol metagenomic').
-[QUAST](http://bioinf.spbau.ru/quast) is used to generate a single report with which to evaluate the quality of the consensus sequence across all of the samples provided to the pipeline. The HTML results can be opened within any browser (we recommend using Google Chrome). Please see the [QUAST output docs](http://quast.sourceforge.net/docs/manual.html#sec3) for more detailed information regarding the output files. +As described in the documentation, [ASCIIGenome](https://asciigenome.readthedocs.io/en/latest/) is a command-line genome browser that can be run from a terminal window and is solely based on ASCII characters. The closest program to ASCIIGenome is probably [samtools tview](http://www.htslib.org/doc/samtools-tview.html) but ASCIIGenome offers much more flexibility, similar to popular GUI viewers like the [IGV](https://software.broadinstitute.org/software/igv/) browser. We are using the batch processing mode of ASCIIGenome in this pipeline to generate individual screenshots for all of the variant sites reported for each sample in the VCF files. This is incredibly useful to be able to quickly QC the variants called by the pipeline without having to tediously load all of the relevant tracks into a conventional genome browser. Where possible, the BAM read alignments, VCF variant file, primer BED file and GFF annotation track will be represented in the screenshot for contextual purposes. The screenshot below shows a SNP called relative to the MN908947.3 SARS-CoV-2 reference genome that overlaps the ORF7a protein and the nCoV-2019_91_LEFT primer from the ARIC v3 protocol. -### Pangolin +

ASCIIGenome screenshot

+ +### QUAST
Output files -* `variants//pangolin/` - * `*.pangolin.csv`: Lineage analysis results from Pangolin. +* `variants//consensus//quast/` + * `report.html`: Results report in HTML format. Also available in various other file formats i.e. `report.pdf`, `report.tex`, `report.tsv` and `report.txt`. **NB:** The value of `` in the output directory name above is determined by the `--variant_caller` parameter (Default: 'ivar' for '--protocol amplicon' and 'bcftools' for '--protocol metagenomic').
-Phylogenetic Assignment of Named Global Outbreak LINeages ([Pangolin](https://github.com/cov-lineages/pangolin)) has been used extensively during the COVID-19 pandemic in order to to assign lineages to SARS-CoV-2 genome sequenced samples. A [web application](https://pangolin.cog-uk.io/) also exists that allows users to upload genome sequences via a web browser to assign lineages to genome sequences of SARS-CoV-2, view descriptive characteristics of the assigned lineage(s), view the placement of the lineage in a phylogeny of global samples, and view the temporal and geographic distribution of the assigned lineage(s). +[QUAST](http://bioinf.spbau.ru/quast) is used to generate a single report with which to evaluate the quality of the consensus sequence across all of the samples provided to the pipeline. The HTML results can be opened within any browser (we recommend using Google Chrome). Please see the [QUAST output docs](http://quast.sourceforge.net/docs/manual.html#sec3) for more detailed information regarding the output files. -### Nextclade +### Pangolin
Output files -* `variants//nextclade/` - * `*.csv`: Analysis results from Nextlade containing genome clade assignment, mutation calling and sequence quality checks. - -**NB:** The value of `` in the output directory name above is determined by the `--variant_caller` parameter (Default: 'ivar' for '--protocol amplicon' and 'bcftools' for '--protocol metagenomic'). +* `variants//consensus//pangolin/` + * `*.pangolin.csv`: Lineage analysis results from Pangolin.
-[Nextclade](https://github.com/nextstrain/nextclade) performs viral genome clade assignment, mutation calling and sequence quality checks for the consensus sequences generated in this pipeline. Similar to Pangolin, it has been used extensively during the COVID-19 pandemic. A [web application](https://clades.nextstrain.org/) also exists that allows users to upload genome sequences via a web browser. +Phylogenetic Assignment of Named Global Outbreak LINeages ([Pangolin](https://github.com/cov-lineages/pangolin)) has been used extensively during the COVID-19 pandemic in order to to assign lineages to SARS-CoV-2 genome sequenced samples. A [web application](https://pangolin.cog-uk.io/) also exists that allows users to upload genome sequences via a web browser to assign lineages to genome sequences of SARS-CoV-2, view descriptive characteristics of the assigned lineage(s), view the placement of the lineage in a phylogeny of global samples, and view the temporal and geographic distribution of the assigned lineage(s). -### ASCIIGenome +**NB:** The value of `` in the output directory name above is determined by the `--variant_caller` parameter (Default: 'ivar' for '--protocol amplicon' and 'bcftools' for '--protocol metagenomic'). + +### Nextclade
Output files -* `variants//asciigenome//` - * `*.pdf`: Individual variant screenshots with annotation tracks in PDF format. +* `variants//consensus//nextclade/` + * `*.csv`: Analysis results from Nextlade containing genome clade assignment, mutation calling and sequence quality checks. **NB:** The value of `` in the output directory name above is determined by the `--variant_caller` parameter (Default: 'ivar' for '--protocol amplicon' and 'bcftools' for '--protocol metagenomic').
-As described in the documentation, [ASCIIGenome](https://asciigenome.readthedocs.io/en/latest/) is a command-line genome browser that can be run from a terminal window and is solely based on ASCII characters. The closest program to ASCIIGenome is probably [samtools tview](http://www.htslib.org/doc/samtools-tview.html) but ASCIIGenome offers much more flexibility, similar to popular GUI viewers like the [IGV](https://software.broadinstitute.org/software/igv/) browser. We are using the batch processing mode of ASCIIGenome in this pipeline to generate individual screenshots for all of the variant sites reported for each sample in the VCF files. This is incredibly useful to be able to quickly QC the variants called by the pipeline without having to tediously load all of the relevant tracks into a conventional genome browser. Where possible, the BAM read alignments, VCF variant file, primer BED file and GFF annotation track will be represented in the screenshot for contextual purposes. The screenshot below shows a SNP called relative to the MN908947.3 SARS-CoV-2 reference genome that overlaps the ORF7a protein and the nCoV-2019_91_LEFT primer from the ARIC v3 protocol. - -

ASCIIGenome screenshot

+[Nextclade](https://github.com/nextstrain/nextclade) performs viral genome clade assignment, mutation calling and sequence quality checks for the consensus sequences generated in this pipeline. Similar to Pangolin, it has been used extensively during the COVID-19 pandemic. A [web application](https://clades.nextstrain.org/) also exists that allows users to upload genome sequences via a web browser. ## Illumina: De novo assembly From f38b236a8451260fbf92d6140c74215b09f448d8 Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Tue, 25 Jan 2022 18:22:25 +0000 Subject: [PATCH 19/21] Update output docs --- docs/output.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/output.md b/docs/output.md index 699e3c7b..afa08c3b 100644 --- a/docs/output.md +++ b/docs/output.md @@ -553,6 +553,8 @@ iVar outputs a tsv format, which is not compatible with downstream analysis such * `*.N_density.pdf`: Plot showing density of N bases within the consensus sequence. * `*.N_run.tsv`: File containing start positions and width of N bases in consensus sequence. +**NB:** The value of `` in the output directory name above is determined by the `--variant_caller` parameter (Default: 'ivar' for '--protocol amplicon' and 'bcftools' for '--protocol metagenomic'). + [BCFtools](http://samtools.github.io/bcftools/bcftools.html) can be used to call variants directly from BAM alignment files. The functionality to call variants with BCFTools in this pipeline was inspired by work carried out by [Conor Walker](https://github.com/conorwalker/covid19/blob/3cb26ec399417bedb7e60487415c78a405f517d6/scripts/call_variants.sh). From 2970912685d08f4accf08bdda9d615ddea635b9c Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Tue, 25 Jan 2022 18:28:11 +0000 Subject: [PATCH 20/21] Fix ECLint --- subworkflows/local/consensus_bcftools.nf | 2 +- subworkflows/local/consensus_ivar.nf | 4 ++-- subworkflows/local/consensus_qc.nf | 4 ++-- subworkflows/local/variants_ivar.nf | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/subworkflows/local/consensus_bcftools.nf b/subworkflows/local/consensus_bcftools.nf index 14cefb2e..7db20187 100644 --- a/subworkflows/local/consensus_bcftools.nf +++ b/subworkflows/local/consensus_bcftools.nf @@ -69,7 +69,7 @@ workflow CONSENSUS_BCFTOOLS { emit: consensus = BCFTOOLS_CONSENSUS.out.fasta // channel: [ val(meta), [ fasta ] ] - + quast_results = CONSENSUS_QC.out.quast_results // channel: [ val(meta), [ results ] ] quast_tsv = CONSENSUS_QC.out.quast_tsv // channel: [ val(meta), [ tsv ] ] diff --git a/subworkflows/local/consensus_ivar.nf b/subworkflows/local/consensus_ivar.nf index b3de4d94..15fc326e 100644 --- a/subworkflows/local/consensus_ivar.nf +++ b/subworkflows/local/consensus_ivar.nf @@ -40,7 +40,7 @@ workflow CONSENSUS_IVAR { emit: consensus = IVAR_CONSENSUS.out.fasta // channel: [ val(meta), [ fasta ] ] consensus_qual = IVAR_CONSENSUS.out.qual // channel: [ val(meta), [ qual.txt ] ] - + quast_results = CONSENSUS_QC.out.quast_results // channel: [ val(meta), [ results ] ] quast_tsv = CONSENSUS_QC.out.quast_tsv // channel: [ val(meta), [ tsv ] ] @@ -52,4 +52,4 @@ workflow CONSENSUS_IVAR { bases_pdf = CONSENSUS_QC.out.bases_pdf // channel: [ val(meta), [ pdf ] ] versions = ch_versions // channel: [ versions.yml ] -} \ No newline at end of file +} diff --git a/subworkflows/local/consensus_qc.nf b/subworkflows/local/consensus_qc.nf index d370f68b..8b24e60e 100644 --- a/subworkflows/local/consensus_qc.nf +++ b/subworkflows/local/consensus_qc.nf @@ -13,7 +13,7 @@ workflow CONSENSUS_QC { fasta // channel: /path/to/genome.fasta gff // channel: /path/to/genome.gff nextclade_db // channel: /path/to/nextclade_db/ - + main: ch_versions = Channel.empty() @@ -87,4 +87,4 @@ workflow CONSENSUS_QC { bases_pdf = ch_bases_pdf // channel: [ val(meta), [ pdf ] ] versions = ch_versions // channel: [ versions.yml ] -} \ No newline at end of file +} diff --git a/subworkflows/local/variants_ivar.nf b/subworkflows/local/variants_ivar.nf index 15a10e4e..c4bf2eeb 100644 --- a/subworkflows/local/variants_ivar.nf +++ b/subworkflows/local/variants_ivar.nf @@ -85,4 +85,4 @@ workflow VARIANTS_IVAR { asciigenome_pdf = VARIANTS_QC.out.asciigenome_pdf // channel: [ val(meta), [ pdf ] ] versions = ch_versions // channel: [ versions.yml ] -} \ No newline at end of file +} From 445b636692359e3065c34512b65f06b78e1089e7 Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Tue, 25 Jan 2022 21:12:12 +0000 Subject: [PATCH 21/21] Add Anthony as contributor --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 47b8e5d4..98e5dc50 100644 --- a/README.md +++ b/README.md @@ -167,6 +167,7 @@ Many thanks to others who have helped out and contributed along the way too, inc | [Aengus Stewart](https://github.com/stewarta) | [The Francis Crick Institute, UK](https://www.crick.ac.uk/) | | [Alexander Peltzer](https://github.com/apeltzer) | [Boehringer Ingelheim, Germany](https://www.boehringer-ingelheim.de/) | | [Alison Meynert](https://github.com/ameynert) | [University of Edinburgh, Scotland](https://www.ed.ac.uk/) | +| [Anthony Underwood](https://github.com/antunderwood) | [Centre for Genomic Pathogen Surveillance](https://www.pathogensurveillance.net) | | [Anton Korobeynikov](https://github.com/asl) | [Saint Petersburg State University, Russia](https://english.spbu.ru/) | | [Artem Babaian](https://github.com/ababaian) | [University of British Columbia, Canada](https://www.ubc.ca/) | | [Dmitry Meleshko](https://github.com/1dayac) | [Saint Petersburg State University, Russia](https://english.spbu.ru/) |