diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6b58b90f..249864d9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -53,6 +53,8 @@ jobs: strategy: matrix: parameters: + - "--consensus_caller ivar" + - "--variant_caller bcftools --consensus_caller ivar" - "--skip_fastp" - "--skip_variants" - "--skip_cutadapt" diff --git a/CHANGELOG.md b/CHANGELOG.md index f2c93952..d2e4791a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 * [[#232](https://github.com/nf-core/viralrecon/issues/232)] - Remove duplicate variants called by ARTIC ONT pipeline * [[#235](https://github.com/nf-core/viralrecon/issues/235)] - Nextclade version bump * [[#244](https://github.com/nf-core/viralrecon/issues/244)] - Fix BCFtools consensus generation and masking +* [[#245](https://github.com/nf-core/viralrecon/issues/245)] - Mpileup file as output +* [[#247](https://github.com/nf-core/viralrecon/issues/247)] - Add strand-bias filtering option and codon fix in consecutive positions in ivar tsv conversion to vcf ### Parameters @@ -23,6 +25,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 | | `--nextclade_dataset_name` | | | `--nextclade_dataset_reference` | | | `--nextclade_dataset_tag` | +| | `--skip_consensus_plots` | +| | `--consensus_caller` | +| `--callers` | `--variant_caller` | > **NB:** Parameter has been __updated__ if both old and new parameter information is present. > **NB:** Parameter has been __added__ if just the new parameter information is present. diff --git a/README.md b/README.md index c4fa3a7e..98e5dc50 100644 --- a/README.md +++ b/README.md @@ -41,13 +41,13 @@ The SRA download functionality has been removed from the pipeline (`>=2.1`) and 4. Duplicate read marking ([`picard`](https://broadinstitute.github.io/picard/); *optional*) 5. Alignment-level QC ([`picard`](https://broadinstitute.github.io/picard/), [`SAMtools`](https://sourceforge.net/projects/samtools/files/samtools/)) 6. Genome-wide and amplicon coverage QC plots ([`mosdepth`](https://github.com/brentp/mosdepth/)) - 7. Choice of multiple variant calling and consensus sequence generation routes ([`iVar variants and consensus`](https://github.com/andersen-lab/ivar); *default for amplicon data* *||* [`BCFTools`](http://samtools.github.io/bcftools/bcftools.html), [`BEDTools`](https://github.com/arq5x/bedtools2/); *default for metagenomics data*) + 7. Choice of multiple variant callers ([`iVar variants`](https://github.com/andersen-lab/ivar); *default for amplicon data* *||* [`BCFTools`](http://samtools.github.io/bcftools/bcftools.html); *default for metagenomics data*) * Variant annotation ([`SnpEff`](http://snpeff.sourceforge.net/SnpEff.html), [`SnpSift`](http://snpeff.sourceforge.net/SnpSift.html)) + * Individual variant screenshots with annotation tracks ([`ASCIIGenome`](https://asciigenome.readthedocs.io/en/latest/)) + 8. Choice of multiple consensus callers ([`BCFTools`](http://samtools.github.io/bcftools/bcftools.html), [`BEDTools`](https://github.com/arq5x/bedtools2/); *default for both amplicon and metagenomics data* *||* [`iVar consensus`](https://github.com/andersen-lab/ivar)) * Consensus assessment report ([`QUAST`](http://quast.sourceforge.net/quast)) * Lineage analysis ([`Pangolin`](https://github.com/cov-lineages/pangolin)) * Clade assignment, mutation calling and sequence quality checks ([`Nextclade`](https://github.com/nextstrain/nextclade)) - * Individual variant screenshots with annotation tracks ([`ASCIIGenome`](https://asciigenome.readthedocs.io/en/latest/)) - 8. Intersect variants across callers ([`BCFTools`](http://samtools.github.io/bcftools/bcftools.html)) 6. _De novo_ assembly 1. Primer trimming ([`Cutadapt`](https://cutadapt.readthedocs.io/en/stable/guide.html); *amplicon data only*) 2. Choice of multiple assembly tools ([`SPAdes`](http://cab.spbu.ru/software/spades/) *||* [`Unicycler`](https://github.com/rrwick/Unicycler) *||* [`minia`](https://github.com/GATB/minia)) @@ -167,6 +167,7 @@ Many thanks to others who have helped out and contributed along the way too, inc | [Aengus Stewart](https://github.com/stewarta) | [The Francis Crick Institute, UK](https://www.crick.ac.uk/) | | [Alexander Peltzer](https://github.com/apeltzer) | [Boehringer Ingelheim, Germany](https://www.boehringer-ingelheim.de/) | | [Alison Meynert](https://github.com/ameynert) | [University of Edinburgh, Scotland](https://www.ed.ac.uk/) | +| [Anthony Underwood](https://github.com/antunderwood) | [Centre for Genomic Pathogen Surveillance](https://www.pathogensurveillance.net) | | [Anton Korobeynikov](https://github.com/asl) | [Saint Petersburg State University, Russia](https://english.spbu.ru/) | | [Artem Babaian](https://github.com/ababaian) | [University of British Columbia, Canada](https://www.ubc.ca/) | | [Dmitry Meleshko](https://github.com/1dayac) | [Saint Petersburg State University, Russia](https://english.spbu.ru/) | diff --git a/conf/modules_illumina.config b/conf/modules_illumina.config index db244a7c..4fe305f7 100644 --- a/conf/modules_illumina.config +++ b/conf/modules_illumina.config @@ -10,15 +10,17 @@ ---------------------------------------------------------------------------------------- */ +def variant_caller = params.variant_caller +if (!variant_caller) { variant_caller = params.protocol == 'amplicon' ? 'ivar' : 'bcftools' } + def assemblers = params.assemblers ? params.assemblers.split(',').collect{ it.trim().toLowerCase() } : [] -def callers = params.callers ? params.callers.split(',').collect{ it.trim().toLowerCase() } : [] // // Pre-processing and general configuration options // process { - withName: 'NFCORE_VIRALRECON:ILLUMINA:PREPARE_GENOME:GUNZIP_.*' { + withName: '.*:.*:PREPARE_GENOME:GUNZIP_.*' { publishDir = [ path: { "${params.outdir}/genome" }, mode: 'copy', @@ -27,7 +29,7 @@ process { ] } - withName: 'NFCORE_VIRALRECON:ILLUMINA:PREPARE_GENOME:UNTAR_.*' { + withName: '.*:.*:PREPARE_GENOME:UNTAR_.*' { ext.args2 = '--no-same-owner' publishDir = [ path: { "${params.outdir}/genome" }, @@ -47,7 +49,7 @@ process { if (!params.skip_fastqc) { process { - withName: 'NFCORE_VIRALRECON:ILLUMINA:FASTQC_FASTP:FASTQC_RAW' { + withName: '.*:.*:FASTQC_FASTP:FASTQC_RAW' { ext.args = '--quiet' publishDir = [ path: { "${params.outdir}/fastqc/raw" }, @@ -92,7 +94,7 @@ if (!params.skip_fastp) { if (!params.skip_fastqc) { process { - withName: 'NFCORE_VIRALRECON:ILLUMINA:FASTQC_FASTP:FASTQC_TRIM' { + withName: '.*:.*:FASTQC_FASTP:FASTQC_TRIM' { ext.args = '--quiet' publishDir = [ path: { "${params.outdir}/fastqc/trim" }, @@ -160,7 +162,7 @@ if (!params.skip_variants) { ] } - withName: 'NFCORE_VIRALRECON:ILLUMINA:ALIGN_BOWTIE2:BAM_SORT_SAMTOOLS:SAMTOOLS_SORT' { + withName: '.*:.*:ALIGN_BOWTIE2:.*:SAMTOOLS_SORT' { ext.prefix = { "${meta.id}.sorted" } publishDir = [ path: { "${params.outdir}/variants/bowtie2" }, @@ -169,7 +171,7 @@ if (!params.skip_variants) { ] } - withName: 'NFCORE_VIRALRECON:ILLUMINA:ALIGN_BOWTIE2:BAM_SORT_SAMTOOLS:SAMTOOLS_INDEX' { + withName: '.*:.*:ALIGN_BOWTIE2:.*:SAMTOOLS_INDEX' { publishDir = [ path: { "${params.outdir}/variants/bowtie2" }, mode: 'copy', @@ -177,7 +179,7 @@ if (!params.skip_variants) { ] } - withName: 'NFCORE_VIRALRECON:ILLUMINA:ALIGN_BOWTIE2:BAM_SORT_SAMTOOLS:BAM_STATS_SAMTOOLS:.*' { + withName: '.*:.*:ALIGN_BOWTIE2:.*:BAM_STATS_SAMTOOLS:.*' { publishDir = [ path: { "${params.outdir}/variants/bowtie2/samtools_stats" }, mode: 'copy', @@ -193,32 +195,6 @@ if (!params.skip_variants) { } } - if (!params.skip_asciigenome) { - process { - withName: 'CUSTOM_GETCHROMSIZES' { - publishDir = [ - path: { "${params.outdir}/genome" }, - mode: 'copy', - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_reference - ] - } - } - } - - if (!params.skip_snpeff) { - process { - withName: 'SNPEFF_BUILD' { - publishDir = [ - path: { "${params.outdir}/genome" }, - mode: 'copy', - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_reference - ] - } - } - } - if (!params.skip_ivar_trim && params.protocol == 'amplicon') { process { withName: 'IVAR_TRIM' { @@ -235,7 +211,7 @@ if (!params.skip_variants) { ] } - withName: 'NFCORE_VIRALRECON:ILLUMINA:PRIMER_TRIM_IVAR:BAM_SORT_SAMTOOLS:SAMTOOLS_SORT' { + withName: '.*:.*:PRIMER_TRIM_IVAR:.*:SAMTOOLS_SORT' { ext.prefix = { "${meta.id}.ivar_trim.sorted" } publishDir = [ path: { "${params.outdir}/variants/bowtie2" }, @@ -245,7 +221,7 @@ if (!params.skip_variants) { ] } - withName: 'NFCORE_VIRALRECON:ILLUMINA:PRIMER_TRIM_IVAR:BAM_SORT_SAMTOOLS:SAMTOOLS_INDEX' { + withName: '.*:.*:PRIMER_TRIM_IVAR:.*:SAMTOOLS_INDEX' { publishDir = [ path: { "${params.outdir}/variants/bowtie2" }, mode: 'copy', @@ -254,7 +230,7 @@ if (!params.skip_variants) { ] } - withName: 'NFCORE_VIRALRECON:ILLUMINA:PRIMER_TRIM_IVAR:BAM_SORT_SAMTOOLS:BAM_STATS_SAMTOOLS:.*' { + withName: '.*:.*:PRIMER_TRIM_IVAR:.*:BAM_STATS_SAMTOOLS:.*' { publishDir = [ path: { "${params.outdir}/variants/bowtie2/samtools_stats" }, mode: 'copy', @@ -266,7 +242,7 @@ if (!params.skip_variants) { if (!params.skip_markduplicates) { process { - withName: '.*:MARK_DUPLICATES_PICARD:PICARD_MARKDUPLICATES' { + withName: 'PICARD_MARKDUPLICATES' { ext.args = [ 'ASSUME_SORTED=true VALIDATION_STRINGENCY=LENIENT TMP_DIR=tmp', params.filter_duplicates ? 'REMOVE_DUPLICATES=true' : '' @@ -378,7 +354,7 @@ if (!params.skip_variants) { } } - if ('ivar' in callers) { + if (variant_caller == 'ivar') { process { withName: 'IVAR_VARIANTS' { ext.args = '-t 0.25 -q 20 -m 10' @@ -389,26 +365,17 @@ if (!params.skip_variants) { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - if (params.protocol == 'amplicon') { - withName: 'IVAR_VARIANTS_TO_VCF' { - ext.args = '--ignore_strand_bias' - publishDir = [ - path: { "${params.outdir}/variants/ivar/log" }, - mode: 'copy', - pattern: '*.log' - ] - } - }else{ - withName: 'IVAR_VARIANTS_TO_VCF' { - publishDir = [ - path: { "${params.outdir}/variants/ivar/log" }, - mode: 'copy', - pattern: '*.log' - ] - } + + withName: 'IVAR_VARIANTS_TO_VCF' { + ext.args = params.protocol == 'amplicon' ? '--ignore_strand_bias' : '' + publishDir = [ + path: { "${params.outdir}/variants/ivar/log" }, + mode: 'copy', + pattern: '*.log' + ] } - withName: 'NFCORE_VIRALRECON:ILLUMINA:VARIANTS_IVAR:.*:TABIX_BGZIP' { + withName: '.*:.*:VARIANTS_IVAR:.*:TABIX_BGZIP' { publishDir = [ path: { "${params.outdir}/variants/ivar" }, mode: 'copy', @@ -416,7 +383,7 @@ if (!params.skip_variants) { ] } - withName: 'NFCORE_VIRALRECON:ILLUMINA:VARIANTS_IVAR:.*:.*:TABIX_TABIX' { + withName: '.*:.*:VARIANTS_IVAR:.*:.*:TABIX_TABIX' { ext.args = '-p vcf -f' publishDir = [ path: { "${params.outdir}/variants/ivar" }, @@ -425,275 +392,205 @@ if (!params.skip_variants) { ] } - withName: 'NFCORE_VIRALRECON:ILLUMINA:VARIANTS_IVAR:.*:.*:BCFTOOLS_STATS' { + withName: '.*:.*:VARIANTS_IVAR:.*:.*:BCFTOOLS_STATS' { publishDir = [ path: { "${params.outdir}/variants/ivar/bcftools_stats" }, mode: 'copy', saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } + } + } - withName: 'MULTIQC_TSV_IVAR_NEXTCLADE' { + if (variant_caller == 'bcftools') { + process { + withName: 'BCFTOOLS_MPILEUP' { + ext.args = '--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 20 --annotate FORMAT/AD,FORMAT/ADF,FORMAT/ADR,FORMAT/DP,FORMAT/SP,INFO/AD,INFO/ADF,INFO/ADR' + ext.args2 = '--ploidy 1 --keep-alts --keep-masked-ref --multiallelic-caller --variants-only' + ext.args3 = "--include 'INFO/DP>=10'" publishDir = [ - path: { "${params.outdir}/multiqc" }, - enabled: false + [ + path: { "${params.outdir}/variants/bcftools" }, + mode: 'copy', + pattern: '*.{gz,tbi}' + ], + [ + path: { "${params.outdir}/variants/bcftools" }, + mode: 'copy', + pattern: '*.mpileup', + enabled: params.save_mpileup + ], + [ + path: { "${params.outdir}/variants/bcftools/bcftools_stats" }, + mode: 'copy', + pattern: '*stats.txt' + ] ] } } + } - if (!params.skip_consensus) { - process { - withName: 'IVAR_CONSENSUS' { - ext.args = '-t 0.75 -q 20 -m 10 -n N' - ext.args2 = '--count-orphans --no-BAQ --max-depth 0 --min-BQ 0 -aa' - ext.prefix = { "${meta.id}.consensus" } - publishDir = [ - [ - path: { "${params.outdir}/variants/ivar/consensus" }, - mode: 'copy', - pattern: "*.{fa,txt}", - ], - [ - path: { "${params.outdir}/variants/ivar/consensus" }, - mode: 'copy', - pattern: "*.mpileup", - enabled: params.save_mpileup - ] - ] - } - - withName: 'NFCORE_VIRALRECON:ILLUMINA:VARIANTS_IVAR:PLOT_BASE_DENSITY' { - ext.prefix = { "${meta.id}.consensus" } - publishDir = [ - path: { "${params.outdir}/variants/ivar/consensus/base_qc" }, - mode: 'copy', - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } + if (!params.skip_asciigenome) { + process { + withName: 'CUSTOM_GETCHROMSIZES' { + publishDir = [ + path: { "${params.outdir}/genome" }, + mode: 'copy', + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.save_reference + ] } - if (!params.skip_pangolin) { - process { - withName: 'NFCORE_VIRALRECON:ILLUMINA:VARIANTS_IVAR:PANGOLIN' { - publishDir = [ - path: { "${params.outdir}/variants/ivar/pangolin" }, - mode: 'copy', - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - } + withName: 'ASCIIGENOME' { + publishDir = [ + path: { "${params.outdir}/variants/${variant_caller}/asciigenome/${meta.id}" }, + mode: 'copy', + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] } + } + } - if (!params.skip_nextclade) { - process { - withName: 'NFCORE_VIRALRECON:ILLUMINA:VARIANTS_IVAR:NEXTCLADE_RUN' { - publishDir = [ - path: { "${params.outdir}/variants/ivar/nextclade" }, - mode: 'copy', - pattern: "*.csv" - ] - } - } + if (!params.skip_snpeff) { + process { + withName: 'SNPEFF_BUILD' { + publishDir = [ + path: { "${params.outdir}/genome" }, + mode: 'copy', + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.save_reference + ] } - if (!params.skip_variants_quast) { - process { - withName: 'NFCORE_VIRALRECON:ILLUMINA:VARIANTS_IVAR:QUAST' { - publishDir = [ - path: { "${params.outdir}/variants/ivar" }, - mode: 'copy', - pattern: "quast" - ] - } - } + withName: 'SNPEFF_ANN' { + publishDir = [ + path: { "${params.outdir}/variants/${variant_caller}/snpeff" }, + mode: 'copy', + pattern: "*.{csv,txt,html}" + ] } - } - if (!params.skip_asciigenome) { - process { - withName: 'NFCORE_VIRALRECON:ILLUMINA:VARIANTS_IVAR:ASCIIGENOME' { - publishDir = [ - path: { "${params.outdir}/variants/ivar/asciigenome/${meta.id}" }, - mode: 'copy', - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } + withName: '.*:.*:.*:.*:SNPEFF_SNPSIFT:.*:TABIX_BGZIP' { + ext.prefix = { "${meta.id}.snpeff" } + publishDir = [ + path: { "${params.outdir}/variants/${variant_caller}/snpeff" }, + mode: 'copy', + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] } - } - - if (!params.skip_snpeff) { - process { - withName: 'NFCORE_VIRALRECON:ILLUMINA:VARIANTS_IVAR:SNPEFF_SNPSIFT:SNPEFF_ANN' { - publishDir = [ - path: { "${params.outdir}/variants/ivar/snpeff" }, - mode: 'copy', - pattern: "*.{csv,txt,html}" - ] - } - withName: 'NFCORE_VIRALRECON:ILLUMINA:VARIANTS_IVAR:SNPEFF_SNPSIFT:VCF_BGZIP_TABIX_STATS:TABIX_BGZIP' { - ext.prefix = { "${meta.id}.snpeff" } - publishDir = [ - path: { "${params.outdir}/variants/ivar/snpeff" }, - mode: 'copy', - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - - withName: 'NFCORE_VIRALRECON:ILLUMINA:VARIANTS_IVAR:SNPEFF_SNPSIFT:VCF_BGZIP_TABIX_STATS:VCF_TABIX_STATS:TABIX_TABIX' { - ext.args = '-p vcf -f' - publishDir = [ - path: { "${params.outdir}/variants/ivar/snpeff" }, - mode: 'copy', - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } + withName: '.*:.*:.*:.*:SNPEFF_SNPSIFT:.*:.*:TABIX_TABIX' { + ext.args = '-p vcf -f' + publishDir = [ + path: { "${params.outdir}/variants/${variant_caller}/snpeff" }, + mode: 'copy', + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } - withName: 'NFCORE_VIRALRECON:ILLUMINA:VARIANTS_IVAR:SNPEFF_SNPSIFT:VCF_BGZIP_TABIX_STATS:VCF_TABIX_STATS:BCFTOOLS_STATS' { - ext.prefix = { "${meta.id}.snpeff" } - publishDir = [ - path: { "${params.outdir}/variants/ivar/snpeff/bcftools_stats" }, - mode: 'copy', - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } + withName: '.*:.*:.*:.*:SNPEFF_SNPSIFT:.*:.*:BCFTOOLS_STATS' { + ext.prefix = { "${meta.id}.snpeff" } + publishDir = [ + path: { "${params.outdir}/variants/${variant_caller}/snpeff/bcftools_stats" }, + mode: 'copy', + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } - withName: 'NFCORE_VIRALRECON:ILLUMINA:VARIANTS_IVAR:SNPEFF_SNPSIFT:SNPSIFT_EXTRACTFIELDS' { - publishDir = [ - path: { "${params.outdir}/variants/ivar/snpeff" }, - mode: 'copy', - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } + withName: '.*:.*:.*:.*:SNPEFF_SNPSIFT:SNPSIFT_EXTRACTFIELDS' { + publishDir = [ + path: { "${params.outdir}/variants/${variant_caller}/snpeff" }, + mode: 'copy', + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] } } } - if ('bcftools' in callers) { + if (!params.skip_consensus && params.consensus_caller == 'ivar') { process { - withName: 'BCFTOOLS_MPILEUP' { - ext.args = '--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 20 --annotate FORMAT/AD,FORMAT/ADF,FORMAT/ADR,FORMAT/DP,FORMAT/SP,INFO/AD,INFO/ADF,INFO/ADR' - ext.args2 = '--ploidy 1 --keep-alts --keep-masked-ref --multiallelic-caller --variants-only' - ext.args3 = "--include 'INFO/DP>=10'" + withName: 'IVAR_CONSENSUS' { + ext.args = '-t 0.75 -q 20 -m 10 -n N' + ext.args2 = '--count-orphans --no-BAQ --max-depth 0 --min-BQ 0 -aa' + ext.prefix = { "${meta.id}.consensus" } publishDir = [ [ - path: { "${params.outdir}/variants/bcftools" }, + path: { "${params.outdir}/variants/${variant_caller}/consensus/ivar" }, mode: 'copy', - pattern: '*.{gz,tbi}' + pattern: "*.{fa,txt}", ], [ - path: { "${params.outdir}/variants/bcftools" }, + path: { "${params.outdir}/variants/${variant_caller}/consensus/ivar" }, mode: 'copy', - pattern: '*.mpileup', + pattern: "*.mpileup", enabled: params.save_mpileup - ], - [ - path: { "${params.outdir}/variants/bcftools/bcftools_stats" }, - mode: 'copy', - pattern: '*stats.txt' ] ] } - withName: 'MULTIQC_TSV_BCFTOOLS_NEXTCLADE' { + withName: 'MULTIQC_TSV_IVAR_NEXTCLADE' { publishDir = [ path: { "${params.outdir}/multiqc" }, enabled: false ] } } + } - if (!params.skip_consensus) { - process { - withName: 'MAKE_BED_MASK' { - ext.args = "-a --ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0" - ext.args2 = 10 - ext.prefix = { "${meta.id}.coverage.masked" } - publishDir = [ - path: { "${params.outdir}/variants/bcftools" }, - mode: 'copy', - pattern: "*.mpileup", - enabled: params.save_mpileup - ] - } - - withName: 'BEDTOOLS_MERGE' { - ext.prefix = { "${meta.id}.coverage.merged" } - publishDir = [ - path: { "${params.outdir}/variants/bcftools" }, - enabled: false - ] - } - - withName: 'BEDTOOLS_MASKFASTA' { - ext.prefix = { "${meta.id}.masked" } - publishDir = [ - path: { "${params.outdir}/variants/bcftools" }, - enabled: false - ] - } - - withName: 'BCFTOOLS_CONSENSUS' { - ext.prefix = { "${meta.id}.consensus" } - publishDir = [ - path: { "${params.outdir}/variants/bcftools/consensus" }, - mode: 'copy', - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } + if (!params.skip_consensus && params.consensus_caller == 'bcftools') { + process { + withName: 'MAKE_BED_MASK' { + ext.args = "-a --ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0" + ext.args2 = 10 + ext.prefix = { "${meta.id}.coverage.masked" } + publishDir = [ + path: { "${params.outdir}/variants/${variant_caller}/consensus/bcftools" }, + mode: 'copy', + pattern: "*.mpileup", + enabled: params.save_mpileup + ] + } - withName: 'NFCORE_VIRALRECON:ILLUMINA:VARIANTS_BCFTOOLS:.*:PLOT_BASE_DENSITY' { - ext.prefix = { "${meta.id}.consensus" } - publishDir = [ - path: { "${params.outdir}/variants/bcftools/consensus/base_qc" }, - mode: 'copy', - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } + withName: 'BEDTOOLS_MERGE' { + ext.prefix = { "${meta.id}.coverage.merged" } + publishDir = [ + path: { "${params.outdir}/variants/${variant_caller}/consensus/bcftools" }, + enabled: false + ] } - if (!params.skip_pangolin) { - process { - withName: 'NFCORE_VIRALRECON:ILLUMINA:VARIANTS_BCFTOOLS:PANGOLIN' { - publishDir = [ - path: { "${params.outdir}/variants/bcftools/pangolin" }, - mode: 'copy', - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - } + withName: 'BEDTOOLS_MASKFASTA' { + ext.prefix = { "${meta.id}.masked" } + publishDir = [ + path: { "${params.outdir}/variants/${variant_caller}/consensus/bcftools" }, + enabled: false + ] } - if (!params.skip_nextclade) { - process { - withName: 'NFCORE_VIRALRECON:ILLUMINA:VARIANTS_BCFTOOLS:NEXTCLADE_RUN' { - publishDir = [ - path: { "${params.outdir}/variants/bcftools/nextclade" }, - mode: 'copy', - pattern: "*.csv" - ] - } - } + withName: 'BCFTOOLS_CONSENSUS' { + ext.prefix = { "${meta.id}.consensus" } + publishDir = [ + path: { "${params.outdir}/variants/${variant_caller}/consensus/bcftools" }, + mode: 'copy', + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] } - if (!params.skip_variants_quast) { - process { - withName: 'NFCORE_VIRALRECON:ILLUMINA:VARIANTS_BCFTOOLS:QUAST' { - publishDir = [ - path: { "${params.outdir}/variants/bcftools" }, - mode: 'copy', - pattern: "quast" - ] - } - } + withName: 'MULTIQC_TSV_BCFTOOLS_NEXTCLADE' { + publishDir = [ + path: { "${params.outdir}/multiqc" }, + enabled: false + ] } } + } - if (!params.skip_asciigenome) { + if (!params.skip_consensus) { + if (!params.skip_pangolin) { process { - withName: 'NFCORE_VIRALRECON:ILLUMINA:VARIANTS_BCFTOOLS:ASCIIGENOME' { + withName: 'PANGOLIN' { publishDir = [ - path: { "${params.outdir}/variants/bcftools/asciigenome/${meta.id}" }, + path: { "${params.outdir}/variants/${variant_caller}/consensus/${params.consensus_caller}/pangolin" }, mode: 'copy', saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -701,46 +598,36 @@ if (!params.skip_variants) { } } - if (!params.skip_snpeff) { + if (!params.skip_nextclade) { process { - withName: 'NFCORE_VIRALRECON:ILLUMINA:VARIANTS_BCFTOOLS:SNPEFF_SNPSIFT:SNPEFF_ANN' { + withName: 'NEXTCLADE_RUN' { publishDir = [ - path: { "${params.outdir}/variants/bcftools/snpeff" }, + path: { "${params.outdir}/variants/${variant_caller}/consensus/${params.consensus_caller}/nextclade" }, mode: 'copy', - pattern: "*.{csv,txt,html}" - ] - } - - withName: 'NFCORE_VIRALRECON:ILLUMINA:VARIANTS_BCFTOOLS:SNPEFF_SNPSIFT:VCF_BGZIP_TABIX_STATS:TABIX_BGZIP' { - ext.prefix = { "${meta.id}.snpeff" } - publishDir = [ - path: { "${params.outdir}/variants/bcftools/snpeff" }, - mode: 'copy', - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - - withName: 'NFCORE_VIRALRECON:ILLUMINA:VARIANTS_BCFTOOLS:SNPEFF_SNPSIFT:VCF_BGZIP_TABIX_STATS:VCF_TABIX_STATS:TABIX_TABIX' { - ext.args = '-p vcf -f' - publishDir = [ - path: { "${params.outdir}/variants/bcftools/snpeff" }, - mode: 'copy', - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + pattern: "*.csv" ] } + } + } - withName: 'NFCORE_VIRALRECON:ILLUMINA:VARIANTS_BCFTOOLS:SNPEFF_SNPSIFT:VCF_BGZIP_TABIX_STATS:VCF_TABIX_STATS:BCFTOOLS_STATS' { - ext.prefix = { "${meta.id}.snpeff" } + if (!params.skip_variants_quast) { + process { + withName: '.*:.*:CONSENSUS_.*:.*:QUAST' { publishDir = [ - path: { "${params.outdir}/variants/bcftools/snpeff/bcftools_stats" }, + path: { "${params.outdir}/variants/${variant_caller}/consensus/${params.consensus_caller}" }, mode: 'copy', - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + pattern: "quast" ] } + } + } - withName: 'NFCORE_VIRALRECON:ILLUMINA:VARIANTS_BCFTOOLS:SNPEFF_SNPSIFT:SNPSIFT_EXTRACTFIELDS' { + if (!params.skip_consensus_plots) { + process { + withName: 'PLOT_BASE_DENSITY' { + ext.prefix = { "${meta.id}.consensus" } publishDir = [ - path: { "${params.outdir}/variants/bcftools/snpeff" }, + path: { "${params.outdir}/variants/${variant_caller}/consensus/${params.consensus_caller}/base_qc" }, mode: 'copy', saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -748,19 +635,6 @@ if (!params.skip_variants) { } } } - - if (callers.size() > 1) { - process { - withName: 'BCFTOOLS_ISEC' { - ext.args = '--nfiles +2 --output-type z' - publishDir = [ - path: { "${params.outdir}/variants/intersect" }, - mode: 'copy', - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - } - } } if (!params.skip_assembly) { @@ -803,7 +677,7 @@ if (!params.skip_assembly) { if (!params.skip_fastqc) { process { - withName: 'NFCORE_VIRALRECON:ILLUMINA:FASTQC' { + withName: '.*:.*:FASTQC' { ext.args = '--quiet' ext.prefix = { "${meta.id}.primer_trim" } publishDir = [ @@ -834,14 +708,14 @@ if (!params.skip_assembly) { ] } - withName: 'NFCORE_VIRALRECON:ILLUMINA:ASSEMBLY_SPADES:GUNZIP_SCAFFOLDS' { + withName: '.*:.*:ASSEMBLY_SPADES:GUNZIP_SCAFFOLDS' { publishDir = [ path: { "${params.outdir}/assembly/spades/${params.spades_mode}" }, enabled: false ] } - withName: 'NFCORE_VIRALRECON:ILLUMINA:ASSEMBLY_SPADES:GUNZIP_GFA' { + withName: '.*:.*:ASSEMBLY_SPADES:GUNZIP_GFA' { publishDir = [ path: { "${params.outdir}/assembly/spades/${params.spades_mode}" }, enabled: false @@ -851,7 +725,7 @@ if (!params.skip_assembly) { if (!params.skip_bandage) { process { - withName: 'NFCORE_VIRALRECON:ILLUMINA:ASSEMBLY_SPADES:BANDAGE_IMAGE' { + withName: '.*:.*:ASSEMBLY_SPADES:BANDAGE_IMAGE' { ext.args = '--height 1000' publishDir = [ path: { "${params.outdir}/assembly/spades/${params.spades_mode}/bandage" }, @@ -864,7 +738,7 @@ if (!params.skip_assembly) { if (!params.skip_blast) { process { - withName: 'NFCORE_VIRALRECON:ILLUMINA:ASSEMBLY_SPADES:.*:BLAST_BLASTN' { + withName: '.*:.*:ASSEMBLY_SPADES:.*:BLAST_BLASTN' { ext.args = "-outfmt '6 stitle std slen qlen qcovs'" publishDir = [ path: { "${params.outdir}/assembly/spades/${params.spades_mode}/blastn" }, @@ -873,7 +747,7 @@ if (!params.skip_assembly) { ] } - withName: 'NFCORE_VIRALRECON:ILLUMINA:ASSEMBLY_SPADES:.*:FILTER_BLASTN' { + withName: '.*:.*:ASSEMBLY_SPADES:.*:FILTER_BLASTN' { ext.prefix = { "${meta.id}.filter.blastn" } publishDir = [ path: { "${params.outdir}/assembly/spades/${params.spades_mode}/blastn" }, @@ -886,7 +760,7 @@ if (!params.skip_assembly) { if (!params.skip_assembly_quast) { process { - withName: 'NFCORE_VIRALRECON:ILLUMINA:ASSEMBLY_SPADES:.*:QUAST' { + withName: '.*:.*:ASSEMBLY_SPADES:.*:QUAST' { publishDir = [ path: { "${params.outdir}/assembly/spades/${params.spades_mode}" }, mode: 'copy', @@ -898,7 +772,7 @@ if (!params.skip_assembly) { if (!params.skip_abacas) { process { - withName: 'NFCORE_VIRALRECON:ILLUMINA:ASSEMBLY_SPADES:.*:ABACAS' { + withName: '.*:.*:ASSEMBLY_SPADES:.*:ABACAS' { ext.args = '-m -p nucmer' publishDir = [ path: { "${params.outdir}/assembly/spades/${params.spades_mode}/abacas" }, @@ -911,7 +785,7 @@ if (!params.skip_assembly) { if (!params.skip_plasmidid) { process { - withName: 'NFCORE_VIRALRECON:ILLUMINA:ASSEMBLY_SPADES:.*:PLASMIDID' { + withName: '.*:.*:ASSEMBLY_SPADES:.*:PLASMIDID' { ext.args = '--only-reconstruct -C 47 -S 47 -i 60 --no-trim -k 0.80' publishDir = [ path: { "${params.outdir}/assembly/spades/${params.spades_mode}/plasmidid" }, @@ -940,14 +814,14 @@ if (!params.skip_assembly) { ] } - withName: 'NFCORE_VIRALRECON:ILLUMINA:ASSEMBLY_UNICYCLER:GUNZIP_SCAFFOLDS' { + withName: '.*:.*:ASSEMBLY_UNICYCLER:GUNZIP_SCAFFOLDS' { publishDir = [ path: { "${params.outdir}/assembly/unicycler" }, enabled: false ] } - withName: 'NFCORE_VIRALRECON:ILLUMINA:ASSEMBLY_UNICYCLER:GUNZIP_GFA' { + withName: '.*:.*:ASSEMBLY_UNICYCLER:GUNZIP_GFA' { publishDir = [ path: { "${params.outdir}/assembly/unicycler" }, enabled: false @@ -957,7 +831,7 @@ if (!params.skip_assembly) { if (!params.skip_bandage) { process { - withName: 'NFCORE_VIRALRECON:ILLUMINA:ASSEMBLY_UNICYCLER:BANDAGE_IMAGE' { + withName: '.*:.*:ASSEMBLY_UNICYCLER:BANDAGE_IMAGE' { ext.args = '--height 1000' publishDir = [ path: { "${params.outdir}/assembly/unicycler/bandage" }, @@ -970,7 +844,7 @@ if (!params.skip_assembly) { if (!params.skip_blast) { process { - withName: 'NFCORE_VIRALRECON:ILLUMINA:ASSEMBLY_UNICYCLER:.*:BLAST_BLASTN' { + withName: '.*:.*:ASSEMBLY_UNICYCLER:.*:BLAST_BLASTN' { ext.args = "-outfmt '6 stitle std slen qlen qcovs'" publishDir = [ path: { "${params.outdir}/assembly/unicycler/blastn" }, @@ -979,7 +853,7 @@ if (!params.skip_assembly) { ] } - withName: 'NFCORE_VIRALRECON:ILLUMINA:ASSEMBLY_UNICYCLER:.*:FILTER_BLASTN' { + withName: '.*:.*:ASSEMBLY_UNICYCLER:.*:FILTER_BLASTN' { ext.prefix = { "${meta.id}.filter.blastn" } publishDir = [ path: { "${params.outdir}/assembly/unicycler/blastn" }, @@ -992,7 +866,7 @@ if (!params.skip_assembly) { if (!params.skip_assembly_quast) { process { - withName: 'NFCORE_VIRALRECON:ILLUMINA:ASSEMBLY_UNICYCLER:.*:QUAST' { + withName: '.*:.*:ASSEMBLY_UNICYCLER:.*:QUAST' { publishDir = [ path: { "${params.outdir}/assembly/unicycler" }, mode: 'copy', @@ -1004,7 +878,7 @@ if (!params.skip_assembly) { if (!params.skip_abacas) { process { - withName: 'NFCORE_VIRALRECON:ILLUMINA:ASSEMBLY_UNICYCLER:.*:ABACAS' { + withName: '.*:.*:ASSEMBLY_UNICYCLER:.*:ABACAS' { ext.args = '-m -p nucmer' publishDir = [ path: { "${params.outdir}/assembly/unicycler/abacas" }, @@ -1017,7 +891,7 @@ if (!params.skip_assembly) { if (!params.skip_plasmidid) { process { - withName: 'NFCORE_VIRALRECON:ILLUMINA:ASSEMBLY_UNICYCLER:.*:PLASMIDID' { + withName: '.*:.*:ASSEMBLY_UNICYCLER:.*:PLASMIDID' { ext.args = '--only-reconstruct -C 47 -S 47 -i 60 --no-trim -k 0.80' publishDir = [ path: { "${params.outdir}/assembly/unicycler/plasmidid" }, @@ -1043,7 +917,7 @@ if (!params.skip_assembly) { if (!params.skip_blast) { process { - withName: 'NFCORE_VIRALRECON:ILLUMINA:ASSEMBLY_MINIA:.*:BLAST_BLASTN' { + withName: '.*:.*:ASSEMBLY_MINIA:.*:BLAST_BLASTN' { ext.args = "-outfmt '6 stitle std slen qlen qcovs'" publishDir = [ path: { "${params.outdir}/assembly/minia/blastn" }, @@ -1052,7 +926,7 @@ if (!params.skip_assembly) { ] } - withName: 'NFCORE_VIRALRECON:ILLUMINA:ASSEMBLY_MINIA:.*:FILTER_BLASTN' { + withName: '.*:.*:ASSEMBLY_MINIA:.*:FILTER_BLASTN' { ext.prefix = { "${meta.id}.filter.blastn" } publishDir = [ path: { "${params.outdir}/assembly/minia/blastn" }, @@ -1065,7 +939,7 @@ if (!params.skip_assembly) { if (!params.skip_assembly_quast) { process { - withName: 'NFCORE_VIRALRECON:ILLUMINA:ASSEMBLY_MINIA:.*:QUAST' { + withName: '.*:.*:ASSEMBLY_MINIA:.*:QUAST' { publishDir = [ path: { "${params.outdir}/assembly/minia" }, mode: 'copy', @@ -1077,7 +951,7 @@ if (!params.skip_assembly) { if (!params.skip_abacas) { process { - withName: 'NFCORE_VIRALRECON:ILLUMINA:ASSEMBLY_MINIA:.*:ABACAS' { + withName: '.*:.*:ASSEMBLY_MINIA:.*:ABACAS' { ext.args = '-m -p nucmer' publishDir = [ path: { "${params.outdir}/assembly/minia/abacas" }, @@ -1090,7 +964,7 @@ if (!params.skip_assembly) { if (!params.skip_plasmidid) { process { - withName: 'NFCORE_VIRALRECON:ILLUMINA:ASSEMBLY_MINIA:.*:PLASMIDID' { + withName: '.*:.*:ASSEMBLY_MINIA:.*:PLASMIDID' { ext.args = '--only-reconstruct -C 47 -S 47 -i 60 --no-trim -k 0.80' publishDir = [ path: { "${params.outdir}/assembly/minia/plasmidid" }, diff --git a/conf/modules_nanopore.config b/conf/modules_nanopore.config index 2e31cb98..c35d0f62 100644 --- a/conf/modules_nanopore.config +++ b/conf/modules_nanopore.config @@ -61,7 +61,7 @@ process { ] } - withName: 'NFCORE_VIRALRECON:NANOPORE:TABIX_TABIX' { + withName: '.*:.*:TABIX_TABIX' { ext.args = '-p vcf -f' publishDir = [ path: { "${params.outdir}/${params.artic_minion_caller}" }, @@ -70,7 +70,7 @@ process { ] } - withName: 'NFCORE_VIRALRECON:NANOPORE:.*:SAMTOOLS_VIEW' { + withName: '.*:.*:.*:SAMTOOLS_VIEW' { ext.args = '-b -F 4' ext.prefix = { "${meta.id}.mapped.sorted" } publishDir = [ @@ -80,7 +80,7 @@ process { ] } - withName: 'NFCORE_VIRALRECON:NANOPORE:.*:SAMTOOLS_INDEX' { + withName: '.*:.*:.*:SAMTOOLS_INDEX' { ext.prefix = { "${meta.id}.mapped.sorted" } publishDir = [ path: { "${params.outdir}/${params.artic_minion_caller}" }, @@ -89,7 +89,7 @@ process { ] } - withName: 'NFCORE_VIRALRECON:NANOPORE:.*:BAM_STATS_SAMTOOLS:.*' { + withName: '.*:.*:.*:BAM_STATS_SAMTOOLS:.*' { ext.prefix = { "${meta.id}.mapped.sorted" } publishDir = [ path: { "${params.outdir}/${params.artic_minion_caller}/samtools_stats" }, @@ -98,7 +98,7 @@ process { ] } - withName: 'NFCORE_VIRALRECON:NANOPORE:BCFTOOLS_STATS' { + withName: '.*:.*:BCFTOOLS_STATS' { publishDir = [ path: { "${params.outdir}/${params.artic_minion_caller}/bcftools_stats" }, mode: 'copy', @@ -275,7 +275,7 @@ if (!params.skip_snpeff) { ] } - withName: 'NFCORE_VIRALRECON:NANOPORE:.*:.*:TABIX_BGZIP' { + withName: '.*:.*:.*:.*:TABIX_BGZIP' { ext.prefix = { "${meta.id}.snpeff" } publishDir = [ path: { "${params.outdir}/${params.artic_minion_caller}/snpeff" }, @@ -284,7 +284,7 @@ if (!params.skip_snpeff) { ] } - withName: 'NFCORE_VIRALRECON:NANOPORE:.*:.*:.*:TABIX_TABIX' { + withName: '.*:.*:.*:.*:.*:TABIX_TABIX' { ext.args = '-p vcf -f' publishDir = [ path: { "${params.outdir}/${params.artic_minion_caller}/snpeff" }, @@ -293,7 +293,7 @@ if (!params.skip_snpeff) { ] } - withName: 'NFCORE_VIRALRECON:NANOPORE:.*:.*:.*:BCFTOOLS_STATS' { + withName: '.*:.*:.*:.*:.*:BCFTOOLS_STATS' { ext.prefix = { "${meta.id}.snpeff" } publishDir = [ path: { "${params.outdir}/${params.artic_minion_caller}/snpeff/bcftools_stats" }, diff --git a/conf/test.config b/conf/test.config index 44eca7bb..c3109b6a 100644 --- a/conf/test.config +++ b/conf/test.config @@ -30,10 +30,10 @@ params { genome = 'MN908947.3' kraken2_db = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/genome/kraken2/kraken2_hs22.tar.gz' - // Other pipeline options - callers = 'ivar,bcftools' - assemblers = 'spades,unicycler,minia' - - // Skip this by default to bypass Github Actions disk quota errors - skip_plasmidid = true + // Variant calling options + variant_caller = 'ivar' + + // Assembly options + assemblers = 'spades,unicycler,minia' + skip_plasmidid = true // Skip this by default to bypass Github Actions disk quota errors } diff --git a/conf/test_full.config b/conf/test_full.config index 10041d28..e8a6d2cc 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -24,13 +24,16 @@ params { // Genome references genome = 'MN908947.3' - // Other pipeline options - callers = 'ivar,bcftools' - assemblers = 'spades,unicycler,minia' + // Variant calling options + variant_caller = 'ivar' + + // Assembly options + assemblers = 'spades,unicycler,minia' + skip_plasmidid = true // Skip this by default to bypass Github Actions disk quota errors } process { - withName:PLASMIDID { + withName: 'PLASMIDID' { errorStrategy = 'ignore' } } diff --git a/conf/test_full_sispa.config b/conf/test_full_sispa.config index 49b295fc..2e8b04d2 100644 --- a/conf/test_full_sispa.config +++ b/conf/test_full_sispa.config @@ -22,7 +22,15 @@ params { // Genome references genome = 'MN908947.3' - // Other pipeline options - callers = 'ivar,bcftools' + // Variant calling options + variant_caller = 'bcftools' + + // Assembly options assemblers = 'spades,unicycler,minia' } + +process { + withName: 'PLASMIDID' { + errorStrategy = 'ignore' + } +} \ No newline at end of file diff --git a/conf/test_sispa.config b/conf/test_sispa.config index 526bf56d..9aaff136 100644 --- a/conf/test_sispa.config +++ b/conf/test_sispa.config @@ -28,10 +28,10 @@ params { genome = 'MN908947.3' kraken2_db = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/genome/kraken2/kraken2_hs22.tar.gz' - // Other pipeline options - callers = 'ivar,bcftools' - assemblers = 'spades,unicycler,minia' - - // Skip this by default to bypass Github Actions disk quota errors - skip_plasmidid = true + // Variant calling options + variant_caller = 'bcftools' + + // Assembly options + assemblers = 'spades,unicycler,minia' + skip_plasmidid = true // Skip this by default to bypass Github Actions disk quota errors } diff --git a/docs/output.md b/docs/output.md index 4ae2e65f..afa08c3b 100644 --- a/docs/output.md +++ b/docs/output.md @@ -301,7 +301,6 @@ An example MultiQC report generated from a full-sized dataset can be viewed on t * [Pangolin](#pangolin) - Lineage analysis * [Nextclade](#nextclade) - Clade assignment, mutation calling and sequence quality checks * [ASCIIGenome](#asciigenome) - Individual variant screenshots with annotation tracks - * [BCFTools isec](#bcftools-isec) - Intersect variants across all callers * [De novo assembly](#illumina-de-novo-assembly) * [Cutadapt](#cutadapt) - Primer trimming for amplicon data * [SPAdes](#spades) *||* [Unicycler](#unicycler) *||* [minia](#minia) - Viral genome assembly @@ -513,19 +512,19 @@ Unless you are using [UMIs](https://emea.illumina.com/science/sequencing-method- * `*.tsv`: Original iVar variants in TSV format. * `*.vcf.gz`: iVar variants in VCF format. Converted using custom `ivar_variants_to_vcf.py` python script. * `*.vcf.gz.tbi`: iVar variants in VCF index file. -* `variants/ivar/consensus/` +* `variants/ivar/log/` + * `*.variant_counts.log`: Counts for type of variants called by iVar. +* `variants/ivar/bcftools_stats/` + * `*.bcftools_stats.txt`: Statistics and counts obtained from iVar variants VCF file. +* `variants//consensus/ivar/` * `*.consensus.fa`: Consensus Fasta file generated by iVar. * `*.consensus.qual.txt`: File with the average quality of each base in the consensus sequence. -* `variants/ivar/consensus/base_qc/` +* `variants//consensus/ivar/base_qc/` * `*.ACTG_density.pdf`: Plot showing density of ACGT bases within the consensus sequence. * `*.base_counts.pdf`: Plot showing frequency and percentages of all bases in consensus sequence. * `*.base_counts.tsv`: File containing frequency and percentages of all bases in consensus sequence. * `*.N_density.pdf`: Plot showing density of N bases within the consensus sequence. * `*.N_run.tsv`: File containing start positions and width of N bases in consensus sequence. -* `variants/ivar/log/` - * `*.variant_counts.log`: Counts for type of variants called by iVar. -* `variants/ivar/bcftools_stats/` - * `*.bcftools_stats.txt`: Statistics and counts obtained from iVar variants VCF file. @@ -543,16 +542,18 @@ iVar outputs a tsv format, which is not compatible with downstream analysis such * `variants/bcftools/` * `*.vcf.gz`: Variants VCF file. * `*.vcf.gz.tbi`: Variants VCF index file. -* `variants/bcftools/consensus/` +* `variants/bcftools/bcftools_stats/` + * `*.bcftools_stats.txt`: Statistics and counts obtained from VCF file. +* `variants//consensus/bcftools/` * `*.consensus.fa`: Consensus Fasta file generated by integrating the variants called by BCFTools into the reference genome. -* `variants/bcftools/consensus/base_qc/` +* `variants//consensus/bcftools/base_qc/` * `*.ACTG_density.pdf`: Plot showing density of ACGT bases within the consensus sequence. * `*.base_counts.pdf`: Plot showing frequency and percentages of all bases in consensus sequence. * `*.base_counts.tsv`: File containing frequency and percentages of all bases in consensus sequence. * `*.N_density.pdf`: Plot showing density of N bases within the consensus sequence. * `*.N_run.tsv`: File containing start positions and width of N bases in consensus sequence. -* `variants/bcftools/bcftools_stats/` - * `*.bcftools_stats.txt`: Statistics and counts obtained from VCF file. + +**NB:** The value of `` in the output directory name above is determined by the `--variant_caller` parameter (Default: 'ivar' for '--protocol amplicon' and 'bcftools' for '--protocol metagenomic'). @@ -579,7 +580,7 @@ iVar outputs a tsv format, which is not compatible with downstream analysis such * `variants//snpeff/bcftools_stats/` * `*.bcftools_stats.txt`: Statistics and counts obtained from VCF file. -**NB:** The value of `` in the output directory name above is determined by the `--callers` parameter (Default: 'ivar' for '--protocol amplicon' and 'bcftools' for '--protocol metagenomic'). +**NB:** The value of `` in the output directory name above is determined by the `--variant_caller` parameter (Default: 'ivar' for '--protocol amplicon' and 'bcftools' for '--protocol metagenomic'). @@ -589,80 +590,63 @@ iVar outputs a tsv format, which is not compatible with downstream analysis such ![MultiQC - SnpEff annotation counts](images/mqc_snpeff_plot.png) -### QUAST +### ASCIIGenome
Output files -* `variants//quast/` - * `report.html`: Results report in HTML format. Also available in various other file formats i.e. `report.pdf`, `report.tex`, `report.tsv` and `report.txt`. +* `variants//asciigenome//` + * `*.pdf`: Individual variant screenshots with annotation tracks in PDF format. -**NB:** The value of `` in the output directory name above is determined by the `--callers` parameter (Default: 'ivar' for '--protocol amplicon' and 'bcftools' for '--protocol metagenomic'). +**NB:** The value of `` in the output directory name above is determined by the `--variant_caller` parameter (Default: 'ivar' for '--protocol amplicon' and 'bcftools' for '--protocol metagenomic').
-[QUAST](http://bioinf.spbau.ru/quast) is used to generate a single report with which to evaluate the quality of the consensus sequence across all of the samples provided to the pipeline. The HTML results can be opened within any browser (we recommend using Google Chrome). Please see the [QUAST output docs](http://quast.sourceforge.net/docs/manual.html#sec3) for more detailed information regarding the output files. - -### Pangolin - -
-Output files - -* `variants//pangolin/` - * `*.pangolin.csv`: Lineage analysis results from Pangolin. - -**NB:** The value of `` in the output directory name above is determined by the `--callers` parameter (Default: 'ivar' for '--protocol amplicon' and 'bcftools' for '--protocol metagenomic'). - -
+As described in the documentation, [ASCIIGenome](https://asciigenome.readthedocs.io/en/latest/) is a command-line genome browser that can be run from a terminal window and is solely based on ASCII characters. The closest program to ASCIIGenome is probably [samtools tview](http://www.htslib.org/doc/samtools-tview.html) but ASCIIGenome offers much more flexibility, similar to popular GUI viewers like the [IGV](https://software.broadinstitute.org/software/igv/) browser. We are using the batch processing mode of ASCIIGenome in this pipeline to generate individual screenshots for all of the variant sites reported for each sample in the VCF files. This is incredibly useful to be able to quickly QC the variants called by the pipeline without having to tediously load all of the relevant tracks into a conventional genome browser. Where possible, the BAM read alignments, VCF variant file, primer BED file and GFF annotation track will be represented in the screenshot for contextual purposes. The screenshot below shows a SNP called relative to the MN908947.3 SARS-CoV-2 reference genome that overlaps the ORF7a protein and the nCoV-2019_91_LEFT primer from the ARIC v3 protocol. -Phylogenetic Assignment of Named Global Outbreak LINeages ([Pangolin](https://github.com/cov-lineages/pangolin)) has been used extensively during the COVID-19 pandemic in order to to assign lineages to SARS-CoV-2 genome sequenced samples. A [web application](https://pangolin.cog-uk.io/) also exists that allows users to upload genome sequences via a web browser to assign lineages to genome sequences of SARS-CoV-2, view descriptive characteristics of the assigned lineage(s), view the placement of the lineage in a phylogeny of global samples, and view the temporal and geographic distribution of the assigned lineage(s). +

ASCIIGenome screenshot

-### Nextclade +### QUAST
Output files -* `variants//nextclade/` - * `*.csv`: Analysis results from Nextlade containing genome clade assignment, mutation calling and sequence quality checks. +* `variants//consensus//quast/` + * `report.html`: Results report in HTML format. Also available in various other file formats i.e. `report.pdf`, `report.tex`, `report.tsv` and `report.txt`. -**NB:** The value of `` in the output directory name above is determined by the `--callers` parameter (Default: 'ivar' for '--protocol amplicon' and 'bcftools' for '--protocol metagenomic'). +**NB:** The value of `` in the output directory name above is determined by the `--variant_caller` parameter (Default: 'ivar' for '--protocol amplicon' and 'bcftools' for '--protocol metagenomic').
-[Nextclade](https://github.com/nextstrain/nextclade) performs viral genome clade assignment, mutation calling and sequence quality checks for the consensus sequences generated in this pipeline. Similar to Pangolin, it has been used extensively during the COVID-19 pandemic. A [web application](https://clades.nextstrain.org/) also exists that allows users to upload genome sequences via a web browser. +[QUAST](http://bioinf.spbau.ru/quast) is used to generate a single report with which to evaluate the quality of the consensus sequence across all of the samples provided to the pipeline. The HTML results can be opened within any browser (we recommend using Google Chrome). Please see the [QUAST output docs](http://quast.sourceforge.net/docs/manual.html#sec3) for more detailed information regarding the output files. -### ASCIIGenome +### Pangolin
Output files -* `variants//asciigenome//` - * `*.pdf`: Individual variant screenshots with annotation tracks in PDF format. - -**NB:** The value of `` in the output directory name above is determined by the `--callers` parameter (Default: 'ivar' for '--protocol amplicon' and 'bcftools' for '--protocol metagenomic'). +* `variants//consensus//pangolin/` + * `*.pangolin.csv`: Lineage analysis results from Pangolin.
-As described in the documentation, [ASCIIGenome](https://asciigenome.readthedocs.io/en/latest/) is a command-line genome browser that can be run from a terminal window and is solely based on ASCII characters. The closest program to ASCIIGenome is probably [samtools tview](http://www.htslib.org/doc/samtools-tview.html) but ASCIIGenome offers much more flexibility, similar to popular GUI viewers like the [IGV](https://software.broadinstitute.org/software/igv/) browser. We are using the batch processing mode of ASCIIGenome in this pipeline to generate individual screenshots for all of the variant sites reported for each sample in the VCF files. This is incredibly useful to be able to quickly QC the variants called by the pipeline without having to tediously load all of the relevant tracks into a conventional genome browser. Where possible, the BAM read alignments, VCF variant file, primer BED file and GFF annotation track will be represented in the screenshot for contextual purposes. The screenshot below shows a SNP called relative to the MN908947.3 SARS-CoV-2 reference genome that overlaps the ORF7a protein and the nCoV-2019_91_LEFT primer from the ARIC v3 protocol. +Phylogenetic Assignment of Named Global Outbreak LINeages ([Pangolin](https://github.com/cov-lineages/pangolin)) has been used extensively during the COVID-19 pandemic in order to to assign lineages to SARS-CoV-2 genome sequenced samples. A [web application](https://pangolin.cog-uk.io/) also exists that allows users to upload genome sequences via a web browser to assign lineages to genome sequences of SARS-CoV-2, view descriptive characteristics of the assigned lineage(s), view the placement of the lineage in a phylogeny of global samples, and view the temporal and geographic distribution of the assigned lineage(s). -

ASCIIGenome screenshot

+**NB:** The value of `` in the output directory name above is determined by the `--variant_caller` parameter (Default: 'ivar' for '--protocol amplicon' and 'bcftools' for '--protocol metagenomic'). -### BCFTools isec +### Nextclade
Output files -* `variants/intersect//` - * `*.vcf.gz`: VCF file containing variants common to both variant callers. There will be one file for each caller - see `README.txt` for details. - * `*.vcf.gz.tbi`: Index for VCF file. - * `README.txt`: File containing command used and file name mappings. - * `sites.txt`: List of variants common to both variant callers in textual format. The last column indicates presence (1) or absence (0) amongst the 2 different callers. +* `variants//consensus//nextclade/` + * `*.csv`: Analysis results from Nextlade containing genome clade assignment, mutation calling and sequence quality checks. -**NB:** This process will only be executed when both variant callers are specified to be run i.e. `--callers ivar,bcftools`. +**NB:** The value of `` in the output directory name above is determined by the `--variant_caller` parameter (Default: 'ivar' for '--protocol amplicon' and 'bcftools' for '--protocol metagenomic').
-[BCFTools isec](http://samtools.github.io/bcftools/bcftools.html#isec) can be used to intersect the variant calls generated by the 2 different callers used in the pipeline. This permits a quick assessment of how consistently a particular variant is being called using different algorithms and to prioritise the investigation of the variants. +[Nextclade](https://github.com/nextstrain/nextclade) performs viral genome clade assignment, mutation calling and sequence quality checks for the consensus sequences generated in this pipeline. Similar to Pangolin, it has been used extensively during the COVID-19 pandemic. A [web application](https://clades.nextstrain.org/) also exists that allows users to upload genome sequences via a web browser. ## Illumina: De novo assembly diff --git a/lib/WorkflowIllumina.groovy b/lib/WorkflowIllumina.groovy index f9531376..56b20612 100755 --- a/lib/WorkflowIllumina.groovy +++ b/lib/WorkflowIllumina.groovy @@ -31,10 +31,19 @@ class WorkflowIllumina { } // Variant calling parameter validation - def callers = params.callers ? params.callers.split(',').collect{ it.trim().toLowerCase() } : [] - if ((valid_params['callers'] + callers).unique().size() != valid_params['callers'].size()) { - log.error "Invalid option: ${params.callers}. Valid options for '--callers': ${valid_params['callers'].join(', ')}." - System.exit(1) + if (params.variant_caller) { + if (!valid_params['variant_callers'].contains(params.variant_caller)) { + log.error "Invalid option: ${params.variant_caller}. Valid options for '--variant_caller': ${valid_params['variant_callers'].join(', ')}." + System.exit(1) + } + } + + // Consensus calling parameter validation + if (params.consensus_caller) { + if (!valid_params['consensus_callers'].contains(params.consensus_caller)) { + log.error "Invalid option: ${params.consensus_caller}. Valid options for '--consensus_caller': ${valid_params['consensus_callers'].join(', ')}." + System.exit(1) + } } if (params.protocol == 'amplicon' && !params.skip_variants && !params.primer_bed) { diff --git a/modules/local/bcftools_isec.nf b/modules/local/bcftools_isec.nf deleted file mode 100644 index a9ce5ea8..00000000 --- a/modules/local/bcftools_isec.nf +++ /dev/null @@ -1,31 +0,0 @@ -process BCFTOOLS_ISEC { - tag "$meta.id" - label 'process_low' - - conda (params.enable_conda ? 'bioconda::bcftools=1.14' : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bcftools:1.14--h88f3f91_0' : - 'quay.io/biocontainers/bcftools:1.14--h88f3f91_0' }" - - input: - tuple val(meta), path('ivar/*'), path('ivar/*'), path('bcftools/*'), path('bcftools/*') - - output: - tuple val(meta), path("${prefix}"), emit: results - path "versions.yml" , emit: versions - - script: - def args = task.ext.args ?: '' - prefix = task.ext.prefix ?: "${meta.id}" - """ - bcftools isec \\ - $args \\ - -p $prefix \\ - */*.vcf.gz - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/nextflow.config b/nextflow.config index 1f0470a3..c28cc0a9 100644 --- a/nextflow.config +++ b/nextflow.config @@ -61,7 +61,8 @@ params { skip_cutadapt = false // Illumina variant calling options - callers = null + variant_caller = null + consensus_caller = 'bcftools' min_mapped_reads = 1000 ivar_trim_noprimer = false ivar_trim_offset = null @@ -71,6 +72,7 @@ params { skip_ivar_trim = false skip_markduplicates = true skip_picard_metrics = false + skip_consensus_plots = false skip_consensus = false skip_variants = false diff --git a/nextflow_schema.json b/nextflow_schema.json index 18214bdf..34154e20 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -383,10 +383,24 @@ "description": "Various options for the variant calling branch of the Illumina workflow.", "default": "", "properties": { - "callers": { + "variant_caller": { "type": "string", - "description": "Specify which variant calling algorithms you would like to use. Available options are 'ivar' (default for '--protocol amplicon') and 'bcftools' (default for '--protocol metagenomic').", - "fa_icon": "fas fa-phone-volume" + "fa_icon": "fas fa-phone-volume", + "description": "Specify which variant calling algorithm you would like to use. Available options are 'ivar' (default for '--protocol amplicon') and 'bcftools' (default for '--protocol metagenomic').", + "enum": [ + "ivar", + "bcftools" + ] + }, + "consensus_caller": { + "type": "string", + "default": "bcftools", + "fa_icon": "fas fa-phone-volume", + "description": "Specify which consensus calling algorithm you would like to use. Available options are 'bcftools' and 'ivar' (default: 'bcftools').", + "enum": [ + "ivar", + "bcftools" + ] }, "min_mapped_reads": { "type": "integer", @@ -441,6 +455,11 @@ "fa_icon": "fas fa-fast-forward", "description": "Skip SnpEff and SnpSift annotation of variants." }, + "skip_consensus_plots": { + "type": "boolean", + "fa_icon": "fas fa-fast-forward", + "description": "Skip creation of consensus base density plots." + }, "skip_consensus": { "type": "boolean", "fa_icon": "fas fa-fast-forward", diff --git a/subworkflows/local/consensus_bcftools.nf b/subworkflows/local/consensus_bcftools.nf new file mode 100644 index 00000000..7db20187 --- /dev/null +++ b/subworkflows/local/consensus_bcftools.nf @@ -0,0 +1,84 @@ +// +// Consensus calling with BCFTools and downstream processing QC +// + +include { BEDTOOLS_MERGE } from '../../modules/nf-core/modules/bedtools/merge/main' +include { BEDTOOLS_MASKFASTA } from '../../modules/nf-core/modules/bedtools/maskfasta/main' +include { BCFTOOLS_CONSENSUS } from '../../modules/nf-core/modules/bcftools/consensus/main' +include { MAKE_BED_MASK } from '../../modules/local/make_bed_mask' +include { CONSENSUS_QC } from './consensus_qc' + +workflow CONSENSUS_BCFTOOLS { + take: + bam // channel: [ val(meta), [ bam ] ] + vcf // channel: [ val(meta), [ vcf ] ] + tbi // channel: [ val(meta), [ tbi ] ] + fasta // channel: /path/to/genome.fasta + gff // channel: /path/to/genome.gff + nextclade_db // channel: /path/to/nextclade_db/ + + main: + + ch_versions = Channel.empty() + + // + // Create BED file with consensus regions to mask + // + MAKE_BED_MASK ( + bam.join(vcf, by: [0]), + fasta, + params.save_mpileup + ) + ch_versions = ch_versions.mix(MAKE_BED_MASK.out.versions.first()) + + // + // Merge intervals with BEDTools + // + BEDTOOLS_MERGE ( + MAKE_BED_MASK.out.bed + ) + ch_versions = ch_versions.mix(BEDTOOLS_MERGE.out.versions.first()) + + // + // Mask regions in consensus with BEDTools + // + BEDTOOLS_MASKFASTA ( + BEDTOOLS_MERGE.out.bed, + fasta + ) + ch_versions = ch_versions.mix(BEDTOOLS_MASKFASTA.out.versions.first()) + + // + // Call consensus sequence with BCFTools + // + BCFTOOLS_CONSENSUS ( + vcf.join(tbi, by: [0]).join(BEDTOOLS_MASKFASTA.out.fasta, by: [0]) + ) + ch_versions = ch_versions.mix(BCFTOOLS_CONSENSUS.out.versions.first()) + + // + // Consensus sequence QC + // + CONSENSUS_QC ( + BCFTOOLS_CONSENSUS.out.fasta, + fasta, + gff, + nextclade_db + ) + ch_versions = ch_versions.mix(CONSENSUS_QC.out.versions.first()) + + emit: + consensus = BCFTOOLS_CONSENSUS.out.fasta // channel: [ val(meta), [ fasta ] ] + + quast_results = CONSENSUS_QC.out.quast_results // channel: [ val(meta), [ results ] ] + quast_tsv = CONSENSUS_QC.out.quast_tsv // channel: [ val(meta), [ tsv ] ] + + pangolin_report = CONSENSUS_QC.out.pangolin_report // channel: [ val(meta), [ csv ] ] + + nextclade_report = CONSENSUS_QC.out.nextclade_report // channel: [ val(meta), [ csv ] ] + + bases_tsv = CONSENSUS_QC.out.bases_tsv // channel: [ val(meta), [ tsv ] ] + bases_pdf = CONSENSUS_QC.out.bases_pdf // channel: [ val(meta), [ pdf ] ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/consensus_ivar.nf b/subworkflows/local/consensus_ivar.nf new file mode 100644 index 00000000..15fc326e --- /dev/null +++ b/subworkflows/local/consensus_ivar.nf @@ -0,0 +1,55 @@ +// +// Consensus calling with iVar and downstream processing QC +// + +include { IVAR_CONSENSUS } from '../../modules/nf-core/modules/ivar/consensus/main' +include { CONSENSUS_QC } from './consensus_qc' + +workflow CONSENSUS_IVAR { + take: + bam // channel: [ val(meta), [ bam ] ] + fasta // channel: /path/to/genome.fasta + gff // channel: /path/to/genome.gff + nextclade_db // channel: /path/to/nextclade_db/ + + main: + + ch_versions = Channel.empty() + + // + // Call consensus sequence with iVar + // + IVAR_CONSENSUS ( + bam, + fasta, + params.save_mpileup + ) + ch_versions = ch_versions.mix(IVAR_CONSENSUS.out.versions.first()) + + // + // Consensus sequence QC + // + CONSENSUS_QC ( + IVAR_CONSENSUS.out.fasta, + fasta, + gff, + nextclade_db + ) + ch_versions = ch_versions.mix(CONSENSUS_QC.out.versions.first()) + + emit: + consensus = IVAR_CONSENSUS.out.fasta // channel: [ val(meta), [ fasta ] ] + consensus_qual = IVAR_CONSENSUS.out.qual // channel: [ val(meta), [ qual.txt ] ] + + quast_results = CONSENSUS_QC.out.quast_results // channel: [ val(meta), [ results ] ] + quast_tsv = CONSENSUS_QC.out.quast_tsv // channel: [ val(meta), [ tsv ] ] + + pangolin_report = CONSENSUS_QC.out.pangolin_report // channel: [ val(meta), [ csv ] ] + + nextclade_report = CONSENSUS_QC.out.nextclade_report // channel: [ val(meta), [ csv ] ] + + bases_tsv = CONSENSUS_QC.out.bases_tsv // channel: [ val(meta), [ tsv ] ] + bases_pdf = CONSENSUS_QC.out.bases_pdf // channel: [ val(meta), [ pdf ] ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/consensus_qc.nf b/subworkflows/local/consensus_qc.nf new file mode 100644 index 00000000..8b24e60e --- /dev/null +++ b/subworkflows/local/consensus_qc.nf @@ -0,0 +1,90 @@ +// +// Consensus calling QC +// + +include { QUAST } from '../../modules/nf-core/modules/quast/main' +include { PANGOLIN } from '../../modules/nf-core/modules/pangolin/main' +include { NEXTCLADE_RUN } from '../../modules/nf-core/modules/nextclade/run/main' +include { PLOT_BASE_DENSITY } from '../../modules/local/plot_base_density' + +workflow CONSENSUS_QC { + take: + consensus // channel: [ val(meta), [ consensus ] ] + fasta // channel: /path/to/genome.fasta + gff // channel: /path/to/genome.gff + nextclade_db // channel: /path/to/nextclade_db/ + + main: + + ch_versions = Channel.empty() + + // + // Consensus QC report across samples with QUAST + // + ch_quast_results = Channel.empty() + ch_quast_tsv = Channel.empty() + if (!params.skip_variants_quast) { + QUAST ( + consensus.collect{ it[1] }, + fasta, + gff, + true, + params.gff + ) + ch_quast_results = QUAST.out.results + ch_quast_tsv = QUAST.out.tsv + ch_versions = ch_versions.mix(QUAST.out.versions) + } + + // + // Lineage analysis with Pangolin + // + ch_pangolin_report = Channel.empty() + if (!params.skip_pangolin) { + PANGOLIN ( + consensus + ) + ch_pangolin_report = PANGOLIN.out.report + ch_versions = ch_versions.mix(PANGOLIN.out.versions.first()) + } + + // + // Lineage analysis with Nextclade + // + ch_nextclade_report = Channel.empty() + if (!params.skip_nextclade) { + NEXTCLADE_RUN ( + consensus, + nextclade_db + ) + ch_nextclade_report = NEXTCLADE_RUN.out.csv + ch_versions = ch_versions.mix(NEXTCLADE_RUN.out.versions.first()) + } + + // + // Plot consensus base density + // + ch_bases_tsv = Channel.empty() + ch_bases_pdf = Channel.empty() + if (!params.skip_consensus_plots) { + PLOT_BASE_DENSITY ( + consensus + ) + ch_bases_tsv = PLOT_BASE_DENSITY.out.tsv + ch_bases_pdf = PLOT_BASE_DENSITY.out.pdf + ch_versions = ch_versions.mix(PLOT_BASE_DENSITY.out.versions.first()) + } + + emit: + quast_results = ch_quast_results // channel: [ val(meta), [ results ] ] + quast_tsv = ch_quast_tsv // channel: [ val(meta), [ tsv ] ] + + pangolin_report = ch_pangolin_report // channel: [ val(meta), [ csv ] ] + + nextclade_report = ch_nextclade_report // channel: [ val(meta), [ csv ] ] + + bases_tsv = ch_bases_tsv // channel: [ val(meta), [ tsv ] ] + bases_pdf = ch_bases_pdf // channel: [ val(meta), [ pdf ] ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/make_consensus.nf b/subworkflows/local/make_consensus.nf deleted file mode 100644 index 95481bd1..00000000 --- a/subworkflows/local/make_consensus.nf +++ /dev/null @@ -1,54 +0,0 @@ -// -// Run various tools to generate a masked genome consensus sequence -// - -include { BEDTOOLS_MERGE } from '../../modules/nf-core/modules/bedtools/merge/main' -include { BEDTOOLS_MASKFASTA } from '../../modules/nf-core/modules/bedtools/maskfasta/main' -include { BCFTOOLS_CONSENSUS } from '../../modules/nf-core/modules/bcftools/consensus/main' -include { MAKE_BED_MASK } from '../../modules/local/make_bed_mask' -include { PLOT_BASE_DENSITY } from '../../modules/local/plot_base_density' - -workflow MAKE_CONSENSUS { - take: - bam_vcf // channel: [ val(meta), [ bam ], [ vcf ], [ tbi ] ] - fasta - - main: - - ch_versions = Channel.empty() - - MAKE_BED_MASK ( - bam_vcf.map { meta, bam, vcf, tbi -> [ meta, bam, vcf ] }, - fasta, - params.save_mpileup - ) - ch_versions = ch_versions.mix(MAKE_BED_MASK.out.versions.first()) - - BEDTOOLS_MERGE ( - MAKE_BED_MASK.out.bed - ) - ch_versions = ch_versions.mix(BEDTOOLS_MERGE.out.versions.first()) - - BEDTOOLS_MASKFASTA ( - BEDTOOLS_MERGE.out.bed, - fasta - ) - ch_versions = ch_versions.mix(BEDTOOLS_MASKFASTA.out.versions.first()) - - BCFTOOLS_CONSENSUS ( - bam_vcf.map { meta, bam, vcf, tbi -> [ meta, vcf, tbi ] }.join( BEDTOOLS_MASKFASTA.out.fasta, by: [0] ) - ) - ch_versions = ch_versions.mix(BCFTOOLS_CONSENSUS.out.versions.first()) - - PLOT_BASE_DENSITY ( - BCFTOOLS_CONSENSUS.out.fasta - ) - ch_versions = ch_versions.mix(PLOT_BASE_DENSITY.out.versions.first()) - - emit: - fasta = BCFTOOLS_CONSENSUS.out.fasta // channel: [ val(meta), [ fasta ] ] - tsv = PLOT_BASE_DENSITY.out.tsv // channel: [ val(meta), [ tsv ] ] - pdf = PLOT_BASE_DENSITY.out.pdf // channel: [ val(meta), [ pdf ] ] - - versions = ch_versions // channel: [ versions.yml ] -} diff --git a/subworkflows/local/variants_bcftools.nf b/subworkflows/local/variants_bcftools.nf index 501ecd53..100d25dc 100644 --- a/subworkflows/local/variants_bcftools.nf +++ b/subworkflows/local/variants_bcftools.nf @@ -1,15 +1,9 @@ // -// Variant calling and downstream processing for BCFTools +// Variant calling with BCFTools, downstream processing and QC // include { BCFTOOLS_MPILEUP } from '../../modules/nf-core/modules/bcftools/mpileup/main' -include { QUAST } from '../../modules/nf-core/modules/quast/main' -include { PANGOLIN } from '../../modules/nf-core/modules/pangolin/main' -include { NEXTCLADE_RUN } from '../../modules/nf-core/modules/nextclade/run/main' -include { ASCIIGENOME } from '../../modules/local/asciigenome' - -include { MAKE_CONSENSUS } from './make_consensus' -include { SNPEFF_SNPSIFT } from './snpeff_snpsift' +include { VARIANTS_QC } from './variants_qc' workflow VARIANTS_BCFTOOLS { take: @@ -18,7 +12,6 @@ workflow VARIANTS_BCFTOOLS { sizes // channel: /path/to/genome.sizes gff // channel: /path/to/genome.gff bed // channel: /path/to/primers.bed - nextclade_db // channel: /path/to/nextclade_db/ snpeff_db // channel: /path/to/snpeff_db/ snpeff_config // channel: /path/to/snpeff.config @@ -37,136 +30,35 @@ workflow VARIANTS_BCFTOOLS { ch_versions = ch_versions.mix(BCFTOOLS_MPILEUP.out.versions.first()) // - // Create genome consensus using variants in VCF, run QUAST and pangolin - // - ch_consensus = Channel.empty() - ch_bases_tsv = Channel.empty() - ch_bases_pdf = Channel.empty() - ch_quast_results = Channel.empty() - ch_quast_tsv = Channel.empty() - ch_pangolin_report = Channel.empty() - ch_nextclade_report = Channel.empty() - if (!params.skip_consensus) { - MAKE_CONSENSUS ( - bam.join(BCFTOOLS_MPILEUP.out.vcf, by: [0]).join(BCFTOOLS_MPILEUP.out.tbi, by: [0]), - fasta - ) - ch_consensus = MAKE_CONSENSUS.out.fasta - ch_bases_tsv = MAKE_CONSENSUS.out.tsv - ch_bases_pdf = MAKE_CONSENSUS.out.pdf - ch_versions = ch_versions.mix(MAKE_CONSENSUS.out.versions) - - if (!params.skip_variants_quast) { - QUAST ( - ch_consensus.collect{ it[1] }, - fasta, - gff, - true, - params.gff - ) - ch_quast_results = QUAST.out.results - ch_quast_tsv = QUAST.out.tsv - ch_versions = ch_versions.mix(QUAST.out.versions) - } - - if (!params.skip_pangolin) { - PANGOLIN ( - ch_consensus - ) - ch_pangolin_report = PANGOLIN.out.report - ch_versions = ch_versions.mix(PANGOLIN.out.versions.first()) - } - - if (!params.skip_nextclade) { - NEXTCLADE_RUN ( - ch_consensus, - nextclade_db - ) - ch_nextclade_report = NEXTCLADE_RUN.out.csv - ch_versions = ch_versions.mix(NEXTCLADE_RUN.out.versions.first()) - } - } - - // - // Annotate variants - // - ch_snpeff_vcf = Channel.empty() - ch_snpeff_tbi = Channel.empty() - ch_snpeff_stats = Channel.empty() - ch_snpeff_csv = Channel.empty() - ch_snpeff_txt = Channel.empty() - ch_snpeff_html = Channel.empty() - ch_snpsift_txt = Channel.empty() - if (params.gff && !params.skip_snpeff) { - SNPEFF_SNPSIFT ( - BCFTOOLS_MPILEUP.out.vcf, - snpeff_db, - snpeff_config, - fasta - ) - ch_snpeff_vcf = SNPEFF_SNPSIFT.out.vcf - ch_snpeff_tbi = SNPEFF_SNPSIFT.out.tbi - ch_snpeff_stats = SNPEFF_SNPSIFT.out.stats - ch_snpeff_csv = SNPEFF_SNPSIFT.out.csv - ch_snpeff_txt = SNPEFF_SNPSIFT.out.txt - ch_snpeff_html = SNPEFF_SNPSIFT.out.html - ch_snpsift_txt = SNPEFF_SNPSIFT.out.snpsift_txt - ch_versions = ch_versions.mix(SNPEFF_SNPSIFT.out.versions) - } - - // - // Variant screenshots with ASCIIGenome + // Run downstream tools for variants QC // - ch_asciigenome_pdf = Channel.empty() - if (!params.skip_asciigenome) { - bam - .join(BCFTOOLS_MPILEUP.out.vcf, by: [0]) - .join(BCFTOOLS_MPILEUP.out.stats, by: [0]) - .map { meta, bam, vcf, stats -> - if (WorkflowCommons.getNumVariantsFromBCFToolsStats(stats) > 0) { - return [ meta, bam, vcf ] - } - } - .set { ch_asciigenome } - - ASCIIGENOME ( - ch_asciigenome, - fasta, - sizes, - gff, - bed, - params.asciigenome_window_size, - params.asciigenome_read_depth - ) - ch_asciigenome_pdf = ASCIIGENOME.out.pdf - ch_versions = ch_versions.mix(ASCIIGENOME.out.versions.first()) - } + VARIANTS_QC ( + bam, + BCFTOOLS_MPILEUP.out.vcf, + BCFTOOLS_MPILEUP.out.stats, + fasta, + sizes, + gff, + bed, + snpeff_db, + snpeff_config + ) + ch_versions = ch_versions.mix(VARIANTS_QC.out.versions) emit: - vcf = BCFTOOLS_MPILEUP.out.vcf // channel: [ val(meta), [ vcf ] ] - tbi = BCFTOOLS_MPILEUP.out.tbi // channel: [ val(meta), [ tbi ] ] - stats = BCFTOOLS_MPILEUP.out.stats // channel: [ val(meta), [ txt ] ] - - consensus = ch_consensus // channel: [ val(meta), [ fasta ] ] - bases_tsv = ch_bases_tsv // channel: [ val(meta), [ tsv ] ] - bases_pdf = ch_bases_pdf // channel: [ val(meta), [ pdf ] ] - - quast_results = ch_quast_results // channel: [ val(meta), [ results ] ] - quast_tsv = ch_quast_tsv // channel: [ val(meta), [ tsv ] ] - - snpeff_vcf = ch_snpeff_vcf // channel: [ val(meta), [ vcf.gz ] ] - snpeff_tbi = ch_snpeff_tbi // channel: [ val(meta), [ tbi ] ] - snpeff_stats = ch_snpeff_stats // channel: [ val(meta), [ txt ] ] - snpeff_csv = ch_snpeff_csv // channel: [ val(meta), [ csv ] ] - snpeff_txt = ch_snpeff_txt // channel: [ val(meta), [ txt ] ] - snpeff_html = ch_snpeff_html // channel: [ val(meta), [ html ] ] - snpsift_txt = ch_snpsift_txt // channel: [ val(meta), [ txt ] ] - - pangolin_report = ch_pangolin_report // channel: [ val(meta), [ csv ] ] + vcf = BCFTOOLS_MPILEUP.out.vcf // channel: [ val(meta), [ vcf ] ] + tbi = BCFTOOLS_MPILEUP.out.tbi // channel: [ val(meta), [ tbi ] ] + stats = BCFTOOLS_MPILEUP.out.stats // channel: [ val(meta), [ txt ] ] - nextclade_report = ch_nextclade_report // channel: [ val(meta), [ csv ] ] + snpeff_vcf = VARIANTS_QC.out.snpeff_vcf // channel: [ val(meta), [ vcf.gz ] ] + snpeff_tbi = VARIANTS_QC.out.snpeff_tbi // channel: [ val(meta), [ tbi ] ] + snpeff_stats = VARIANTS_QC.out.snpeff_stats // channel: [ val(meta), [ txt ] ] + snpeff_csv = VARIANTS_QC.out.snpeff_csv // channel: [ val(meta), [ csv ] ] + snpeff_txt = VARIANTS_QC.out.snpeff_txt // channel: [ val(meta), [ txt ] ] + snpeff_html = VARIANTS_QC.out.snpeff_html // channel: [ val(meta), [ html ] ] + snpsift_txt = VARIANTS_QC.out.snpsift_txt // channel: [ val(meta), [ txt ] ] - asciigenome_pdf = ch_asciigenome_pdf // channel: [ val(meta), [ pdf ] ] + asciigenome_pdf = VARIANTS_QC.out.asciigenome_pdf // channel: [ val(meta), [ pdf ] ] - versions = ch_versions // channel: [ versions.yml ] + versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/variants_ivar.nf b/subworkflows/local/variants_ivar.nf index a21a2825..c4bf2eeb 100644 --- a/subworkflows/local/variants_ivar.nf +++ b/subworkflows/local/variants_ivar.nf @@ -1,18 +1,11 @@ // -// Variant calling and downstream processing for IVar +// Variant calling with IVar, downstream processing and QC // -include { IVAR_VARIANTS_TO_VCF } from '../../modules/local/ivar_variants_to_vcf' -include { PLOT_BASE_DENSITY } from '../../modules/local/plot_base_density' include { IVAR_VARIANTS } from '../../modules/nf-core/modules/ivar/variants/main' -include { IVAR_CONSENSUS } from '../../modules/nf-core/modules/ivar/consensus/main' -include { QUAST } from '../../modules/nf-core/modules/quast/main' -include { PANGOLIN } from '../../modules/nf-core/modules/pangolin/main' -include { NEXTCLADE_RUN } from '../../modules/nf-core/modules/nextclade/run/main' -include { ASCIIGENOME } from '../../modules/local/asciigenome' - +include { IVAR_VARIANTS_TO_VCF } from '../../modules/local/ivar_variants_to_vcf' include { VCF_BGZIP_TABIX_STATS } from '../nf-core/vcf_bgzip_tabix_stats' -include { SNPEFF_SNPSIFT } from './snpeff_snpsift' +include { VARIANTS_QC } from './variants_qc' workflow VARIANTS_IVAR { take: @@ -21,7 +14,6 @@ workflow VARIANTS_IVAR { sizes // channel: /path/to/genome.sizes gff // channel: /path/to/genome.gff bed // channel: /path/to/primers.bed - nextclade_db // channel: /path/to/nextclade_db/ snpeff_db // channel: /path/to/snpeff_db/ snpeff_config // channel: /path/to/snpeff.config ivar_multiqc_header // channel: /path/to/multiqc_header for ivar variants @@ -56,151 +48,41 @@ workflow VARIANTS_IVAR { ch_versions = ch_versions.mix(VCF_BGZIP_TABIX_STATS.out.versions) // - // Create genome consensus - // - ch_consensus = Channel.empty() - ch_consensus_qual = Channel.empty() - ch_bases_tsv = Channel.empty() - ch_bases_pdf = Channel.empty() - ch_quast_results = Channel.empty() - ch_quast_tsv = Channel.empty() - ch_pangolin_report = Channel.empty() - ch_nextclade_report = Channel.empty() - if (!params.skip_consensus) { - IVAR_CONSENSUS ( - bam, - fasta, - params.save_mpileup - ) - ch_consensus = IVAR_CONSENSUS.out.fasta - ch_consensus_qual = IVAR_CONSENSUS.out.qual - ch_versions = ch_versions.mix(IVAR_CONSENSUS.out.versions.first()) - - PLOT_BASE_DENSITY ( - ch_consensus - ) - ch_bases_tsv = PLOT_BASE_DENSITY.out.tsv - ch_bases_pdf = PLOT_BASE_DENSITY.out.pdf - ch_versions = ch_versions.mix(PLOT_BASE_DENSITY.out.versions.first()) - - if (!params.skip_variants_quast) { - QUAST ( - ch_consensus.collect{ it[1] }, - fasta, - gff, - true, - params.gff - ) - ch_quast_results = QUAST.out.results - ch_quast_tsv = QUAST.out.tsv - ch_versions = ch_versions.mix(QUAST.out.versions) - } - - if (!params.skip_pangolin) { - PANGOLIN ( - ch_consensus - ) - ch_pangolin_report = PANGOLIN.out.report - ch_versions = ch_versions.mix(PANGOLIN.out.versions.first()) - } - - if (!params.skip_nextclade) { - NEXTCLADE_RUN ( - ch_consensus, - nextclade_db - ) - ch_nextclade_report = NEXTCLADE_RUN.out.csv - ch_versions = ch_versions.mix(NEXTCLADE_RUN.out.versions.first()) - } - } - - // - // Annotate variants - // - ch_snpeff_vcf = Channel.empty() - ch_snpeff_tbi = Channel.empty() - ch_snpeff_stats = Channel.empty() - ch_snpeff_csv = Channel.empty() - ch_snpeff_txt = Channel.empty() - ch_snpeff_html = Channel.empty() - ch_snpsift_txt = Channel.empty() - if (params.gff && !params.skip_snpeff) { - SNPEFF_SNPSIFT ( - VCF_BGZIP_TABIX_STATS.out.vcf, - snpeff_db, - snpeff_config, - fasta - ) - ch_snpeff_vcf = SNPEFF_SNPSIFT.out.vcf - ch_snpeff_tbi = SNPEFF_SNPSIFT.out.tbi - ch_snpeff_stats = SNPEFF_SNPSIFT.out.stats - ch_snpeff_csv = SNPEFF_SNPSIFT.out.csv - ch_snpeff_txt = SNPEFF_SNPSIFT.out.txt - ch_snpeff_html = SNPEFF_SNPSIFT.out.html - ch_snpsift_txt = SNPEFF_SNPSIFT.out.snpsift_txt - ch_versions = ch_versions.mix(SNPEFF_SNPSIFT.out.versions) - } - - // - // Variant screenshots with ASCIIGenome + // Run downstream tools for variants QC // - ch_asciigenome_pdf = Channel.empty() - if (!params.skip_asciigenome) { - bam - .join(VCF_BGZIP_TABIX_STATS.out.vcf, by: [0]) - .join(VCF_BGZIP_TABIX_STATS.out.stats, by: [0]) - .map { meta, bam, vcf, stats -> - if (WorkflowCommons.getNumVariantsFromBCFToolsStats(stats) > 0) { - return [ meta, bam, vcf ] - } - } - .set { ch_asciigenome } - - ASCIIGENOME ( - ch_asciigenome, - fasta, - sizes, - gff, - bed, - params.asciigenome_window_size, - params.asciigenome_read_depth - ) - ch_asciigenome_pdf = ASCIIGENOME.out.pdf - ch_versions = ch_versions.mix(ASCIIGENOME.out.versions.first()) - } + VARIANTS_QC ( + bam, + VCF_BGZIP_TABIX_STATS.out.vcf, + VCF_BGZIP_TABIX_STATS.out.stats, + fasta, + sizes, + gff, + bed, + snpeff_db, + snpeff_config + ) + ch_versions = ch_versions.mix(VARIANTS_QC.out.versions) emit: - tsv = IVAR_VARIANTS.out.tsv // channel: [ val(meta), [ tsv ] ] - - vcf_orig = IVAR_VARIANTS_TO_VCF.out.vcf // channel: [ val(meta), [ vcf ] ] - log_out = IVAR_VARIANTS_TO_VCF.out.log // channel: [ val(meta), [ log ] ] - multiqc_tsv = IVAR_VARIANTS_TO_VCF.out.tsv // channel: [ val(meta), [ tsv ] ] - - vcf = VCF_BGZIP_TABIX_STATS.out.vcf // channel: [ val(meta), [ vcf ] ] - tbi = VCF_BGZIP_TABIX_STATS.out.tbi // channel: [ val(meta), [ tbi ] ] - stats = VCF_BGZIP_TABIX_STATS.out.stats // channel: [ val(meta), [ txt ] ] - - consensus = ch_consensus // channel: [ val(meta), [ fasta ] ] - consensus_qual = ch_consensus_qual // channel: [ val(meta), [ fasta ] ] - bases_tsv = ch_bases_tsv // channel: [ val(meta), [ tsv ] ] - bases_pdf = ch_bases_pdf // channel: [ val(meta), [ pdf ] ] - - quast_results = ch_quast_results // channel: [ val(meta), [ results ] ] - quast_tsv = ch_quast_tsv // channel: [ val(meta), [ tsv ] ] + tsv = IVAR_VARIANTS.out.tsv // channel: [ val(meta), [ tsv ] ] - snpeff_vcf = ch_snpeff_vcf // channel: [ val(meta), [ vcf.gz ] ] - snpeff_tbi = ch_snpeff_tbi // channel: [ val(meta), [ tbi ] ] - snpeff_stats = ch_snpeff_stats // channel: [ val(meta), [ txt ] ] - snpeff_csv = ch_snpeff_csv // channel: [ val(meta), [ csv ] ] - snpeff_txt = ch_snpeff_txt // channel: [ val(meta), [ txt ] ] - snpeff_html = ch_snpeff_html // channel: [ val(meta), [ html ] ] - snpsift_txt = ch_snpsift_txt // channel: [ val(meta), [ txt ] ] + vcf_orig = IVAR_VARIANTS_TO_VCF.out.vcf // channel: [ val(meta), [ vcf ] ] + log_out = IVAR_VARIANTS_TO_VCF.out.log // channel: [ val(meta), [ log ] ] + multiqc_tsv = IVAR_VARIANTS_TO_VCF.out.tsv // channel: [ val(meta), [ tsv ] ] - pangolin_report = ch_pangolin_report // channel: [ val(meta), [ csv ] ] + vcf = VCF_BGZIP_TABIX_STATS.out.vcf // channel: [ val(meta), [ vcf ] ] + tbi = VCF_BGZIP_TABIX_STATS.out.tbi // channel: [ val(meta), [ tbi ] ] + stats = VCF_BGZIP_TABIX_STATS.out.stats // channel: [ val(meta), [ txt ] ] - nextclade_report = ch_nextclade_report // channel: [ val(meta), [ csv ] ] + snpeff_vcf = VARIANTS_QC.out.snpeff_vcf // channel: [ val(meta), [ vcf.gz ] ] + snpeff_tbi = VARIANTS_QC.out.snpeff_tbi // channel: [ val(meta), [ tbi ] ] + snpeff_stats = VARIANTS_QC.out.snpeff_stats // channel: [ val(meta), [ txt ] ] + snpeff_csv = VARIANTS_QC.out.snpeff_csv // channel: [ val(meta), [ csv ] ] + snpeff_txt = VARIANTS_QC.out.snpeff_txt // channel: [ val(meta), [ txt ] ] + snpeff_html = VARIANTS_QC.out.snpeff_html // channel: [ val(meta), [ html ] ] + snpsift_txt = VARIANTS_QC.out.snpsift_txt // channel: [ val(meta), [ txt ] ] - asciigenome_pdf = ch_asciigenome_pdf // channel: [ val(meta), [ pdf ] ] + asciigenome_pdf = VARIANTS_QC.out.asciigenome_pdf // channel: [ val(meta), [ pdf ] ] - versions = ch_versions // channel: [ versions.yml ] + versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/variants_qc.nf b/subworkflows/local/variants_qc.nf new file mode 100644 index 00000000..cded165c --- /dev/null +++ b/subworkflows/local/variants_qc.nf @@ -0,0 +1,91 @@ +// +// Variant calling QC +// + +include { ASCIIGENOME } from '../../modules/local/asciigenome' +include { SNPEFF_SNPSIFT } from './snpeff_snpsift' + +workflow VARIANTS_QC { + take: + bam // channel: [ val(meta), [ bam ] ] + vcf // channel: [ val(meta), [ vcf ] ] + stats // channel: [ val(meta), [ bcftools_stats ] ] + fasta // channel: /path/to/genome.fasta + sizes // channel: /path/to/genome.sizes + gff // channel: /path/to/genome.gff + bed // channel: /path/to/primers.bed + snpeff_db // channel: /path/to/snpeff_db/ + snpeff_config // channel: /path/to/snpeff.config + + main: + + ch_versions = Channel.empty() + + // + // Annotate variants + // + ch_snpeff_vcf = Channel.empty() + ch_snpeff_tbi = Channel.empty() + ch_snpeff_stats = Channel.empty() + ch_snpeff_csv = Channel.empty() + ch_snpeff_txt = Channel.empty() + ch_snpeff_html = Channel.empty() + ch_snpsift_txt = Channel.empty() + if (params.gff && !params.skip_snpeff) { + SNPEFF_SNPSIFT ( + vcf, + snpeff_db, + snpeff_config, + fasta + ) + ch_snpeff_vcf = SNPEFF_SNPSIFT.out.vcf + ch_snpeff_tbi = SNPEFF_SNPSIFT.out.tbi + ch_snpeff_stats = SNPEFF_SNPSIFT.out.stats + ch_snpeff_csv = SNPEFF_SNPSIFT.out.csv + ch_snpeff_txt = SNPEFF_SNPSIFT.out.txt + ch_snpeff_html = SNPEFF_SNPSIFT.out.html + ch_snpsift_txt = SNPEFF_SNPSIFT.out.snpsift_txt + ch_versions = ch_versions.mix(SNPEFF_SNPSIFT.out.versions) + } + + // + // Variant screenshots with ASCIIGenome + // + ch_asciigenome_pdf = Channel.empty() + if (!params.skip_asciigenome) { + bam + .join(vcf, by: [0]) + .join(stats, by: [0]) + .map { meta, bam, vcf, stats -> + if (WorkflowCommons.getNumVariantsFromBCFToolsStats(stats) > 0) { + return [ meta, bam, vcf ] + } + } + .set { ch_asciigenome } + + ASCIIGENOME ( + ch_asciigenome, + fasta, + sizes, + gff, + bed, + params.asciigenome_window_size, + params.asciigenome_read_depth + ) + ch_asciigenome_pdf = ASCIIGENOME.out.pdf + ch_versions = ch_versions.mix(ASCIIGENOME.out.versions.first()) + } + + emit: + snpeff_vcf = ch_snpeff_vcf // channel: [ val(meta), [ vcf.gz ] ] + snpeff_tbi = ch_snpeff_tbi // channel: [ val(meta), [ tbi ] ] + snpeff_stats = ch_snpeff_stats // channel: [ val(meta), [ txt ] ] + snpeff_csv = ch_snpeff_csv // channel: [ val(meta), [ csv ] ] + snpeff_txt = ch_snpeff_txt // channel: [ val(meta), [ txt ] ] + snpeff_html = ch_snpeff_html // channel: [ val(meta), [ html ] ] + snpsift_txt = ch_snpsift_txt // channel: [ val(meta), [ txt ] ] + + asciigenome_pdf = ch_asciigenome_pdf // channel: [ val(meta), [ pdf ] ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/workflows/illumina.nf b/workflows/illumina.nf index c2807c3b..c9ec299e 100644 --- a/workflows/illumina.nf +++ b/workflows/illumina.nf @@ -5,10 +5,11 @@ */ def valid_params = [ - protocols : ['metagenomic', 'amplicon'], - callers : ['ivar', 'bcftools'], - assemblers : ['spades', 'unicycler', 'minia'], - spades_modes: ['rnaviral', 'corona', 'metaviral', 'meta', 'metaplasmid', 'plasmid', 'isolate', 'rna', 'bio'] + protocols : ['metagenomic', 'amplicon'], + variant_callers : ['ivar', 'bcftools'], + consensus_callers : ['ivar', 'bcftools'], + assemblers : ['spades', 'unicycler', 'minia'], + spades_modes : ['rnaviral', 'corona', 'metaviral', 'meta', 'metaplasmid', 'plasmid', 'isolate', 'rna', 'bio'] ] def summary_params = NfcoreSchema.paramsSummaryMap(workflow, params) @@ -28,8 +29,9 @@ if (params.input) { ch_input = file(params.input) } else { exit 1 if (params.spades_hmm) { ch_spades_hmm = file(params.spades_hmm) } else { ch_spades_hmm = [] } def assemblers = params.assemblers ? params.assemblers.split(',').collect{ it.trim().toLowerCase() } : [] -def callers = params.callers ? params.callers.split(',').collect{ it.trim().toLowerCase() } : [] -if (!callers) { callers = params.protocol == 'amplicon' ? ['ivar'] : ['bcftools'] } + +def variant_caller = params.variant_caller +if (!variant_caller) { variant_caller = params.protocol == 'amplicon' ? 'ivar' : 'bcftools' } /* ======================================================================================== @@ -53,9 +55,8 @@ ch_ivar_variants_header_mqc = file("$projectDir/assets/headers/ivar_variants_hea // // MODULE: Loaded from modules/local/ // -include { BCFTOOLS_ISEC } from '../modules/local/bcftools_isec' -include { CUTADAPT } from '../modules/local/cutadapt' -include { MULTIQC } from '../modules/local/multiqc_illumina' +include { CUTADAPT } from '../modules/local/cutadapt' +include { MULTIQC } from '../modules/local/multiqc_illumina' include { PLOT_MOSDEPTH_REGIONS as PLOT_MOSDEPTH_REGIONS_GENOME } from '../modules/local/plot_mosdepth_regions' include { PLOT_MOSDEPTH_REGIONS as PLOT_MOSDEPTH_REGIONS_AMPLICON } from '../modules/local/plot_mosdepth_regions' include { MULTIQC_TSV_FROM_LIST as MULTIQC_TSV_FAIL_READS } from '../modules/local/multiqc_tsv_from_list' @@ -70,6 +71,8 @@ include { INPUT_CHECK } from '../subworkflows/local/input_check' include { PREPARE_GENOME } from '../subworkflows/local/prepare_genome_illumina' include { VARIANTS_IVAR } from '../subworkflows/local/variants_ivar' include { VARIANTS_BCFTOOLS } from '../subworkflows/local/variants_bcftools' +include { CONSENSUS_IVAR } from '../subworkflows/local/consensus_ivar' +include { CONSENSUS_BCFTOOLS } from '../subworkflows/local/consensus_bcftools' include { ASSEMBLY_SPADES } from '../subworkflows/local/assembly_spades' include { ASSEMBLY_UNICYCLER } from '../subworkflows/local/assembly_unicycler' include { ASSEMBLY_MINIA } from '../subworkflows/local/assembly_minia' @@ -351,7 +354,6 @@ workflow ILLUMINA { ch_mosdepth_multiqc = Channel.empty() ch_amplicon_heatmap_multiqc = Channel.empty() if (!params.skip_variants && !params.skip_mosdepth) { - MOSDEPTH_GENOME ( ch_bam.join(ch_bai, by: [0]), [], @@ -384,35 +386,69 @@ workflow ILLUMINA { // // SUBWORKFLOW: Call variants with IVar // - ch_ivar_vcf = Channel.empty() - ch_ivar_tbi = Channel.empty() - ch_ivar_counts_multiqc = Channel.empty() - ch_ivar_stats_multiqc = Channel.empty() - ch_ivar_snpeff_multiqc = Channel.empty() - ch_ivar_quast_multiqc = Channel.empty() - ch_ivar_pangolin_multiqc = Channel.empty() - ch_ivar_nextclade_multiqc = Channel.empty() - if (!params.skip_variants && 'ivar' in callers) { + ch_vcf = Channel.empty() + ch_tbi = Channel.empty() + ch_ivar_counts_multiqc = Channel.empty() + ch_ivar_stats_multiqc = Channel.empty() + ch_ivar_snpeff_multiqc = Channel.empty() + if (!params.skip_variants && variant_caller == 'ivar') { VARIANTS_IVAR ( ch_bam, PREPARE_GENOME.out.fasta, PREPARE_GENOME.out.chrom_sizes, PREPARE_GENOME.out.gff, (params.protocol == 'amplicon' && params.primer_bed) ? PREPARE_GENOME.out.primer_bed : [], - PREPARE_GENOME.out.nextclade_db, PREPARE_GENOME.out.snpeff_db, PREPARE_GENOME.out.snpeff_config, ch_ivar_variants_header_mqc ) - ch_ivar_vcf = VARIANTS_IVAR.out.vcf - ch_ivar_tbi = VARIANTS_IVAR.out.tbi - ch_ivar_counts_multiqc = VARIANTS_IVAR.out.multiqc_tsv - ch_ivar_stats_multiqc = VARIANTS_IVAR.out.stats - ch_ivar_snpeff_multiqc = VARIANTS_IVAR.out.snpeff_csv - ch_ivar_quast_multiqc = VARIANTS_IVAR.out.quast_tsv - ch_ivar_pangolin_multiqc = VARIANTS_IVAR.out.pangolin_report - ch_ivar_nextclade_report = VARIANTS_IVAR.out.nextclade_report - ch_versions = ch_versions.mix(VARIANTS_IVAR.out.versions) + ch_vcf = VARIANTS_IVAR.out.vcf + ch_tbi = VARIANTS_IVAR.out.tbi + ch_ivar_counts_multiqc = VARIANTS_IVAR.out.multiqc_tsv + ch_ivar_stats_multiqc = VARIANTS_IVAR.out.stats + ch_ivar_snpeff_multiqc = VARIANTS_IVAR.out.snpeff_csv + ch_versions = ch_versions.mix(VARIANTS_IVAR.out.versions) + } + + // + // SUBWORKFLOW: Call variants with BCFTools + // + ch_bcftools_stats_multiqc = Channel.empty() + ch_bcftools_snpeff_multiqc = Channel.empty() + if (!params.skip_variants && variant_caller == 'bcftools') { + VARIANTS_BCFTOOLS ( + ch_bam, + PREPARE_GENOME.out.fasta, + PREPARE_GENOME.out.chrom_sizes, + PREPARE_GENOME.out.gff, + (params.protocol == 'amplicon' && params.primer_bed) ? PREPARE_GENOME.out.primer_bed : [], + PREPARE_GENOME.out.snpeff_db, + PREPARE_GENOME.out.snpeff_config + ) + ch_vcf = VARIANTS_BCFTOOLS.out.vcf + ch_tbi = VARIANTS_BCFTOOLS.out.tbi + ch_bcftools_stats_multiqc = VARIANTS_BCFTOOLS.out.stats + ch_bcftools_snpeff_multiqc = VARIANTS_BCFTOOLS.out.snpeff_csv + ch_versions = ch_versions.mix(VARIANTS_BCFTOOLS.out.versions) + } + + // + // SUBWORKFLOW: Call consensus with iVar and downstream QC + // + ch_ivar_quast_multiqc = Channel.empty() + ch_ivar_pangolin_multiqc = Channel.empty() + ch_ivar_nextclade_multiqc = Channel.empty() + if (!params.skip_consensus && params.consensus_caller == 'ivar') { + CONSENSUS_IVAR ( + ch_bam, + PREPARE_GENOME.out.fasta, + PREPARE_GENOME.out.gff, + PREPARE_GENOME.out.nextclade_db + ) + ch_ivar_quast_multiqc = CONSENSUS_IVAR.out.quast_tsv + ch_ivar_pangolin_multiqc = CONSENSUS_IVAR.out.pangolin_report + ch_ivar_nextclade_report = CONSENSUS_IVAR.out.nextclade_report + ch_versions = ch_versions.mix(CONSENSUS_IVAR.out.versions) // // MODULE: Get Nextclade clade information for MultiQC report @@ -433,34 +469,24 @@ workflow ILLUMINA { } // - // SUBWORKFLOW: Call variants with BCFTools + // SUBWORKFLOW: Call consensus with BCFTools // - ch_bcftools_vcf = Channel.empty() - ch_bcftools_tbi = Channel.empty() - ch_bcftools_stats_multiqc = Channel.empty() - ch_bcftools_snpeff_multiqc = Channel.empty() ch_bcftools_quast_multiqc = Channel.empty() ch_bcftools_pangolin_multiqc = Channel.empty() ch_bcftools_nextclade_multiqc = Channel.empty() - if (!params.skip_variants && 'bcftools' in callers) { - VARIANTS_BCFTOOLS ( + if (!params.skip_consensus && params.consensus_caller == 'bcftools' && variant_caller) { + CONSENSUS_BCFTOOLS ( ch_bam, + ch_vcf, + ch_tbi, PREPARE_GENOME.out.fasta, - PREPARE_GENOME.out.chrom_sizes, PREPARE_GENOME.out.gff, - (params.protocol == 'amplicon' && params.primer_bed) ? PREPARE_GENOME.out.primer_bed : [], - PREPARE_GENOME.out.nextclade_db, - PREPARE_GENOME.out.snpeff_db, - PREPARE_GENOME.out.snpeff_config + PREPARE_GENOME.out.nextclade_db ) - ch_bcftools_vcf = VARIANTS_BCFTOOLS.out.vcf - ch_bcftools_tbi = VARIANTS_BCFTOOLS.out.tbi - ch_bcftools_stats_multiqc = VARIANTS_BCFTOOLS.out.stats - ch_bcftools_snpeff_multiqc = VARIANTS_BCFTOOLS.out.snpeff_csv - ch_bcftools_quast_multiqc = VARIANTS_BCFTOOLS.out.quast_tsv - ch_bcftools_pangolin_multiqc = VARIANTS_BCFTOOLS.out.pangolin_report - ch_bcftools_nextclade_report = VARIANTS_BCFTOOLS.out.nextclade_report - ch_versions = ch_versions.mix(VARIANTS_BCFTOOLS.out.versions) + ch_bcftools_quast_multiqc = CONSENSUS_BCFTOOLS.out.quast_tsv + ch_bcftools_pangolin_multiqc = CONSENSUS_BCFTOOLS.out.pangolin_report + ch_bcftools_nextclade_report = CONSENSUS_BCFTOOLS.out.nextclade_report + ch_versions = ch_versions.mix(CONSENSUS_BCFTOOLS.out.versions) // // MODULE: Get Nextclade clade information for MultiQC report @@ -480,19 +506,6 @@ workflow ILLUMINA { .set { ch_bcftools_nextclade_multiqc } } - // - // MODULE: Intersect variants across callers - // - if (!params.skip_variants && callers.size() > 1) { - BCFTOOLS_ISEC ( - ch_ivar_vcf - .join(ch_ivar_tbi, by: [0]) - .join(ch_bcftools_vcf, by: [0]) - .join(ch_bcftools_tbi, by: [0]) - ) - ch_versions = ch_versions.mix(BCFTOOLS_ISEC.out.versions) - } - // // MODULE: Primer trimming with Cutadapt //