Merge branch 'dev' of github.com:nf-core/raredisease into dev

nf-core · Sep 25, 2024 · 39e473d · 39e473d
2 parents d509436 + df4db70
commit 39e473d
Show file tree

Hide file tree

Showing 14 changed files with 155 additions and 76 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -3,6 +3,22 @@
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## 2.3.0dev - Getafix [xxxx-xx-xx]
+
+### `Added`
+
+- A new analysis option `mito` to call and annotate only mitochondrial variants [#608](https://github.com/nf-core/raredisease/pull/608)
+
+### `Changed`
+
+- Report only variants above 5% heteroplasmy in the clinical vcf file for mitochondria [#616](https://github.com/nf-core/raredisease/pull/616)
+
+### `Fixed`
+
+### Parameters
+
+### Tool updates
+
 ## 2.2.0 - Dogmatix [2024-09-13]
 
 ### `Added`

diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml
@@ -3,9 +3,9 @@ custom_logo_url: https://github.com/nf-core/raredisease/
 custom_logo_title: "nf-core/raredisease"
 
 report_comment: >
-  This report has been generated by the <a href="https://github.com/nf-core/raredisease/releases/tag/2.2.0" target="_blank">nf-core/raredisease</a>
+  This report has been generated by the <a href="https://github.com/nf-core/raredisease/tree/dev" target="_blank">nf-core/raredisease</a>
   analysis pipeline. For information about how to interpret these results, please see the
-  <a href="https://nf-co.re/raredisease/2.2.0/docs/output" target="_blank">documentation</a>.
+  <a href="https://nf-co.re/raredisease/dev/docs/output" target="_blank">documentation</a>.
 report_section_order:
   "nf-core-raredisease-methods-description":
     order: -1000

diff --git a/conf/modules/generate_clinical_set.config b/conf/modules/generate_clinical_set.config
@@ -50,10 +50,16 @@ process {
 process {
     withName: '.*:GENERATE_CLINICAL_SET_MT:ENSEMBLVEP_FILTERVEP' {
         ext.when   = !params.skip_vep_filter
-        ext.prefix = { "${meta.id}_mt_${meta.set}" }
+        ext.prefix = { "${meta.id}_mt_filtervep_${meta.set}" }
         ext.args   = { "--filter \"HGNC_ID in ${feature_file}\"" }
     }
 
+    withName: '.*:GENERATE_CLINICAL_SET_MT:BCFTOOLS_FILTER' {
+        ext.when   = !params.skip_vep_filter
+        ext.prefix = { "${meta.id}_mt_${meta.set}" }
+        ext.args   = { "-Oz -i 'AF>0.05'" }
+    }
+
     withName: '.*:GENERATE_CLINICAL_SET_MT:TABIX_BGZIP' {
         ext.when   = !params.skip_vep_filter
         ext.prefix = { "${meta.id}_mt_${meta.set}" }

diff --git a/conf/modules/prepare_references.config b/conf/modules/prepare_references.config
@@ -42,16 +42,16 @@ process {
         ext.when = {!params.bwa && (params.aligner == "sentieon" || params.mt_aligner == "sentieon")}
     }
 
-    withName: '.*PREPARE_REFERENCES:BWAMEM2_INDEX_MT_SHIFT' {
-        ext.when = { (params.analysis_type.equals("wgs") || params.run_mt_for_wes) && params.mt_aligner == "bwamem2"}
+    withName: '.*PREPARE_REFERENCES:BWAMEM2_INDEX_MT.*' {
+        ext.when = { (params.analysis_type.matches("wgs|mito") || params.run_mt_for_wes) && params.mt_aligner == "bwamem2"}
     }
 
-    withName: '.*PREPARE_REFERENCES:SENTIEON_BWAINDEX_MT_SHIFT' {
-        ext.when = { (params.analysis_type.equals("wgs") || params.run_mt_for_wes) && params.mt_aligner == "sentieon"}
+    withName: '.*PREPARE_REFERENCES:SENTIEON_BWAINDEX_MT.*' {
+        ext.when = { (params.analysis_type.matches("wgs|mito") || params.run_mt_for_wes) && params.mt_aligner == "sentieon"}
     }
 
-    withName: '.*PREPARE_REFERENCES:BWA_INDEX_MT_SHIFT' {
-        ext.when = { (params.analysis_type.equals("wgs") || params.run_mt_for_wes) && params.mt_aligner == "bwa"}
+    withName: '.*PREPARE_REFERENCES:BWA_INDEX_MT.*' {
+        ext.when = { (params.analysis_type.matches("wgs|mito") || params.run_mt_for_wes) && params.mt_aligner == "bwa"}
     }
 
     withName: '.*PREPARE_REFERENCES:SAMTOOLS_FAIDX_GENOME' {
@@ -67,8 +67,8 @@ process {
         ext.when = {!params.mt_fasta}
     }
 
-    withName: '.*PREPARE_REFERENCES:SAMTOOLS_FAIDX_MT_SHIFT' {
-        ext.when = { (params.analysis_type.equals("wgs") || params.run_mt_for_wes) }
+    withName: '.*PREPARE_REFERENCES:SAMTOOLS_FAIDX_MT' {
+        ext.when = { (params.analysis_type.matches("wgs|mito") || params.run_mt_for_wes) }
     }
 
     withName: '.*PREPARE_REFERENCES:GATK_SD' {
@@ -79,8 +79,8 @@ process {
         ext.args = { "--interval-file-name ${meta.id}_mt" }
     }
 
-    withName: '.*PREPARE_REFERENCES:GATK_SD_MT_SHIFT' {
-        ext.when = { (params.analysis_type.equals("wgs") || params.run_mt_for_wes)}
+    withName: '.*PREPARE_REFERENCES:GATK_SD_MT' {
+        ext.when = { (params.analysis_type.matches("wgs|mito") || params.run_mt_for_wes)}
     }
 
     withName: '.*PREPARE_REFERENCES:TABIX_DBSNP' {

diff --git a/docs/output.md b/docs/output.md
@@ -316,7 +316,7 @@ The pipeline performs variant calling using [Sentieon DNAscope](https://support.
 <summary>Output files</summary>
 
 - `call_sv/genome`
-  - `<case_id>_sv_merge.vcf.gz`: file containing the merged variant calls.
+  - `<case_id>_sv_merge.vcf.gz`: file containing the merged variant calls. As of version 2.3.0, this file also contains mitochondrial structural variants.
   - `<case_id>_sv_merge.vcf.gz.tbi`: index of the file containing the merged variant calls.
 
 </details>
@@ -529,7 +529,7 @@ We recommend using vcfanno to annotate SNVs with precomputed CADD scores (files
 <summary>Output files</summary>
 
 - `rank_and_filter/`
-  - `<case_id>_mt_ranked_clinical.vcf.gz`: file containing clinically relevant mitochondrial SNVs.
+  - `<case_id>_mt_ranked_clinical.vcf.gz`: file containing clinically relevant mitochondrial SNVs, and only contains variants less than 5%VAF by default.
   - `<case_id>_mt_ranked_clinical.vcf.gz.tbi`: index of the file containing clinically relevant mitochondrial SNVs.
   - `<case_id>_mt_ranked_research.vcf.gz`: file containing mitochondrial SNV annotations with their rank scores.
   - `<case_id>_mt_ranked_research.vcf.gz.tbi`: index of the file containing mitochondrial SNV annotations with their rank scores.

diff --git a/docs/usage.md b/docs/usage.md
@@ -205,7 +205,7 @@ The mandatory and optional parameters for each category are tabulated below.
 
 <sup>1</sup>Default variant caller is DeepVariant, but you have the option to use Sentieon as well.<br />
 <sup>2</sup>These parameters are only used by Sentieon.<br />
-<sup>3</sup>Default is WGS, but you have the option to choose WES as well.<br />
+<sup>3</sup>Default is `WGS`, but you have the option to choose `WES` and `mito` as well.<br />
 <sup>4</sup>This parameter is only used by Deepvariant.<br />
 
 ##### 5. Variant calling - Structural variants

diff --git a/nextflow.config b/nextflow.config
@@ -307,7 +307,7 @@ manifest {
     description     = """call and score variants from WGS/WES of rare disease patients"""
     mainScript      = 'main.nf'
     nextflowVersion = '!>=23.04.0'
-    version         = '2.2.0'
+    version         = '2.3.0dev'
     doi             = ''
 }
 

diff --git a/nextflow_schema.json b/nextflow_schema.json
@@ -485,9 +485,9 @@
                 "analysis_type": {
                     "type": "string",
                     "default": "wgs",
-                    "description": "Specifies which analysis type for the pipeline- either 'wgs' or 'wes'. This changes resources consumed and tools used.",
+                    "description": "Specifies which analysis type for the pipeline- either 'wgs', 'wes' or 'mito'. This changes resources consumed and tools used.",
                     "fa_icon": "fas fa-align-center",
-                    "enum": ["wgs", "wes"]
+                    "enum": ["wgs", "wes", "mito"]
                 },
                 "bwa_as_fallback": {
                     "type": "boolean",

diff --git a/subworkflows/local/align.nf b/subworkflows/local/align.nf
@@ -19,11 +19,16 @@ workflow ALIGN {
         ch_genome_bwamem2index   // channel: [mandatory] [ val(meta), path(index) ]
         ch_genome_bwamemeindex   // channel: [mandatory] [ val(meta), path(index) ]
         ch_genome_dictionary     // channel: [mandatory] [ val(meta), path(dict) ]
+        ch_mt_bwaindex           // channel: [mandatory] [ val(meta), path(index) ]
+        ch_mt_bwamem2index       // channel: [mandatory] [ val(meta), path(index) ]
+        ch_mt_dictionary         // channel: [mandatory] [ val(meta), path(dict) ]
+        ch_mt_fai                // channel: [mandatory] [ val(meta), path(fai) ]
+        ch_mt_fasta              // channel: [mandatory] [ val(meta), path(fasta) ]
         ch_mtshift_bwaindex      // channel: [mandatory] [ val(meta), path(index) ]
         ch_mtshift_bwamem2index  // channel: [mandatory] [ val(meta), path(index) ]
-        ch_mtshift_fasta         // channel: [mandatory] [ val(meta), path(fasta) ]
         ch_mtshift_dictionary    // channel: [mandatory] [ val(meta), path(dict) ]
         ch_mtshift_fai           // channel: [mandatory] [ val(meta), path(fai) ]
+        ch_mtshift_fasta         // channel: [mandatory] [ val(meta), path(fasta) ]
         val_mbuffer_mem          // integer: [mandatory] memory in megabytes
         val_platform             // string:  [mandatory] illumina or a different technology
         val_sort_threads         // integer: [mandatory] number of sorting threads
@@ -83,7 +88,7 @@ workflow ALIGN {
 
         // PREPARING READS FOR MT ALIGNMENT
 
-        if (params.analysis_type.equals("wgs") || params.run_mt_for_wes) {
+        if (params.analysis_type.matches("wgs|mito") || params.run_mt_for_wes) {
             CONVERT_MT_BAM_TO_FASTQ (
                 ch_genome_bam_bai,
                 ch_genome_fasta,
@@ -94,11 +99,11 @@ workflow ALIGN {
             ALIGN_MT (
                 CONVERT_MT_BAM_TO_FASTQ.out.fastq,
                 CONVERT_MT_BAM_TO_FASTQ.out.bam,
-                ch_genome_bwaindex,
-                ch_genome_bwamem2index,
-                ch_genome_fasta,
-                ch_genome_dictionary,
-                ch_genome_fai
+                ch_mt_bwaindex,
+                ch_mt_bwamem2index,
+                ch_mt_fasta,
+                ch_mt_dictionary,
+                ch_mt_fai
             )
 
             ALIGN_MT_SHIFT (

diff --git a/subworkflows/local/call_snv.nf b/subworkflows/local/call_snv.nf
@@ -19,9 +19,12 @@ workflow CALL_SNV {
         ch_genome_fai         // channel: [mandatory] [ val(meta), path(fai) ]
         ch_genome_dictionary  // channel: [mandatory] [ val(meta), path(dict) ]
         ch_mt_intervals       // channel: [optional] [ path(interval_list) ]
-        ch_mtshift_fasta      // channel: [optional] [ val(meta), path(fasta) ]
-        ch_mtshift_fai        // channel: [optional] [ val(meta), path(fai) ]
+        ch_mt_dictionary      // channel: [optional] [ val(meta), path(dict) ]
+        ch_mt_fai             // channel: [optional] [ val(meta), path(fai) ]
+        ch_mt_fasta           // channel: [optional] [ val(meta), path(fasta) ]
         ch_mtshift_dictionary // channel: [optional] [ val(meta), path(dict) ]
+        ch_mtshift_fai        // channel: [optional] [ val(meta), path(fai) ]
+        ch_mtshift_fasta      // channel: [optional] [ val(meta), path(fasta) ]
         ch_mtshift_intervals  // channel: [optional] [ path(interval_list) ]
         ch_mtshift_backchain  // channel: [mandatory] [ val(meta), path(back_chain) ]
         ch_dbsnp              // channel: [optional] [ val(meta), path(vcf) ]
@@ -46,7 +49,7 @@ workflow CALL_SNV {
         ch_sentieon_gvcf = Channel.empty()
         ch_sentieon_gtbi = Channel.empty()
 
-        if (params.variant_caller.equals("deepvariant")) {
+        if (params.variant_caller.equals("deepvariant") && !params.analysis_type.equals("mito")) {
             CALL_SNV_DEEPVARIANT (      // triggered only when params.variant_caller is set as deepvariant
                 ch_genome_bam_bai,
                 ch_genome_fasta,
@@ -97,12 +100,12 @@ workflow CALL_SNV {
         ch_genome_tabix     = GATK4_SELECTVARIANTS.out.tbi
         ch_genome_vcf_tabix = ch_genome_vcf.join(ch_genome_tabix, failOnMismatch:true, failOnDuplicate:true)
 
-        if (params.analysis_type.equals("wgs") || params.run_mt_for_wes) {
+        if (params.analysis_type.matches("wgs|mito") || params.run_mt_for_wes) {
             CALL_SNV_MT(
                 ch_mt_bam_bai,
-                ch_genome_fasta,
-                ch_genome_fai,
-                ch_genome_dictionary,
+                ch_mt_fasta,
+                ch_mt_fai,
+                ch_mt_dictionary,
                 ch_mt_intervals
             )
 
@@ -117,9 +120,9 @@ workflow CALL_SNV {
             POSTPROCESS_MT_CALLS(
                 CALL_SNV_MT.out.vcf,
                 CALL_SNV_MT_SHIFT.out.vcf,
-                ch_genome_fasta,
-                ch_genome_dictionary,
-                ch_genome_fai,
+                ch_mt_fasta,
+                ch_mt_dictionary,
+                ch_mt_fai,
                 ch_mtshift_backchain,
                 ch_case_info,
                 ch_foundin_header,

diff --git a/subworkflows/local/call_structural_variants.nf b/subworkflows/local/call_structural_variants.nf
@@ -32,11 +32,16 @@ workflow CALL_STRUCTURAL_VARIANTS {
 
     main:
         ch_versions = Channel.empty()
+        ch_merged_svs = Channel.empty()
+        ch_merged_tbi = Channel.empty()
 
-        CALL_SV_MANTA (ch_genome_bam, ch_genome_bai, ch_genome_fasta, ch_genome_fai, ch_case_info, ch_target_bed)
-            .diploid_sv_vcf
-            .collect{it[1]}
-            .set{ manta_vcf }
+        if (!params.analysis_type.equals("mito")) {
+            CALL_SV_MANTA (ch_genome_bam, ch_genome_bai, ch_genome_fasta, ch_genome_fai, ch_case_info, ch_target_bed)
+                .diploid_sv_vcf
+                .collect{it[1]}
+                .set{ manta_vcf }
+            ch_versions = ch_versions.mix(CALL_SV_MANTA.out.versions)
+        }
 
         if (params.analysis_type.equals("wgs")) {
             CALL_SV_TIDDIT (ch_genome_bam_bai, ch_genome_fasta, ch_bwa_index, ch_case_info)
@@ -61,7 +66,7 @@ workflow CALL_STRUCTURAL_VARIANTS {
             ch_versions = ch_versions.mix(CALL_SV_GERMLINECNVCALLER.out.versions)
         }
 
-        if (params.analysis_type.equals("wgs") || params.run_mt_for_wes) {
+        if (params.analysis_type.matches("wgs|mito") || params.run_mt_for_wes) {
             CALL_SV_MT (ch_mt_bam_bai, ch_genome_fasta)
             ch_versions = ch_versions.mix(CALL_SV_MT.out.versions)
         }
@@ -74,39 +79,41 @@ workflow CALL_STRUCTURAL_VARIANTS {
                     .combine(cnvnator_vcf)
                     .toList()
                     .set { vcf_list }
-            } else {
+            } else if (!params.analysis_type.equals("mito")) {
                 manta_vcf
                     .toList()
                     .set { vcf_list }
             }
-        } else if (params.analysis_type.equals("wgs")){
+        } else if (params.analysis_type.equals("wgs")) {
             tiddit_vcf
                 .combine(manta_vcf)
                 .combine(gcnvcaller_vcf)
                 .combine(cnvnator_vcf)
                 .toList()
                 .set { vcf_list }
-        } else {
+        } else if (!params.analysis_type.equals("mito")) {
             manta_vcf
                 .combine(gcnvcaller_vcf)
                 .toList()
                 .set { vcf_list }
         }
 
-        ch_case_info
-            .combine(vcf_list)
-            .set { merge_input_vcfs }
+        if (!params.analysis_type.equals("mito")) {
+            ch_case_info
+                .combine(vcf_list)
+                .set { merge_input_vcfs }
 
-        SVDB_MERGE (merge_input_vcfs, ch_svcaller_priority)
+            SVDB_MERGE (merge_input_vcfs, ch_svcaller_priority)
 
-        TABIX_TABIX (SVDB_MERGE.out.vcf)
-
-        ch_versions = ch_versions.mix(CALL_SV_MANTA.out.versions)
-        ch_versions = ch_versions.mix(TABIX_TABIX.out.versions)
-        ch_versions = ch_versions.mix(SVDB_MERGE.out.versions)
+            TABIX_TABIX (SVDB_MERGE.out.vcf)
+            ch_merged_svs = SVDB_MERGE.out.vcf
+            ch_merged_tbi = TABIX_TABIX.out.tbi
+            ch_versions = ch_versions.mix(TABIX_TABIX.out.versions)
+            ch_versions = ch_versions.mix(SVDB_MERGE.out.versions)
+        }
 
     emit:
-        vcf      = SVDB_MERGE.out.vcf  // channel: [ val(meta), path(vcf)]
-        tbi      = TABIX_TABIX.out.tbi // channel: [ val(meta), path(tbi)]
-        versions = ch_versions         // channel: [ path(versions.yml) ]
+        vcf      = ch_merged_svs // channel: [ val(meta), path(vcf)]
+        tbi      = ch_merged_tbi // channel: [ val(meta), path(tbi)]
+        versions = ch_versions   // channel: [ path(versions.yml) ]
 }
diff --git a/subworkflows/local/generate_clinical_set.nf b/subworkflows/local/generate_clinical_set.nf
@@ -5,11 +5,13 @@
 include { ENSEMBLVEP_FILTERVEP } from '../../modules/nf-core/ensemblvep/filtervep'
 include { TABIX_BGZIP          } from '../../modules/nf-core/tabix/bgzip'
 include { TABIX_TABIX          } from '../../modules/nf-core/tabix/tabix'
+include { BCFTOOLS_FILTER      } from '../../modules/nf-core/bcftools/filter'
 
 workflow GENERATE_CLINICAL_SET {
     take:
         ch_vcf      // channel: [mandatory] [ val(meta), path(vcf) ]
         ch_hgnc_ids // channel: [mandatory] [ val(hgnc_ids) ]
+        val_ismt    // value: if mitochondria, set to true
 
     main:
         ch_versions = Channel.empty()
@@ -28,16 +30,23 @@ workflow GENERATE_CLINICAL_SET {
         .output
         .set { ch_filtervep_out }
 
-        TABIX_BGZIP( ch_filtervep_out )
+        if (val_ismt) {
+            BCFTOOLS_FILTER (ch_filtervep_out)
+            ch_clinical = BCFTOOLS_FILTER.out.vcf
+            ch_versions = ch_versions.mix( BCFTOOLS_FILTER.out.versions )
+        } else {
+            TABIX_BGZIP( ch_filtervep_out )
+            ch_clinical = TABIX_BGZIP.out.output
+            ch_versions = ch_versions.mix( TABIX_BGZIP.out.versions )
+        }
 
         ch_clin_research_vcf.research
-            .mix( TABIX_BGZIP.out.output )
+            .mix( ch_clinical )
             .set { ch_clin_research_split }
 
         TABIX_TABIX( ch_clin_research_split )
 
         ch_versions = ch_versions.mix( ENSEMBLVEP_FILTERVEP.out.versions )
-        ch_versions = ch_versions.mix( TABIX_BGZIP.out.versions )
         ch_versions = ch_versions.mix( TABIX_TABIX.out.versions )
 
     emit: