Merge branch 'release/v7.1.0'

Clinical-Genomics · Jun 19, 2019 · a71515d · a71515d
2 parents 9fe0b4a + 07cfb38
commit a71515d
Show file tree

Hide file tree

Showing 127 changed files with 10,224 additions and 2,258 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -15,7 +15,6 @@ before_install:
 
 ## Install conda
 ##  - wget https://repo.anaconda.com/miniconda/Miniconda2-latest-Linux-x86_64.sh -O miniconda.sh;
-#  - wget https://repo.anaconda.com/miniconda/Miniconda2-4.5.11-Linux-x86_64.sh -O miniconda.sh;
   - wget https://repo.anaconda.com/miniconda/Miniconda3-4.5.11-Linux-x86_64.sh -O miniconda.sh;
   - bash miniconda.sh -b -p $HOME/miniconda
   - source $HOME/miniconda/etc/profile.d/conda.sh
@@ -36,11 +35,11 @@ install:
 ## Test installation script and gather coverage information
   - PERL5OPT=-MDevel::Cover=-ignore,"^t/",-coverage,statement,branch,condition,path,subroutine perl t/mip_install.test
 ## Generate rare disease installation script
-  - PERL5OPT=-MDevel::Cover=-ignore,"^t/",-coverage,statement,branch,condition,path,subroutine perl mip install rd_dna --bash_set_errexit --install emip epeddy eperl_5.26 epy3 esvdb etiddit --envn emip=mip_travis --snpg GRCh37.75 --skip gatk
+  - PERL5OPT=-MDevel::Cover=-ignore,"^t/",-coverage,statement,branch,condition,path,subroutine perl mip install rd_dna --bash_set_errexit --install emip epeddy eperl5 epy3 esvdb etiddit --envn emip=mip_travis --snpg GRCh37.75 --skip gatk --skip snpeff
 ## Install MIP rare disease
   - bash mip.sh
 ## Generate rna installation script
-  - PERL5OPT=-MDevel::Cover=-ignore,"^t/",-coverage,statement,branch,condition,path,subroutine perl mip install rd_rna --bash_set_errexit --install emip epy3 eperl5.26
+  - PERL5OPT=-MDevel::Cover=-ignore,"^t/",-coverage,statement,branch,condition,path,subroutine perl mip install rd_rna --bash_set_errexit --install emip epy3 eperl5
 ## Install MIP rna
   - bash mip.sh
 
@@ -62,6 +61,8 @@ script:
   - PERL5OPT=-MDevel::Cover=-ignore,"^t/",-coverage,statement,branch,condition,path,subroutine perl t/mip_analyse_rd_dna.test
 ## Set-up test coverage for mip_analyse_vcf_rerun.test
   - PERL5OPT=-MDevel::Cover=-ignore,"^t/",-coverage,statement,branch,condition,path,subroutine perl t/mip_analyse_rd_dna_vcf_rerun.test
+## Set-up test coverage for mip qccollect
+  - PERL5OPT=-MDevel::Cover=-ignore,"^t/",-coverage,statement,branch,condition,path,subroutine perl mip qccollect --log_file qc_metrics_qccollect.log --regexp_file t/data/references/qc_regexp_-v1.22-.yaml --sample_info_file t/data/test_data/643594-miptest_qc_sample_info_travis.yaml --evaluate_plink_gender --outfile qc_metrics.yaml
 ## Set-up test coverage for mip_analyse_rna.test
   - conda activate MIP_rd_rna
   - PERL5OPT=-MDevel::Cover=-ignore,"^t/",-coverage,statement,branch,condition,path,subroutine perl t/mip_analyse_rd_rna.test 

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,18 @@
 All notable changes to this project will be documented in this file.
 This project adheres to [Semantic Versioning](http://semver.org/).
 
+## [7.1.0]
+- Updated TIDDIT to enable faster processing
+- Updated GATK for faster haplotypecalling
+
+**Tools**
+- TIDDIT: 2.5.0 -> 2.7.1
+- bcftools: 1.9-h4da6232_0 -> 1.9=ha228f0b_4
+- bioconductor-deseq2: 1.18.1=r3.4.1_1 -> 1.22.1=r351hf484d3e_0
+- bioconductor-tximport: 1.8.0=r341_0 -> 1.12.0=r351_0 
+- GATK: 4.1.0.0-0 -> 4.1.2.0-1
+- samtools: 1.9-h8ee4bcc_1 -> 1.9=h8571acd_11 
+
 ## [7.0.9]
 - Removed plink memory allocation from rd_dna_vcf_rerun
 

diff --git a/README.md b/README.md
@@ -25,11 +25,11 @@ MIP performs whole genome or target region analysis of sequenced single-end and/
 
 MIP performs QC, alignment, coverage analysis, variant discovery and annotation, sample checks as well as ranking the found variants according to disease potential with a minimum of manual intervention. MIP is compatible with Scout for visualization of identified variants.
 
-MIP rare disease DNA analyses single nucleotide variants (snvs), insertions and deletions (indels) and structural variants (SV).
+MIP rare disease DNA analyses single nucleotide variants (SNVs), insertions and deletions (INDELs) and structural variants (SVs).
 
 MIP rare disease RNA analyses mono allelic expression, fusion transcripts, transcript expression and alternative splicing.
 
-MIP rare disease DNA vcf rerun performs re-runs starting from bcfs.
+MIP rare disease DNA vcf rerun performs re-runs starting from BCFs or VCFs.
 
 MIP has been in use in the clinical production at the Clinical Genomics facility at Science for Life Laboratory since 2014.
 
@@ -39,7 +39,7 @@ MIP has been in use in the clinical production at the Clinical Genomics facility
 $ mip analyse rd_dna [case_id] --config_file [mip_config_dna.yaml] --pedigree_file [case_id_pedigree.yaml]
 ```
 
-### MIP analyse rare disease DNA vcf rerun
+### MIP analyse rare disease DNA VCF rerun
 ```Bash
 mip analyse rd_dna_vcf_rerun [case_id] --config_file [mip_config_dna_vcf_rerun.yaml] --vcf_rerun_file vcf.bcf  --sv_vcf_rerun_file sv_vcf.bcf --pedigree [case_id_pedigree_vcf_rerun.yaml]
 ```
@@ -50,28 +50,26 @@ $ mip analyse rd_rna [case_id] --config_file [mip_config_rna.yaml] --pedigree_fi
 ## Features
 
 * Installation
-  * Simple automated install of all programs using conda/SHELL via supplied install application
+  * Simple automated install of all programs using conda/pip/SHELL via supplied install application
   * Downloads and prepares references in the installation process
   * Handle conflicting tool dependencies
 * Autonomous
   * Checks that all dependencies are fulfilled before launching
   * Builds and prepares references and/or files missing before launching
-  * Decompose and normalise reference\(s\) and variant vcf\(s\)
+  * Decompose and normalise reference\(s\) and variant VCF\(s\)
   * Splits and merges files/contigs for samples and case when relevant
 * Automatic
   * A minimal amount of hands-on time
   * Tracks and executes all recipes without manual intervention
   * Creates internal queues at nodes to optimize processing
-  * Minimal IO between nodes and login node
 * Flexible:
-  * Design your own workflow by turning on/off relevant recipes
+  * Design your own workflow by turning on/off relevant recipes in predefined pipelines
   * Restart an analysis from anywhere in your workflow
-  * Process one, or multiple samples using the recipe\(s\) of your choice
+  * Process one, or multiple samples
   * Supply parameters on the command line, in a pedigree.yaml file or via config files
   * Simulate your analysis before performing it
-  * Redirect each recipe analysis process to a temporary directory \(@nodes or @login\)
   * Limit a run to a specific set of genomic intervals or chromosomes
-  * Use multiple variant callers for both snv, indels and SV
+  * Use multiple variant callers for both SNV, INDELs and SV
   * Use multiple annotation programs
   * Optionally split data into clinical variants and research variants
 * Fast
@@ -83,7 +81,7 @@ $ mip analyse rd_rna [case_id] --config_file [mip_config_rna.yaml] --pedigree_fi
   * Log sample meta-data and sequence meta-data
   * Log version numbers of softwares and databases
   * Checks sample integrity \(sex, contamination, duplications, ancestry, inbreeding and relationship\)
-  * Test data output existens and integrity using automated tests
+  * Test data output file creation and integrity using automated tests
 * Annotation
   * Gene annotation
     * Summarize over all transcript and output on gene level
@@ -115,6 +113,7 @@ We recommend perlbrew for installing and managing perl and cpanm libraries. Inst
 
 #### Automated Installation \(Linux x86\_64\)
 Below are instructions for installing MIP for analysis of rare diseases. Installation of the RNA pipeline follows a similar syntax.
+
 ##### 1.Clone the official git repository
 
 ```Bash
@@ -129,7 +128,7 @@ $ cpanm --installdeps .
 $ cd -
 ```  
 
-##### 3.Test conda and mip installation files (optional)
+##### 3.Test conda and mip installation files (optional, but recommended)
 
 ```Bash
 $ perl t/mip_install.test
@@ -147,7 +146,7 @@ This will generate a bash script called "mip.sh" in your working directory.
   * MIP_rare_ecnvnator
   * MIP_rare_edelly
   * MIP_rare_epeddy
-  * MIP_rare_eperl_5.26
+  * MIP_rare_eperl5
   * MIP_rare_epy3
   * MIP_rare_etiddit
   * MIP_rare_evep
@@ -167,7 +166,7 @@ A conda environment will be created where MIP with most of its dependencies will
 ###### *Note:*
   Some references are quite large and will take time to download. You might want to run this using screen or tmux. Alternatively, the installation script can be submitted as a sbatch job if the flag ``--sbatch_mode`` is used when generating the installation script.
 
-##### 6.Test your MIP installation (optional)
+##### 6.Test your MIP installation (optional, but recommended)
 
 Make sure to activate your MIP conda base environment before executing prove.
 
@@ -215,11 +214,11 @@ MIP is called from the command line and takes input from the command line \(prec
 Lists are supplied as repeated flag entries on the command line or in the config using the yaml format for arrays.  
 Only flags that will actually be used needs to be specified and MIP will check that all required parameters are set before submitting to SLURM.
 
-Recipe parameters can be set to "0" \(=off\), "1" \(=on\) and "2" \(=dry run mode\). Any recipe can be set to dry run mode and MIP will create sbatch scripts, but not submit them to SLURM. MIP can be restarted from any recipe using the ``--start_with_recipe`` flag.
+Recipe parameters can be set to "0" \(=off\), "1" \(=on\) and "2" \(=dry run mode\). Any recipe can be set to dry run mode and MIP will create the sbatch scripts, but not submit them to SLURM. MIP can be restarted from any recipe using the ``--start_with_recipe`` flag.
 
 MIP will overwrite data files when reanalyzing, but keeps all "versioned" sbatch scripts for traceability.
 
-You can always supply `perl mip [process] [pipeline] --help` to list all available parameters and defaults.
+You can always supply `mip [process] [pipeline] --help` to list all available parameters and defaults.
 
 Example usage:
 ```Bash
@@ -230,14 +229,15 @@ This will analyse case 3 using 3 individuals from that case and begin the analys
 
 #### Input
 
-All references and template files should be placed directly in the reference directory specified by `--reference_dir`.
+* Fastq file directories can be supplied with `--infile_dirs [PATH_TO_FASTQ_DIR=SAMPLE_ID]`
+* All references and template files should be placed directly in the reference directory specified by `--reference_dir`.
 
 ##### Meta-Data
 
 * [Configuration file] \(YAML-format\)
 * [Gene panel file]
 * [Pedigree file] \(YAML-format\)
-* [Rank model file] \(Ini-format; Snv/indel\)
+* [Rank model file] \(Ini-format; SNV/INDEL\)
 * [SV rank model file] \(Ini-format; SV\)
 * [Qc regexp file] \(YAML-format\)
 
@@ -251,7 +251,7 @@ MIP will create sbatch scripts \(.sh\) and submit them in proper order with atta
 
 ##### Data
 
-MIP will place any generated datafiles in the output data directory specified by `--outdata_dir`. All data files are regenerated for each analysis. STDOUT and STDERR for each recipe is written in the recipe/info directory.
+MIP will place any generated data files in the output data directory specified by `--outdata_dir`. All data files are regenerated for each analysis. STDOUT and STDERR for each recipe is written in the recipe/info directory.
 
 [Configuration file]: https://github.com/Clinical-Genomics/MIP/blob/master/templates/mip_config.yaml
 [Gene panel file]: https://github.com/Clinical-Genomics/MIP/blob/master/templates/aggregated_master.txt

diff --git a/definitions/cpanfile b/definitions/cpanfile
@@ -27,7 +27,7 @@ requires qw{ JSON::XS 4.0 };                        # STAR-Fusion
 requires qw{ List::MoreUtils 0.413 };               # MIP
 requires qw{ List::Util 1.49 };                     # MIP
 requires qw{ Log::Log4perl 1.49 };                  # MIP
-requires qw{ Modern::Perl 1.20170117 };             # MIP
+requires qw{ Modern::Perl 1.20181021 };             # MIP
 requires qw{ Module::Build 0.4224 };                # VEP
 requires qw{ Module::CPANfile 1.1002 };             # MIP
 requires qw{ Moose::Util::TypeConstraints 2.2010 }; # MIP

diff --git a/definitions/download_rd_dna_parameters.yaml b/definitions/download_rd_dna_parameters.yaml
@@ -43,6 +43,34 @@
   data_type: SCALAR
   default: 1
   type: recipe
+cadd_bravo_topmed:
+  analysis_mode: case
+  associated_recipe:
+   - mip
+  data_type: SCALAR
+  default: 1
+  type: recipe
+cadd_gnomad_genomes:
+  analysis_mode: case
+  associated_recipe:
+   - mip
+  data_type: SCALAR
+  default: 1
+  type: recipe
+cadd_to_vcf_header:
+  analysis_mode: case
+  associated_recipe:
+   - mip
+  data_type: SCALAR
+  default: 1
+  type: recipe
+cadd_whole_genome_snvs:
+  analysis_mode: case
+  associated_recipe:
+   - mip
+  data_type: SCALAR
+  default: 1
+  type: recipe
 clinvar:
   analysis_mode: case
   associated_recipe:
@@ -64,6 +92,13 @@ dbsnp:
   data_type: SCALAR
   default: 1
   type: recipe
+delly_exclude:
+  analysis_mode: case
+  associated_recipe:
+   - mip
+  data_type: SCALAR
+  default: 1
+  type: recipe
 download_pipeline_type:
   associated_recipe:
    - mip
@@ -77,6 +112,13 @@ expansionhunter:
   data_type: SCALAR
   default: 1
   type: recipe
+genbank_haplogroup:
+  analysis_mode: case
+  associated_recipe:
+   - mip
+  data_type: SCALAR
+  default: 1
+  type: recipe
 genomic_superdups:
   analysis_mode: case
   associated_recipe:
@@ -126,6 +168,13 @@ gatk_mitochondrial_ref:
   data_type: SCALAR
   default: 1
   type: recipe
+rank_model:
+  analysis_mode: case
+  associated_recipe:
+   - mip
+  data_type: SCALAR
+  default: 1
+  type: recipe
 recipe_core_number:
   associated_recipe:
    - mip
@@ -137,17 +186,28 @@ recipe_core_number:
     1000g_omni: 1
     1000g_sites: 1
     1000g_snps: 1
+    cadd_bravo_topmed: 1
+    cadd_gnomad_genomes: 1
+    cadd_to_vcf_header: 1
+    cadd_whole_genome_snvs: 1
     clinvar: 1
     dbnsfp: 1
     dbsnp: 1
+    delly_exclude: 1
     expansionhunter: 1
+    genbank_haplogroup: 1
     genomic_superdups: 1
     giab: 1
     gnomad: 1
     human_reference: 1
     hapmap: 1
     mills_and_1000g_indels: 1
     gatk_mitochondrial_ref: 1
+    rank_model: 1
+    reduced_penetrance: 1
+    scout_exons: 1
+    svrank_model: 1
+    sv_vcfanno_config: 1
   type: mip
 recipe_time:
   associated_recipe:
@@ -160,20 +220,59 @@ recipe_time:
     1000g_omni: 1
     1000g_sites: 1
     1000g_snps: 1
+    cadd_bravo_topmed: 5
+    cadd_gnomad_genomes: 5
+    cadd_to_vcf_header: 1
+    cadd_whole_genome_snvs: 15
     clinvar: 1
     dbnsfp: 15
     dbsnp: 1
+    delly_exclude: 1
     expansionhunter: 1
+    genbank_haplogroup: 1
     genomic_superdups: 1
     giab: 1
     gnomad: 1
     hapmap: 1
     human_reference: 1
     mills_and_1000g_indels: 1
     gatk_mitochondrial_ref: 1
+    rank_model: 1
+    reduced_penetrance: 1
+    scout_exons: 1
+    svrank_model: 1
+    sv_vcfanno_config: 1
   type: mip
+reduced_penetrance:
+  analysis_mode: case
+  associated_recipe:
+   - mip
+  data_type: SCALAR
+  default: 1
+  type: recipe
 reference_feature:
   associated_recipe:
    - mip
   data_type: HASH
   type: mip
+scout_exons:
+  analysis_mode: case
+  associated_recipe:
+   - mip
+  data_type: SCALAR
+  default: 1
+  type: recipe
+svrank_model:
+  analysis_mode: case
+  associated_recipe:
+   - mip
+  data_type: SCALAR
+  default: 1
+  type: recipe
+sv_vcfanno_config:
+  analysis_mode: case
+  associated_recipe:
+   - mip
+  data_type: SCALAR
+  default: 1
+  type: recipe