diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index fef25faa..83bf5a89 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -6,7 +6,7 @@ We try to manage the required tasks for nf-core/chipseq using GitHub issues, you However, don't be put off by this template - other more general issues and suggestions are welcome! Contributions to the code are even more welcome ;) -> If you need help using or modifying nf-core/chipseq then the best place to ask is on the pipeline channel on [Slack](https://nf-core-invite.herokuapp.com/). +> If you need help using or modifying nf-core/chipseq then the best place to ask is on the pipeline channel on [Slack](https://nf-co.re/join/slack/). @@ -26,13 +26,13 @@ If you're not used to this workflow with git, you can start with some [basic doc ## Tests -When you create a pull request with changes, [Travis CI](https://travis-ci.org/) will run automatic tests. +When you create a pull request with changes, [Travis CI](https://travis-ci.com/) will run automatic tests. Typically, pull-requests are only fully reviewed when these tests are passing, though of course we can help out before then. There are typically two types of tests that run: ### Lint Tests -The nf-core has a [set of guidelines](http://nf-co.re/guidelines) which all pipelines must adhere to. +The nf-core has a [set of guidelines](https://nf-co.re/developers/guidelines) which all pipelines must adhere to. To enforce these and ensure that all pipelines stay in sync, we have developed a helper tool which runs checks on the pipeline code. This is in the [nf-core/tools repository](https://github.com/nf-core/tools) and once installed can be run locally with the `nf-core lint ` command. If any failures or warnings are encountered, please follow the listed URL for more documentation. @@ -44,4 +44,4 @@ If there are any failures then the automated tests fail. These tests are run both with the latest available version of Nextflow and also the minimum required version that is stated in the pipeline code. ## Getting help -For further information/help, please consult the [nf-core/chipseq documentation](https://github.com/nf-core/chipseq#documentation) and don't hesitate to get in touch on the pipeline channel on [Slack](https://nf-core-invite.herokuapp.com/). +For further information/help, please consult the [nf-core/chipseq documentation](https://github.com/nf-core/chipseq#documentation) and don't hesitate to get in touch on the [nf-core/chipseq pipeline channel](https://nfcore.slack.com/channels/chipseq) on [Slack](https://nf-co.re/join/slack/). diff --git a/.github/markdownlint.yml b/.github/markdownlint.yml index e052a635..96b12a70 100644 --- a/.github/markdownlint.yml +++ b/.github/markdownlint.yml @@ -1,9 +1,5 @@ # Markdownlint configuration file default: true, line-length: false -no-multiple-blanks: 0 -blanks-around-headers: false -blanks-around-lists: false -header-increment: false no-duplicate-header: siblings_only: true diff --git a/.travis.yml b/.travis.yml index df2d29da..f86c8abf 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,12 +9,12 @@ matrix: before_install: # PRs to master are only ok if coming from dev branch - - '[ $TRAVIS_PULL_REQUEST = "false" ] || [ $TRAVIS_BRANCH != "master" ] || ([ $TRAVIS_PULL_REQUEST_SLUG = $TRAVIS_REPO_SLUG ] && [ $TRAVIS_PULL_REQUEST_BRANCH = "dev" ])' + - '[ $TRAVIS_PULL_REQUEST = "false" ] || [ $TRAVIS_BRANCH != "master" ] || ([ $TRAVIS_PULL_REQUEST_SLUG = $TRAVIS_REPO_SLUG ] && ([ $TRAVIS_PULL_REQUEST_BRANCH = "dev" ] || [ $TRAVIS_PULL_REQUEST_BRANCH = "patch" ]))' # Pull the docker image first so the test doesn't wait for this - docker pull nfcore/chipseq:dev # Fake the tag locally so that the pipeline runs properly # Looks weird when this is :dev to :dev, but makes sense when testing code for a release (:dev to :1.0.1) - - docker tag nfcore/chipseq:dev nfcore/chipseq:1.0.0 + - docker tag nfcore/chipseq:dev nfcore/chipseq:1.1.0 install: # Install Nextflow @@ -30,7 +30,7 @@ install: - sudo apt-get install npm && npm install -g markdownlint-cli env: - - NXF_VER='0.32.0' # Specify a minimum NF version that should be tested and work + - NXF_VER='19.10.0' # Specify a minimum NF version that should be tested and work - NXF_VER='' # Plus: get the latest NF version and check that it works script: @@ -39,4 +39,4 @@ script: # Lint the documentation - markdownlint ${TRAVIS_BUILD_DIR} -c ${TRAVIS_BUILD_DIR}/.github/markdownlint.yml # Run the pipeline with the test profile - - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker + - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker -ansi-log false diff --git a/CHANGELOG.md b/CHANGELOG.md index d16b9edc..6c2fbe56 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,55 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). +## [1.1.0] - 2019-11-05 + +### `Added` + +* [#46](https://github.com/nf-core/atacseq/issues/46) - Missing gene_bed path in igenomes config +* Update template to tools `1.7` +* Add `--trim_nextseq` parameter +* Add `CITATIONS.md` file +* Capitalised process names + +### `Fixed` + +* **Change all parameters from `camelCase` to `snake_case` (see [Deprecated](#Deprecated))** +* [#44](https://github.com/nf-core/atacseq/issues/44) - Output directory missing: macs2/consensus/deseq2 +* [#45](https://github.com/nf-core/atacseq/issues/45) - Wrong x-axis scale for the HOMER: Peak annotation Counts tab plot? +* [#46](https://github.com/nf-core/atacseq/issues/46) - Stage blacklist file in channel properly +* [#50](https://github.com/nf-core/atacseq/issues/50) - HOMER number of peaks does not correspond to found MACS2 peaks +* Fixed bug in UpSetR peak intersection plot +* Increase default resource requirements in `base.config` +* Increase process-specific requirements based on user-reported failures + +### `Dependencies` + +* Update Nextflow `0.32.0` -> `19.10.0` + +### `Deprecated` + +| Deprecated | Replacement | +|------------------------------|---------------------------| +| `--design` | `--input` | +| `--singleEnd` | `--single_end` | +| `--saveGenomeIndex` | `--save_reference` | +| `--skipTrimming` | `--skip_trimming` | +| `--saveTrimmed` | `--save_trimmed` | +| `--keepDups` | `--keep_dups` | +| `--keepMultiMap` | `--keep_multi_map` | +| `--saveAlignedIntermediates` | `--save_align_intermeds` | +| `--narrowPeak` | `--narrow_peak` | +| `--saveMACSPileup` | `--save_macs_pileup` | +| `--skipDiffAnalysis` | `--skip_diff_analysis` | +| `--skipFastQC` | `--skip_fastqc` | +| `--skipPicardMetrics` | `--skip_picard_metrics` | +| `--skipPreseq` | `--skip_preseq` | +| `--skipPlotProfile` | `--skip_plot_profile` | +| `--skipPlotFingerprint` | `--skip_plot_fingerprint` | +| `--skipSpp` | `--skip_spp` | +| `--skipIGV` | `--skip_igv` | +| `--skipMultiQC` | `--skip_multiqc` | + ## [1.0.0] - 2019-06-06 Initial release of nf-core/chipseq pipeline. diff --git a/CITATIONS.md b/CITATIONS.md new file mode 100644 index 00000000..3a7e6242 --- /dev/null +++ b/CITATIONS.md @@ -0,0 +1,101 @@ +# nf-core/chipseq: Citations + +## Pipeline tools + +* [Nextflow](https://www.ncbi.nlm.nih.gov/pubmed/28398311/) + > Di Tommaso P, Chatzou M, Floden EW, Barja PP, Palumbo E, Notredame C. Nextflow enables reproducible computational workflows. Nat Biotechnol. 2017 Apr 11;35(4):316-319. doi: 10.1038/nbt.3820. PubMed PMID: 28398311. + +* [BWA](https://www.ncbi.nlm.nih.gov/pubmed/19451168/) + > Li H, Durbin R. Fast and accurate short read alignment with Burrows-Wheeler transform. Bioinformatics. 2009 Jul 15;25(14):1754-60. doi: 10.1093/bioinformatics/btp324. Epub 2009 May 18. PubMed PMID: 19451168; PubMed Central PMCID: PMC2705234. + +* [BEDTools](https://www.ncbi.nlm.nih.gov/pubmed/20110278/) + > Quinlan AR, Hall IM. BEDTools: a flexible suite of utilities for comparing genomic features. Bioinformatics. 2010 Mar 15;26(6):841-2. doi: 10.1093/bioinformatics/btq033. Epub 2010 Jan 28. PubMed PMID: 20110278; PubMed Central PMCID: PMC2832824. + +* [SAMtools](https://www.ncbi.nlm.nih.gov/pubmed/19505943/) + > Li H, Handsaker B, Wysoker A, Fennell T, Ruan J, Homer N, Marth G, Abecasis G, Durbin R; 1000 Genome Project Data Processing Subgroup. The Sequence Alignment/Map format and SAMtools. Bioinformatics. 2009 Aug 15;25(16):2078-9. doi: 10.1093/bioinformatics/btp352. Epub 2009 Jun 8. PubMed PMID: 19505943; PubMed Central PMCID: PMC2723002. + +* [BamTools](https://www.ncbi.nlm.nih.gov/pubmed/21493652/) + > Barnett DW, Garrison EK, Quinlan AR, Strömberg MP, Marth GT. BamTools: a C++ API and toolkit for analyzing and managing BAM files. Bioinformatics. 2011 Jun 15;27(12):1691-2. doi: 10.1093/bioinformatics/btr174. Epub 2011 Apr 14. PubMed PMID: 21493652; PubMed Central PMCID: PMC3106182. + +* [UCSC tools](https://www.ncbi.nlm.nih.gov/pubmed/20639541/) + > Kent WJ, Zweig AS, Barber G, Hinrichs AS, Karolchik D. BigWig and BigBed: enabling browsing of large distributed datasets. Bioinformatics. 2010 Sep 1;26(17):2204-7. doi: 10.1093/bioinformatics/btq351. Epub 2010 Jul 17. PubMed PMID: 20639541; PubMed Central PMCID: PMC2922891. + +* [preseq](https://www.ncbi.nlm.nih.gov/pubmed/23435259/) + > Daley T, Smith AD. Predicting the molecular complexity of sequencing libraries. Nat Methods. 2013 Apr;10(4):325-7. doi: 10.1038/nmeth.2375. Epub 2013 Feb 24. PubMed PMID: 23435259; PubMed Central PMCID: PMC3612374. + +* [deepTools](https://www.ncbi.nlm.nih.gov/pubmed/27079975/) + > Ramírez F, Ryan DP, Grüning B, Bhardwaj V, Kilpert F, Richter AS, Heyne S, Dündar F, Manke T. deepTools2: a next generation web server for deep-sequencing data analysis. Nucleic Acids Res. 2016 Jul 8;44(W1):W160-5. doi: 10.1093/nar/gkw257. Epub 2016 Apr 13. PubMed PMID: 27079975; PubMed Central PMCID: PMC4987876. + +* [MACS2](https://www.ncbi.nlm.nih.gov/pubmed/18798982/) + > Zhang Y, Liu T, Meyer CA, Eeckhoute J, Johnson DS, Bernstein BE, Nusbaum C, Myers RM, Brown M, Li W, Liu XS. Model-based analysis of ChIP-Seq (MACS). Genome Biol. 2008;9(9):R137. doi: 10.1186/gb-2008-9-9-r137. Epub 2008 Sep 17. PubMed PMID: 18798982; PubMed Central PMCID: PMC2592715. + +* [HOMER](https://www.ncbi.nlm.nih.gov/pubmed/20513432/) + > Heinz S, Benner C, Spann N, Bertolino E, Lin YC, Laslo P, Cheng JX, Murre C, Singh H, Glass CK. Simple combinations of lineage-determining transcription factors prime cis-regulatory elements required for macrophage and B cell identities. Mol Cell. 2010 May 28;38(4):576-89. doi: 10.1016/j.molcel.2010.05.004. PubMed PMID: 20513432; PubMed Central PMCID: PMC2898526. + +* [phantompeakqualtools](https://www.ncbi.nlm.nih.gov/pubmed/22955991/) + > Landt SG, Marinov GK, Kundaje A, Kheradpour P, Pauli F, Batzoglou S, Bernstein BE, Bickel P, Brown JB, Cayting P, Chen Y, DeSalvo G, Epstein C, Fisher-Aylor KI, Euskirchen G, Gerstein M, Gertz J, Hartemink AJ, Hoffman MM, Iyer VR, Jung YL, Karmakar S, Kellis M, Kharchenko PV, Li Q, Liu T, Liu XS, Ma L, Milosavljevic A, Myers RM, Park PJ, Pazin MJ, Perry MD, Raha D, Reddy TE, Rozowsky J, Shoresh N, Sidow A, Slattery M, Stamatoyannopoulos JA, Tolstorukov MY, White KP, Xi S, Farnham PJ, Lieb JD, Wold BJ, Snyder M. ChIP-seq guidelines and practices of the ENCODE and modENCODE consortia. Genome Res. 2012 Sep;22(9):1813-31. doi: 10.1101/gr.136184.111. PubMed PMID: 22955991; PubMed Central PMCID: PMC3431496. + +* [featureCounts](https://www.ncbi.nlm.nih.gov/pubmed/24227677/) + > Liao Y, Smyth GK, Shi W. featureCounts: an efficient general purpose program for assigning sequence reads to genomic features. Bioinformatics. 2014 Apr 1;30(7):923-30. doi: 10.1093/bioinformatics/btt656. Epub 2013 Nov 13. PubMed PMID: 24227677. + +* [MultiQC](https://www.ncbi.nlm.nih.gov/pubmed/27312411/) + > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. + +* [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) + +* [Trim Galore!](https://www.bioinformatics.babraham.ac.uk/projects/trim_galore/) + +* [picard-tools](http://broadinstitute.github.io/picard) + +* [pysam](https://github.com/pysam-developers/pysam) + +## R packages + +* [R](https://www.R-project.org/) + > R Core Team (2017). R: A language and environment for statistical computing. R Foundation for Statistical Computing, Vienna, Austria. + +* [DESeq2](https://www.ncbi.nlm.nih.gov/pubmed/25516281/) + > Love MI, Huber W, Anders S. Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2. Genome Biol. 2014;15(12):550. PubMed PMID: 25516281; PubMed Central PMCID: PMC4302049. + +* [vsn](https://bioconductor.org/packages/release/bioc/html/vsn.html) + > Wolfgang Huber, Anja von Heydebreck, Holger Sueltmann, Annemarie Poustka and Martin Vingron. Variance Stabilization Applied to Microarray Data Calibration and to the Quantification of Differential Expression. Bioinformatics 18, S96-S104 (2002). + +* [UpSetR](https://CRAN.R-project.org/package=UpSetR) + > Nils Gehlenborg (2017). UpSetR: A More Scalable Alternative to Venn and Euler Diagrams for Visualizing Intersecting Sets. + +* [ggplot2](https://cran.r-project.org/web/packages/ggplot2/index.html) + > H. Wickham. ggplot2: Elegant Graphics for Data Analysis. Springer-Verlag New York, 2016. + +* [reshape2](http://www.jstatsoft.org/v21/i12/) + > Hadley Wickham (2007). Reshaping Data with the reshape Package. Journal of Statistical Software, 21(12), 1-20. + +* [scales](https://CRAN.R-project.org/package=scales) + > Hadley Wickham (2018). scales: Scale Functions for Visualization. + +* [pheatmap](https://CRAN.R-project.org/package=pheatmap) + > Raivo Kolde (2018). pheatmap: Pretty Heatmaps. + +* [lattice](https://cran.r-project.org/web/packages/lattice/index.html) + > Sarkar, Deepayan (2008) Lattice: Multivariate Data Visualization with R. Springer, New York. ISBN 978-0-387-75968-5. + +* [RColorBrewer](https://CRAN.R-project.org/package=RColorBrewer) + > Erich Neuwirth (2014). RColorBrewer: ColorBrewer Palettes. + +* [optparse](https://CRAN.R-project.org/package=optparse) + > Trevor L Davis (2018). optparse: Command Line Option Parser. + +* [xfun](https://CRAN.R-project.org/package=xfun) + > Yihui Xie (2018). xfun: Miscellaneous Functions by 'Yihui Xie'. + +## Software packaging/containerisation tools + +* [Bioconda](https://www.ncbi.nlm.nih.gov/pubmed/29967506/) + > Grüning B, Dale R, Sjödin A, Chapman BA, Rowe J, Tomkins-Tinch CH, Valieris R, Köster J; Bioconda Team. Bioconda: sustainable and comprehensive software distribution for the life sciences. Nat Methods. 2018 Jul;15(7):475-476. doi: 10.1038/s41592-018-0046-7. PubMed PMID: 29967506. + +* [Anaconda](https://anaconda.com) + > Anaconda Software Distribution. Computer software. Vers. 2-2.4.0. Anaconda, Nov. 2016. Web. + +* [Singularity](https://www.ncbi.nlm.nih.gov/pubmed/28494014/) + > Kurtzer GM, Sochat V, Bauer MW. Singularity: Scientific containers for mobility of compute. PLoS One. 2017 May 11;12(5):e0177459. doi: 10.1371/journal.pone.0177459. eCollection 2017. PubMed PMID: 28494014; PubMed Central PMCID: PMC5426675. + +* [Docker](https://www.docker.com/) diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index 09226d0d..1cda7600 100755 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -34,7 +34,7 @@ This Code of Conduct applies both within project spaces and in public spaces whe ## Enforcement -Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team on [Slack](https://nf-core-invite.herokuapp.com/). The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. +Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team on [Slack](https://nf-co.re/join/slack/). The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. diff --git a/Dockerfile b/Dockerfile index 42d5a2be..c43fecf6 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,7 +1,13 @@ -FROM nfcore/base +FROM nfcore/base:1.7 LABEL authors="Philip Ewels" \ description="Docker image containing all requirements for nf-core/chipseq pipeline" +# Install the conda environment COPY environment.yml / RUN conda env create -f /environment.yml && conda clean -a -ENV PATH /opt/conda/envs/nf-core-chipseq-1.0.0/bin:$PATH + +# Add conda installation dir to PATH (instead of doing 'conda activate') +ENV PATH /opt/conda/envs/nf-core-chipseq-1.1.0/bin:$PATH + +# Dump the details of the installed packages to a file for posterity +RUN conda env export --name nf-core-chipseq-1.1.0 > nf-core-chipseq-1.1.0.yml diff --git a/README.md b/README.md index 27558b76..543f009f 100644 --- a/README.md +++ b/README.md @@ -1,18 +1,19 @@ -# ![nf-core/chipseq](docs/images/nfcore-chipseq_logo.png) +# ![nf-core/chipseq](docs/images/nf-core-chipseq_logo.png) -[![Build Status](https://travis-ci.org/nf-core/chipseq.svg?branch=master)](https://travis-ci.org/nf-core/chipseq) -[![Nextflow](https://img.shields.io/badge/nextflow-%E2%89%A50.32.0-brightgreen.svg)](https://www.nextflow.io/) +[![Build Status](https://travis-ci.com/nf-core/chipseq.svg?branch=master)](https://travis-ci.com/nf-core/chipseq) +[![Nextflow](https://img.shields.io/badge/nextflow-%E2%89%A519.10.0-brightgreen.svg)](https://www.nextflow.io/) [![install with bioconda](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg)](http://bioconda.github.io/) [![Docker](https://img.shields.io/docker/automated/nfcore/chipseq.svg)](https://hub.docker.com/r/nfcore/chipseq/) -[![DOI](https://zenodo.org/badge/130877729.svg)](https://zenodo.org/badge/latestdoi/130877729) +[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.3240506.svg)](https://doi.org/10.5281/zenodo.3240506) ## Introduction + **nfcore/chipseq** is a bioinformatics analysis pipeline used for Chromatin ImmunopreciPitation sequencing (ChIP-seq) data. The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It comes with docker containers making installation trivial and results highly reproducible. -### Pipeline summary +## Pipeline summary 1. Raw read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)) 2. Adapter trimming ([`Trim Galore!`](https://www.bioinformatics.babraham.ac.uk/projects/trim_galore/)) @@ -44,7 +45,30 @@ The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool 6. Create IGV session file containing bigWig tracks, peaks and differential sites for data visualisation ([`IGV`](https://software.broadinstitute.org/software/igv/)). 7. Present QC for raw read, alignment, peak-calling and differential binding results ([`MultiQC`](http://multiqc.info/), [`R`](https://www.r-project.org/)) -### Documentation +## Quick Start + +i. Install [`nextflow`](https://nf-co.re/usage/installation) + +ii. Install one of [`docker`](https://docs.docker.com/engine/installation/), [`singularity`](https://www.sylabs.io/guides/3.0/user-guide/) or [`conda`](https://conda.io/miniconda.html) + +iii. Download the pipeline and test it on a minimal dataset with a single command + +```bash +nextflow run nf-core/chipseq -profile test, +``` + +> Please check [nf-core/configs](https://github.com/nf-core/configs#documentation) to see if a custom config file to run nf-core pipelines already exists for your Institute. If so, you can simply use `-profile institute` in your command. This will enable either `docker` or `singularity` and set the appropriate execution settings for your local compute environment. + +iv. Start running your own analysis! + +```bash +nextflow run nf-core/chipseq -profile --input design.csv --genome GRCh37 +``` + +See [usage docs](docs/usage.md) for all of the available options when running the pipeline. + +## Documentation + The nf-core/chipseq pipeline comes with documentation about the pipeline, found in the `docs/` directory: 1. [Installation](https://nf-co.re/usage/installation) @@ -57,13 +81,22 @@ The nf-core/chipseq pipeline comes with documentation about the pipeline, found 5. [Troubleshooting](https://nf-co.re/usage/troubleshooting) ## Credits + These scripts were orginally written by Chuan Wang ([@chuan-wang](https://github.com/chuan-wang)) and Phil Ewels ([@ewels](https://github.com/ewels)) for use at the [National Genomics Infrastructure](https://portal.scilifelab.se/genomics/) at [SciLifeLab](http://www.scilifelab.se/) in Stockholm, Sweden. It has since been re-implemented by Harshil Patel ([@drpatelh](https://github.com/drpatelh)) from [The Bioinformatics & Biostatistics Group](https://www.crick.ac.uk/research/science-technology-platforms/bioinformatics-and-biostatistics/) at [The Francis Crick Institute](https://www.crick.ac.uk/), London. -Many thanks to others who have helped out along the way too, including (but not limited to): [@apeltzer](https://github.com/apeltzer), [@bc2zb](https://github.com/bc2zb), [@drejom](https://github.com/drejom), [@KevinMenden](https://github.com/KevinMenden), [@pditommaso](https://github.com/pditommaso). +Many thanks to others who have helped out along the way too, including (but not limited to): [@apeltzer](https://github.com/apeltzer), [@bc2zb](https://github.com/bc2zb), [@drejom](https://github.com/drejom), [@KevinMenden](https://github.com/KevinMenden), [@crickbabs](https://github.com/crickbabs), [@pditommaso](https://github.com/pditommaso). + +## Contributions and Support + +If you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md). + +For further information or help, don't hesitate to get in touch on [Slack](https://nfcore.slack.com/channels/chipseq) (you can join with [this invite](https://nf-co.re/join/slack)). ## Citation -If you use nf-core/chipseq for your analysis, please cite it using the following doi: [10.5281/zenodo.3240507](https://doi.org/10.5281/zenodo.3240507) +If you use nf-core/chipseq for your analysis, please cite it using the following doi: [10.5281/zenodo.3240506](https://doi.org/10.5281/zenodo.3240506) You can cite the `nf-core` pre-print as follows: -Ewels PA, Peltzer A, Fillinger S, Alneberg JA, Patel H, Wilm A, Garcia MU, Di Tommaso P, Nahnsen S. **nf-core: Community curated bioinformatics pipelines**. *bioRxiv*. 2019. p. 610741. [doi: 10.1101/610741](https://www.biorxiv.org/content/10.1101/610741v1). +> Ewels PA, Peltzer A, Fillinger S, Alneberg JA, Patel H, Wilm A, Garcia MU, Di Tommaso P, Nahnsen S. **nf-core: Community curated bioinformatics pipelines**. *bioRxiv*. 2019. p. 610741. [doi: 10.1101/610741](https://www.biorxiv.org/content/10.1101/610741v1). + +An extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file. diff --git a/assets/blacklists/BDGP6-blacklist.bed b/assets/blacklists/BDGP6-blacklist.bed deleted file mode 100644 index af547148..00000000 --- a/assets/blacklists/BDGP6-blacklist.bed +++ /dev/null @@ -1,449 +0,0 @@ -chr2L 47600 49300 -chr2L 982500 984400 -chr2L 2885500 2887000 -chr2L 4920500 4922400 -chr2L 4937900 4941100 -chr2L 5171400 5177700 -chr2L 6426500 6427500 -chr2L 6992200 6996700 -chr2L 7345200 7350300 -chr2L 8102400 8103400 -chr2L 8729600 8731000 -chr2L 9899400 9902800 -chr2L 9976200 9979800 -chr2L 10422300 10423400 -chr2L 11541098 11541243 -chr2L 11992600 11999400 -chr2L 12558600 12563800 -chr2L 12792200 12794100 -chr2L 13522300 13523300 -chr2L 13650700 13651700 -chr2L 15451900 15452900 -chr2L 16514400 16518200 -chr2L 19576100 19577300 -chr2L 19709600 19711500 -chr2L 20197000 20201100 -chr2L 20458300 20459300 -chr2L 20746500 20747500 -chr2L 21022300 21023500 -chr2L 21416300 21440600 -chr2L 21439805 21440204 -chr2L 21447300 21454900 -chr2L 21482700 21485200 -chr2L 21499300 21500400 -chr2L 21537800 21543500 -chr2L 22202600 22203600 -chr2L 22377700 22389700 -chr2L 22605768 22607668 -chr2L 22650968 22653868 -chr2L 22681568 22682568 -chr2L 22709668 22710768 -chr2L 22768468 22770468 -chr2L 22859468 22860468 -chr2L 22892768 22894668 -chr2L 22916468 22917568 -chr2L 22962568 22963668 -chr2L 23095135 23095208 -chr2L 23100681 23100690 -chr2L 23101135 23101198 -chr2L 23155826 23156826 -chr2L 23267984 23270784 -chr2L 23277785 23278785 -chr2L 23331885 23332885 -chr2L 23382531 23383931 -chr2L 23389332 23390332 -chr2L 23396932 23397932 -chr2L 23406032 23407032 -chr2L 23492434 23493440 -chr2L 23503240 23504240 -chr2L 23512440 23513640 -chr2L 23512972 23513113 -chr2L 23513325 23513427 -chr2L 23513325 23513486 -chr2R 4869 4931 -chr2R 127649 130567 -chr2R 167144 168244 -chr2R 233935 235174 -chr2R 285375 290597 -chr2R 309683 310085 -chr2R 530880 534580 -chr2R 870395 874286 -chr2R 1453267 1454267 -chr2R 1518376 1519576 -chr2R 1799340 1799396 -chr2R 1942730 1945330 -chr2R 1974030 1975030 -chr2R 2165041 2169842 -chr2R 2465863 2467163 -chr2R 2489337 2491037 -chr2R 2548937 2551737 -chr2R 2583437 2584437 -chr2R 2651936 2653736 -chr2R 2660236 2665036 -chr2R 2861534 2862834 -chr2R 3267123 3268123 -chr2R 3306823 3307923 -chr2R 3414527 3414565 -chr2R 3492859 3492914 -chr2R 3608792 3608825 -chr2R 3698569 3699969 -chr2R 3718236 3718279 -chr2R 3885272 3885921 -chr2R 3991563 3998435 -chr2R 4174889 4175088 -chr2R 4213195 4214295 -chr2R 4253795 4256895 -chr2R 4318395 4319995 -chr2R 4354395 4355395 -chr2R 4372695 4373695 -chr2R 4437995 4444295 -chr2R 4489295 4500295 -chr2R 4680395 4684895 -chr2R 4733295 4734295 -chr2R 4766495 4769695 -chr2R 4810695 4812495 -chr2R 4834495 4835995 -chr2R 4988195 4989295 -chr2R 5005795 5006795 -chr2R 5048995 5056395 -chr2R 5105195 5109995 -chr2R 5221395 5222795 -chr2R 5230795 5231795 -chr2R 5287095 5288095 -chr2R 5393095 5394795 -chr2R 5406695 5407995 -chr2R 5571095 5572195 -chr2R 5652595 5653895 -chr2R 6308795 6314595 -chr2R 6343595 6348695 -chr2R 6389195 6391695 -chr2R 6400195 6402295 -chr2R 6440795 6441895 -chr2R 6453695 6455095 -chr2R 7200195 7201195 -chr2R 7235995 7247295 -chr2R 7826695 7827695 -chr2R 8491725 8491966 -chr2R 8781195 8783195 -chr2R 9727995 9729995 -chr2R 10184695 10185995 -chr2R 10659595 10661495 -chr2R 10950695 10952595 -chr2R 11021795 11023595 -chr2R 11297595 11301895 -chr2R 12481495 12482495 -chr2R 12819595 12822095 -chr2R 13408395 13411595 -chr2R 13728195 13735795 -chr2R 14102395 14106895 -chr2R 14108359 14108584 -chr2R 14173695 14174895 -chr2R 14189095 14195495 -chr2R 14358795 14361695 -chr2R 14467395 14469295 -chr2R 14891995 14893195 -chr2R 17147995 17152195 -chr2R 17237895 17239695 -chr2R 18371195 18372595 -chr2R 18576595 18579795 -chr2R 18593995 18595995 -chr2R 19729495 19730495 -chr2R 19739895 19743795 -chr2R 19759695 19760795 -chr2R 20779995 20785395 -chr2R 21814295 21816495 -chr2R 22526895 22528295 -chr2R 23365777 23367477 -chr2R 23406677 23407777 -chr2R 23914679 23914711 -chr2R 24183377 24184677 -chr2R 25260089 25261985 -chr3L 1245300 1247200 -chr3L 1425400 1427300 -chr3L 2063900 2069700 -chr3L 3899200 3901900 -chr3L 4361900 4362900 -chr3L 4849900 4850900 -chr3L 5047600 5048600 -chr3L 5104600 5105700 -chr3L 5462900 5464600 -chr3L 6002000 6004400 -chr3L 7248900 7250300 -chr3L 7379500 7380500 -chr3L 7683300 7691600 -chr3L 7795400 7796400 -chr3L 7920700 7921700 -chr3L 8021800 8023900 -chr3L 9083500 9084600 -chr3L 9130628 9130652 -chr3L 9392500 9393500 -chr3L 9576600 9581000 -chr3L 9930000 9937600 -chr3L 11329800 11331500 -chr3L 11507200 11508200 -chr3L 11613100 11619300 -chr3L 11968500 11972400 -chr3L 13579100 13580100 -chr3L 14726800 14728700 -chr3L 14825400 14826600 -chr3L 15296900 15298400 -chr3L 15423800 15426700 -chr3L 15555600 15558000 -chr3L 15825600 15826600 -chr3L 16051400 16053300 -chr3L 16599000 16607700 -chr3L 16685800 16688500 -chr3L 17537103 17537465 -chr3L 17918400 17921100 -chr3L 18529200 18530200 -chr3L 20477700 20483500 -chr3L 20822100 20824700 -chr3L 21374600 21376500 -chr3L 21485000 21486300 -chr3L 21753200 21754400 -chr3L 22099800 22102500 -chr3L 22817900 22819600 -chr3L 23042900 23044100 -chr3L 23140900 23142500 -chr3L 23423400 23424600 -chr3L 23440100 23441100 -chr3L 23497500 23498500 -chr3L 23669300 23675300 -chr3L 23791100 23792200 -chr3L 23826000 23827900 -chr3L 23968000 23971800 -chr3L 24091600 24102100 -chr3L 24169600 24171900 -chr3L 24193900 24195700 -chr3L 24220900 24221900 -chr3L 24370900 24371900 -chr3L 24440900 24442100 -chr3L 24467900 24470200 -chr3L 24502900 24504900 -chr3L 24544300 24546200 -chr3L 24658803 24659994 -chr3L 24667605 24668905 -chr3L 24794768 24795768 -chr3L 25051756 25052756 -chr3L 25223122 25224722 -chr3L 25288122 25290522 -chr3L 25778255 25778727 -chr3L 25897201 25903001 -chr3L 25963501 25964701 -chr3L 26116482 26117982 -chr3L 26149978 26150978 -chr3L 26610641 26611641 -chr3L 26704569 26706569 -chr3L 27071207 27071367 -chr3L 27079475 27080475 -chr3L 27095375 27101075 -chr3L 27153218 27153246 -chr3L 27429589 27429714 -chr3L 27747393 27748493 -chr3L 27959562 27964651 -chr3L 28076881 28077240 -chr3L 28110068 28110227 -chr3R 2453 2512 -chr3R 21385 21546 -chr3R 84563 84637 -chr3R 141480 141655 -chr3R 141917 141981 -chr3R 173097 173203 -chr3R 231789 231819 -chr3R 1378782 1379782 -chr3R 1781282 1781567 -chr3R 2088173 2089173 -chr3R 2324662 2325662 -chr3R 2698494 2710781 -chr3R 2700868 2701184 -chr3R 2779767 2781467 -chr3R 2792181 2793381 -chr3R 2804364 2805364 -chr3R 2850166 2851266 -chr3R 2927255 2930755 -chr3R 2953255 2959255 -chr3R 3001634 3001890 -chr3R 3022269 3022796 -chr3R 3033708 3033774 -chr3R 3049012 3049261 -chr3R 3129778 3131378 -chr3R 3209582 3210582 -chr3R 3371264 3372364 -chr3R 3498665 3504561 -chr3R 3529261 3571759 -chr3R 3647138 3660653 -chr3R 3698553 3700452 -chr3R 3716162 3730380 -chr3R 3935332 3935391 -chr3R 3951581 3952147 -chr3R 3953905 3954905 -chr3R 4106234 4107734 -chr3R 4231678 4233678 -chr3R 4270678 4277178 -chr3R 4373178 4374178 -chr3R 4753378 4761178 -chr3R 4893878 4894878 -chr3R 5003878 5007178 -chr3R 5047878 5052978 -chr3R 5259478 5260878 -chr3R 5339878 5343378 -chr3R 5822278 5823478 -chr3R 6307578 6308778 -chr3R 6510078 6511078 -chr3R 6820178 6822978 -chr3R 7087178 7088578 -chr3R 7351278 7353178 -chr3R 7700578 7703078 -chr3R 8043778 8044878 -chr3R 8095178 8096278 -chr3R 8403078 8404978 -chr3R 8571178 8573478 -chr3R 8900978 8913778 -chr3R 9691078 9699878 -chr3R 10257478 10259578 -chr3R 10385078 10388078 -chr3R 10960578 10961978 -chr3R 11067078 11068078 -chr3R 11371978 11373278 -chr3R 11669078 11670378 -chr3R 11841178 11842278 -chr3R 11959878 11960878 -chr3R 12491878 12494478 -chr3R 12499778 12510278 -chr3R 12711878 12713278 -chr3R 13314778 13318878 -chr3R 13978578 13980978 -chr3R 14246478 14247878 -chr3R 15105878 15106878 -chr3R 15133778 15138778 -chr3R 15165678 15166678 -chr3R 15607778 15608778 -chr3R 16988278 16994678 -chr3R 18033778 18037178 -chr3R 18330721 18331078 -chr3R 19029878 19031178 -chr3R 19441578 19442878 -chr3R 19775578 19777178 -chr3R 20111078 20112078 -chr3R 20247878 20248878 -chr3R 20553378 20559778 -chr3R 21300278 21302578 -chr3R 21610678 21611778 -chr3R 22298478 22299478 -chr3R 22450378 22451878 -chr3R 22488578 22489578 -chr3R 22883878 22884878 -chr3R 23533278 23534378 -chr3R 23838978 23843678 -chr3R 23857278 23858278 -chr3R 24077378 24078478 -chr3R 24207178 24211678 -chr3R 25327178 25328678 -chr3R 25398278 25400378 -chr3R 25609678 25610678 -chr3R 26116378 26117378 -chr3R 27096678 27097878 -chr3R 27143378 27145278 -chr3R 28253578 28255478 -chr3R 28758778 28759778 -chr3R 29653278 29654378 -chr3R 29778878 29779978 -chr3R 30238878 30239878 -chr3R 30401278 30403178 -chr3R 31075278 31078578 -chr3R 31331678 31333578 -chr3R 31415678 31417878 -chr3R 31892978 31894778 -chr3R 31980778 31983478 -chr3R 32070078 32073978 -chr4 37874 38874 -chr4 207774 208774 -chr4 413974 414974 -chr4 545274 546274 -chr4 591674 595274 -chr4 789474 790574 -chr4 840174 841374 -chr4 907974 909174 -chr4 1176474 1177474 -chr4 1199976 1200452 -chr4 1279674 1296774 -chr4 1311074 1324574 -chr4 1335974 1337274 -chrM 1473 15268 -chrX 11670 12770 -chrX 109913 120313 -chrX 429167 434567 -chrX 554667 556867 -chrX 564567 565567 -chrX 814167 815167 -chrX 1006467 1007667 -chrX 1365567 1367467 -chrX 1518167 1522767 -chrX 1527551 1527606 -chrX 1933767 1935667 -chrX 1959567 1960767 -chrX 2404967 2406667 -chrX 2611367 2617567 -chrX 3415167 3421167 -chrX 3790367 3793567 -chrX 3798767 3801767 -chrX 3945167 3948067 -chrX 4733667 4735967 -chrX 4926267 4932967 -chrX 4991367 4993367 -chrX 6383967 6384967 -chrX 7024167 7026667 -chrX 7125367 7127267 -chrX 7480867 7482167 -chrX 8293167 8296667 -chrX 10395567 10396567 -chrX 11099867 11103067 -chrX 11596067 11598067 -chrX 11646239 11646265 -chrX 11890167 11891167 -chrX 12932467 12937767 -chrX 14049067 14050267 -chrX 14056867 14060367 -chrX 14278767 14280567 -chrX 14551667 14556567 -chrX 15795267 15796467 -chrX 16053767 16059467 -chrX 16344868 16345336 -chrX 17115167 17119267 -chrX 19578167 19579667 -chrX 19637067 19638267 -chrX 19943267 19945167 -chrX 20198801 20200406 -chrX 20201693 20202006 -chrX 20841973 20843373 -chrX 21607773 21609673 -chrX 21622773 21623873 -chrX 21653410 21653907 -chrX 21723673 21725673 -chrX 21741673 21743573 -chrX 21963550 21965050 -chrX 22207190 22208582 -chrX 22369467 22371367 -chrX 22432165 22434065 -chrX 23021304 23022282 -chrX 23036082 23037082 -chrX 23056082 23062382 -chrX 23108682 23109682 -chrX 23114821 23115169 -chrX 23184182 23185182 -chrX 23199382 23200382 -chrX 23217282 23218482 -chrX 23291206 23291256 -chrX 23349323 23349358 -chrX 23475208 23475474 -chrY 587886 591286 -chrY 652186 653386 -chrY 734077 735877 -chrY 803659 803847 -chrY 860181 860257 -chrY 969051 969293 -chrY 1423565 1423631 -chrY 2321441 2321732 -chrY 2570084 2570211 -chrY 2848695 2848887 -chrY 3562137 3562433 diff --git a/assets/blacklists/ce11-blacklist.bed b/assets/blacklists/ce11-blacklist.bed deleted file mode 100644 index 8a17aad9..00000000 --- a/assets/blacklists/ce11-blacklist.bed +++ /dev/null @@ -1,122 +0,0 @@ -chrI 932997 934497 -chrI 2542898 2543998 -chrI 3171398 3172598 -chrI 3664797 3666097 -chrI 3989697 3990997 -chrI 4544299 4547499 -chrI 5152597 5153997 -chrI 10130610 10133010 -chrI 10208010 10209110 -chrI 10216310 10219210 -chrI 10266309 10274309 -chrI 10946007 10953107 -chrI 14453012 14454612 -chrI 15059811 15072411 -chrII 0 1000 -chrII 500900 502100 -chrII 694793 696493 -chrII 1452493 1453593 -chrII 2569895 2571395 -chrII 2897396 2898696 -chrII 3465997 3468697 -chrII 3796197 3797498 -chrII 3941998 3946697 -chrII 3962397 3963397 -chrII 3993897 3994897 -chrII 4284898 4285898 -chrII 4640903 4645003 -chrII 5144709 5146709 -chrII 6506132 6509132 -chrII 7444243 7448843 -chrII 8287450 8292950 -chrII 8975462 8976962 -chrII 9631759 9633259 -chrII 9809659 9824759 -chrII 10335760 10339360 -chrII 12843569 12846169 -chrII 13598570 13600070 -chrII 13939974 13941474 -chrII 13984974 13987074 -chrII 14324176 14326176 -chrII 14336876 14339776 -chrII 14992376 14994276 -chrII 15277076 15279376 -chrIII 414401 415601 -chrIII 930611 932411 -chrIII 1017911 1020111 -chrIII 1269508 1270508 -chrIII 1299408 1302908 -chrIII 2497010 2501110 -chrIII 5353939 5358539 -chrIII 7415865 7417865 -chrIII 7443965 7449265 -chrIII 7594664 7597264 -chrIII 8862681 8864181 -chrIII 10224291 10226191 -chrIII 13778301 13783801 -chrIV 906200 907700 -chrIV 2828302 2830902 -chrIV 3206303 3209503 -chrIV 4416207 4421907 -chrIV 6357711 6361011 -chrIV 6468711 6469811 -chrIV 6698011 6699711 -chrIV 6714311 6724411 -chrIV 7593511 7598311 -chrIV 8572913 8581913 -chrIV 9045815 9049015 -chrIV 10943021 10951221 -chrIV 11070521 11076021 -chrIV 11610823 11612723 -chrIV 11697023 11698023 -chrIV 12024022 12025422 -chrIV 12169322 12170622 -chrIV 12314422 12319522 -chrIV 12730523 12731823 -chrIV 13360424 13362224 -chrIV 13548524 13549924 -chrIV 16963335 16964835 -chrIV 17059735 17062235 -chrV 264299 267299 -chrV 1638000 1639300 -chrV 3098302 3099702 -chrV 3434603 3438803 -chrV 4333314 4336614 -chrV 5073315 5076315 -chrV 5283116 5286116 -chrV 6172117 6178017 -chrV 6939118 6943218 -chrV 7442619 7444819 -chrV 7919720 7925020 -chrV 7988620 7991520 -chrV 8699222 8701922 -chrV 9432724 9435524 -chrV 10606121 10612021 -chrV 12509619 12510919 -chrV 14756415 14757515 -chrV 14766615 14770515 -chrV 16707222 16709422 -chrV 17119724 17132624 -chrV 17308625 17311725 -chrV 17384125 17385825 -chrV 17391225 17394525 -chrV 18400128 18401728 -chrX 109500 114200 -chrX 291200 295300 -chrX 1752205 1755105 -chrX 3007010 3008310 -chrX 4026023 4051823 -chrX 5056231 5057331 -chrX 5914635 5915835 -chrX 7076944 7079144 -chrX 9186057 9189257 -chrX 9438159 9439559 -chrX 10361560 10367060 -chrX 11785767 11789867 -chrX 11886368 11889068 -chrX 12277168 12278968 -chrX 14388070 14389270 -chrX 14907969 14909769 -chrX 15226969 15228869 -chrX 15807468 15811268 -chrX 16758373 16760073 diff --git a/assets/email_template.html b/assets/email_template.html index 30127d9a..a3f19225 100644 --- a/assets/email_template.html +++ b/assets/email_template.html @@ -11,7 +11,7 @@
- +

nf-core/chipseq v${version}

Run Name: $runName

diff --git a/assets/email_template.txt b/assets/email_template.txt index 25f87153..6d35a697 100644 --- a/assets/email_template.txt +++ b/assets/email_template.txt @@ -1,6 +1,12 @@ -======================================== - nf-core/chipseq v${version} -======================================== +---------------------------------------------------- + ,--./,-. + ___ __ __ __ ___ /,-._.--~\\ + |\\ | |__ __ / ` / \\ |__) |__ } { + | \\| | \\__, \\__/ | \\ |___ \\`-._,-`-, + `._,._,' + nf-core/chipseq v${version} +---------------------------------------------------- + Run Name: $runName <% if (success){ diff --git a/assets/multiqc/peak_annotation_header.txt b/assets/multiqc/peak_annotation_header.txt index 14f89548..1aff7522 100644 --- a/assets/multiqc/peak_annotation_header.txt +++ b/assets/multiqc/peak_annotation_header.txt @@ -5,8 +5,5 @@ #plot_type: 'bargraph' #anchor: 'nfcore_chipseq-peak_annotation' #pconfig: -# title: 'Peak to feature %' -# ylab: 'Feature %' -# ymax: 100 -# ymin: 0 -# cpswitch_c_active: false +# title: 'Peak to feature proportion' +# ylab: 'Peak count' diff --git a/assets/nfcore-chipseq_logo.png b/assets/nf-core-chipseq_logo.png similarity index 100% rename from assets/nfcore-chipseq_logo.png rename to assets/nf-core-chipseq_logo.png diff --git a/assets/sendmail_template.txt b/assets/sendmail_template.txt index b316b04d..0042bf1d 100644 --- a/assets/sendmail_template.txt +++ b/assets/sendmail_template.txt @@ -9,12 +9,12 @@ Content-Type: text/html; charset=utf-8 $email_html --nfcoremimeboundary -Content-Type: image/png;name="nfcore-chipseq_logo.png" +Content-Type: image/png;name="nf-core-chipseq_logo.png" Content-Transfer-Encoding: base64 -Content-ID: -Content-Disposition: inline; filename="nfcore-chipseq_logo.png" +Content-ID: +Content-Disposition: inline; filename="nf-core-chipseq_logo.png" -<% out << new File("$baseDir/assets/nfcore-chipseq_logo.png"). +<% out << new File("$baseDir/assets/nf-core-chipseq_logo.png"). bytes. encodeBase64(). toString(). diff --git a/bin/check_design.py b/bin/check_design.py index ed87fd6c..9c263b8f 100755 --- a/bin/check_design.py +++ b/bin/check_design.py @@ -2,7 +2,7 @@ ####################################################################### ####################################################################### -## Created on April 4th 2019 to reformat nf-core/chipseq design file +## Created on April 4th 2019 to check nf-core/chipseq design file ####################################################################### ####################################################################### @@ -18,7 +18,7 @@ ############################################ Description = 'Reformat nf-core/chipseq design file and check its contents.' -Epilog = """Example usage: python reformat_design.py """ +Epilog = """Example usage: python check_design.py """ argParser = argparse.ArgumentParser(description=Description, epilog=Epilog) @@ -159,7 +159,7 @@ def reformat_design(DesignFile,ReadMappingFile,ControlMappingFile): if not antibodyList in antibodyGroupDict[antibody][group]: antibodyGroupDict[antibody][group].append(antibodyList) else: - print "{}: Control id not a valid group\nControl id: {}, Valid Groups: {}".format(ERROR_STR,groupControlDict[group],sorted(sampleMappingDict.keys())) + print "{}: Control id not a valid group\nControl id: {}, Valid Groups: {}".format(ERROR_STR,control,sorted(sampleMappingDict.keys())) sys.exit(1) fout.close() diff --git a/bin/plot_homer_annotatepeaks.r b/bin/plot_homer_annotatepeaks.r index 1eb0ec3c..9865357c 100755 --- a/bin/plot_homer_annotatepeaks.r +++ b/bin/plot_homer_annotatepeaks.r @@ -57,7 +57,7 @@ plot.feature.dat <- data.frame() for (idx in 1:length(HomerFiles)) { sampleid = SampleIDs[idx] - anno.dat <- read.table(HomerFiles[idx], sep="\t", header=TRUE) + anno.dat <- read.table(HomerFiles[idx], sep="\t", header=TRUE,quote="") anno.dat <- anno.dat[,c("Annotation","Distance.to.TSS","Nearest.PromoterID")] anno.dat <- anno.dat[which(!is.na(anno.dat$Distance.to.TSS)),] if (nrow(anno.dat) == 0) { @@ -87,9 +87,9 @@ for (idx in 1:length(HomerFiles)) { plot.dist.dat <- rbind(plot.dist.dat,dist.melt) } -levels(plot.dat$name) <- sort(unique(as.character(plot.dat$name))) -levels(plot.dist.dat$variable) <- sort(unique(as.character(plot.dist.dat$variable))) -levels(plot.feature.dat$variable) <- sort(unique(as.character(plot.feature.dat$variable))) +plot.dat$name <- factor(plot.dat$name, levels=sort(unique(as.character(plot.dat$name)))) +plot.dist.dat$variable <- factor(plot.dist.dat$variable, levels=sort(unique(as.character(plot.dist.dat$variable)))) +plot.feature.dat$variable <- factor(plot.feature.dat$variable, levels=sort(unique(as.character(plot.feature.dat$variable)))) summary.dat <- dcast(plot.feature.dat, variable ~ feature, value.var="value") colnames(summary.dat)[1] <- "sample" diff --git a/bin/plot_macs_qc.r b/bin/plot_macs_qc.r index e43fcb8c..2360505f 100755 --- a/bin/plot_macs_qc.r +++ b/bin/plot_macs_qc.r @@ -81,7 +81,7 @@ for (idx in 1:length(PeakFiles)) { peaks.dat$name <- rep(sampleid,nrow(peaks.dat)) plot.dat <- rbind(plot.dat,peaks.dat) } -levels(plot.dat$name) <- sort(unique(as.character(plot.dat$name))) +plot.dat$name <- factor(plot.dat$name, levels=sort(unique(as.character(plot.dat$name)))) SummaryFile <- file.path(opt$outdir,paste(opt$outprefix,".summary.txt",sep="")) write.table(summary.dat,file=SummaryFile,quote=FALSE,sep="\t",row.names=FALSE,col.names=TRUE) diff --git a/bin/plot_peak_intersect.r b/bin/plot_peak_intersect.r index 58bb3b33..2404b8b9 100755 --- a/bin/plot_peak_intersect.r +++ b/bin/plot_peak_intersect.r @@ -45,14 +45,28 @@ comb.dat <- read.table(opt$input_file,sep="\t",header=FALSE) comb.vec <- comb.dat[,2] comb.vec <- setNames(comb.vec,comb.dat[,1]) -pdf(opt$output_file,onefile=F,height=10,width=14) - -upset(fromExpression(comb.vec), - sets.bar.color = "#56B4E9", - point.size = 5, - line.size = 2, - order.by = "freq", - text.scale = c(1.7, 1.5, 1.7, 1.5, 1.7, 1.7)) +sets <- sort(unique(unlist(strsplit(names(comb.vec),split='&'))), decreasing = TRUE) +nintersects = length(names(comb.vec)) +if (nintersects > 70) { + nintersects <- 70 +} + +pdf(opt$output_file,onefile=F,height=10,width=20) + +upset( + fromExpression(comb.vec), + nsets = length(sets), + nintersects = nintersects, + sets = sets, + keep.order = TRUE, + sets.bar.color = "#56B4E9", + point.size = 3, + line.size = 1, + mb.ratio = c(0.55, 0.45), + order.by = "freq", + number.angles = 30, + text.scale = c(1.5, 1.5, 1.5, 1.5, 1.5, 1.2) +) dev.off() diff --git a/bin/scrape_software_versions.py b/bin/scrape_software_versions.py index 2640bb4f..4e8dfbcd 100755 --- a/bin/scrape_software_versions.py +++ b/bin/scrape_software_versions.py @@ -44,12 +44,15 @@ # Search each file using its regex for k, v in regexes.items(): - with open(v[0]) as x: - versions = x.read() - match = re.search(v[1], versions) - if match: - results[k] = "v{}".format(match.group(1)) - + try: + with open(v[0]) as x: + versions = x.read() + match = re.search(v[1], versions) + if match: + results[k] = "v{}".format(match.group(1)) + except IOError: + results[k] = False + # Remove software set to false in results for k in results: if not results[k]: diff --git a/conf/base.config b/conf/base.config index b52774f2..722a01ad 100644 --- a/conf/base.config +++ b/conf/base.config @@ -12,8 +12,8 @@ process { cpus = { check_max( 1 * task.attempt, 'cpus' ) } - memory = { check_max( 2.GB * task.attempt, 'memory' ) } - time = { check_max( 2.h * task.attempt, 'time' ) } + memory = { check_max( 7.GB * task.attempt, 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } errorStrategy = { task.exitStatus in [143,137,104,134,139] ? 'retry' : 'finish' } maxRetries = 1 @@ -22,31 +22,24 @@ process { // Process-specific resource requirements withLabel:process_low { cpus = { check_max( 2 * task.attempt, 'cpus' ) } - memory = { check_max( 16.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } + memory = { check_max( 14.GB * task.attempt, 'memory' ) } + time = { check_max( 6.h * task.attempt, 'time' ) } } withLabel:process_medium { cpus = { check_max( 6 * task.attempt, 'cpus' ) } - memory = { check_max( 48.GB * task.attempt, 'memory' ) } + memory = { check_max( 42.GB * task.attempt, 'memory' ) } time = { check_max( 8.h * task.attempt, 'time' ) } } withLabel:process_high { cpus = { check_max( 12 * task.attempt, 'cpus' ) } - memory = { check_max( 96.GB * task.attempt, 'memory' ) } + memory = { check_max( 84.GB * task.attempt, 'memory' ) } time = { check_max( 16.h * task.attempt, 'time' ) } } withLabel:process_long { - time = { check_max( 8.h * task.attempt, 'time' ) } + time = { check_max( 20.h * task.attempt, 'time' ) } } withName:get_software_versions { cache = false } -} -// Defaults config params, may be overwritten by later configs -params { - max_memory = 128.GB - max_cpus = 16 - max_time = 240.h - igenomes_base = 's3://ngi-igenomes/igenomes/' } diff --git a/conf/igenomes.config b/conf/igenomes.config index f95c8dd4..37217cf4 100644 --- a/conf/igenomes.config +++ b/conf/igenomes.config @@ -7,154 +7,391 @@ * path using $params.igenomes_base / --igenomes_base */ - params { - // illumina iGenomes reference file paths - genomes { - 'GRCh37' { - fasta = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BWAIndex/genome.fa" - gtf = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.gtf" - blacklist = "${baseDir}/assets/blacklists/GRCh37-blacklist.bed" - macs_gsize = "2.7e9" - } - 'GRCh38' { - fasta = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/BWAIndex/genome.fa" - gtf = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.gtf" - blacklist = "${baseDir}/assets/blacklists/hg38-blacklist.bed" - macs_gsize = "2.7e9" - } - 'GRCm38' { - fasta = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BWAIndex/genome.fa" - gtf = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.gtf" - blacklist = "${baseDir}/assets/blacklists/GRCm38-blacklist.bed" - macs_gsize = "1.87e9" - } - 'TAIR10' { - fasta = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/BWAIndex/genome.fa" - gtf = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.gtf" - } - 'EB2' { - fasta = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/BWAIndex/genome.fa" - gtf = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/Genes/genes.gtf" - } - 'UMD3.1' { - fasta = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/BWAIndex/genome.fa" - gtf = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.gtf" - } - 'WBcel235' { - fasta = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BWAIndex/genome.fa" - gtf = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.gtf" - macs_gsize = "9e7" - } - 'CanFam3.1' { - fasta = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BWAIndex/genome.fa" - gtf = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.gtf" - } - 'GRCz10' { - fasta = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/BWAIndex/genome.fa" - gtf = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.gtf" - } - 'BDGP6' { - fasta = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/BWAIndex/genome.fa" - gtf = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.gtf" - macs_gsize = "1.2e8" - } - 'EquCab2' { - fasta = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/BWAIndex/genome.fa" - gtf = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/Genes/genes.gtf" - } - 'EB1' { - fasta = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/BWAIndex/genome.fa" - gtf = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.gtf" - } - 'Galgal4' { - fasta = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/BWAIndex/genome.fa" - gtf = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.gtf" - } - 'Gm01' { - fasta = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/BWAIndex/genome.fa" - gtf = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/Genes/genes.gtf" - } - 'Mmul_1' { - fasta = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/BWAIndex/genome.fa" - gtf = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.gtf" - } - 'IRGSP-1.0' { - fasta = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/BWAIndex/genome.fa" - gtf = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.gtf" - } - 'CHIMP2.1.4' { - fasta = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/BWAIndex/genome.fa" - gtf = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.gtf" - } - 'Rnor_6.0' { - fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/BWAIndex/genome.fa" - gtf = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.gtf" - } - 'R64-1-1' { - fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/BWAIndex/genome.fa" - gtf = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.gtf" - macs_gsize = "1.2e7" - } - 'EF2' { - fasta = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/BWAIndex/genome.fa" - gtf = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/Genes/genes.gtf" - macs_gsize = "1.21e7" - } - 'Sbi1' { - fasta = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/BWAIndex/genome.fa" - gtf = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.gtf" - } - 'Sscrofa10.2' { - fasta = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/BWAIndex/genome.fa" - gtf = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/Genes/genes.gtf" - } - 'AGPv3' { - fasta = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/BWAIndex/genome.fa" - gtf = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.gtf" - } - 'hg38' { - fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/BWAIndex/genome.fa" - gtf = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.gtf" - blacklist = "${baseDir}/assets/blacklists/hg38-blacklist.bed" - macs_gsize = "2.7e9" - } - 'hg19' { - fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/BWAIndex/genome.fa" - gtf = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/Genes/genes.gtf" - blacklist = "${baseDir}/assets/blacklists/hg19-blacklist.bed" - macs_gsize = "2.7e9" - } - 'mm10' { - fasta = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/BWAIndex/genome.fa" - gtf = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/Genes/genes.gtf" - blacklist = "${baseDir}/assets/blacklists/mm10-blacklist.bed" - macs_gsize = "1.87e9" - } +params { + // illumina iGenomes reference file paths + genomes { + 'GRCh37' { + fasta = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.bed" + mito_name = "MT" + macs_gsize = "2.7e9" + blacklist = "${baseDir}/assets/blacklists/GRCh37-blacklist.bed" + } + 'GRCh38' { + fasta = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.bed" + mito_name = "chrM" + macs_gsize = "2.7e9" + blacklist = "${baseDir}/assets/blacklists/hg38-blacklist.bed" + } + 'GRCm38' { + fasta = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.bed" + mito_name = "MT" + macs_gsize = "1.87e9" + blacklist = "${baseDir}/assets/blacklists/GRCm38-blacklist.bed" + } + 'TAIR10' { + fasta = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.bed" + mito_name = "Mt" + } + 'EB2' { + fasta = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/Genes/genes.bed" + } + 'UMD3.1' { + fasta = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.bed" + mito_name = "MT" + } + 'WBcel235' { + fasta = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.bed" + mito_name = "MtDNA" + macs_gsize = "9e7" + } + 'CanFam3.1' { + fasta = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.bed" + mito_name = "MT" + } + 'GRCz10' { + fasta = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.bed" + mito_name = "MT" + } + 'BDGP6' { + fasta = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.bed" + mito_name = "M" + macs_gsize = "1.2e8" + } + 'EquCab2' { + fasta = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/Genes/genes.bed" + mito_name = "MT" + } + 'EB1' { + fasta = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.bed" + } + 'Galgal4' { + fasta = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.bed" + mito_name = "MT" + } + 'Gm01' { + fasta = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/Genes/genes.bed" + } + 'Mmul_1' { + fasta = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.bed" + mito_name = "MT" + } + 'IRGSP-1.0' { + fasta = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.bed" + mito_name = "Mt" + } + 'CHIMP2.1.4' { + fasta = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.bed" + mito_name = "MT" + } + 'Rnor_6.0' { + fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.bed" + mito_name = "MT" + } + 'R64-1-1' { + fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.bed" + mito_name = "MT" + macs_gsize = "1.2e7" + } + 'EF2' { + fasta = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/Genes/genes.bed" + mito_name = "MT" + macs_gsize = "1.21e7" + } + 'Sbi1' { + fasta = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.bed" + } + 'Sscrofa10.2' { + fasta = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/Genes/genes.bed" + mito_name = "MT" + } + 'AGPv3' { + fasta = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.bed" + mito_name = "Mt" + } + 'hg38' { + fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.bed" + mito_name = "chrM" + macs_gsize = "2.7e9" + blacklist = "${baseDir}/assets/blacklists/hg38-blacklist.bed" + } + 'hg19' { + fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/Genes/genes.bed" + mito_name = "chrM" + macs_gsize = "2.7e9" + blacklist = "${baseDir}/assets/blacklists/hg19-blacklist.bed" + } + 'mm10' { + fasta = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/Genes/genes.bed" + mito_name = "chrM" + macs_gsize = "1.87e9" + blacklist = "${baseDir}/assets/blacklists/mm10-blacklist.bed" + } + 'bosTau8' { + fasta = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Annotation/Genes/genes.bed" + mito_name = "chrM" + } + 'ce10' { + fasta = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/Genes/genes.bed" + mito_name = "chrM" + macs_gsize = "9e7" + } + 'canFam3' { + fasta = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/Genes/genes.bed" + mito_name = "chrM" + } + 'danRer10' { + fasta = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Annotation/Genes/genes.bed" + mito_name = "chrM" + } + 'dm6' { + fasta = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Annotation/Genes/genes.bed" + mito_name = "chrM" + macs_gsize = "1.2e8" + } + 'equCab2' { + fasta = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/Genes/genes.bed" + mito_name = "chrM" + } + 'galGal4' { + fasta = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/Genes/genes.bed" + mito_name = "chrM" + } + 'panTro4' { + fasta = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/Genes/genes.bed" + mito_name = "chrM" + } + 'rn6' { + fasta = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Annotation/Genes/genes.bed" + mito_name = "chrM" + } + 'sacCer3' { + fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/BismarkIndex/" + mito_name = "chrM" + macs_gsize = "1.2e7" + } + 'susScr3' { + fasta = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/Genes/genes.bed" + mito_name = "chrM" + } } } diff --git a/conf/test.config b/conf/test.config index 52f17e7f..2fdcb4e8 100644 --- a/conf/test.config +++ b/conf/test.config @@ -16,7 +16,7 @@ params { max_time = 12.h // Input data - design = 'https://raw.githubusercontent.com/nf-core/test-datasets/chipseq/design.csv' + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/chipseq/design.csv' // Genome references fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/atacseq/reference/genome.fa' diff --git a/docs/images/nfcore-chipseq_logo.png b/docs/images/nf-core-chipseq_logo.png similarity index 100% rename from docs/images/nfcore-chipseq_logo.png rename to docs/images/nf-core-chipseq_logo.png diff --git a/docs/output.md b/docs/output.md index 91eddfa5..b797b869 100644 --- a/docs/output.md +++ b/docs/output.md @@ -3,6 +3,7 @@ This document describes the output produced by the pipeline. Most of the plots are taken from the MultiQC report, which summarises results at the end of the pipeline. ## Pipeline overview + The pipeline is built using [Nextflow](https://www.nextflow.io/). See [`main README.md`](../README.md) for a condensed overview of the steps in the pipeline, and the bioinformatics tools used at each step. See [Illumina website](https://emea.illumina.com/techniques/sequencing/dna-sequencing/chip-seq.html) for more information regarding the ChIP-seq protocol, and for an extensive list of publications. @@ -39,7 +40,7 @@ The initial QC and alignments are performed at the library-level e.g. if the sam *Output directories*: * `trim_galore/` - If `--saveTrimmed` is specified FastQ files **after** adapter trimming will be placed in this directory. + If `--save_trimmed` is specified FastQ files **after** adapter trimming will be placed in this directory. * `trim_galore/logs/` `*.log` files generated by Trim Galore!. * `trim_galore/fastqc/` @@ -53,7 +54,7 @@ The initial QC and alignments are performed at the library-level e.g. if the sam [BWA](http://bio-bwa.sourceforge.net/bwa.shtml), [SAMtools](http://samtools.sourceforge.net/) *Description*: - Adapter-trimmed reads are mapped to the reference assembly using BWA. A genome index is required to run BWA so if this is not provided explicitly using the `--bwa_index` parameter then it will be created automatically from the genome fasta input. The index creation process can take a while for larger genomes so it is possible to use the `--saveGenomeIndex` parameter to save the indices for future pipeline runs, reducing processing times. + Adapter-trimmed reads are mapped to the reference assembly using BWA. A genome index is required to run BWA so if this is not provided explicitly using the `--bwa_index` parameter then it will be created automatically from the genome fasta input. The index creation process can take a while for larger genomes so it is possible to use the `--save_reference` parameter to save the indices for future pipeline runs, reducing processing times. ![MultiQC - SAMtools stats plot](images/mqc_samtools_stats_plot.png) @@ -77,8 +78,7 @@ The library-level alignments associated with the same sample are merged and subs *Description*: Picard MergeSamFiles and MarkDuplicates are used in combination to merge the alignments, and for the marking of duplicates, respectively. If you only have one library for any given replicate then the merging step isnt carried out because the library-level and merged library-level BAM files will be exactly the same. - Read duplicate marking is carried out using the Picard MarkDuplicates command. Duplicate reads are generally removed from the aligned reads to mitigate for fragments in the library that may have been sequenced more than once due to PCR biases. There is an option to keep duplicate reads with the `--keepDups` parameter but its generally recommended to remove them to avoid the wrong interpretation of the results. A similar option has been provided to keep reads that are multi-mapped - `--keepMultiMap`. Other steps have been incorporated into the pipeline to filter the resulting alignments - see [`main README.md`](../README.md) for a more comprehensive listing, and the tools used at each step. - + Read duplicate marking is carried out using the Picard MarkDuplicates command. Duplicate reads are generally removed from the aligned reads to mitigate for fragments in the library that may have been sequenced more than once due to PCR biases. There is an option to keep duplicate reads with the `--keep_dups` parameter but its generally recommended to remove them to avoid the wrong interpretation of the results. A similar option has been provided to keep reads that are multi-mapped - `--keep_multi_map`. Other steps have been incorporated into the pipeline to filter the resulting alignments - see [`main README.md`](../README.md) for a more comprehensive listing, and the tools used at each step. A selection of alignment-based QC metrics generated by Picard CollectMultipleMetrics and MarkDuplicates will be included in the MultiQC report. @@ -159,7 +159,7 @@ The library-level alignments associated with the same sample are merged and subs [MACS2](https://github.com/taoliu/MACS), [HOMER](http://homer.ucsd.edu/homer/ngs/annotation.html) *Description*: - MACS2 is one of the most popular peak-calling algorithms for ChIP-seq data. By default, the peaks are called with the MACS2 `--broad` parameter. If, however, you would like to call narrow peaks then please provide the `--narrowPeak` parameter when running the pipeline. See [MACS2 outputs](https://github.com/taoliu/MACS#output-files) for a description of the output files generated by MACS2. + MACS2 is one of the most popular peak-calling algorithms for ChIP-seq data. By default, the peaks are called with the MACS2 `--broad` parameter. If, however, you would like to call narrow peaks then please provide the `--narrow_peak` parameter when running the pipeline. See [MACS2 outputs](https://github.com/taoliu/MACS#output-files) for a description of the output files generated by MACS2. ![MultiQC - MACS2 total peak count plot](images/mqc_macs2_peak_count_plot.png) @@ -171,14 +171,14 @@ The library-level alignments associated with the same sample are merged and subs ![MultiQC - MACS2 peaks FRiP score plot](images/mqc_frip_score_plot.png) - `` in the directory structure below corresponds to the type of peak that you have specified to call with MACS2 i.e. `broadPeak` or `narrowPeak`. If you so wish, you can call both narrow and broad peaks without redoing the preceding steps in the pipeline such as the alignment and filtering. For example, if you already have broad peaks then just add `--narrowPeak -resume` to the command you used to run the pipeline, and these will be called too! However, resuming the pipeline will only be possible if you have not deleted the `work/` directory generated by the pipeline. Also, the IGV session file and MultiQC reports in the results directory will be overwritten with the latest output so you may want to rename/move these beforehand. + `` in the directory structure below corresponds to the type of peak that you have specified to call with MACS2 i.e. `broadPeak` or `narrowPeak`. If you so wish, you can call both narrow and broad peaks without redoing the preceding steps in the pipeline such as the alignment and filtering. For example, if you already have broad peaks then just add `--narrow_peak -resume` to the command you used to run the pipeline, and these will be called too! However, resuming the pipeline will only be possible if you have not deleted the `work/` directory generated by the pipeline. *Output directories*: - * `bwa/mergedLibrary/macs2//` + * `bwa/mergedLibrary/macs//` * MACS2 output files: `*.xls`, `*.broadPeak` or `*.narrowPeak`, `*.gappedPeak` and `*summits.bed`. The files generated will depend on whether MACS2 has been run in *narrowPeak* or *broadPeak* mode. * HOMER peak-to-gene annotation file: `*.annotatePeaks.txt`. - * `bwa/mergedLibrary/macs2//qc/` + * `bwa/mergedLibrary/macs//qc/` * QC plots for MACS2 peaks: `macs_peak.plots.pdf` * QC plots for peak-to-gene feature annotation: `macs_annotatePeaks.plots.pdf` * MultiQC custom-content files for FRiP score, peak count and peak-to-gene ratios: `*.FRiP_mqc.tsv`, `*.count_mqc.tsv` and `macs_annotatePeaks.summary_mqc.tsv` respectively. @@ -198,7 +198,7 @@ The library-level alignments associated with the same sample are merged and subs ![R - UpSetR peak intersection plot](images/r_upsetr_intersect_plot.png) *Output directories*: - * `bwa/mergedLibrary/macs2//consensus/` + * `bwa/mergedLibrary/macs//consensus/` * Consensus peak-set across all samples in `*.bed` format. * Consensus peak-set across all samples in `*.saf` format. Required by featureCounts for read quantification. * HOMER `*.annotatePeaks.txt` peak-to-gene annotation file for consensus peaks. @@ -233,19 +233,19 @@ The library-level alignments associated with the same sample are merged and subs ![R - DESeq2 Volcano plot](images/r_deseq2_volcano_plot.png) *Output directories*: - * `bwa/mergedLibrary/macs2//consensus//deseq2/` + * `bwa/mergedLibrary/macs//consensus//deseq2/` * `.featureCounts.txt` file for read counts across all samples relative to consensus peak-set. * Differential binding `*.results.txt` spreadsheet containing results across all consensus peaks and all comparisons. * `*.plots.pdf` file for PCA and hierarchical clustering. * `*.log` file with information for number of differentially bound intervals at different FDR and fold-change thresholds for each comparison. * `*.dds.rld.RData` file containing R `dds` and `rld` objects generated by DESeq2. * `R_sessionInfo.log` file containing information about R, the OS and attached or loaded packages. - * `bwa/mergedLibrary/macs2//consensus///` + * `bwa/mergedLibrary/macs//consensus///` * `*.results.txt` spreadsheet containing comparison-specific DESeq2 output for differential binding results across all peaks. * Subset of above file for peaks that pass FDR <= 0.01 (`*FDR0.01.results.txt`) and FDR <= 0.05 (`*FDR0.05.results.txt`). * BED files for peaks that pass FDR <= 0.01 (`*FDR0.01.results.bed`) and FDR <= 0.05 (`*FDR0.05.results.bed`). * MA, Volcano, clustering and scatterplots at FDR <= 0.01 and FDR <= 0.05: `*deseq2.plots.pdf`. - * `bwa/mergedLibrary/macs2//consensus//sizeFactors/` + * `bwa/mergedLibrary/macs//consensus//sizeFactors/` Files containing DESeq2 sizeFactors per sample: `*.txt` and `*.RData`. ## Aggregate analysis @@ -263,7 +263,7 @@ The library-level alignments associated with the same sample are merged and subs The pipeline has special steps which also allow the software versions to be reported in the MultiQC output for future traceability. For more information about how to use MultiQC reports, see . *Output directories*: - * `multiqc/` + * `multiqc//` * `multiqc_report.html` - a standalone HTML file that can be viewed in your web browser. * `multiqc_data/` - directory containing parsed statistics from the different tools used in the pipeline. * `multiqc_plots/` - directory containing static images from the report in various formats. @@ -287,7 +287,7 @@ The library-level alignments associated with the same sample are merged and subs ![IGV screenshot](images/igv_screenshot.png) *Output directories*: - * `igv/` + * `igv//` * `igv_session.xml` file. * `igv_files.txt` file containing a listing of the files used to create the IGV session, and their allocated colours. @@ -305,7 +305,7 @@ The library-level alignments associated with the same sample are merged and subs * `reference_genome/` A number of genome-specific files are generated by the pipeline in order to aid in the filtering of the data, and because they are required by standard tools such as BEDTools. These can be found in this directory along with the genome fasta file which is required by IGV. * `reference_genome/BWAIndex/` - If the `--saveGenomeIndex` parameter is provided then the alignment indices generated by the pipeline will be saved in this directory. This can be quite a time-consuming process so it permits their reuse for future runs of the pipeline or for other purposes. + If the `--save_reference` parameter is provided then the alignment indices generated by the pipeline will be saved in this directory. This can be quite a time-consuming process so it permits their reuse for future runs of the pipeline or for other purposes. 2. **Pipeline information** diff --git a/docs/usage.md b/docs/usage.md index 7da137df..76f91076 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -11,9 +11,9 @@ * [Reproducibility](#reproducibility) * [Main arguments](#main-arguments) * [`-profile`](#-profile) - * [`--design`](#--design) + * [`--input`](#--input) * [Generic arguments](#generic-arguments) - * [`--singleEnd`](#--singleend) + * [`--single_end`](#--single_end) * [`--seq_center`](#--seq_center) * [`--fragment_size`](#--fragment_size) * [`--fingerprint_bins`](#--fingerprint_bins) @@ -26,21 +26,21 @@ * [`--tss_bed`](#--tss_bed) * [`--macs_gsize`](#--macs_gsize) * [`--blacklist`](#--blacklist) - * [`--saveGenomeIndex`](#--savegenomeindex) - * [`--igenomesIgnore`](#--igenomesignore) + * [`--save_reference`](#--save_reference) + * [`--igenomes_ignore`](#--igenomes_ignore) * [Adapter trimming](#adapter-trimming) - * [`--skipTrimming`](#--skiptrimming) - * [`--saveTrimmed`](#--savetrimmed) + * [`--skip_trimming`](#--skip_trimming) + * [`--save_trimmed`](#--save_trimmed) * [Alignments](#alignments) - * [`--keepDups`](#--keepdups) - * [`--keepMultiMap`](#--keepmultimap) - * [`--saveAlignedIntermediates`](#--savealignedintermediates) + * [`--keep_dups`](#--keep_dups) + * [`--keep_multi_map`](#--keep_multi_map) + * [`--save_align_intermeds`](#--save_align_intermeds) * [Peaks](#peaks) - * [`--narrowPeak`](#--narrowpeak) + * [`--narrow_peak`](#--narrow_peak) * [`--broad_cutoff`](#--broad_cutoff) * [`--min_reps_consensus`](#--min_reps_consensus) - * [`--saveMACSPileup`](#--savemacspileup) - * [`--skipDiffAnalysis`](#--skipdiffanalysis) + * [`--save_macs_pileup`](#--save_macs_pileup) + * [`--skip_diff_analysis`](#--skip_diff_analysis) * [Skipping QC steps](#skipping-qc-steps) * [Job resources](#job-resources) * [Automatic resubmission](#automatic-resubmission) @@ -51,6 +51,8 @@ * [Other command line parameters](#other-command-line-parameters) * [`--outdir`](#--outdir) * [`--email`](#--email) + * [`--email_on_fail`](#--email_on_fail) + * [`--max_multiqc_email_size`](#--max_multiqc_email_size) * [`-name`](#-name) * [`-resume`](#-resume) * [`-c`](#-c) @@ -64,8 +66,8 @@ * [`--multiqc_config`](#--multiqc_config) - ## Introduction + Nextflow handles job submissions on SLURM or other environments, and supervises running the jobs. Thus the Nextflow process must run until the pipeline is finished. We recommend that you put the process running in the background through `screen` / `tmux` or similar tool. Alternatively you can run nextflow within a cluster job submitted your job scheduler. It is recommended to limit the Nextflow Java virtual machines memory. We recommend adding the following line to your environment (typically in `~/.bashrc` or `~./bash_profile`): @@ -75,10 +77,11 @@ NXF_OPTS='-Xms1g -Xmx4g' ``` ## Running the pipeline + The typical command for running the pipeline is as follows: ```bash -nextflow run nf-core/chipseq --design design.csv --genome GRCh37 -profile docker +nextflow run nf-core/chipseq --input design.csv --genome GRCh37 -profile docker ``` This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles. @@ -93,6 +96,7 @@ results # Finished results (configurable, see below) ``` ### Updating the pipeline + When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline: ```bash @@ -100,17 +104,18 @@ nextflow pull nf-core/chipseq ``` ### Reproducibility + It's a good idea to specify a pipeline version when running the pipeline on your data. This ensures that a specific version of the pipeline code and software are used when you run your pipeline. If you keep using the same tag, you'll be running the same version of the pipeline, even if there have been changes to the code since. First, go to the [nf-core/chipseq releases page](https://github.com/nf-core/chipseq/releases) and find the latest version number - numeric only (eg. `1.3.1`). Then specify this when running the pipeline with `-r` (one hyphen) - eg. `-r 1.3.1`. This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future. - ## Main arguments ### `-profile` -Use this parameter to choose a configuration profile. Profiles can give configuration pre-sets for different compute environments. Note that multiple profiles can be loaded, for example: `-profile docker` - the order of arguments is important! + +Use this parameter to choose a configuration profile. Profiles can give configuration presets for different compute environments. Note that multiple profiles can be loaded, for example: `-profile docker` - the order of arguments is important! If `-profile` is not specified at all the pipeline will be run locally and expects all software to be installed and available on the `PATH`. @@ -129,11 +134,12 @@ If `-profile` is not specified at all the pipeline will be run locally and expec * A profile with a complete configuration for automated testing * Includes links to test data so needs no other parameters -### `--design` +### `--input` + You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 6 columns, and a header row as shown in the examples below. ```bash ---design '[path to design file]' +--input '[path to design file]' ``` #### Multiple replicates @@ -214,29 +220,30 @@ Example design files have been provided with the pipeline for [paired-end](../as ## Generic arguments -### `--singleEnd` -By default, the pipeline expects paired-end data. If you have single-end data, specify `--singleEnd` on the command line when you launch the pipeline. +### `--single_end` + +By default, the pipeline expects paired-end data. If you have single-end data, specify `--single_end` on the command line when you launch the pipeline. It is not possible to run a mixture of single-end and paired-end files in one run. ### `--seq_center` + Sequencing center information that will be added to read groups in BAM files. ### `--fragment_size` -Number of base pairs to extend single-end reads when creating bigWig files. -Default: `200` +Number of base pairs to extend single-end reads when creating bigWig files (Default: `200`). ### `--fingerprint_bins` -Number of genomic bins to use when generating the deepTools fingerprint plot. Larger numbers will give a smoother profile, but take longer to run. -Default: `500000` +Number of genomic bins to use when generating the deepTools fingerprint plot. Larger numbers will give a smoother profile, but take longer to run (Default: `500000`). ## Reference genomes The pipeline config files come bundled with paths to the illumina iGenomes reference index files. If running with docker or AWS, the configuration is set up to use the [AWS-iGenomes](https://ewels.github.io/AWS-iGenomes/) resource. ### `--genome` (using iGenomes) + There are 31 different species supported in the iGenomes references. To run the pipeline, you must specify which to use with the `--genome` flag. You can find the keys to specify the genomes in the [iGenomes config file](../conf/igenomes.config). Common genomes that are supported are: @@ -268,13 +275,15 @@ params { ``` ### `--fasta` -Full path to fasta file containing reference genome (*mandatory* if `--genome` is not specified). If you don't have a BWA index available this will be generated for you automatically. Combine with `--saveGenomeIndex` to save BWA index for future runs. + +Full path to fasta file containing reference genome (*mandatory* if `--genome` is not specified). If you don't have a BWA index available this will be generated for you automatically. Combine with `--save_reference` to save BWA index for future runs. ```bash --fasta '[path to FASTA reference]' ``` ### `--gtf` + The full path to GTF file for annotating peaks (*mandatory* if `--genome` is not specified). Note that the GTF file should resemble the Ensembl format. ```bash @@ -282,6 +291,7 @@ The full path to GTF file for annotating peaks (*mandatory* if `--genome` is not ``` ### `--bwa_index` + Full path to an existing BWA index for your reference genome including the base name for the index. ```bash @@ -289,6 +299,7 @@ Full path to an existing BWA index for your reference genome including the base ``` ### `--gene_bed` + The full path to BED file for genome-wide gene intervals. This will be created from the GTF file if not specified. ```bash @@ -296,6 +307,7 @@ The full path to BED file for genome-wide gene intervals. This will be created f ``` ### `--tss_bed` + The full path to BED file for genome-wide transcription start sites. This will be created from the gene BED file if not specified. ```bash @@ -303,6 +315,7 @@ The full path to BED file for genome-wide transcription start sites. This will b ``` ### `--macs_gsize` + [Effective genome size](https://github.com/taoliu/MACS#-g--gsize) parameter required by MACS2. These have been provided when `--genome` is set as *GRCh37*, *GRCh38*, *GRCm38*, *WBcel235*, *BDGP6*, *R64-1-1*, *EF2*, *hg38*, *hg19* and *mm10*. For other genomes, if this parameter is not specified then the MACS2 peak-calling and differential analysis will be skipped. ```bash @@ -310,67 +323,83 @@ The full path to BED file for genome-wide transcription start sites. This will b ``` ### `--blacklist` + If provided, alignments that overlap with the regions in this file will be filtered out (see [ENCODE blacklists](https://sites.google.com/site/anshulkundaje/projects/blacklists)). The file should be in BED format. Blacklisted regions for *GRCh37*, *GRCh38*, *GRCm38*, *hg19*, *hg38*, *mm10* are bundled with the pipeline in the [`blacklists`](../assets/blacklists/) directory, and as such will be automatically used if any of those genomes are specified with the `--genome` parameter. ```bash --blacklist '[path to blacklisted regions]' ``` -### `--saveGenomeIndex` +### `--save_reference` + If the BWA index is generated by the pipeline use this parameter to save it to your results folder. These can then be used for future pipeline runs, reducing processing times. -### `--igenomesIgnore` +### `--igenomes_ignore` + Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`. ## Adapter trimming + The pipeline accepts a number of parameters to change how the trimming is done, according to your data type. You can specify custom trimming parameters as follows: -* `--clip_r1 ` - * Instructs Trim Galore to remove bp from the 5' end of read 1 (for single-end reads). -* `--clip_r2 ` - * Instructs Trim Galore to remove bp from the 5' end of read 2 (paired-end reads only). -* `--three_prime_clip_r1 ` - * Instructs Trim Galore to remove bp from the 3' end of read 1 _AFTER_ adapter/quality trimming has been -* `--three_prime_clip_r2 ` - * Instructs Trim Galore to re move bp from the 3' end of read 2 _AFTER_ adapter/quality trimming has been performed. +* `--clip_r1 [int]` + * Instructs Trim Galore to remove [int] bp from the 5' end of read 1 (for single-end reads). +* `--clip_r2 [int]` + * Instructs Trim Galore to remove [int] bp from the 5' end of read 2 (paired-end reads only). +* `--three_prime_clip_r1 [int]` + * Instructs Trim Galore to remove [int] bp from the 3' end of read 1 _AFTER_ adapter/quality trimming has been +* `--three_prime_clip_r2 [int]` + * Instructs Trim Galore to remove [int] bp from the 3' end of read 2 _AFTER_ adapter/quality trimming has been performed. +* `--trim_nextseq [int]` + * This enables the option Cutadapt `--nextseq-trim=3'CUTOFF` option via Trim Galore, which will set a quality cutoff (that is normally given with -q instead), but qualities of G bases are ignored. This trimming is in common for the NextSeq- and NovaSeq-platforms, where basecalls without any signal are called as high-quality G bases. + +### `--skip_trimming` -### `--skipTrimming` Skip the adapter trimming step. Use this if your input FastQ files have already been trimmed outside of the workflow or if you're very confident that there is no adapter contamination in your data. -### `--saveTrimmed` +### `--save_trimmed` + By default, trimmed FastQ files will not be saved to the results directory. Specify this flag (or set to true in your config file) to copy these files to the results directory when complete. ## Alignments -### `--keepDups` +### `--keep_dups` + Duplicate reads are not filtered from alignments. -### `--keepMultiMap` +### `--keep_multi_map` + Reads mapping to multiple locations in the genome are not filtered from alignments. -### `--saveAlignedIntermediates` +### `--save_align_intermeds` + By default, intermediate BAM files will not be saved. The final BAM files created after the appropriate filtering step are always saved to limit storage usage. Set to true to also save other intermediate BAM files. ## Peaks -### `--narrowPeak` +### `--narrow_peak` + MACS2 is run by default with the [`--broad`](https://github.com/taoliu/MACS#--broad) flag. Specify this flag to call peaks in narrowPeak mode. ### `--broad_cutoff` -Specifies broad cut-off value for MACS2. Only used when `--narrowPeak` isnt specified. Default: 0.1 + +Specifies broad cut-off value for MACS2. Only used when `--narrow_peak` isnt specified (Default: `0.1`). ### `--min_reps_consensus` + Number of biological replicates required from a given condition for a peak to contribute to a consensus peak . If you are confident you have good reproducibility amongst your replicates then you can increase the value of this parameter to create a "reproducible" set of consensus of peaks. For example, a value of 2 will mean peaks that have been called in at least 2 replicates will contribute to the consensus set of peaks, and as such peaks that are unique to a given replicate will be discarded. ```bash -- min_reps_consensus 1 ``` -### `--saveMACSPileup` +### `--save_macs_pileup` + Instruct MACS2 to create bedGraph files using the `-B --SPMR` parameters. -### `--skipDiffAnalysis` +### `--skip_diff_analysis` + Skip read counting and differential analysis step. ## Skipping QC steps @@ -378,35 +407,41 @@ Skip read counting and differential analysis step. The pipeline contains a large number of quality control steps. Sometimes, it may not be desirable to run all of them if time and compute resources are limited. The following options make this easy: -| Step | Description | -|-------------------------|------------------------------------| -| `--skipFastQC` | Skip FastQC | -| `--skipPicardMetrics` | Skip Picard CollectMultipleMetrics | -| `--skipPreseq` | Skip Preseq | -| `--skipPlotProfile` | Skip deepTools plotProfile | -| `--skipPlotFingerprint` | Skip deepTools plotFingerprint | -| `--skipSpp` | Skip Phantompeakqualtools | -| `--skipIGV` | Skip IGV | -| `--skipMultiQC` | Skip MultiQC | - -`--skipMultiQCStats` allows you to exclude the [general statistics table](https://multiqc.info/docs/#general-statistics-table) from the MultiQC report. +| Step | Description | +|---------------------------|------------------------------------| +| `--skip_fastqc` | Skip FastQC | +| `--skip_picard_metrics` | Skip Picard CollectMultipleMetrics | +| `--skip_preseq` | Skip Preseq | +| `--skip_plot_profile` | Skip deepTools plotProfile | +| `--skip_plot_fingerprint` | Skip deepTools plotFingerprint | +| `--skip_spp` | Skip Phantompeakqualtools | +| `--skip_igv` | Skip IGV | +| `--skip_multiqc` | Skip MultiQC | ## Job resources + ### Automatic resubmission + Each step in the pipeline has a default set of requirements for number of CPUs, memory and time. For most of the steps in the pipeline, if the job exits with an error code of `143` (exceeded requested resources) it will automatically resubmit with higher requests (2 x original, then 3 x original). If it still fails after three times then the pipeline is stopped. ### Custom resource requests + Wherever process-specific requirements are set in the pipeline, the default value can be changed by creating a custom config file. See the files hosted at [`nf-core/configs`](https://github.com/nf-core/configs/tree/master/conf) for examples. If you are likely to be running `nf-core` pipelines regularly it may be a good idea to request that your custom config file is uploaded to the `nf-core/configs` git repository. Before you do this please can you test that the config file works with your pipeline of choice using the `-c` parameter (see definition below). You can then create a pull request to the `nf-core/configs` repository with the addition of your config file, associated documentation file (see examples in [`nf-core/configs/docs`](https://github.com/nf-core/configs/tree/master/docs)), and amending [`nfcore_custom.config`](https://github.com/nf-core/configs/blob/master/nfcore_custom.config) to include your custom profile. -If you have any questions or issues please send us a message on [Slack](https://nf-core-invite.herokuapp.com/). +If you have any questions or issues please send us a message on [Slack](https://nf-co.re/join/slack/). ## AWS Batch specific parameters + Running the pipeline on AWS Batch requires a couple of specific parameters to be set according to your AWS Batch configuration. Please use the `-awsbatch` profile and then specify all of the following parameters. + ### `--awsqueue` + The JobQueue that you intend to use on AWS Batch. + ### `--awsregion` + The AWS region to run your job in. Default is set to `eu-west-1` but can be adjusted to your needs. Please make sure to also set the `-w/--work-dir` and `--outdir` parameters to a S3 storage bucket of your choice - you'll get an error message notifying you if you didn't. @@ -414,12 +449,23 @@ Please make sure to also set the `-w/--work-dir` and `--outdir` parameters to a ## Other command line parameters ### `--outdir` + The output directory where the results will be saved. ### `--email` + Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to specify this on the command line for every run. +### `--email_on_fail` + +This works exactly as with `--email`, except emails are only sent if the workflow is not successful. + +### `--max_multiqc_email_size` + +Threshold size for MultiQC report to be attached in notification email. If file generated by pipeline exceeds the threshold, it will not be attached (Default: `25MB`). + ### `-name` + Name for the pipeline run. If not specified, Nextflow will automatically generate a random mnemonic. This is used in the MultiQC report (if not default) and in the summary HTML / e-mail (always). @@ -427,6 +473,7 @@ This is used in the MultiQC report (if not default) and in the summary HTML / e- **NB:** Single hyphen (core Nextflow option) ### `-resume` + Specify this when restarting a pipeline. Nextflow will used cached results from any pipeline steps where the inputs are the same, continuing from where it got to previously. You can also supply a run name to resume a specific run: `-resume [run-name]`. Use the `nextflow log` command to show previous run names. @@ -434,6 +481,7 @@ You can also supply a run name to resume a specific run: `-resume [run-name]`. U **NB:** Single hyphen (core Nextflow option) ### `-c` + Specify the path to a specific config file (this is a core NextFlow command). **NB:** Single hyphen (core Nextflow option) @@ -441,6 +489,7 @@ Specify the path to a specific config file (this is a core NextFlow command). Note - you can use this to override pipeline defaults. ### `--custom_config_version` + Provide git commit id for custom Institutional configs hosted at `nf-core/configs`. This was implemented for reproducibility purposes. Default is set to `master`. ```bash @@ -449,6 +498,7 @@ Provide git commit id for custom Institutional configs hosted at `nf-core/config ``` ### `--custom_config_base` + If you're running offline, nextflow will not be able to fetch the institutional config files from the internet. If you don't need them, then this is not a problem. If you do need them, you should download the files from the repo and tell nextflow where to find them with the @@ -469,22 +519,28 @@ nextflow run /path/to/pipeline/ --custom_config_base /path/to/my/configs/configs > files + singularity containers + institutional configs in one go for you, to make this process easier. ### `--max_memory` + Use to set a top-limit for the default memory requirement for each process. Should be a string in the format integer-unit. eg. `--max_memory '8.GB'` ### `--max_time` + Use to set a top-limit for the default time requirement for each process. Should be a string in the format integer-unit. eg. `--max_time '2.h'` ### `--max_cpus` + Use to set a top-limit for the default CPU requirement for each process. Should be a string in the format integer-unit. eg. `--max_cpus 1` ### `--plaintext_email` + Set to receive plain-text e-mails instead of HTML formatted. ### `--monochrome_logs` + Set to disable colourful command line output and live life in monochrome. ### `--multiqc_config` + Specify a path to a custom MultiQC configuration file. diff --git a/environment.yml b/environment.yml index fea6ef21..40bd3199 100644 --- a/environment.yml +++ b/environment.yml @@ -1,6 +1,6 @@ # You can use this file to create a conda environment for this pipeline: # conda env create -f environment.yml -name: nf-core-chipseq-1.0.0 +name: nf-core-chipseq-1.1.0 channels: - conda-forge - bioconda diff --git a/main.nf b/main.nf index f904304d..51b84f3e 100755 --- a/main.nf +++ b/main.nf @@ -16,70 +16,71 @@ def helpMessage() { The typical command for running the pipeline is as follows: - nextflow run nf-core/chipseq --design design.csv --genome GRCh37 -profile docker + nextflow run nf-core/chipseq --input design.csv --genome GRCh37 -profile docker Mandatory arguments: - --design Comma-separated file containing information about the samples in the experiment (see docs/usage.md) - --fasta Path to Fasta reference. Not mandatory when using reference in iGenomes config via --genome - --gtf Path to GTF file in Ensembl format. Not mandatory when using reference in iGenomes config via --genome - -profile Configuration profile to use. Can use multiple (comma separated) - Available: conda, docker, singularity, awsbatch, test + --input [file] Comma-separated file containing information about the samples in the experiment (see docs/usage.md) + --fasta [file] Path to Fasta reference. Not mandatory when using reference in iGenomes config via --genome + --gtf [file] Path to GTF file. Not mandatory when using reference in iGenomes config via --genome + -profile [str] Configuration profile to use. Can use multiple (comma separated) + Available: conda, docker, singularity, awsbatch, test Generic - --singleEnd Specifies that the input is single-end reads - --seq_center Sequencing center information to be added to read group of BAM files - --fragment_size [int] Estimated fragment size used to extend single-end reads (Default: 200) - --fingerprint_bins [int] Number of genomic bins to use when calculating fingerprint plot (Default: 500000) - - References If not specified in the configuration file or you wish to overwrite any of the references - --genome Name of iGenomes reference - --bwa_index Full path to directory containing BWA index including base name i.e. /path/to/index/genome.fa - --gene_bed Path to BED file containing gene intervals - --tss_bed Path to BED file containing transcription start sites - --macs_gsize Effective genome size parameter required by MACS2. If using iGenomes config, values have only been provided when --genome is set as GRCh37, GRCm38, hg19, mm10, BDGP6 and WBcel235 - --blacklist Path to blacklist regions (.BED format), used for filtering alignments - --saveGenomeIndex If generated by the pipeline save the BWA index in the results directory + --single_end [bool] Specifies that the input is single-end reads + --seq_center [str] Sequencing center information to be added to read group of BAM files + --fragment_size [int] Estimated fragment size used to extend single-end reads (Default: 200) + --fingerprint_bins [int] Number of genomic bins to use when calculating fingerprint plot (Default: 500000) + + References If not specified in the configuration file or you wish to overwrite any of the references + --genome [str] Name of iGenomes reference + --bwa_index [file] Full path to directory containing BWA index including base name i.e. /path/to/index/genome.fa + --gene_bed [file] Path to BED file containing gene intervals + --tss_bed [file] Path to BED file containing transcription start sites + --macs_gsize [str] Effective genome size parameter required by MACS2. If using iGenomes config, values have only been provided when --genome is set as GRCh37, GRCm38, hg19, mm10, BDGP6 and WBcel235 + --blacklist [file] Path to blacklist regions (.BED format), used for filtering alignments + --save_reference [bool] If generated by the pipeline save the BWA index in the results directory Trimming - --clip_r1 [int] Instructs Trim Galore to remove bp from the 5' end of read 1 (or single-end reads) (Default: 0) - --clip_r2 [int] Instructs Trim Galore to remove bp from the 5' end of read 2 (paired-end reads only) (Default: 0) - --three_prime_clip_r1 [int] Instructs Trim Galore to remove bp from the 3' end of read 1 AFTER adapter/quality trimming has been performed (Default: 0) - --three_prime_clip_r2 [int] Instructs Trim Galore to re move bp from the 3' end of read 2 AFTER adapter/quality trimming has been performed (Default: 0) - --skipTrimming Skip the adapter trimming step - --saveTrimmed Save the trimmed FastQ files in the the results directory + --clip_r1 [int] Instructs Trim Galore to remove bp from the 5' end of read 1 (or single-end reads) (Default: 0) + --clip_r2 [int] Instructs Trim Galore to remove bp from the 5' end of read 2 (paired-end reads only) (Default: 0) + --three_prime_clip_r1 [int] Instructs Trim Galore to remove bp from the 3' end of read 1 AFTER adapter/quality trimming has been performed (Default: 0) + --three_prime_clip_r2 [int] Instructs Trim Galore to re move bp from the 3' end of read 2 AFTER adapter/quality trimming has been performed (Default: 0) + --trim_nextseq [int] Instructs Trim Galore to apply the --nextseq=X option, to trim based on quality after removing poly-G tails (Default: 0) + --skip_trimming [bool] Skip the adapter trimming step + --save_trimmed [bool] Save the trimmed FastQ files in the results directory Alignments - --keepDups Duplicate reads are not filtered from alignments - --keepMultiMap Reads mapping to multiple locations are not filtered from alignments - --saveAlignedIntermediates Save the intermediate BAM files from the alignment step - not done by default + --keep_dups [bool] Duplicate reads are not filtered from alignments + --keep_multi_map [bool] Reads mapping to multiple locations are not filtered from alignments + --save_align_intermeds [bool] Save the intermediate BAM files from the alignment step - not done by default Peaks - --narrowPeak Run MACS2 in narrowPeak mode - --broad_cutoff [float] Specifies broad cutoff value for MACS2. Only used when --narrowPeak isnt specified (Default: 0.1) - --min_reps_consensus Number of biological replicates required from a given condition for a peak to contribute to a consensus peak (Default: 1) - --saveMACSPileup Instruct MACS2 to create bedGraph files normalised to signal per million reads - --skipDiffAnalysis Skip differential binding analysis + --narrow_peak [bool] Run MACS2 in narrowPeak mode + --broad_cutoff [float] Specifies broad cutoff value for MACS2. Only used when --narrow_peak isnt specified (Default: 0.1) + --min_reps_consensus [int] Number of biological replicates required from a given condition for a peak to contribute to a consensus peak (Default: 1) + --save_macs_pileup [bool] Instruct MACS2 to create bedGraph files normalised to signal per million reads + --skip_diff_analysis [bool] Skip differential binding analysis QC - --skipFastQC Skip FastQC - --skipPicardMetrics Skip Picard CollectMultipleMetrics - --skipPreseq Skip Preseq - --skipPlotProfile Skip deepTools plotProfile - --skipPlotFingerprint Skip deepTools plotFingerprint - --skipSpp Skip Phantompeakqualtools - --skipIGV Skip IGV - --skipMultiQC Skip MultiQC - --skipMultiQCStats Exclude general statistics table from MultiQC report + --skip_fastqc [bool] Skip FastQC + --skip_picard_metrics [bool] Skip Picard CollectMultipleMetrics + --skip_preseq [bool] Skip Preseq + --skip_plot_profile [bool] Skip deepTools plotProfile + --skip_plot_fingerprint [bool] Skip deepTools plotFingerprint + --skip_spp [bool] Skip Phantompeakqualtools + --skip_igv [bool] Skip IGV + --skip_multiqc [bool] Skip MultiQC Other - --outdir The output directory where the results will be saved - --email Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits - --maxMultiqcEmailFileSize Theshold size for MultiQC report to be attached in notification email. If file generated by pipeline exceeds the threshold, it will not be attached (Default: 25MB) - -name Name for the pipeline run. If not specified, Nextflow will automatically generate a random mnemonic + --outdir [file] The output directory where the results will be saved + --email [email] Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits + --email_on_fail [email] Same as --email, except only send mail if the workflow is not successful + --max_multiqc_email_size [str] Theshold size for MultiQC report to be attached in notification email. If file generated by pipeline exceeds the threshold, it will not be attached (Default: 25MB) + -name [str] Name for the pipeline run. If not specified, Nextflow will automatically generate a random mnemonic AWSBatch - --awsqueue The AWSBatch JobQueue that needs to be set when running on AWSBatch - --awsregion The AWS Region for your AWS Batch job to run on + --awsqueue [str] The AWSBatch JobQueue that needs to be set when running on AWSBatch + --awsregion [str] The AWS Region for your AWS Batch job to run on """.stripIndent() } @@ -96,7 +97,7 @@ def helpMessage() { */ // Show help message -if (params.help){ +if (params.help) { helpMessage() exit 0 } @@ -114,15 +115,18 @@ if (params.genomes && params.genome && !params.genomes.containsKey(params.genome params.fasta = params.genome ? params.genomes[ params.genome ].fasta ?: false : false params.bwa_index = params.genome ? params.genomes[ params.genome ].bwa ?: false : false params.gtf = params.genome ? params.genomes[ params.genome ].gtf ?: false : false -params.gene_bed = params.genome ? params.genomes[ params.genome ].gene_bed ?: false : false +params.gene_bed = params.genome ? params.genomes[ params.genome ].bed12 ?: false : false params.macs_gsize = params.genome ? params.genomes[ params.genome ].macs_gsize ?: false : false params.blacklist = params.genome ? params.genomes[ params.genome ].blacklist ?: false : false +// Global variables +def PEAK_TYPE = params.narrow_peak ? "narrowPeak" : "broadPeak" + // Has the run name been specified by the user? // this has the bonus effect of catching both -name and --name custom_runName = params.name -if (!(workflow.runName ==~ /[a-z]+_[a-z]+/)){ - custom_runName = workflow.runName +if (!(workflow.runName ==~ /[a-z]+_[a-z]+/)) { + custom_runName = workflow.runName } //////////////////////////////////////////////////// @@ -133,7 +137,7 @@ if (!(workflow.runName ==~ /[a-z]+_[a-z]+/)){ ch_output_docs = file("$baseDir/docs/output.md", checkIfExists: true) // JSON files required by BAMTools for alignment filtering -if (params.singleEnd) { +if (params.single_end) { ch_bamtools_filter_config = file(params.bamtools_filter_se_config, checkIfExists: true) } else { ch_bamtools_filter_config = file(params.bamtools_filter_pe_config, checkIfExists: true) @@ -155,13 +159,13 @@ ch_spp_rsc_header = file("$baseDir/assets/multiqc/spp_rsc_header.txt", checkIfEx //////////////////////////////////////////////////// // Validate inputs -if (params.design) { ch_design = file(params.design, checkIfExists: true) } else { exit 1, "Samples design file not specified!" } +if (params.input) { ch_input = file(params.input, checkIfExists: true) } else { exit 1, "Samples design file not specified!" } if (params.gtf) { ch_gtf = file(params.gtf, checkIfExists: true) } else { exit 1, "GTF annotation file not specified!" } -if (params.gene_bed) { ch_gene_bed = file(params.gene.bed, checkIfExists: true) } +if (params.gene_bed) { ch_gene_bed = file(params.gene_bed, checkIfExists: true) } if (params.tss_bed) { ch_tss_bed = file(params.tss_bed, checkIfExists: true) } -if (params.blacklist) { ch_blacklist = file(params.blacklist, checkIfExists: true) } +if (params.blacklist) { ch_blacklist = Channel.fromPath(params.blacklist, checkIfExists: true) } else { ch_blacklist = Channel.empty() } -if (params.fasta){ +if (params.fasta) { lastPath = params.fasta.lastIndexOf(File.separator) bwa_base = params.fasta.substring(lastPath+1) ch_fasta = file(params.fasta, checkIfExists: true) @@ -169,27 +173,27 @@ if (params.fasta){ exit 1, "Fasta file not specified!" } -if (params.bwa_index){ +if (params.bwa_index) { lastPath = params.bwa_index.lastIndexOf(File.separator) bwa_dir = params.bwa_index.substring(0,lastPath+1) bwa_base = params.bwa_index.substring(lastPath+1) - ch_bwa_index = Channel + Channel .fromPath(bwa_dir, checkIfExists: true) - .ifEmpty { exit 1, "BWA index directory not found: ${bwa_dir}" } + .set { ch_bwa_index } } //////////////////////////////////////////////////// /* -- AWS -- */ //////////////////////////////////////////////////// -if( workflow.profile == 'awsbatch') { - // AWSBatch sanity checking - if (!params.awsqueue || !params.awsregion) exit 1, "Specify correct --awsqueue and --awsregion parameters on AWSBatch!" - // Check outdir paths to be S3 buckets if running on AWSBatch - // related: https://github.com/nextflow-io/nextflow/issues/813 - if (!params.outdir.startsWith('s3:')) exit 1, "Outdir not on S3 - specify S3 Bucket to run on AWSBatch!" - // Prevent trace files to be stored on S3 since S3 does not support rolling files. - if (workflow.tracedir.startsWith('s3:')) exit 1, "Specify a local tracedir or run without trace! S3 cannot be used for tracefiles." +if (workflow.profile == 'awsbatch') { + // AWSBatch sanity checking + if (!params.awsqueue || !params.awsregion) exit 1, "Specify correct --awsqueue and --awsregion parameters on AWSBatch!" + // Check outdir paths to be S3 buckets if running on AWSBatch + // related: https://github.com/nextflow-io/nextflow/issues/813 + if (!params.outdir.startsWith('s3:')) exit 1, "Outdir not on S3 - specify S3 Bucket to run on AWSBatch!" + // Prevent trace files to be stored on S3 since S3 does not support rolling files. + if (workflow.tracedir.startsWith('s3:')) exit 1, "Specify a local tracedir or run without trace! S3 cannot be used for tracefiles." } /////////////////////////////////////////////////////////////////////////////// @@ -203,74 +207,75 @@ if( workflow.profile == 'awsbatch') { // Header log info log.info nfcoreHeader() def summary = [:] -summary['Run Name'] = custom_runName ?: workflow.runName -summary['Data Type'] = params.singleEnd ? 'Single-End' : 'Paired-End' -summary['Design File'] = params.design -summary['Genome'] = params.genome ?: 'Not supplied' -summary['Fasta File'] = params.fasta -summary['GTF File'] = params.gtf -if (params.gene_bed) summary['Gene BED File'] = params.gene_bed -if (params.tss_bed) summary['TSS BED File'] = params.tss_bed -if (params.bwa_index) summary['BWA Index'] = params.bwa_index -if (params.blacklist) summary['Blacklist BED'] = params.blacklist -summary['MACS2 Genome Size'] = params.macs_gsize ?: 'Not supplied' -summary['Min Consensus Reps'] = params.min_reps_consensus -if (params.macs_gsize) summary['MACS2 Narrow Peaks'] = params.narrowPeak ? 'Yes' : 'No' -if (!params.narrowPeak) summary['MACS2 Broad Cutoff'] = params.broad_cutoff -if (params.skipTrimming){ - summary['Trimming Step'] = 'Skipped' +summary['Run Name'] = custom_runName ?: workflow.runName +summary['Data Type'] = params.single_end ? 'Single-End' : 'Paired-End' +summary['Design File'] = params.input +summary['Genome'] = params.genome ?: 'Not supplied' +summary['Fasta File'] = params.fasta +summary['GTF File'] = params.gtf +if (params.gene_bed) summary['Gene BED File'] = params.gene_bed +if (params.tss_bed) summary['TSS BED File'] = params.tss_bed +if (params.bwa_index) summary['BWA Index'] = params.bwa_index +if (params.blacklist) summary['Blacklist BED'] = params.blacklist +summary['MACS2 Genome Size'] = params.macs_gsize ?: 'Not supplied' +summary['Min Consensus Reps'] = params.min_reps_consensus +if (params.macs_gsize) summary['MACS2 Narrow Peaks'] = params.narrow_peak ? 'Yes' : 'No' +if (!params.narrow_peak) summary['MACS2 Broad Cutoff'] = params.broad_cutoff +if (params.skip_trimming) { + summary['Trimming Step'] = 'Skipped' } else { - summary['Trim R1'] = "$params.clip_r1 bp" - summary['Trim R2'] = "$params.clip_r2 bp" - summary["Trim 3' R1"] = "$params.three_prime_clip_r1 bp" - summary["Trim 3' R2"] = "$params.three_prime_clip_r2 bp" + summary['Trim R1'] = "$params.clip_r1 bp" + summary['Trim R2'] = "$params.clip_r2 bp" + summary["Trim 3' R1"] = "$params.three_prime_clip_r1 bp" + summary["Trim 3' R2"] = "$params.three_prime_clip_r2 bp" + summary["NextSeq Trim"] = "$params.trim_nextseq bp" } -if (params.seq_center) summary['Sequencing Center'] = params.seq_center -if (params.singleEnd) summary['Fragment Size'] = "$params.fragment_size bp" -summary['Fingerprint Bins'] = params.fingerprint_bins -if (params.keepDups) summary['Keep Duplicates'] = 'Yes' -if (params.keepMultiMap) summary['Keep Multi-mapped'] = 'Yes' -summary['Save Genome Index'] = params.saveGenomeIndex ? 'Yes' : 'No' -if (params.saveTrimmed) summary['Save Trimmed'] = 'Yes' -if (params.saveAlignedIntermediates) summary['Save Intermeds'] = 'Yes' -if (params.saveMACSPileup) summary['Save MACS2 Pileup'] = 'Yes' -if (params.skipDiffAnalysis) summary['Skip Diff Analysis'] = 'Yes' -if (params.skipFastQC) summary['Skip FastQC'] = 'Yes' -if (params.skipPicardMetrics) summary['Skip Picard Metrics'] = 'Yes' -if (params.skipPreseq) summary['Skip Preseq'] = 'Yes' -if (params.skipPlotProfile) summary['Skip plotProfile'] = 'Yes' -if (params.skipPlotFingerprint) summary['Skip plotFingerprint'] = 'Yes' -if (params.skipSpp) summary['Skip spp'] = 'Yes' -if (params.skipIGV) summary['Skip IGV'] = 'Yes' -if (params.skipMultiQC) summary['Skip MultiQC'] = 'Yes' -if (params.skipMultiQCStats) summary['Skip MultiQC Stats'] = 'Yes' -summary['Max Resources'] = "$params.max_memory memory, $params.max_cpus cpus, $params.max_time time per job" -if(workflow.containerEngine) summary['Container'] = "$workflow.containerEngine - $workflow.container" -summary['Output Dir'] = params.outdir -summary['Launch Dir'] = workflow.launchDir -summary['Working Dir'] = workflow.workDir -summary['Script Dir'] = workflow.projectDir -summary['User'] = workflow.userName -if (workflow.profile == 'awsbatch'){ - summary['AWS Region'] = params.awsregion - summary['AWS Queue'] = params.awsqueue +if (params.seq_center) summary['Sequencing Center'] = params.seq_center +if (params.single_end) summary['Fragment Size'] = "$params.fragment_size bp" +summary['Fingerprint Bins'] = params.fingerprint_bins +if (params.keep_dups) summary['Keep Duplicates'] = 'Yes' +if (params.keep_multi_map) summary['Keep Multi-mapped'] = 'Yes' +summary['Save Genome Index'] = params.save_reference ? 'Yes' : 'No' +if (params.save_trimmed) summary['Save Trimmed'] = 'Yes' +if (params.save_align_intermeds) summary['Save Intermeds'] = 'Yes' +if (params.save_macs_pileup) summary['Save MACS2 Pileup'] = 'Yes' +if (params.skip_diff_analysis) summary['Skip Diff Analysis'] = 'Yes' +if (params.skip_fastqc) summary['Skip FastQC'] = 'Yes' +if (params.skip_picard_metrics) summary['Skip Picard Metrics'] = 'Yes' +if (params.skip_preseq) summary['Skip Preseq'] = 'Yes' +if (params.skip_plot_profile) summary['Skip plotProfile'] = 'Yes' +if (params.skip_plot_fingerprint) summary['Skip plotFingerprint'] = 'Yes' +if (params.skip_spp) summary['Skip spp'] = 'Yes' +if (params.skip_igv) summary['Skip IGV'] = 'Yes' +if (params.skip_multiqc) summary['Skip MultiQC'] = 'Yes' +summary['Max Resources'] = "$params.max_memory memory, $params.max_cpus cpus, $params.max_time time per job" +if (workflow.containerEngine) summary['Container'] = "$workflow.containerEngine - $workflow.container" +summary['Output Dir'] = params.outdir +summary['Launch Dir'] = workflow.launchDir +summary['Working Dir'] = workflow.workDir +summary['Script Dir'] = workflow.projectDir +summary['User'] = workflow.userName +if (workflow.profile == 'awsbatch') { + summary['AWS Region'] = params.awsregion + summary['AWS Queue'] = params.awsqueue } -summary['Config Profile'] = workflow.profile +summary['Config Profile'] = workflow.profile if (params.config_profile_description) summary['Config Description'] = params.config_profile_description if (params.config_profile_contact) summary['Config Contact'] = params.config_profile_contact if (params.config_profile_url) summary['Config URL'] = params.config_profile_url -if(params.email) { - summary['E-mail Address'] = params.email - summary['MultiQC Max Size'] = params.maxMultiqcEmailFileSize +if (params.email || params.email_on_fail) { + summary['E-mail Address'] = params.email + summary['E-mail on failure'] = params.email_on_fail + summary['MultiQC Max Size'] = params.max_multiqc_email_size } log.info summary.collect { k,v -> "${k.padRight(20)}: $v" }.join("\n") -log.info "\033[2m----------------------------------------------------\033[0m" +log.info "-\033[2m--------------------------------------------------\033[0m-" // Check the hostnames against configured profiles checkHostname() // Show a big warning message if we're not running MACS -if (!params.macs_gsize){ +if (!params.macs_gsize) { def warnstring = params.genome ? "supported for '${params.genome}'" : 'supplied' log.warn "=================================================================\n" + " WARNING! MACS genome size parameter not $warnstring.\n" + @@ -290,12 +295,12 @@ if (!params.macs_gsize){ /* * PREPROCESSING - REFORMAT DESIGN FILE, CHECK VALIDITY & CREATE IP vs CONTROL MAPPINGS */ -process checkDesign { +process CheckDesign { tag "$design" publishDir "${params.outdir}/pipeline_info", mode: 'copy' input: - file design from ch_design + file design from ch_input output: file "design_reads.csv" into ch_design_reads_csv @@ -310,24 +315,27 @@ process checkDesign { /* * Create channels for input fastq files */ -if (params.singleEnd) { - ch_design_reads_csv.splitCsv(header:true, sep:',') - .map { row -> [ row.sample_id, [ file(row.fastq_1, checkIfExists: true) ] ] } - .into { ch_raw_reads_fastqc; - ch_raw_reads_trimgalore } +if (params.single_end) { + ch_design_reads_csv + .splitCsv(header:true, sep:',') + .map { row -> [ row.sample_id, [ file(row.fastq_1, checkIfExists: true) ] ] } + .into { ch_raw_reads_fastqc; + ch_raw_reads_trimgalore } } else { - ch_design_reads_csv.splitCsv(header:true, sep:',') - .map { row -> [ row.sample_id, [ file(row.fastq_1, checkIfExists: true), file(row.fastq_2, checkIfExists: true) ] ] } - .into { ch_raw_reads_fastqc; - ch_raw_reads_trimgalore } + ch_design_reads_csv + .splitCsv(header:true, sep:',') + .map { row -> [ row.sample_id, [ file(row.fastq_1, checkIfExists: true), file(row.fastq_2, checkIfExists: true) ] ] } + .into { ch_raw_reads_fastqc; + ch_raw_reads_trimgalore } } /* * Create a channel with [sample_id, control id, antibody, replicatesExist, multipleGroups] */ - ch_design_controls_csv.splitCsv(header:true, sep:',') - .map { row -> [ row.sample_id, row.control_id, row.antibody, row.replicatesExist.toBoolean(), row.multipleGroups.toBoolean() ] } - .set { ch_design_controls_csv } +ch_design_controls_csv + .splitCsv(header:true, sep:',') + .map { row -> [ row.sample_id, row.control_id, row.antibody, row.replicatesExist.toBoolean(), row.multipleGroups.toBoolean() ] } + .set { ch_design_controls_csv } /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// @@ -340,12 +348,12 @@ if (params.singleEnd) { /* * PREPROCESSING - Build BWA index */ -if (!params.bwa_index){ - process makeBWAindex { +if (!params.bwa_index) { + process BWAIndex { tag "$fasta" label 'process_high' - publishDir path: { params.saveGenomeIndex ? "${params.outdir}/reference_genome" : params.outdir }, - saveAs: { params.saveGenomeIndex ? it : null }, mode: 'copy' + publishDir path: { params.save_reference ? "${params.outdir}/reference_genome" : params.outdir }, + saveAs: { params.save_reference ? it : null }, mode: 'copy' input: file fasta from ch_fasta @@ -364,8 +372,8 @@ if (!params.bwa_index){ /* * PREPROCESSING - Generate gene BED file */ -if (!params.gene_bed){ - process makeGeneBED { +if (!params.gene_bed) { + process MakeGeneBED { tag "$gtf" label 'process_low' publishDir "${params.outdir}/reference_genome", mode: 'copy' @@ -386,8 +394,8 @@ if (!params.gene_bed){ /* * PREPROCESSING - Generate TSS BED file */ -if (!params.tss_bed){ - process makeTSSBED { +if (!params.tss_bed) { + process MakeTSSBED { tag "$bed" publishDir "${params.outdir}/reference_genome", mode: 'copy' @@ -407,12 +415,13 @@ if (!params.tss_bed){ /* * PREPROCESSING - Prepare genome intervals for filtering */ -process makeGenomeFilter { +process MakeGenomeFilter { tag "$fasta" publishDir "${params.outdir}/reference_genome", mode: 'copy' input: file fasta from ch_fasta + file blacklist from ch_blacklist.ifEmpty([]) output: file "$fasta" into ch_genome_fasta // FASTA FILE FOR IGV @@ -421,7 +430,7 @@ process makeGenomeFilter { file "*.sizes" into ch_genome_sizes_bigwig // CHROMOSOME SIZES FILE FOR BEDTOOLS script: - blacklist_filter = params.blacklist ? "sortBed -i ${params.blacklist} -g ${fasta}.sizes | complementBed -i stdin -g ${fasta}.sizes" : "awk '{print \$1, '0' , \$2}' OFS='\t' ${fasta}.sizes" + blacklist_filter = params.blacklist ? "sortBed -i $blacklist -g ${fasta}.sizes | complementBed -i stdin -g ${fasta}.sizes" : "awk '{print \$1, '0' , \$2}' OFS='\t' ${fasta}.sizes" """ samtools faidx $fasta cut -f 1,2 ${fasta}.fai > ${fasta}.sizes @@ -440,14 +449,16 @@ process makeGenomeFilter { /* * STEP 1 - FastQC */ -process fastqc { +process FastQC { tag "$name" label 'process_medium' publishDir "${params.outdir}/fastqc", mode: 'copy', - saveAs: {filename -> filename.endsWith(".zip") ? "zips/$filename" : "$filename"} + saveAs: { filename -> + filename.endsWith(".zip") ? "zips/$filename" : "$filename" + } when: - !params.skipFastQC + !params.skip_fastqc input: set val(name), file(reads) from ch_raw_reads_fastqc @@ -457,17 +468,17 @@ process fastqc { script: // Added soft-links to original fastqs for consistent naming in MultiQC - if (params.singleEnd) { + if (params.single_end) { """ [ ! -f ${name}.fastq.gz ] && ln -s $reads ${name}.fastq.gz - fastqc -q ${name}.fastq.gz + fastqc -q -t $task.cpus ${name}.fastq.gz """ } else { """ [ ! -f ${name}_1.fastq.gz ] && ln -s ${reads[0]} ${name}_1.fastq.gz [ ! -f ${name}_2.fastq.gz ] && ln -s ${reads[1]} ${name}_2.fastq.gz - fastqc -q ${name}_1.fastq.gz - fastqc -q ${name}_2.fastq.gz + fastqc -q -t $task.cpus ${name}_1.fastq.gz + fastqc -q -t $task.cpus ${name}_2.fastq.gz """ } } @@ -483,21 +494,21 @@ process fastqc { /* * STEP 2 - Trim Galore! */ -if (params.skipTrimming){ +if (params.skip_trimming) { ch_trimmed_reads = ch_raw_reads_trimgalore ch_trimgalore_results_mqc = [] ch_trimgalore_fastqc_reports_mqc = [] } else { - process trimGalore { + process TrimGalore { tag "$name" label 'process_long' publishDir "${params.outdir}/trim_galore", mode: 'copy', - saveAs: {filename -> - if (filename.endsWith(".html")) "fastqc/$filename" - else if (filename.endsWith(".zip")) "fastqc/zip/$filename" - else if (filename.endsWith("trimming_report.txt")) "logs/$filename" - else params.saveTrimmed ? filename : null - } + saveAs: { filename -> + if (filename.endsWith(".html")) "fastqc/$filename" + else if (filename.endsWith(".zip")) "fastqc/zips/$filename" + else if (filename.endsWith("trimming_report.txt")) "logs/$filename" + else params.save_trimmed ? filename : null + } input: set val(name), file(reads) from ch_raw_reads_trimgalore @@ -513,16 +524,17 @@ if (params.skipTrimming){ c_r2 = params.clip_r2 > 0 ? "--clip_r2 ${params.clip_r2}" : '' tpc_r1 = params.three_prime_clip_r1 > 0 ? "--three_prime_clip_r1 ${params.three_prime_clip_r1}" : '' tpc_r2 = params.three_prime_clip_r2 > 0 ? "--three_prime_clip_r2 ${params.three_prime_clip_r2}" : '' - if (params.singleEnd) { + nextseq = params.trim_nextseq > 0 ? "--nextseq ${params.trim_nextseq}" : '' + if (params.single_end) { """ [ ! -f ${name}.fastq.gz ] && ln -s $reads ${name}.fastq.gz - trim_galore --fastqc --gzip $c_r1 $tpc_r1 ${name}.fastq.gz + trim_galore --fastqc --gzip $c_r1 $tpc_r1 $nextseq ${name}.fastq.gz """ } else { """ [ ! -f ${name}_1.fastq.gz ] && ln -s ${reads[0]} ${name}_1.fastq.gz [ ! -f ${name}_2.fastq.gz ] && ln -s ${reads[1]} ${name}_2.fastq.gz - trim_galore --paired --fastqc --gzip $c_r1 $c_r2 $tpc_r1 $tpc_r2 ${name}_1.fastq.gz ${name}_2.fastq.gz + trim_galore --paired --fastqc --gzip $c_r1 $c_r2 $tpc_r1 $tpc_r2 $nextseq ${name}_1.fastq.gz ${name}_2.fastq.gz """ } } @@ -539,7 +551,7 @@ if (params.skipTrimming){ /* * STEP 3.1 - Align read 1 with bwa */ -process bwaMEM { +process BWAMem { tag "$name" label 'process_high' @@ -551,11 +563,10 @@ process bwaMEM { set val(name), file("*.bam") into ch_bwa_bam script: - prefix="${name}.Lb" - if (!params.seq_center) { - rg="\'@RG\\tID:${name}\\tSM:${name.split('_')[0..-2].join('_')}\\tPL:ILLUMINA\\tLB:${name}\\tPU:1\'" - } else { - rg="\'@RG\\tID:${name}\\tSM:${name.split('_')[0..-2].join('_')}\\tPL:ILLUMINA\\tLB:${name}\\tPU:1\\tCN:${params.seq_center}\'" + prefix = "${name}.Lb" + rg = "\'@RG\\tID:${name}\\tSM:${name.split('_')[0..-2].join('_')}\\tPL:ILLUMINA\\tLB:${name}\\tPU:1\'" + if (params.seq_center) { + rg = "\'@RG\\tID:${name}\\tSM:${name.split('_')[0..-2].join('_')}\\tPL:ILLUMINA\\tLB:${name}\\tPU:1\\tCN:${params.seq_center}\'" } """ bwa mem \\ @@ -571,16 +582,17 @@ process bwaMEM { /* * STEP 3.2 - Convert .bam to coordinate sorted .bam */ -process sortBAM { +process SortBAM { tag "$name" label 'process_medium' - if (params.saveAlignedIntermediates) { + if (params.save_align_intermeds) { publishDir path: "${params.outdir}/bwa/library", mode: 'copy', saveAs: { filename -> - if (filename.endsWith(".flagstat")) "samtools_stats/$filename" - else if (filename.endsWith(".idxstats")) "samtools_stats/$filename" - else if (filename.endsWith(".stats")) "samtools_stats/$filename" - else filename } + if (filename.endsWith(".flagstat")) "samtools_stats/$filename" + else if (filename.endsWith(".idxstats")) "samtools_stats/$filename" + else if (filename.endsWith(".stats")) "samtools_stats/$filename" + else filename + } } input: @@ -591,7 +603,7 @@ process sortBAM { file "*.{flagstat,idxstats,stats}" into ch_sort_bam_flagstat_mqc script: - prefix="${name}.Lb" + prefix = "${name}.Lb" """ samtools sort -@ $task.cpus -o ${prefix}.sorted.bam -T $name $bam samtools index ${prefix}.sorted.bam @@ -610,24 +622,25 @@ process sortBAM { /////////////////////////////////////////////////////////////////////////////// /* - * STEP 4.1 Merge BAM files for all libraries from same sample + * STEP 4.1 Merge BAM files for all libraries from same replicate */ -ch_sort_bam_merge.map { it -> [ it[0].split('_')[0..-2].join('_'), it[1] ] } - .groupTuple(by: [0]) - .map { it -> [ it[0], it[1].flatten() ] } - .set { ch_sort_bam_merge } +ch_sort_bam_merge + .map { it -> [ it[0].split('_')[0..-2].join('_'), it[1] ] } + .groupTuple(by: [0]) + .map { it -> [ it[0], it[1].flatten() ] } + .set { ch_sort_bam_merge } -process mergeBAM { +process MergeBAM { tag "$name" label 'process_medium' publishDir "${params.outdir}/bwa/mergedLibrary", mode: 'copy', saveAs: { filename -> - if (filename.endsWith(".flagstat")) "samtools_stats/$filename" - else if (filename.endsWith(".idxstats")) "samtools_stats/$filename" - else if (filename.endsWith(".stats")) "samtools_stats/$filename" - else if (filename.endsWith(".metrics.txt")) "picard_metrics/$filename" - else params.saveAlignedIntermediates ? filename : null - } + if (filename.endsWith(".flagstat")) "samtools_stats/$filename" + else if (filename.endsWith(".idxstats")) "samtools_stats/$filename" + else if (filename.endsWith(".stats")) "samtools_stats/$filename" + else if (filename.endsWith(".metrics.txt")) "picard_metrics/$filename" + else params.save_align_intermeds ? filename : null + } input: set val(name), file(bams) from ch_sort_bam_merge @@ -639,11 +652,11 @@ process mergeBAM { file "*.txt" into ch_merge_bam_metrics_mqc script: - prefix="${name}.mLb.mkD" + prefix = "${name}.mLb.mkD" bam_files = bams.findAll { it.toString().endsWith('.bam') }.sort() - if (!task.memory){ + def avail_mem = 3 + if (!task.memory) { log.info "[Picard MarkDuplicates] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this." - avail_mem = 3 } else { avail_mem = task.memory.toGiga() } @@ -693,19 +706,20 @@ process mergeBAM { /* * STEP 4.2 Filter BAM file at merged library-level */ -process filterBAM { +process MergeBAMFilter { tag "$name" label 'process_medium' publishDir path: "${params.outdir}/bwa/mergedLibrary", mode: 'copy', saveAs: { filename -> - if (params.singleEnd || params.saveAlignedIntermediates) { - if (filename.endsWith(".flagstat")) "samtools_stats/$filename" - else if (filename.endsWith(".idxstats")) "samtools_stats/$filename" - else if (filename.endsWith(".stats")) "samtools_stats/$filename" - else if (filename.endsWith(".sorted.bam")) filename - else if (filename.endsWith(".sorted.bam.bai")) filename - else null } - } + if (params.single_end || params.save_align_intermeds) { + if (filename.endsWith(".flagstat")) "samtools_stats/$filename" + else if (filename.endsWith(".idxstats")) "samtools_stats/$filename" + else if (filename.endsWith(".stats")) "samtools_stats/$filename" + else if (filename.endsWith(".sorted.bam")) filename + else if (filename.endsWith(".sorted.bam.bai")) filename + else null + } + } input: set val(name), file(bam) from ch_merge_bam_filter @@ -718,12 +732,12 @@ process filterBAM { file "*.{idxstats,stats}" into ch_filter_bam_stats_mqc script: - prefix = params.singleEnd ? "${name}.mLb.clN" : "${name}.mLb.flT" - filter_params = params.singleEnd ? "-F 0x004" : "-F 0x004 -F 0x0008 -f 0x001" - dup_params = params.keepDups ? "" : "-F 0x0400" - multimap_params = params.keepMultiMap ? "" : "-q 1" + prefix = params.single_end ? "${name}.mLb.clN" : "${name}.mLb.flT" + filter_params = params.single_end ? "-F 0x004" : "-F 0x004 -F 0x0008 -f 0x001" + dup_params = params.keep_dups ? "" : "-F 0x0400" + multimap_params = params.keep_multi_map ? "" : "-q 1" blacklist_params = params.blacklist ? "-L $bed" : "" - name_sort_bam = params.singleEnd ? "" : "samtools sort -n -@ $task.cpus -o ${prefix}.bam -T $prefix ${prefix}.sorted.bam" + name_sort_bam = params.single_end ? "" : "samtools sort -n -@ $task.cpus -o ${prefix}.bam -T $prefix ${prefix}.sorted.bam" """ samtools view \\ $filter_params \\ @@ -747,30 +761,35 @@ process filterBAM { /* * STEP 4.3 Remove orphan reads from paired-end BAM file */ -if (params.singleEnd){ - ch_filter_bam.into { ch_rm_orphan_bam_metrics; - ch_rm_orphan_bam_bigwig; - ch_rm_orphan_bam_macs_1; - ch_rm_orphan_bam_macs_2; - ch_rm_orphan_bam_phantompeakqualtools; - ch_rm_orphan_name_bam_counts } - ch_filter_bam_flagstat.into { ch_rm_orphan_flagstat_bigwig; - ch_rm_orphan_flagstat_macs; - ch_rm_orphan_flagstat_mqc } - ch_filter_bam_stats_mqc.set { ch_rm_orphan_stats_mqc } +if (params.single_end) { + ch_filter_bam + .into { ch_rm_orphan_bam_metrics; + ch_rm_orphan_bam_bigwig; + ch_rm_orphan_bam_macs_1; + ch_rm_orphan_bam_macs_2; + ch_rm_orphan_bam_phantompeakqualtools; + ch_rm_orphan_name_bam_counts } + + ch_filter_bam_flagstat + .into { ch_rm_orphan_flagstat_bigwig; + ch_rm_orphan_flagstat_macs; + ch_rm_orphan_flagstat_mqc } + + ch_filter_bam_stats_mqc + .set { ch_rm_orphan_stats_mqc } } else { - process rmOrphanReads { + process MergeBAMRemoveOrphan { tag "$name" label 'process_medium' publishDir path: "${params.outdir}/bwa/mergedLibrary", mode: 'copy', saveAs: { filename -> - if (filename.endsWith(".flagstat")) "samtools_stats/$filename" - else if (filename.endsWith(".idxstats")) "samtools_stats/$filename" - else if (filename.endsWith(".stats")) "samtools_stats/$filename" - else if (filename.endsWith(".sorted.bam")) filename - else if (filename.endsWith(".sorted.bam.bai")) filename - else null - } + if (filename.endsWith(".flagstat")) "samtools_stats/$filename" + else if (filename.endsWith(".idxstats")) "samtools_stats/$filename" + else if (filename.endsWith(".stats")) "samtools_stats/$filename" + else if (filename.endsWith(".sorted.bam")) filename + else if (filename.endsWith(".sorted.bam.bai")) filename + else null + } input: set val(name), file(bam) from ch_filter_bam @@ -788,7 +807,7 @@ if (params.singleEnd){ file "*.{idxstats,stats}" into ch_rm_orphan_stats_mqc script: // This script is bundled with the pipeline, in nf-core/chipseq/bin/ - prefix="${name}.mLb.clN" + prefix = "${name}.mLb.clN" """ bampe_rm_orphan.py ${bam[0]} ${prefix}.bam --only_fr_pairs @@ -812,22 +831,22 @@ if (params.singleEnd){ /* * STEP 5.1 preseq analysis after merging libraries and before filtering */ -process preseq { +process Preseq { tag "$name" label 'process_low' publishDir "${params.outdir}/bwa/mergedLibrary/preseq", mode: 'copy' when: - !params.skipPreseq + !params.skip_preseq input: set val(name), file(bam) from ch_merge_bam_preseq output: - file "*.ccurve.txt" into ch_preseq_results + file "*.ccurve.txt" into ch_preseq_mqc script: - prefix="${name}.mLb.clN" + prefix = "${name}.mLb.clN" """ preseq lc_extrap -v -output ${prefix}.ccurve.txt -bam ${bam[0]} """ @@ -836,18 +855,18 @@ process preseq { /* * STEP 5.2 Picard CollectMultipleMetrics after merging libraries and filtering */ -process collectMultipleMetrics { +process CollectMultipleMetrics { tag "$name" label 'process_medium' publishDir path: "${params.outdir}/bwa/mergedLibrary", mode: 'copy', saveAs: { filename -> - if (filename.endsWith("_metrics")) "picard_metrics/$filename" - else if (filename.endsWith(".pdf")) "picard_metrics/pdf/$filename" - else null - } + if (filename.endsWith("_metrics")) "picard_metrics/$filename" + else if (filename.endsWith(".pdf")) "picard_metrics/pdf/$filename" + else null + } when: - !params.skipPicardMetrics + !params.skip_picard_metrics input: set val(name), file(bam) from ch_rm_orphan_bam_metrics @@ -858,10 +877,10 @@ process collectMultipleMetrics { file "*.pdf" into ch_collectmetrics_pdf script: - prefix="${name}.mLb.clN" - if (!task.memory){ + prefix = "${name}.mLb.clN" + def avail_mem = 3 + if (!task.memory) { log.info "[Picard CollectMultipleMetrics] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this." - avail_mem = 3 } else { avail_mem = task.memory.toGiga() } @@ -878,14 +897,14 @@ process collectMultipleMetrics { /* * STEP 5.3 Read depth normalised bigWig */ -process bigWig { +process BigWig { tag "$name" label 'process_medium' publishDir "${params.outdir}/bwa/mergedLibrary/bigwig", mode: 'copy', - saveAs: {filename -> - if (filename.endsWith(".txt")) "scale/$filename" - else if (filename.endsWith(".bigWig")) "$filename" - else null + saveAs: { filename -> + if (filename.endsWith("scale_factor.txt")) "scale/$filename" + else if (filename.endsWith(".bigWig")) "$filename" + else null } input: @@ -898,9 +917,9 @@ process bigWig { file "*igv.txt" into ch_bigwig_igv script: - prefix="${name}.mLb.clN" - pe_fragment = params.singleEnd ? "" : "-pc" - extend = (params.singleEnd && params.fragment_size > 0) ? "-fs ${params.fragment_size}" : '' + prefix = "${name}.mLb.clN" + pe_fragment = params.single_end ? "" : "-pc" + extend = (params.single_end && params.fragment_size > 0) ? "-fs ${params.fragment_size}" : '' """ SCALE_FACTOR=\$(grep 'mapped (' $flagstat | awk '{print 1000000/\$1}') echo \$SCALE_FACTOR > ${prefix}.scale_factor.txt @@ -915,13 +934,13 @@ process bigWig { /* * STEP 5.4 generate gene body coverage plot with deepTools */ -process plotProfile { +process PlotProfile { tag "$name" label 'process_high' publishDir "${params.outdir}/bwa/mergedLibrary/deepTools/plotProfile", mode: 'copy' when: - !params.skipPlotProfile + !params.skip_plot_profile input: set val(name), file(bigwig) from ch_bigwig_plotprofile @@ -943,7 +962,7 @@ process plotProfile { --afterRegionStartLength 3000 \\ --skipZeros \\ --smartLabels \\ - -p $task.cpus + --numberOfProcessors $task.cpus plotProfile --matrixFile ${name}.computeMatrix.mat.gz \\ --outFileName ${name}.plotProfile.pdf \\ @@ -954,13 +973,13 @@ process plotProfile { /* * STEP 5.5 Phantompeakqualtools */ -process phantomPeakQualTools { +process PhantomPeakQualTools { tag "$name" label 'process_medium' publishDir "${params.outdir}/bwa/mergedLibrary/phantompeakqualtools", mode: 'copy' when: - !params.skipSpp + !params.skip_spp input: set val(name), file(bam) from ch_rm_orphan_bam_phantompeakqualtools @@ -995,26 +1014,29 @@ process phantomPeakQualTools { /////////////////////////////////////////////////////////////////////////////// // Create channel linking IP bams with control bams -ch_rm_orphan_bam_macs_1.combine(ch_rm_orphan_bam_macs_2) - .set { ch_rm_orphan_bam_macs_1 } -ch_design_controls_csv.combine(ch_rm_orphan_bam_macs_1) - .filter { it[0] == it[5] && it[1] == it[7] } - .join(ch_rm_orphan_flagstat_macs) - .map { it -> it[2..-1] } - .into { ch_group_bam_macs; - ch_group_bam_plotfingerprint; - ch_group_bam_deseq } +ch_rm_orphan_bam_macs_1 + .combine(ch_rm_orphan_bam_macs_2) + .set { ch_rm_orphan_bam_macs_1 } + +ch_design_controls_csv + .combine(ch_rm_orphan_bam_macs_1) + .filter { it[0] == it[5] && it[1] == it[7] } + .join(ch_rm_orphan_flagstat_macs) + .map { it -> it[2..-1] } + .into { ch_group_bam_macs; + ch_group_bam_plotfingerprint; + ch_group_bam_deseq } /* * STEP 6.1 deepTools plotFingerprint */ -process plotFingerprint { +process PlotFingerprint { tag "${ip} vs ${control}" label 'process_high' publishDir "${params.outdir}/bwa/mergedLibrary/deepTools/plotFingerprint", mode: 'copy' when: - !params.skipPlotFingerprint + !params.skip_plot_fingerprint input: set val(antibody), val(replicatesExist), val(multipleGroups), val(ip), file(ipbam), val(control), file(controlbam), file(ipflagstat) from ch_group_bam_plotfingerprint @@ -1024,7 +1046,7 @@ process plotFingerprint { file '*.raw.txt' into ch_plotfingerprint_mqc script: - extend = (params.singleEnd && params.fragment_size > 0) ? "--extendReads ${params.fragment_size}" : '' + extend = (params.single_end && params.fragment_size > 0) ? "--extendReads ${params.fragment_size}" : '' """ plotFingerprint \\ --bamfiles ${ipbam[0]} ${controlbam[0]} \\ @@ -1035,22 +1057,22 @@ process plotFingerprint { --outQualityMetrics ${ip}.plotFingerprint.qcmetrics.txt \\ --skipZeros \\ --JSDsample ${controlbam[0]} \\ - --numberOfProcessors ${task.cpus} \\ - --numberOfSamples ${params.fingerprint_bins} + --numberOfProcessors $task.cpus \\ + --numberOfSamples $params.fingerprint_bins """ } /* * STEP 6.2 Call peaks with MACS2 and calculate FRiP score */ -process macsCallPeak { +process MACSCallPeak { tag "${ip} vs ${control}" - label 'process_long' - publishDir "${params.outdir}/bwa/mergedLibrary/macs/${peaktype}", mode: 'copy', - saveAs: {filename -> - if (filename.endsWith(".tsv")) "qc/$filename" - else if (filename.endsWith(".igv.txt")) null - else filename + label 'process_medium' + publishDir "${params.outdir}/bwa/mergedLibrary/macs/${PEAK_TYPE}", mode: 'copy', + saveAs: { filename -> + if (filename.endsWith(".tsv")) "qc/$filename" + else if (filename.endsWith(".igv.txt")) null + else filename } when: @@ -1063,43 +1085,41 @@ process macsCallPeak { output: set val(ip), file("*.{bed,xls,gappedPeak,bdg}") into ch_macs_output - set val(antibody), val(replicatesExist), val(multipleGroups), val(ip), val(control), file("*.$peaktype") into ch_macs_homer, ch_macs_qc, ch_macs_consensus + set val(antibody), val(replicatesExist), val(multipleGroups), val(ip), val(control), file("*.$PEAK_TYPE") into ch_macs_homer, ch_macs_qc, ch_macs_consensus file "*igv.txt" into ch_macs_igv file "*_mqc.tsv" into ch_macs_mqc script: - peaktype = params.narrowPeak ? "narrowPeak" : "broadPeak" - broad = params.narrowPeak ? '' : "--broad --broad-cutoff ${params.broad_cutoff}" - format = params.singleEnd ? "BAM" : "BAMPE" - pileup = params.saveMACSPileup ? "-B --SPMR" : "" + broad = params.narrow_peak ? '' : "--broad --broad-cutoff ${params.broad_cutoff}" + format = params.single_end ? "BAM" : "BAMPE" + pileup = params.save_macs_pileup ? "-B --SPMR" : "" """ macs2 callpeak \\ -t ${ipbam[0]} \\ -c ${controlbam[0]} \\ $broad \\ -f $format \\ - -g ${params.macs_gsize} \\ + -g $params.macs_gsize \\ -n $ip \\ $pileup \\ - --keep-dup all \\ - --nomodel + --keep-dup all - cat ${ip}_peaks.${peaktype} | wc -l | awk -v OFS='\t' '{ print "${ip}", \$1 }' | cat $peak_count_header - > ${ip}_peaks.count_mqc.tsv + cat ${ip}_peaks.${PEAK_TYPE} | wc -l | awk -v OFS='\t' '{ print "${ip}", \$1 }' | cat $peak_count_header - > ${ip}_peaks.count_mqc.tsv - READS_IN_PEAKS=\$(intersectBed -a ${ipbam[0]} -b ${ip}_peaks.${peaktype} -bed -c -f 0.20 | awk -F '\t' '{sum += \$NF} END {print sum}') + READS_IN_PEAKS=\$(intersectBed -a ${ipbam[0]} -b ${ip}_peaks.${PEAK_TYPE} -bed -c -f 0.20 | awk -F '\t' '{sum += \$NF} END {print sum}') grep 'mapped (' $ipflagstat | awk -v a="\$READS_IN_PEAKS" -v OFS='\t' '{print "${ip}", a/\$1}' | cat $frip_score_header - > ${ip}_peaks.FRiP_mqc.tsv - find * -type f -name "*.${peaktype}" -exec echo -e "bwa/mergedLibrary/macs/${peaktype}/"{}"\\t0,0,178" \\; > ${ip}_peaks.${peaktype}.igv.txt + find * -type f -name "*.${PEAK_TYPE}" -exec echo -e "bwa/mergedLibrary/macs/${PEAK_TYPE}/"{}"\\t0,0,178" \\; > ${ip}_peaks.igv.txt """ } /* * STEP 6.3 Annotate peaks with HOMER */ -process annotatePeaks { +process AnnotatePeaks { tag "${ip} vs ${control}" label 'process_medium' - publishDir "${params.outdir}/bwa/mergedLibrary/macs/${peaktype}", mode: 'copy' + publishDir "${params.outdir}/bwa/mergedLibrary/macs/${PEAK_TYPE}", mode: 'copy' when: params.macs_gsize @@ -1113,12 +1133,13 @@ process annotatePeaks { file "*.txt" into ch_macs_annotate script: - peaktype = params.narrowPeak ? "narrowPeak" : "broadPeak" """ - annotatePeaks.pl $peak \\ + annotatePeaks.pl \\ + $peak \\ $fasta \\ -gid \\ -gtf $gtf \\ + -cpu $task.cpus \\ > ${ip}_peaks.annotatePeaks.txt """ } @@ -1126,37 +1147,38 @@ process annotatePeaks { /* * STEP 6.4 Aggregated QC plots for peaks, FRiP and peak-to-gene annotation */ -process peakQC { - label "process_medium" - publishDir "${params.outdir}/bwa/mergedLibrary/macs/${peaktype}/qc", mode: 'copy' - - when: - params.macs_gsize - - input: - file peaks from ch_macs_qc.collect{ it[-1] } - file annos from ch_macs_annotate.collect() - file peak_annotation_header from ch_peak_annotation_header - - output: - file "*.{txt,pdf}" into ch_macs_qc_output - file "*.tsv" into ch_macs_qc_mqc - - script: // This script is bundled with the pipeline, in nf-core/chipseq/bin/ - peaktype = params.narrowPeak ? "narrowPeak" : "broadPeak" - """ - plot_macs_qc.r -i ${peaks.join(',')} \\ - -s ${peaks.join(',').replaceAll("_peaks.${peaktype}","")} \\ - -o ./ \\ - -p macs_peak - - plot_homer_annotatepeaks.r -i ${annos.join(',')} \\ - -s ${annos.join(',').replaceAll("_peaks.annotatePeaks.txt","")} \\ - -o ./ \\ - -p macs_annotatePeaks - - cat $peak_annotation_header macs_annotatePeaks.summary.txt > macs_annotatePeaks.summary_mqc.tsv - """ +process PeakQC { + label "process_medium" + publishDir "${params.outdir}/bwa/mergedLibrary/macs/${PEAK_TYPE}/qc", mode: 'copy' + + when: + params.macs_gsize + + input: + file peaks from ch_macs_qc.collect{ it[-1] } + file annos from ch_macs_annotate.collect() + file peak_annotation_header from ch_peak_annotation_header + + output: + file "*.{txt,pdf}" into ch_macs_qc_output + file "*.tsv" into ch_macs_qc_mqc + + script: // This script is bundled with the pipeline, in nf-core/chipseq/bin/ + """ + plot_macs_qc.r \\ + -i ${peaks.join(',')} \\ + -s ${peaks.join(',').replaceAll("_peaks.${PEAK_TYPE}","")} \\ + -o ./ \\ + -p macs_peak + + plot_homer_annotatepeaks.r \\ + -i ${annos.join(',')} \\ + -s ${annos.join(',').replaceAll("_peaks.annotatePeaks.txt","")} \\ + -o ./ \\ + -p macs_annotatePeaks + + cat $peak_annotation_header macs_annotatePeaks.summary.txt > macs_annotatePeaks.summary_mqc.tsv + """ } /////////////////////////////////////////////////////////////////////////////// @@ -1168,21 +1190,22 @@ process peakQC { /////////////////////////////////////////////////////////////////////////////// // group by ip from this point and carry forward boolean variables -ch_macs_consensus.map { it -> [ it[0], it[1], it[2], it[-1] ] } - .groupTuple() - .map { it -> [ it[0], it[1][0], it[2][0], it[3].sort() ] } - .set { ch_macs_consensus } +ch_macs_consensus + .map { it -> [ it[0], it[1], it[2], it[-1] ] } + .groupTuple() + .map { it -> [ it[0], it[1][0], it[2][0], it[3].sort() ] } + .set { ch_macs_consensus } /* * STEP 7.1 Consensus peaks across samples, create boolean filtering file, .saf file for featureCounts and UpSetR plot for intersection */ -process createConsensusPeakSet { +process ConsensusPeakSet { tag "${antibody}" label 'process_long' - publishDir "${params.outdir}/bwa/mergedLibrary/macs/${peaktype}/consensus/${antibody}", mode: 'copy', - saveAs: {filename -> - if (filename.endsWith(".igv.txt")) null - else filename + publishDir "${params.outdir}/bwa/mergedLibrary/macs/${PEAK_TYPE}/consensus/${antibody}", mode: 'copy', + saveAs: { filename -> + if (filename.endsWith(".igv.txt")) null + else filename } when: @@ -1199,17 +1222,16 @@ process createConsensusPeakSet { file "*igv.txt" into ch_macs_consensus_igv script: // scripts are bundled with the pipeline, in nf-core/chipseq/bin/ - prefix="${antibody}.consensus_peaks" - peaktype = params.narrowPeak ? "narrowPeak" : "broadPeak" - mergecols = params.narrowPeak ? (2..10).join(',') : (2..9).join(',') - collapsecols = params.narrowPeak ? (["collapse"]*9).join(',') : (["collapse"]*8).join(',') - expandparam = params.narrowPeak ? "--is_narrow_peak" : "" + prefix = "${antibody}.consensus_peaks" + mergecols = params.narrow_peak ? (2..10).join(',') : (2..9).join(',') + collapsecols = params.narrow_peak ? (["collapse"]*9).join(',') : (["collapse"]*8).join(',') + expandparam = params.narrow_peak ? "--is_narrow_peak" : "" """ sort -k1,1 -k2,2n ${peaks.collect{it.toString()}.sort().join(' ')} \\ | mergeBed -c $mergecols -o $collapsecols > ${prefix}.txt macs2_merged_expand.py ${prefix}.txt \\ - ${peaks.collect{it.toString()}.sort().join(',').replaceAll("_peaks.${peaktype}","")} \\ + ${peaks.collect{it.toString()}.sort().join(',').replaceAll("_peaks.${PEAK_TYPE}","")} \\ ${prefix}.boolean.txt \\ --min_replicates $params.min_reps_consensus \\ $expandparam @@ -1221,17 +1243,17 @@ process createConsensusPeakSet { plot_peak_intersect.r -i ${prefix}.boolean.intersect.txt -o ${prefix}.boolean.intersect.plot.pdf - find * -type f -name "${prefix}.bed" -exec echo -e "bwa/mergedLibrary/macs/${peaktype}/consensus/${antibody}/"{}"\\t0,0,0" \\; > ${prefix}.bed.igv.txt + find * -type f -name "${prefix}.bed" -exec echo -e "bwa/mergedLibrary/macs/${PEAK_TYPE}/consensus/${antibody}/"{}"\\t0,0,0" \\; > ${prefix}.bed.igv.txt """ } /* * STEP 7.2 Annotate consensus peaks with HOMER, and add annotation to boolean output file */ -process annotateConsensusPeakSet { +process ConsensusPeakSetAnnotate { tag "${antibody}" label 'process_medium' - publishDir "${params.outdir}/bwa/mergedLibrary/macs/${peaktype}/consensus/${antibody}", mode: 'copy' + publishDir "${params.outdir}/bwa/mergedLibrary/macs/${PEAK_TYPE}/consensus/${antibody}", mode: 'copy' when: params.macs_gsize && (replicatesExist || multipleGroups) @@ -1246,13 +1268,14 @@ process annotateConsensusPeakSet { file "*.annotatePeaks.txt" into ch_macs_consensus_annotate script: - prefix="${antibody}.consensus_peaks" - peaktype = params.narrowPeak ? "narrowPeak" : "broadPeak" + prefix = "${antibody}.consensus_peaks" """ - annotatePeaks.pl $bed \\ + annotatePeaks.pl \\ + $bed \\ $fasta \\ -gid \\ -gtf $gtf \\ + -cpu $task.cpus \\ > ${prefix}.annotatePeaks.txt cut -f2- ${prefix}.annotatePeaks.txt | awk 'NR==1; NR > 1 {print \$0 | "sort -k1,1 -k2,2n"}' | cut -f6- > tmp.txt @@ -1261,28 +1284,29 @@ process annotateConsensusPeakSet { } // get bam and saf files for each ip -ch_group_bam_deseq.map { it -> [ it[3], [ it[0], it[1], it[2] ] ] } - .join(ch_rm_orphan_name_bam_counts) - .map { it -> [ it[1][0], it[1][1], it[1][2], it[2] ] } - .groupTuple() - .map { it -> [ it[0], it[1][0], it[2][0], it[3].flatten().sort() ] } - .join(ch_macs_consensus_saf) - .set { ch_group_bam_deseq } +ch_group_bam_deseq + .map { it -> [ it[3], [ it[0], it[1], it[2] ] ] } + .join(ch_rm_orphan_name_bam_counts) + .map { it -> [ it[1][0], it[1][1], it[1][2], it[2] ] } + .groupTuple() + .map { it -> [ it[0], it[1][0], it[2][0], it[3].flatten().sort() ] } + .join(ch_macs_consensus_saf) + .set { ch_group_bam_deseq } /* * STEP 7.3 Count reads in consensus peaks with featureCounts and perform differential analysis with DESeq2 */ -process deseqConsensusPeakSet { +process ConsensusPeakSetDESeq { tag "${antibody}" label 'process_medium' - publishDir "${params.outdir}/bwa/mergedLibrary/macs/${peaktype}/consensus/${antibody}/deseq2", mode: 'copy', - saveAs: {filename -> - if (filename.endsWith(".igv.txt")) null - else filename + publishDir "${params.outdir}/bwa/mergedLibrary/macs/${PEAK_TYPE}/consensus/${antibody}/deseq2", mode: 'copy', + saveAs: { filename -> + if (filename.endsWith(".igv.txt")) null + else filename } when: - params.macs_gsize && !params.skipDiffAnalysis && replicatesExist && multipleGroups + params.macs_gsize && replicatesExist && multipleGroups && !params.skip_diff_analysis input: set val(antibody), val(replicatesExist), val(multipleGroups), file(bams) ,file(saf) from ch_group_bam_deseq @@ -1300,13 +1324,13 @@ process deseqConsensusPeakSet { file "*.tsv" into ch_macs_consensus_deseq_mqc script: - prefix="${antibody}.consensus_peaks" - peaktype = params.narrowPeak ? "narrowPeak" : "broadPeak" + prefix = "${antibody}.consensus_peaks" bam_files = bams.findAll { it.toString().endsWith('.bam') }.sort() - bam_ext = params.singleEnd ? ".mLb.clN.sorted.bam" : ".mLb.clN.bam" - pe_params = params.singleEnd ? '' : "-p --donotsort" + bam_ext = params.single_end ? ".mLb.clN.sorted.bam" : ".mLb.clN.bam" + pe_params = params.single_end ? '' : "-p --donotsort" """ - featureCounts -F SAF \\ + featureCounts \\ + -F SAF \\ -O \\ --fracOverlap 0.2 \\ -T $task.cpus \\ @@ -1325,7 +1349,7 @@ process deseqConsensusPeakSet { sed -i -e 's/DESeq2:/${antibody} DESeq2:/g' tmp.txt cat tmp.txt ${prefix}.sample.dists.txt > ${prefix}.sample.dists_mqc.tsv - find * -type f -name "*.FDR0.05.results.bed" -exec echo -e "bwa/mergedLibrary/macs/${peaktype}/consensus/${antibody}/deseq2/"{}"\\t255,0,0" \\; > ${prefix}.igv.txt + find * -type f -name "*.FDR0.05.results.bed" -exec echo -e "bwa/mergedLibrary/macs/${PEAK_TYPE}/consensus/${antibody}/deseq2/"{}"\\t255,0,0" \\; > ${prefix}.igv.txt """ } @@ -1340,11 +1364,11 @@ process deseqConsensusPeakSet { /* * STEP 8 - Create IGV session file */ -process igv { - publishDir "${params.outdir}/igv", mode: 'copy' +process IGV { + publishDir "${params.outdir}/igv/${PEAK_TYPE}", mode: 'copy' when: - !params.skipIGV + !params.skip_igv input: file fasta from ch_fasta @@ -1359,7 +1383,7 @@ process igv { script: // scripts are bundled with the pipeline, in nf-core/chipseq/bin/ """ cat *.txt > igv_files.txt - igv_files_to_session.py igv_session.xml igv_files.txt ../reference_genome/${fasta.getName()} --path_prefix '../' + igv_files_to_session.py igv_session.xml igv_files.txt ../../reference_genome/${fasta.getName()} --path_prefix '../../' """ } @@ -1376,10 +1400,10 @@ process igv { */ process get_software_versions { publishDir "${params.outdir}/pipeline_info", mode: 'copy', - saveAs: {filename -> - if (filename.indexOf(".csv") > 0) filename - else null - } + saveAs: { filename -> + if (filename.indexOf(".csv") > 0) filename + else null + } output: file 'software_versions_mqc.yaml' into ch_software_versions_mqc @@ -1429,18 +1453,21 @@ ${summary.collect { k,v -> "
$k
${v ?: ' w << email_html } - def output_tf = new File( output_d, "pipeline_report.txt" ) + def output_tf = new File(output_d, "pipeline_report.txt") output_tf.withWriter { w -> w << email_txt } c_reset = params.monochrome_logs ? '' : "\033[0m"; @@ -1599,12 +1628,12 @@ workflow.onComplete { c_red = params.monochrome_logs ? '' : "\033[0;31m"; if (workflow.stats.ignoredCount > 0 && workflow.success) { - log.info "${c_purple}Warning, pipeline completed, but with errored process(es) ${c_reset}" - log.info "${c_red}Number of ignored errored process(es) : ${workflow.stats.ignoredCount} ${c_reset}" - log.info "${c_green}Number of successfully ran process(es) : ${workflow.stats.succeedCount} ${c_reset}" + log.info "${c_purple}Warning, pipeline completed, but with errored process(es) ${c_reset}" + log.info "${c_red}Number of ignored errored process(es) : ${workflow.stats.ignoredCount} ${c_reset}" + log.info "${c_green}Number of successfully ran process(es) : ${workflow.stats.succeedCount} ${c_reset}" } - if(workflow.success){ + if (workflow.success) { log.info "${c_purple}[nf-core/chipseq]${c_green} Pipeline completed successfully${c_reset}" } else { checkHostname() @@ -1621,7 +1650,7 @@ workflow.onComplete { /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// -def nfcoreHeader(){ +def nfcoreHeader() { // Log colors ANSI codes c_reset = params.monochrome_logs ? '' : "\033[0m"; c_dim = params.monochrome_logs ? '' : "\033[2m"; @@ -1633,27 +1662,27 @@ def nfcoreHeader(){ c_cyan = params.monochrome_logs ? '' : "\033[0;36m"; c_white = params.monochrome_logs ? '' : "\033[0;37m"; - return """ ${c_dim}----------------------------------------------------${c_reset} + return """ -${c_dim}--------------------------------------------------${c_reset}- ${c_green},--.${c_black}/${c_green},-.${c_reset} ${c_blue} ___ __ __ __ ___ ${c_green}/,-._.--~\'${c_reset} ${c_blue} |\\ | |__ __ / ` / \\ |__) |__ ${c_yellow}} {${c_reset} ${c_blue} | \\| | \\__, \\__/ | \\ |___ ${c_green}\\`-._,-`-,${c_reset} ${c_green}`._,._,\'${c_reset} ${c_purple} nf-core/chipseq v${workflow.manifest.version}${c_reset} - ${c_dim}----------------------------------------------------${c_reset} + -${c_dim}--------------------------------------------------${c_reset}- """.stripIndent() } -def checkHostname(){ +def checkHostname() { def c_reset = params.monochrome_logs ? '' : "\033[0m" def c_white = params.monochrome_logs ? '' : "\033[0;37m" def c_red = params.monochrome_logs ? '' : "\033[1;91m" def c_yellow_bold = params.monochrome_logs ? '' : "\033[1;93m" - if(params.hostnames){ + if (params.hostnames) { def hostname = "hostname".execute().text.trim() params.hostnames.each { prof, hnames -> hnames.each { hname -> - if(hostname.contains(hname) && !workflow.profile.contains(prof)){ + if (hostname.contains(hname) && !workflow.profile.contains(prof)) { log.error "====================================================\n" + " ${c_red}WARNING!${c_reset} You are running with `-profile $workflow.profile`\n" + " but your machine hostname is ${c_white}'$hostname'${c_reset}\n" + diff --git a/nextflow.config b/nextflow.config index 0ad95086..c758ab92 100644 --- a/nextflow.config +++ b/nextflow.config @@ -9,7 +9,7 @@ params { // Options: Generic - singleEnd = false + single_end = false seq_center = false fragment_size = 200 fingerprint_bins = 500000 @@ -17,47 +17,38 @@ params { // Options: References genome = false tss_bed = false - saveGenomeIndex = false + save_reference = false // Options: Trimming clip_r1 = 0 clip_r2 = 0 three_prime_clip_r1 = 0 three_prime_clip_r2 = 0 - skipTrimming = false - saveTrimmed = false + trim_nextseq = 0 + skip_trimming = false + save_trimmed = false // Options: Alignments - keepDups = false - keepMultiMap = false - saveAlignedIntermediates = false + keep_dups = false + keep_multi_map = false + save_align_intermeds = false // Options: Peaks - narrowPeak = false + narrow_peak = false broad_cutoff = 0.1 min_reps_consensus = 1 - saveMACSPileup = false - skipDiffAnalysis = false + save_macs_pileup = false + skip_diff_analysis = false // Options: QC - skipFastQC = false - skipPicardMetrics = false - skipPreseq = false - skipPlotProfile = false - skipPlotFingerprint = false - skipSpp = false - skipIGV = false - skipMultiQC = false - skipMultiQCStats = false - - // Options: Other - outdir = './results' - igenomes_base = "./iGenomes" - email = false - plaintext_email = false - monochrome_logs = false - name = false - maxMultiqcEmailFileSize = 25.MB + skip_fastqc = false + skip_picard_metrics = false + skip_preseq = false + skip_plot_profile = false + skip_plot_fingerprint = false + skip_spp = false + skip_igv = false + skip_multiqc = false // Options: AWSBatch awsqueue = false @@ -69,24 +60,37 @@ params { bamtools_filter_se_config = "$baseDir/assets/bamtools_filter_se.json" // Options: Custom config + custom_config_version = 'master' + custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" config_profile_description = false config_profile_contact = false config_profile_url = false - // Options: Default + // Options: Other help = false - hostnames = false + outdir = './results' + igenomes_base = 's3://ngi-igenomes/igenomes/' + igenomes_ignore = false + max_multiqc_email_size = 25.MB tracedir = "${params.outdir}/pipeline_info" + email = false + email_on_fail = false + plaintext_email = false + monochrome_logs = false + name = false + hostnames = false clusterOptions = false - igenomesIgnore = false - custom_config_version = 'master' - custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" + + // Defaults only, expecting to be overwritten + max_memory = 128.GB + max_cpus = 16 + max_time = 240.h } // Container slug. Stable releases should specify release tag! // Developmental code should specify :dev -process.container = 'nfcore/chipseq:1.0.0' +process.container = 'nfcore/chipseq:1.1.0' // Load base.config by default for all pipelines includeConfig 'conf/base.config' @@ -103,18 +107,29 @@ profiles { conda { process.conda = "$baseDir/environment.yml" } debug { process.beforeScript = 'echo $HOSTNAME' } docker { docker.enabled = true } - singularity { singularity.enabled = true } + singularity { singularity.enabled = true + singularity.autoMounts = true } test { includeConfig 'conf/test.config' } } +// Avoid this error: +// WARNING: Your kernel does not support swap limit capabilities or the cgroup is not mounted. Memory limited without swap. +// Testing this in nf-core after discussion here https://github.com/nf-core/tools/pull/351, once this is established and works well, nextflow might implement this behavior as new default. +docker.runOptions = '-u \$(id -u):\$(id -g)' + // Load igenomes.config if required -if(!params.igenomesIgnore){ +if (!params.igenomes_ignore) { includeConfig 'conf/igenomes.config' } // Increase time available to build conda environment conda { createTimeout = "60 min" } +// Export this variable to prevent local Python libraries from conflicting with those in the container +env { + PYTHONNOUSERSITE = 1 +} + // Capture exit codes from upstream processes when piping process.shell = ['/bin/bash', '-euo', 'pipefail'] @@ -141,16 +156,16 @@ manifest { homePage = 'https://github.com/nf-core/chipseq' description = 'ChIP-seq peak-calling and differential analysis pipeline.' mainScript = 'main.nf' - nextflowVersion = '>=0.32.0' - version = '1.0.0' + nextflowVersion = '>=19.10.0' + version = '1.1.0' } // Function to ensure that resource requirements don't go beyond // a maximum limit def check_max(obj, type) { - if(type == 'memory'){ + if (type == 'memory') { try { - if(obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) + if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) return params.max_memory as nextflow.util.MemoryUnit else return obj @@ -158,9 +173,9 @@ def check_max(obj, type) { println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj" return obj } - } else if(type == 'time'){ + } else if (type == 'time') { try { - if(obj.compareTo(params.max_time as nextflow.util.Duration) == 1) + if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1) return params.max_time as nextflow.util.Duration else return obj @@ -168,7 +183,7 @@ def check_max(obj, type) { println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj" return obj } - } else if(type == 'cpus'){ + } else if (type == 'cpus') { try { return Math.min( obj, params.max_cpus as int ) } catch (all) {