From 4fe20afe592f13299ca7b0d73abeb0bf97e429d0 Mon Sep 17 00:00:00 2001 From: Katrin Sameith Date: Thu, 25 May 2023 13:41:51 +0200 Subject: [PATCH 01/12] Update bedtools_genomecov.nf Remove paired-end option, and use reads as single-end; otherwise, the coverage graph is misleading as it will often span over nucleosomes --- modules/local/bedtools_genomecov.nf | 2 -- 1 file changed, 2 deletions(-) diff --git a/modules/local/bedtools_genomecov.nf b/modules/local/bedtools_genomecov.nf index 58ac1cae..1766654f 100644 --- a/modules/local/bedtools_genomecov.nf +++ b/modules/local/bedtools_genomecov.nf @@ -21,7 +21,6 @@ process BEDTOOLS_GENOMECOV { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def pe = meta.single_end ? '' : '-pc' """ SCALE_FACTOR=\$(grep '[0-9] mapped (' $flagstat | awk '{print 1000000/\$1}') echo \$SCALE_FACTOR > ${prefix}.scale_factor.txt @@ -31,7 +30,6 @@ process BEDTOOLS_GENOMECOV { -ibam $bam \\ -bg \\ -scale \$SCALE_FACTOR \\ - $pe \\ $args \\ | sort -T '.' -k1,1 -k2,2n > ${prefix}.bedGraph From 782658b97f7ecd1cd019b0c4c92d4dcc5689cb52 Mon Sep 17 00:00:00 2001 From: Katrin Date: Fri, 9 Jun 2023 11:06:50 +0200 Subject: [PATCH 02/12] Installed bedtools/bamtobed module --- modules.json | 183 ++++++++++++++++----- modules/nf-core/bedtools/bamtobed/main.nf | 35 ++++ modules/nf-core/bedtools/bamtobed/meta.yml | 38 +++++ 3 files changed, 215 insertions(+), 41 deletions(-) create mode 100644 modules/nf-core/bedtools/bamtobed/main.nf create mode 100644 modules/nf-core/bedtools/bamtobed/meta.yml diff --git a/modules.json b/modules.json index 4149c653..bab8f05f 100644 --- a/modules.json +++ b/modules.json @@ -8,172 +8,259 @@ "ataqv/ataqv": { "branch": "master", "git_sha": "56421e1a812bc2f9e77dbe9f297e9d9c580cb8a5", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "ataqv/mkarv": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] + }, + "bedtools/bamtobed": { + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": [ + "modules" + ] }, "bowtie2/align": { "branch": "master", "git_sha": "cf5b9c30a2adacc581793afb79fae5f5b50bed01", - "installed_by": ["modules", "fastq_align_bowtie2"] + "installed_by": [ + "modules", + "fastq_align_bowtie2" + ] }, "bowtie2/build": { "branch": "master", "git_sha": "e797efb47b0d3b2124753beb55dc83ab9512bceb", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bwa/index": { "branch": "master", "git_sha": "9518fa4f65f3fb8cde24fde7d40333b39ec8fd65", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bwa/mem": { "branch": "master", "git_sha": "cf5b9c30a2adacc581793afb79fae5f5b50bed01", - "installed_by": ["modules", "fastq_align_bwa"] + "installed_by": [ + "modules", + "fastq_align_bwa" + ] }, "chromap/chromap": { "branch": "master", "git_sha": "cf5b9c30a2adacc581793afb79fae5f5b50bed01", - "installed_by": ["modules", "fastq_align_chromap"] + "installed_by": [ + "modules", + "fastq_align_chromap" + ] }, "chromap/index": { "branch": "master", "git_sha": "3a8e3ca607132a468c07c69aaa3bccd55eb983b8", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "custom/dumpsoftwareversions": { "branch": "master", "git_sha": "8022c68e7403eecbd8ba9c49496f69f8c49d50f0", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "custom/getchromsizes": { "branch": "master", "git_sha": "cf5b9c30a2adacc581793afb79fae5f5b50bed01", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "deeptools/computematrix": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "deeptools/plotfingerprint": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "deeptools/plotheatmap": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "deeptools/plotprofile": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "fastqc": { "branch": "master", "git_sha": "810e8f2603ec38401d49a4aaed06f6d058745552", - "installed_by": ["modules", "fastq_fastqc_umitools_trimgalore"] + "installed_by": [ + "modules", + "fastq_fastqc_umitools_trimgalore" + ] }, "gffread": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gunzip": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "homer/annotatepeaks": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "khmer/uniquekmers": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "macs2/callpeak": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "picard/collectmultiplemetrics": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "picard/markduplicates": { "branch": "master", "git_sha": "eca65aa4a5e2e192ac44d6962c8f9260f314ffb8", - "installed_by": ["modules", "bam_markduplicates_picard"] + "installed_by": [ + "modules", + "bam_markduplicates_picard" + ] }, "picard/mergesamfiles": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "preseq/lcextrap": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/flagstat": { "branch": "master", "git_sha": "cf5b9c30a2adacc581793afb79fae5f5b50bed01", - "installed_by": ["modules", "bam_stats_samtools"] + "installed_by": [ + "modules", + "bam_stats_samtools" + ] }, "samtools/idxstats": { "branch": "master", "git_sha": "cf5b9c30a2adacc581793afb79fae5f5b50bed01", - "installed_by": ["modules", "bam_stats_samtools"] + "installed_by": [ + "modules", + "bam_stats_samtools" + ] }, "samtools/index": { "branch": "master", "git_sha": "cf5b9c30a2adacc581793afb79fae5f5b50bed01", - "installed_by": ["modules", "bam_markduplicates_picard", "bam_sort_stats_samtools"] + "installed_by": [ + "modules", + "bam_markduplicates_picard", + "bam_sort_stats_samtools" + ] }, "samtools/sort": { "branch": "master", "git_sha": "cf5b9c30a2adacc581793afb79fae5f5b50bed01", - "installed_by": ["modules", "bam_sort_stats_samtools"] + "installed_by": [ + "modules", + "bam_sort_stats_samtools" + ] }, "samtools/stats": { "branch": "master", "git_sha": "cf5b9c30a2adacc581793afb79fae5f5b50bed01", - "installed_by": ["modules", "bam_stats_samtools"] + "installed_by": [ + "modules", + "bam_stats_samtools" + ] }, "subread/featurecounts": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "trimgalore": { "branch": "master", "git_sha": "b51a69e30973c71950225c817ad07a3337d22c40", - "installed_by": ["modules", "fastq_fastqc_umitools_trimgalore"] + "installed_by": [ + "modules", + "fastq_fastqc_umitools_trimgalore" + ] }, "ucsc/bedgraphtobigwig": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "umitools/extract": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": ["fastq_fastqc_umitools_trimgalore"] + "installed_by": [ + "fastq_fastqc_umitools_trimgalore" + ] }, "untar": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] } } }, @@ -182,7 +269,9 @@ "bam_markduplicates_picard": { "branch": "master", "git_sha": "6daac2bc63f4847e0c7cc661f4f5b043ac13faaf", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "bam_sort_stats_samtools": { "branch": "master", @@ -197,30 +286,42 @@ "bam_stats_samtools": { "branch": "master", "git_sha": "92eb5091ae5368a60cda58b3a0ced8b36d715b0f", - "installed_by": ["bam_markduplicates_picard", "bam_sort_stats_samtools", "subworkflows"] + "installed_by": [ + "bam_markduplicates_picard", + "bam_sort_stats_samtools", + "subworkflows" + ] }, "fastq_align_bowtie2": { "branch": "master", "git_sha": "ac75f79157ecc64283a2b3a559f1ba90bc0f2259", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "fastq_align_bwa": { "branch": "master", "git_sha": "ac75f79157ecc64283a2b3a559f1ba90bc0f2259", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "fastq_align_chromap": { "branch": "master", "git_sha": "ac75f79157ecc64283a2b3a559f1ba90bc0f2259", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "fastq_fastqc_umitools_trimgalore": { "branch": "master", "git_sha": "b51a69e30973c71950225c817ad07a3337d22c40", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] } } } } } -} +} \ No newline at end of file diff --git a/modules/nf-core/bedtools/bamtobed/main.nf b/modules/nf-core/bedtools/bamtobed/main.nf new file mode 100644 index 00000000..29f5a62f --- /dev/null +++ b/modules/nf-core/bedtools/bamtobed/main.nf @@ -0,0 +1,35 @@ +process BEDTOOLS_BAMTOBED { + tag "$meta.id" + label 'process_medium' + + conda "bioconda::bedtools=2.30.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bedtools:2.30.0--hc088bd4_0' : + 'biocontainers/bedtools:2.30.0--hc088bd4_0' }" + + input: + tuple val(meta), path(bam) + + output: + tuple val(meta), path("*.bed"), emit: bed + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + bedtools \\ + bamtobed \\ + $args \\ + -i $bam \\ + > ${prefix}.bed + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/bedtools/bamtobed/meta.yml b/modules/nf-core/bedtools/bamtobed/meta.yml new file mode 100644 index 00000000..5a4ff73a --- /dev/null +++ b/modules/nf-core/bedtools/bamtobed/meta.yml @@ -0,0 +1,38 @@ +name: bedtools_bamtobed +description: Converts a bam file to a bed12 file. +keywords: + - bam + - bed +tools: + - bedtools: + description: | + A set of tools for genomic analysis tasks, specifically enabling genome arithmetic (merge, count, complement) on various file types. + documentation: https://bedtools.readthedocs.io/en/latest/content/tools/complement.html + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: Input BAM file + pattern: "*.{bam}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bed: + type: file + description: Bed file containing genomic intervals. + pattern: "*.{bed}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@yuukiiwa" + - "@drpatelh" From 4c8cda7f0e55945c8a931cc405558a581f702674 Mon Sep 17 00:00:00 2001 From: Katrin Sameith-Lauber Date: Fri, 9 Jun 2023 15:02:09 +0200 Subject: [PATCH 03/12] Run MACS2 on bed input rather than bampe --- conf/modules.config | 2 ++ .../local/bam_peaks_call_qc_annotate_macs2_homer.nf | 11 ++++++++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index a856f1b0..b2ee1e10 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -584,6 +584,7 @@ if (!params.skip_plot_fingerprint) { process { withName: '.*:MERGED_LIBRARY_CALL_ANNOTATE_PEAKS:MACS2_CALLPEAK' { ext.args = [ + '--format BED', '--keep-dup all', '--nomodel', params.narrow_peak ? '' : "--broad --broad-cutoff ${params.broad_cutoff}", @@ -814,6 +815,7 @@ if (!params.skip_merge_replicates) { process { withName: '.*:MERGED_REPLICATE_CALL_ANNOTATE_PEAKS:MACS2_CALLPEAK' { ext.args = [ + '--format BED', '--keep-dup all', '--nomodel', params.narrow_peak ? '' : "--broad --broad-cutoff ${params.broad_cutoff}", diff --git a/subworkflows/local/bam_peaks_call_qc_annotate_macs2_homer.nf b/subworkflows/local/bam_peaks_call_qc_annotate_macs2_homer.nf index 4c2a8710..a17e20a0 100644 --- a/subworkflows/local/bam_peaks_call_qc_annotate_macs2_homer.nf +++ b/subworkflows/local/bam_peaks_call_qc_annotate_macs2_homer.nf @@ -2,6 +2,7 @@ // Call peaks with MACS2, annotate with HOMER and perform downstream QC // +include { BEDTOOLS_BAMTOBED } from '../../modules/nf-core/bedtools/bamtobed/main' include { MACS2_CALLPEAK } from '../../modules/nf-core/macs2/callpeak/main' include { HOMER_ANNOTATEPEAKS } from '../../modules/nf-core/homer/annotatepeaks/main' @@ -28,11 +29,19 @@ workflow BAM_PEAKS_CALL_QC_ANNOTATE_MACS2_HOMER { ch_versions = Channel.empty() + // + // Convert bam to bed + // + BEDTOOLS_BAMTOBED ( + ch_bam + ) + ch_versions = ch_versions.mix(BEDTOOLS_BAMTOBED.out.versions.first()) + // // Call peaks with MACS2 // MACS2_CALLPEAK ( - ch_bam, + BEDTOOLS_BAMTOBED.out.bed, macs_gsize ) ch_versions = ch_versions.mix(MACS2_CALLPEAK.out.versions.first()) From 91c0a1687df7ba77f3c6c256bd9b1553e7fbd340 Mon Sep 17 00:00:00 2001 From: Katrin Sameith-Lauber Date: Fri, 9 Jun 2023 15:06:25 +0200 Subject: [PATCH 04/12] Move reads such that cut-sites are in the middle --- conf/modules.config | 2 ++ 1 file changed, 2 insertions(+) diff --git a/conf/modules.config b/conf/modules.config index b2ee1e10..7b90fe7c 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -587,6 +587,8 @@ process { '--format BED', '--keep-dup all', '--nomodel', + '--shift -75', + '--extsize 150', params.narrow_peak ? '' : "--broad --broad-cutoff ${params.broad_cutoff}", params.save_macs_pileup ? '--bdg --SPMR' : '', params.macs_pvalue ? "--pvalue ${params.macs_pvalue}" : '', From db8f3f817f55088e3c0481883a045fc1cc55bc44 Mon Sep 17 00:00:00 2001 From: Katrin Sameith-Lauber Date: Wed, 14 Jun 2023 10:58:12 +0200 Subject: [PATCH 05/12] Call peak summits per library that can be used for TFBS detection. --- conf/modules.config | 1 + 1 file changed, 1 insertion(+) diff --git a/conf/modules.config b/conf/modules.config index 7b90fe7c..d8bc2548 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -589,6 +589,7 @@ process { '--nomodel', '--shift -75', '--extsize 150', + '--call-summits', params.narrow_peak ? '' : "--broad --broad-cutoff ${params.broad_cutoff}", params.save_macs_pileup ? '--bdg --SPMR' : '', params.macs_pvalue ? "--pvalue ${params.macs_pvalue}" : '', From 83b2e251c27b0e6b9ca27e0d901f0745b4cab5a6 Mon Sep 17 00:00:00 2001 From: Katrin Sameith-Lauber Date: Wed, 14 Jun 2023 11:01:12 +0200 Subject: [PATCH 06/12] Convert bam to bed, and call MACS2 on single-end bed file. --- .../bam_peaks_call_qc_annotate_macs2_homer.nf | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/bam_peaks_call_qc_annotate_macs2_homer.nf b/subworkflows/local/bam_peaks_call_qc_annotate_macs2_homer.nf index a17e20a0..b28b255c 100644 --- a/subworkflows/local/bam_peaks_call_qc_annotate_macs2_homer.nf +++ b/subworkflows/local/bam_peaks_call_qc_annotate_macs2_homer.nf @@ -33,15 +33,25 @@ workflow BAM_PEAKS_CALL_QC_ANNOTATE_MACS2_HOMER { // Convert bam to bed // BEDTOOLS_BAMTOBED ( - ch_bam + ch_bam.map { meta, ip_bam, control_bam -> [ meta, ip_bam ] } ) ch_versions = ch_versions.mix(BEDTOOLS_BAMTOBED.out.versions.first()) + // Create channels: [meta, ip_bed, []] + BEDTOOLS_BAMTOBED + .out + .bed + .map { + meta, ip_bed -> + [ meta, ip_bed, [] ] + } + .set { ch_bed } + // // Call peaks with MACS2 // MACS2_CALLPEAK ( - BEDTOOLS_BAMTOBED.out.bed, + ch_bed, macs_gsize ) ch_versions = ch_versions.mix(MACS2_CALLPEAK.out.versions.first()) From dac43672794fbe74c57604d9a9244685552a43c8 Mon Sep 17 00:00:00 2001 From: Katrin Date: Tue, 27 Jun 2023 10:25:40 +0200 Subject: [PATCH 07/12] Fixed #164 - Introduced 4/5bp shift as it is common for ATAC-seq data. Fixed #168 - Always write out genome fa and fai so IGV session file can be opened. --- conf/modules.config | 65 ++++++++++++++++- modules/local/deeptools_alignmentsieve.nf | 36 ++++++++++ modules/local/igv.nf | 2 + nextflow.config | 1 + nextflow_schema.json | 7 ++ subworkflows/local/bam_shift_reads.nf | 40 +++++++++++ workflows/atacseq.nf | 85 ++++++++++++++++++----- 7 files changed, 216 insertions(+), 20 deletions(-) create mode 100644 modules/local/deeptools_alignmentsieve.nf create mode 100644 subworkflows/local/bam_shift_reads.nf diff --git a/conf/modules.config b/conf/modules.config index d8bc2548..285e890a 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -453,6 +453,37 @@ process { ] } + withName: '.*:MERGED_LIBRARY_BAM_SHIFT_READS:DEEPTOOLS_ALIGNMENTSIEVE' { + ext.args = '--ATACshift' + ext.prefix = { "${meta.id}.mLb.clN.shifted" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/merged_library/shifted_reads" }, + mode: params.publish_dir_mode, + pattern: '*.bam', + enabled: params.save_align_intermeds + ] + } + + withName: '.*:MERGED_LIBRARY_BAM_SHIFT_READS:SAMTOOLS_SORT' { + ext.prefix = { "${meta.id}.mLb.clN.shifted.sorted" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/merged_library/shifted_reads" }, + mode: params.publish_dir_mode, + pattern: '*.bam', + enabled: params.shift_reads + ] + } + + withName: '.*:MERGED_LIBRARY_BAM_SHIFT_READS:SAMTOOLS_INDEX' { + ext.prefix = { "${meta.id}.mLb.clN.shifted.sorted" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/merged_library/shifted_reads" }, + mode: params.publish_dir_mode, + pattern: '*.bai', + enabled: params.shift_reads + ] + } + withName: '.*:MERGED_LIBRARY_BAM_TO_BIGWIG:BEDTOOLS_GENOMECOV' { ext.args = { [ @@ -782,6 +813,37 @@ if (!params.skip_merge_replicates) { ] } + withName: '.*:MERGED_REPLICATE_BAM_SHIFT_READS:DEEPTOOLS_ALIGNMENTSIEVE' { + ext.args = '--ATACshift' + ext.prefix = { "${meta.id}.mRp.clN.shifted" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/merged_replicate" }, + mode: params.publish_dir_mode, + pattern: '*.bam', + enabled: params.save_align_intermeds + ] + } + + withName: '.*:MERGED_REPLICATE_BAM_SHIFT_READS:SAMTOOLS_SORT' { + ext.prefix = { "${meta.id}.mRp.clN.shifted.sorted" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/merged_replicate/shifted_reads" }, + mode: params.publish_dir_mode, + pattern: '*.bam', + enabled: params.shift_reads + ] + } + + withName: '.*:MERGED_REPLICATE_BAM_SHIFT_READS:SAMTOOLS_INDEX' { + ext.prefix = { "${meta.id}.mRp.clN.shifted.sorted" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/merged_replicate/shifted_reads" }, + mode: params.publish_dir_mode, + pattern: '*.bai', + enabled: params.shift_reads + ] + } + withName: '.*:MERGED_REPLICATE_BAM_TO_BIGWIG:BEDTOOLS_GENOMECOV' { ext.args = { [ @@ -956,8 +1018,7 @@ if (!params.skip_igv) { [ path: { "${params.outdir}/genome" }, mode: params.publish_dir_mode, - pattern: '*.{fa,fasta}', - enabled: params.save_reference + pattern: '*.{fa,fasta,fai}' ] ] } diff --git a/modules/local/deeptools_alignmentsieve.nf b/modules/local/deeptools_alignmentsieve.nf new file mode 100644 index 00000000..5148030a --- /dev/null +++ b/modules/local/deeptools_alignmentsieve.nf @@ -0,0 +1,36 @@ +process DEEPTOOLS_ALIGNMENTSIEVE { + tag "$meta.id" + label 'process_medium' + + conda "bioconda::deeptools=3.5.1" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/deeptools:3.5.1--py_0' : + 'biocontainers/deeptools:3.5.1--py_0' }" + + input: + tuple val(meta), path(bam), path(bai) + + output: + tuple val(meta), path("*.bam"), emit: bam + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + alignmentSieve \\ + $args \\ + -b $bam \\ + -o ${prefix}.bam \\ + --numberOfProcessors $task.cpus + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + deeptools: \$(alignmentSieve --version | sed -e "s/alignmentSieve //g") + END_VERSIONS + """ +} diff --git a/modules/local/igv.nf b/modules/local/igv.nf index 542d6cc2..64f4266b 100644 --- a/modules/local/igv.nf +++ b/modules/local/igv.nf @@ -7,6 +7,7 @@ process IGV { input: path fasta + path fai path ("${bigwig_library_publish_dir}/*") path ("${peak_library_publish_dir}/*") path ("${consensus_library_publish_dir}/*") @@ -25,6 +26,7 @@ process IGV { path "*files.txt" , emit: txt path "*.xml" , emit: xml path fasta , emit: fasta + path fai , emit: fai path "versions.yml", emit: versions when: diff --git a/nextflow.config b/nextflow.config index 8495b492..5214e7e8 100644 --- a/nextflow.config +++ b/nextflow.config @@ -39,6 +39,7 @@ params { skip_merge_replicates = false save_align_intermeds = false save_unaligned = false + shift_reads = true // Options: Peaks narrow_peak = false diff --git a/nextflow_schema.json b/nextflow_schema.json index 41d5c76e..d8d7baa9 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -289,6 +289,13 @@ "hidden": true, "description": "BAMTools JSON file with custom filters for single-end data.", "fa_icon": "fas fa-cog" + }, + "shift_reads": { + "type": "boolean", + "fa_icon": "fas fa-chart-area", + "default": true, + "help_text": "Shift aligned reads as commonly done for ATACseq, +4bp for reads on the + strand, -5 bp for reads on the - strand. This can only be applied if all samples are paired-end.", + "description": "Shift aligned reads (+4bp and -5bp)." } } }, diff --git a/subworkflows/local/bam_shift_reads.nf b/subworkflows/local/bam_shift_reads.nf new file mode 100644 index 00000000..d3fcabc1 --- /dev/null +++ b/subworkflows/local/bam_shift_reads.nf @@ -0,0 +1,40 @@ +include { SAMTOOLS_SORT } from '../../modules/nf-core/samtools/sort/main' +include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main' +include { DEEPTOOLS_ALIGNMENTSIEVE } from '../../modules/local/deeptools_alignmentsieve' + +workflow BAM_SHIFT_READS { + take: + ch_bam_bai // channel: [ val(meta), [ bam ], [bai] ] + + main: + ch_versions = Channel.empty() + + // + // Shift reads + // + DEEPTOOLS_ALIGNMENTSIEVE ( + ch_bam_bai + ) + ch_versions = ch_versions.mix(DEEPTOOLS_ALIGNMENTSIEVE.out.versions) + + // + // Sort reads + // + SAMTOOLS_SORT ( + DEEPTOOLS_ALIGNMENTSIEVE.out.bam + ) + ch_versions = ch_versions.mix(SAMTOOLS_SORT.out.versions) + + // + // Index reads + // + SAMTOOLS_INDEX ( + SAMTOOLS_SORT.out.bam + ) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions) + + emit: + bam = SAMTOOLS_SORT.out.bam // channel: [ val(meta), [ bam ] ] + bai = SAMTOOLS_INDEX.out.bai // channel: [ val(meta), [ bai ] ] + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/workflows/atacseq.nf b/workflows/atacseq.nf index c370ff29..8e5dc907 100644 --- a/workflows/atacseq.nf +++ b/workflows/atacseq.nf @@ -68,9 +68,11 @@ include { MULTIQC } from '../modules/local/multiqc' // // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules // -include { INPUT_CHECK } from '../subworkflows/local/input_check' -include { PREPARE_GENOME } from '../subworkflows/local/prepare_genome' -include { ALIGN_STAR } from '../subworkflows/local/align_star' +include { INPUT_CHECK } from '../subworkflows/local/input_check' +include { PREPARE_GENOME } from '../subworkflows/local/prepare_genome' +include { ALIGN_STAR } from '../subworkflows/local/align_star' +include { BAM_SHIFT_READS as MERGED_LIBRARY_BAM_SHIFT_READS } from '../subworkflows/local/bam_shift_reads' +include { BAM_SHIFT_READS as MERGED_REPLICATE_BAM_SHIFT_READS } from '../subworkflows/local/bam_shift_reads' include { BIGWIG_PLOT_DEEPTOOLS as MERGED_LIBRARY_BIGWIG_PLOT_DEEPTOOLS } from '../subworkflows/local/bigwig_plot_deeptools' include { BAM_FILTER_BAMTOOLS as MERGED_LIBRARY_FILTER_BAM } from '../subworkflows/local/bam_filter_bamtools' include { BAM_BEDGRAPH_BIGWIG_BEDTOOLS_UCSC as MERGED_LIBRARY_BAM_TO_BIGWIG } from '../subworkflows/local/bam_bedgraph_bigwig_bedtools_ucsc' @@ -96,6 +98,7 @@ include { PRESEQ_LCEXTRAP as MERGED_LIBRARY_PRESEQ_LCEXTRAP include { DEEPTOOLS_PLOTFINGERPRINT as MERGED_LIBRARY_DEEPTOOLS_PLOTFINGERPRINT } from '../modules/nf-core/deeptools/plotfingerprint/main' include { ATAQV_ATAQV as MERGED_LIBRARY_ATAQV_ATAQV } from '../modules/nf-core/ataqv/ataqv/main' include { ATAQV_MKARV as MERGED_LIBRARY_ATAQV_MKARV } from '../modules/nf-core/ataqv/mkarv/main' +include { SAMTOOLS_INDEX } from '../modules/nf-core/samtools/index/main' include { PICARD_MERGESAMFILES as PICARD_MERGESAMFILES_LIBRARY } from '../modules/nf-core/picard/mergesamfiles/main' include { PICARD_MERGESAMFILES as PICARD_MERGESAMFILES_REPLICATE } from '../modules/nf-core/picard/mergesamfiles/main' @@ -141,6 +144,24 @@ workflow ATACSEQ { ) ch_versions = ch_versions.mix(INPUT_CHECK.out.versions) + // + // Check if reads are all paired-end if 'shift_reads' parameter is set + // + if (params.shift_reads) { + INPUT_CHECK + .out + .reads + .filter { meta, reads -> meta.single_end } + .collect() + .map { + it -> + def count = it.size() + if (count > 0) { + exit 1, 'The parameter --shift_reads can only be applied if all samples are paired-end.' + } + } + } + // // SUBWORKFLOW: Read QC and trim adapters // @@ -242,8 +263,8 @@ workflow ATACSEQ { [], [] ) - ch_genome_bam = FASTQ_ALIGN_CHROMAP.out.bam ch_genome_bam_index = FASTQ_ALIGN_CHROMAP.out.bai + ch_genome_bam = FASTQ_ALIGN_CHROMAP.out.bam ch_samtools_stats = FASTQ_ALIGN_CHROMAP.out.stats ch_samtools_flagstat = FASTQ_ALIGN_CHROMAP.out.flagstat ch_samtools_idxstats = FASTQ_ALIGN_CHROMAP.out.idxstats @@ -342,11 +363,27 @@ workflow ATACSEQ { ch_versions = ch_versions.mix(MERGED_LIBRARY_PICARD_COLLECTMULTIPLEMETRICS.out.versions.first()) } + // + // SUBWORKFLOW: Shift paired-end reads + // + ch_merged_library_filter_bam = MERGED_LIBRARY_FILTER_BAM.out.bam + ch_merged_library_filter_bai = MERGED_LIBRARY_FILTER_BAM.out.bai + + if (params.shift_reads && params.aligner != 'chromap' ) { + MERGED_LIBRARY_BAM_SHIFT_READS ( + ch_merged_library_filter_bam.join(ch_merged_library_filter_bai, by: [0]), + ) + ch_versions = ch_versions.mix(MERGED_LIBRARY_BAM_SHIFT_READS.out.versions) + + ch_merged_library_filter_bam = MERGED_LIBRARY_BAM_SHIFT_READS.out.bam + ch_merged_library_filter_bai = MERGED_LIBRARY_BAM_SHIFT_READS.out.bai + } + // // SUBWORKFLOW: Normalised bigWig coverage tracks // MERGED_LIBRARY_BAM_TO_BIGWIG ( - MERGED_LIBRARY_FILTER_BAM.out.bam.join(MERGED_LIBRARY_FILTER_BAM.out.flagstat, by: [0]), + ch_merged_library_filter_bam.join(MERGED_LIBRARY_FILTER_BAM.out.flagstat, by: [0]), PREPARE_GENOME.out.chrom_sizes ) ch_versions = ch_versions.mix(MERGED_LIBRARY_BAM_TO_BIGWIG.out.versions) @@ -366,10 +403,8 @@ workflow ATACSEQ { } // Create channels: [ meta, [bam], [bai] ] - MERGED_LIBRARY_FILTER_BAM - .out - .bam - .join(MERGED_LIBRARY_FILTER_BAM.out.bai, by: [0]) + ch_merged_library_filter_bam + .join(ch_merged_library_filter_bai, by: [0]) .set { ch_bam_bai } // @@ -523,24 +558,37 @@ workflow ATACSEQ { ch_markduplicates_replicate_metrics = MERGED_REPLICATE_MARKDUPLICATES_PICARD.out.metrics ch_versions = ch_versions.mix(MERGED_REPLICATE_MARKDUPLICATES_PICARD.out.versions) + // + // SUBWORKFLOW: Shift paired-end reads + // Shift again, as ch_merged_library_replicate_bam is generated out of unshifted reads + // + ch_merged_replicate_markduplicate_bam = MERGED_REPLICATE_MARKDUPLICATES_PICARD.out.bam + ch_merged_replicate_markduplicate_bai = MERGED_REPLICATE_MARKDUPLICATES_PICARD.out.bai + + if (params.shift_reads && params.aligner != 'chromap' ) { + MERGED_REPLICATE_BAM_SHIFT_READS ( + ch_merged_replicate_markduplicate_bam.join(ch_merged_replicate_markduplicate_bai, by: [0]), + ) + ch_versions = ch_versions.mix(MERGED_REPLICATE_BAM_SHIFT_READS.out.versions) + + ch_merged_replicate_markduplicate_bam = MERGED_REPLICATE_BAM_SHIFT_READS.out.bam + ch_merged_replicate_markduplicate_bai = MERGED_REPLICATE_BAM_SHIFT_READS.out.bai + } + // SUBWORKFLOW: Normalised bigWig coverage tracks // MERGED_REPLICATE_BAM_TO_BIGWIG ( - MERGED_REPLICATE_MARKDUPLICATES_PICARD.out.bam.join(MERGED_REPLICATE_MARKDUPLICATES_PICARD.out.flagstat, by: [0]), + ch_merged_replicate_markduplicate_bam.join(MERGED_REPLICATE_MARKDUPLICATES_PICARD.out.flagstat, by: [0]), PREPARE_GENOME.out.chrom_sizes ) ch_ucsc_bedgraphtobigwig_replicate_bigwig = MERGED_REPLICATE_BAM_TO_BIGWIG.out.bigwig ch_versions = ch_versions.mix(MERGED_REPLICATE_BAM_TO_BIGWIG.out.versions) // Create channels: [ meta, bam, ([] for control_bam) ] - MERGED_REPLICATE_MARKDUPLICATES_PICARD - .out - .bam - .map { - meta, bam -> - [ meta , bam, [] ] - } - .set { ch_bam_replicate } + // Create channels: [ meta, [bam], [bai] ] + ch_merged_replicate_markduplicate_bam + .join(ch_merged_replicate_markduplicate_bai, by: [0]) + .set { ch_bam_replicate } // // SUBWORKFLOW: Call peaks with MACS2, annotate with HOMER and perform downstream QC @@ -593,6 +641,7 @@ workflow ATACSEQ { if (!params.skip_igv) { IGV ( PREPARE_GENOME.out.fasta, + PREPARE_GENOME.out.fai, MERGED_LIBRARY_BAM_TO_BIGWIG.out.bigwig.collect{it[1]}.ifEmpty([]), MERGED_LIBRARY_CALL_ANNOTATE_PEAKS.out.peaks.collect{it[1]}.ifEmpty([]), ch_macs2_consensus_library_bed.collect{it[1]}.ifEmpty([]), From df64c3aa3f27f31cdd03770d36c33a8b9e5f55ef Mon Sep 17 00:00:00 2001 From: Katrin Date: Thu, 29 Jun 2023 11:03:54 +0200 Subject: [PATCH 08/12] Use --call-summits only when calling narrow peaks. These summits can then be used for TFBS enrichment. --- conf/modules.config | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 285e890a..5e0f703b 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -620,8 +620,7 @@ process { '--nomodel', '--shift -75', '--extsize 150', - '--call-summits', - params.narrow_peak ? '' : "--broad --broad-cutoff ${params.broad_cutoff}", + params.narrow_peak ? '--call-summits' : "--broad --broad-cutoff ${params.broad_cutoff}", params.save_macs_pileup ? '--bdg --SPMR' : '', params.macs_pvalue ? "--pvalue ${params.macs_pvalue}" : '', params.macs_fdr ? "--qvalue ${params.macs_fdr}" : '' From c75b3c8c785a2d33b6532fff7fe3bb1af1d72cc0 Mon Sep 17 00:00:00 2001 From: Katrin Date: Wed, 5 Jul 2023 12:48:50 +0200 Subject: [PATCH 09/12] Make sure bigwig is generated based on shifted reads. --- modules/local/bedtools_genomecov.nf | 2 +- subworkflows/local/bam_shift_reads.nf | 10 ++++++++++ workflows/atacseq.nf | 8 ++++++-- 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/modules/local/bedtools_genomecov.nf b/modules/local/bedtools_genomecov.nf index 8f20100e..92c94b99 100644 --- a/modules/local/bedtools_genomecov.nf +++ b/modules/local/bedtools_genomecov.nf @@ -31,7 +31,7 @@ process BEDTOOLS_GENOMECOV { -bg \\ -scale \$SCALE_FACTOR \\ $args \\ - | bedtools sort > ${prefix}.bedGraph + | LC_COLLATE=C sort -T '.' -k1,1 -k2,2n > ${prefix}.bedGraph cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/subworkflows/local/bam_shift_reads.nf b/subworkflows/local/bam_shift_reads.nf index d3fcabc1..63553c12 100644 --- a/subworkflows/local/bam_shift_reads.nf +++ b/subworkflows/local/bam_shift_reads.nf @@ -1,5 +1,6 @@ include { SAMTOOLS_SORT } from '../../modules/nf-core/samtools/sort/main' include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main' +include { SAMTOOLS_FLAGSTAT } from '../../modules/nf-core/samtools/flagstat/main' include { DEEPTOOLS_ALIGNMENTSIEVE } from '../../modules/local/deeptools_alignmentsieve' workflow BAM_SHIFT_READS { @@ -33,8 +34,17 @@ workflow BAM_SHIFT_READS { ) ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions) + // + // Run samtools flagstat + // + SAMTOOLS_FLAGSTAT ( + SAMTOOLS_SORT.out.bam.join(SAMTOOLS_INDEX.out.bai, by: [0]) + ) + ch_versions = ch_versions.mix(SAMTOOLS_FLAGSTAT.out.versions) + emit: bam = SAMTOOLS_SORT.out.bam // channel: [ val(meta), [ bam ] ] bai = SAMTOOLS_INDEX.out.bai // channel: [ val(meta), [ bai ] ] + flagstat = SAMTOOLS_FLAGSTAT.out.flagstat // channel: [ val(meta), [ flagstat ] ] versions = ch_versions // channel: [ versions.yml ] } diff --git a/workflows/atacseq.nf b/workflows/atacseq.nf index 4f5293f5..d4da4778 100644 --- a/workflows/atacseq.nf +++ b/workflows/atacseq.nf @@ -385,6 +385,7 @@ workflow ATACSEQ { // ch_merged_library_filter_bam = MERGED_LIBRARY_FILTER_BAM.out.bam ch_merged_library_filter_bai = MERGED_LIBRARY_FILTER_BAM.out.bai + ch_merged_library_filter_flagstat = MERGED_LIBRARY_FILTER_BAM.out.flagstat if (params.shift_reads && params.aligner != 'chromap' ) { MERGED_LIBRARY_BAM_SHIFT_READS ( @@ -394,13 +395,14 @@ workflow ATACSEQ { ch_merged_library_filter_bam = MERGED_LIBRARY_BAM_SHIFT_READS.out.bam ch_merged_library_filter_bai = MERGED_LIBRARY_BAM_SHIFT_READS.out.bai + ch_merged_library_filter_flagstat = MERGED_LIBRARY_BAM_SHIFT_READS.out.flagstat } // // SUBWORKFLOW: Normalised bigWig coverage tracks // MERGED_LIBRARY_BAM_TO_BIGWIG ( - ch_merged_library_filter_bam.join(MERGED_LIBRARY_FILTER_BAM.out.flagstat, by: [0]), + ch_merged_library_filter_bam.join(ch_merged_library_filter_flagstat, by: [0]), PREPARE_GENOME.out.chrom_sizes ) ch_versions = ch_versions.mix(MERGED_LIBRARY_BAM_TO_BIGWIG.out.versions) @@ -609,6 +611,7 @@ workflow ATACSEQ { // ch_merged_replicate_markduplicate_bam = MERGED_REPLICATE_MARKDUPLICATES_PICARD.out.bam ch_merged_replicate_markduplicate_bai = MERGED_REPLICATE_MARKDUPLICATES_PICARD.out.bai + ch_merged_replicate_markduplicate_flagstat = MERGED_REPLICATE_MARKDUPLICATES_PICARD.out.flagstat if (params.shift_reads && params.aligner != 'chromap' ) { MERGED_REPLICATE_BAM_SHIFT_READS ( @@ -618,12 +621,13 @@ workflow ATACSEQ { ch_merged_replicate_markduplicate_bam = MERGED_REPLICATE_BAM_SHIFT_READS.out.bam ch_merged_replicate_markduplicate_bai = MERGED_REPLICATE_BAM_SHIFT_READS.out.bai + ch_merged_replicate_markduplicate_flagstat = MERGED_REPLICATE_BAM_SHIFT_READS.out.flagstat } // SUBWORKFLOW: Normalised bigWig coverage tracks // MERGED_REPLICATE_BAM_TO_BIGWIG ( - ch_merged_replicate_markduplicate_bam.join(MERGED_REPLICATE_MARKDUPLICATES_PICARD.out.flagstat, by: [0]), + ch_merged_replicate_markduplicate_bam.join(ch_merged_replicate_markduplicate_flagstat, by: [0]), PREPARE_GENOME.out.chrom_sizes ) ch_ucsc_bedgraphtobigwig_replicate_bigwig = MERGED_REPLICATE_BAM_TO_BIGWIG.out.bigwig From 964cc177150a53d7111bceca78f91886d1927798 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Langer?= <61791748+bjlang@users.noreply.github.com> Date: Tue, 15 Aug 2023 20:04:53 +0200 Subject: [PATCH 10/12] Create samplesheet_with_control.csv --- assets/samplesheet_with_control.csv | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 assets/samplesheet_with_control.csv diff --git a/assets/samplesheet_with_control.csv b/assets/samplesheet_with_control.csv new file mode 100644 index 00000000..0e0e1944 --- /dev/null +++ b/assets/samplesheet_with_control.csv @@ -0,0 +1,7 @@ +sample,fastq_1,fastq_2,replicate,control,control_replicate +SPT5_T0,https://raw.githubusercontent.com/nf-core/test-datasets/atacseq/testdata/SRR1822153_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/atacseq/testdata/SRR1822153_2.fastq.gz,1,SPT5_INPUT,1 +SPT5_T0,https://raw.githubusercontent.com/nf-core/test-datasets/atacseq/testdata/SRR1822154_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/atacseq/testdata/SRR1822154_2.fastq.gz,2,SPT5_INPUT,2 +SPT5_T15,https://raw.githubusercontent.com/nf-core/test-datasets/atacseq/testdata/SRR1822157_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/atacseq/testdata/SRR1822157_2.fastq.gz,1,SPT5_INPUT,1 +SPT5_T15,https://raw.githubusercontent.com/nf-core/test-datasets/atacseq/testdata/SRR1822158_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/atacseq/testdata/SRR1822158_2.fastq.gz,2,SPT5_INPUT,2 +SPT5_INPUT,https://raw.githubusercontent.com/nf-core/test-datasets/chipseq/testdata/SRR5204809_Spt5-ChIP_Input1_SacCer_ChIP-Seq_ss100k_R1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/chipseq/testdata/SRR5204809_Spt5-ChIP_Input1_SacCer_ChIP-Seq_ss100k_R2.fastq.gz,1,, +SPT5_INPUT,https://raw.githubusercontent.com/nf-core/test-datasets/chipseq/testdata/SRR5204810_Spt5-ChIP_Input2_SacCer_ChIP-Seq_ss100k_R1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/chipseq/testdata/SRR5204810_Spt5-ChIP_Input2_SacCer_ChIP-Seq_ss100k_R2.fastq.gz,2,, From 42cda346ac56de89d2274f76c26a5551a80d9be4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Langer?= <61791748+bjlang@users.noreply.github.com> Date: Wed, 6 Sep 2023 15:50:46 +0200 Subject: [PATCH 11/12] Update bam_shift_reads.nf --- subworkflows/local/bam_shift_reads.nf | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/bam_shift_reads.nf b/subworkflows/local/bam_shift_reads.nf index 63553c12..4e99c1df 100644 --- a/subworkflows/local/bam_shift_reads.nf +++ b/subworkflows/local/bam_shift_reads.nf @@ -43,8 +43,9 @@ workflow BAM_SHIFT_READS { ch_versions = ch_versions.mix(SAMTOOLS_FLAGSTAT.out.versions) emit: - bam = SAMTOOLS_SORT.out.bam // channel: [ val(meta), [ bam ] ] - bai = SAMTOOLS_INDEX.out.bai // channel: [ val(meta), [ bai ] ] + bam = SAMTOOLS_SORT.out.bam // channel: [ val(meta), [ bam ] ] + bai = SAMTOOLS_INDEX.out.bai // channel: [ val(meta), [ bai ] ] + csi = SAMTOOLS_INDEX.out.csi // channel: [ val(meta), [ csi ] ] flagstat = SAMTOOLS_FLAGSTAT.out.flagstat // channel: [ val(meta), [ flagstat ] ] versions = ch_versions // channel: [ versions.yml ] } From 1030a66b60f312466d057fb288b5be02a3fff58a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Langer?= <61791748+bjlang@users.noreply.github.com> Date: Wed, 6 Sep 2023 16:52:16 +0200 Subject: [PATCH 12/12] Revert "DO NOT MERGE yet: Discuss fixes of #164 #168 #169 " --- conf/modules.config | 68 +------- modules.json | 147 ++++++------------ modules/local/bedtools_genomecov.nf | 2 + modules/local/deeptools_alignmentsieve.nf | 36 ----- modules/nf-core/bedtools/bamtobed/main.nf | 35 ----- modules/nf-core/bedtools/bamtobed/meta.yml | 38 ----- nextflow.config | 1 - nextflow_schema.json | 7 - .../bam_peaks_call_qc_annotate_macs2_homer.nf | 21 +-- subworkflows/local/bam_shift_reads.nf | 51 ------ workflows/atacseq.nf | 82 ++-------- 11 files changed, 65 insertions(+), 423 deletions(-) delete mode 100644 modules/local/deeptools_alignmentsieve.nf delete mode 100644 modules/nf-core/bedtools/bamtobed/main.nf delete mode 100644 modules/nf-core/bedtools/bamtobed/meta.yml delete mode 100644 subworkflows/local/bam_shift_reads.nf diff --git a/conf/modules.config b/conf/modules.config index 622534f3..2da820b5 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -465,37 +465,6 @@ process { ] } - withName: '.*:MERGED_LIBRARY_BAM_SHIFT_READS:DEEPTOOLS_ALIGNMENTSIEVE' { - ext.args = '--ATACshift' - ext.prefix = { "${meta.id}.mLb.clN.shifted" } - publishDir = [ - path: { "${params.outdir}/${params.aligner}/merged_library/shifted_reads" }, - mode: params.publish_dir_mode, - pattern: '*.bam', - enabled: params.save_align_intermeds - ] - } - - withName: '.*:MERGED_LIBRARY_BAM_SHIFT_READS:SAMTOOLS_SORT' { - ext.prefix = { "${meta.id}.mLb.clN.shifted.sorted" } - publishDir = [ - path: { "${params.outdir}/${params.aligner}/merged_library/shifted_reads" }, - mode: params.publish_dir_mode, - pattern: '*.bam', - enabled: params.shift_reads - ] - } - - withName: '.*:MERGED_LIBRARY_BAM_SHIFT_READS:SAMTOOLS_INDEX' { - ext.prefix = { "${meta.id}.mLb.clN.shifted.sorted" } - publishDir = [ - path: { "${params.outdir}/${params.aligner}/merged_library/shifted_reads" }, - mode: params.publish_dir_mode, - pattern: '*.bai', - enabled: params.shift_reads - ] - } - withName: '.*:MERGED_LIBRARY_BAM_TO_BIGWIG:BEDTOOLS_GENOMECOV' { ext.args = { (meta.single_end && params.fragment_size > 0) ? "-fs ${params.fragment_size}" : '' } ext.prefix = { "${meta.id}.mLb.clN" } @@ -623,12 +592,9 @@ if (!params.skip_plot_fingerprint) { process { withName: '.*:MERGED_LIBRARY_CALL_ANNOTATE_PEAKS:MACS2_CALLPEAK' { ext.args = [ - '--format BED', '--keep-dup all', '--nomodel', - '--shift -75', - '--extsize 150', - params.narrow_peak ? '--call-summits' : "--broad --broad-cutoff ${params.broad_cutoff}", + params.narrow_peak ? '' : "--broad --broad-cutoff ${params.broad_cutoff}", params.save_macs_pileup ? '--bdg --SPMR' : '', params.macs_pvalue ? "--pvalue ${params.macs_pvalue}" : '', params.macs_fdr ? "--qvalue ${params.macs_fdr}" : '' @@ -820,37 +786,6 @@ if (!params.skip_merge_replicates) { ] } - withName: '.*:MERGED_REPLICATE_BAM_SHIFT_READS:DEEPTOOLS_ALIGNMENTSIEVE' { - ext.args = '--ATACshift' - ext.prefix = { "${meta.id}.mRp.clN.shifted" } - publishDir = [ - path: { "${params.outdir}/${params.aligner}/merged_replicate" }, - mode: params.publish_dir_mode, - pattern: '*.bam', - enabled: params.save_align_intermeds - ] - } - - withName: '.*:MERGED_REPLICATE_BAM_SHIFT_READS:SAMTOOLS_SORT' { - ext.prefix = { "${meta.id}.mRp.clN.shifted.sorted" } - publishDir = [ - path: { "${params.outdir}/${params.aligner}/merged_replicate/shifted_reads" }, - mode: params.publish_dir_mode, - pattern: '*.bam', - enabled: params.shift_reads - ] - } - - withName: '.*:MERGED_REPLICATE_BAM_SHIFT_READS:SAMTOOLS_INDEX' { - ext.prefix = { "${meta.id}.mRp.clN.shifted.sorted" } - publishDir = [ - path: { "${params.outdir}/${params.aligner}/merged_replicate/shifted_reads" }, - mode: params.publish_dir_mode, - pattern: '*.bai', - enabled: params.shift_reads - ] - } - withName: '.*:MERGED_REPLICATE_BAM_TO_BIGWIG:BEDTOOLS_GENOMECOV' { ext.args = { (meta.single_end && params.fragment_size > 0) ? "-fs ${params.fragment_size}" : '' } ext.prefix = { "${meta.id}.mRp.clN" } @@ -883,7 +818,6 @@ if (!params.skip_merge_replicates) { process { withName: '.*:MERGED_REPLICATE_CALL_ANNOTATE_PEAKS:MACS2_CALLPEAK' { ext.args = [ - '--format BED', '--keep-dup all', '--nomodel', params.narrow_peak ? '' : "--broad --broad-cutoff ${params.broad_cutoff}", diff --git a/modules.json b/modules.json index 79827467..962c8dc3 100644 --- a/modules.json +++ b/modules.json @@ -7,24 +7,13 @@ "nf-core": { "ataqv/ataqv": { "branch": "master", - "git_sha": "56421e1a812bc2f9e77dbe9f297e9d9c580cb8a5", - "installed_by": [ - "modules" - ] + "git_sha": "11c7e5b3073845889060c793786bf3177275d62e", + "installed_by": ["modules"] }, "ataqv/mkarv": { "branch": "master", - "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": [ - "modules" - ] - }, - "bedtools/bamtobed": { - "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "git_sha": "11c7e5b3073845889060c793786bf3177275d62e", + "installed_by": ["modules"] }, "bowtie2/align": { "branch": "master", @@ -33,17 +22,13 @@ }, "bowtie2/build": { "branch": "master", - "git_sha": "e797efb47b0d3b2124753beb55dc83ab9512bceb", - "installed_by": [ - "modules" - ] + "git_sha": "6a24fbe314bb2e6fe6306c29a63076ea87e8eb3c", + "installed_by": ["modules"] }, "bwa/index": { "branch": "master", - "git_sha": "9518fa4f65f3fb8cde24fde7d40333b39ec8fd65", - "installed_by": [ - "modules" - ] + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] }, "bwa/mem": { "branch": "master", @@ -57,52 +42,38 @@ }, "chromap/index": { "branch": "master", - "git_sha": "3a8e3ca607132a468c07c69aaa3bccd55eb983b8", - "installed_by": [ - "modules" - ] + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] }, "custom/dumpsoftwareversions": { "branch": "master", - "git_sha": "8022c68e7403eecbd8ba9c49496f69f8c49d50f0", - "installed_by": [ - "modules" - ] + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] }, "custom/getchromsizes": { "branch": "master", - "git_sha": "cf5b9c30a2adacc581793afb79fae5f5b50bed01", - "installed_by": [ - "modules" - ] + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] }, "deeptools/computematrix": { "branch": "master", - "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": [ - "modules" - ] + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] }, "deeptools/plotfingerprint": { "branch": "master", - "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": [ - "modules" - ] + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] }, "deeptools/plotheatmap": { "branch": "master", - "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": [ - "modules" - ] + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] }, "deeptools/plotprofile": { "branch": "master", - "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": [ - "modules" - ] + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] }, "fastqc": { "branch": "master", @@ -111,10 +82,8 @@ }, "gffread": { "branch": "master", - "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": [ - "modules" - ] + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] }, "gunzip": { "branch": "master", @@ -128,24 +97,18 @@ }, "khmer/uniquekmers": { "branch": "master", - "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": [ - "modules" - ] + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] }, "macs2/callpeak": { "branch": "master", - "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": [ - "modules" - ] + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] }, "picard/collectmultiplemetrics": { "branch": "master", - "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": [ - "modules" - ] + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] }, "picard/markduplicates": { "branch": "master", @@ -154,17 +117,13 @@ }, "picard/mergesamfiles": { "branch": "master", - "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": [ - "modules" - ] + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] }, "preseq/lcextrap": { "branch": "master", - "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": [ - "modules" - ] + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] }, "samtools/flagstat": { "branch": "master", @@ -193,10 +152,8 @@ }, "subread/featurecounts": { "branch": "master", - "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": [ - "modules" - ] + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] }, "trimgalore": { "branch": "master", @@ -210,17 +167,13 @@ }, "umitools/extract": { "branch": "master", - "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": [ - "fastq_fastqc_umitools_trimgalore" - ] + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["fastq_fastqc_umitools_trimgalore"] }, "untar": { "branch": "master", - "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", - "installed_by": [ - "modules" - ] + "git_sha": "5c460c5a4736974abde2843294f35307ee2b0e5e", + "installed_by": ["modules"] } } }, @@ -228,10 +181,8 @@ "nf-core": { "bam_markduplicates_picard": { "branch": "master", - "git_sha": "6daac2bc63f4847e0c7cc661f4f5b043ac13faaf", - "installed_by": [ - "subworkflows" - ] + "git_sha": "a9784afdd5dcda23b84e64db75dc591065d64653", + "installed_by": ["subworkflows"] }, "bam_sort_stats_samtools": { "branch": "master", @@ -250,10 +201,8 @@ }, "fastq_align_bwa": { "branch": "master", - "git_sha": "ac75f79157ecc64283a2b3a559f1ba90bc0f2259", - "installed_by": [ - "subworkflows" - ] + "git_sha": "a9784afdd5dcda23b84e64db75dc591065d64653", + "installed_by": ["subworkflows"] }, "fastq_align_chromap": { "branch": "master", @@ -262,13 +211,11 @@ }, "fastq_fastqc_umitools_trimgalore": { "branch": "master", - "git_sha": "b51a69e30973c71950225c817ad07a3337d22c40", - "installed_by": [ - "subworkflows" - ] + "git_sha": "a9784afdd5dcda23b84e64db75dc591065d64653", + "installed_by": ["subworkflows"] } } } } } -} \ No newline at end of file +} diff --git a/modules/local/bedtools_genomecov.nf b/modules/local/bedtools_genomecov.nf index 9ee0b613..66c96030 100644 --- a/modules/local/bedtools_genomecov.nf +++ b/modules/local/bedtools_genomecov.nf @@ -21,6 +21,7 @@ process BEDTOOLS_GENOMECOV { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def pe = meta.single_end ? '' : '-pc' """ SCALE_FACTOR=\$(grep '[0-9] mapped (' $flagstat | awk '{print 1000000/\$1}') echo \$SCALE_FACTOR > ${prefix}.scale_factor.txt @@ -30,6 +31,7 @@ process BEDTOOLS_GENOMECOV { -ibam $bam \\ -bg \\ -scale \$SCALE_FACTOR \\ + $pe \\ $args \\ > tmp.bg diff --git a/modules/local/deeptools_alignmentsieve.nf b/modules/local/deeptools_alignmentsieve.nf deleted file mode 100644 index 5148030a..00000000 --- a/modules/local/deeptools_alignmentsieve.nf +++ /dev/null @@ -1,36 +0,0 @@ -process DEEPTOOLS_ALIGNMENTSIEVE { - tag "$meta.id" - label 'process_medium' - - conda "bioconda::deeptools=3.5.1" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/deeptools:3.5.1--py_0' : - 'biocontainers/deeptools:3.5.1--py_0' }" - - input: - tuple val(meta), path(bam), path(bai) - - output: - tuple val(meta), path("*.bam"), emit: bam - path "versions.yml", emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - - """ - alignmentSieve \\ - $args \\ - -b $bam \\ - -o ${prefix}.bam \\ - --numberOfProcessors $task.cpus - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - deeptools: \$(alignmentSieve --version | sed -e "s/alignmentSieve //g") - END_VERSIONS - """ -} diff --git a/modules/nf-core/bedtools/bamtobed/main.nf b/modules/nf-core/bedtools/bamtobed/main.nf deleted file mode 100644 index 29f5a62f..00000000 --- a/modules/nf-core/bedtools/bamtobed/main.nf +++ /dev/null @@ -1,35 +0,0 @@ -process BEDTOOLS_BAMTOBED { - tag "$meta.id" - label 'process_medium' - - conda "bioconda::bedtools=2.30.0" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bedtools:2.30.0--hc088bd4_0' : - 'biocontainers/bedtools:2.30.0--hc088bd4_0' }" - - input: - tuple val(meta), path(bam) - - output: - tuple val(meta), path("*.bed"), emit: bed - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - """ - bedtools \\ - bamtobed \\ - $args \\ - -i $bam \\ - > ${prefix}.bed - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") - END_VERSIONS - """ -} diff --git a/modules/nf-core/bedtools/bamtobed/meta.yml b/modules/nf-core/bedtools/bamtobed/meta.yml deleted file mode 100644 index 5a4ff73a..00000000 --- a/modules/nf-core/bedtools/bamtobed/meta.yml +++ /dev/null @@ -1,38 +0,0 @@ -name: bedtools_bamtobed -description: Converts a bam file to a bed12 file. -keywords: - - bam - - bed -tools: - - bedtools: - description: | - A set of tools for genomic analysis tasks, specifically enabling genome arithmetic (merge, count, complement) on various file types. - documentation: https://bedtools.readthedocs.io/en/latest/content/tools/complement.html - licence: ["MIT"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: Input BAM file - pattern: "*.{bam}" -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bed: - type: file - description: Bed file containing genomic intervals. - pattern: "*.{bed}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@yuukiiwa" - - "@drpatelh" diff --git a/nextflow.config b/nextflow.config index e95d207d..9f60b156 100644 --- a/nextflow.config +++ b/nextflow.config @@ -43,7 +43,6 @@ params { skip_merge_replicates = false save_align_intermeds = false save_unaligned = false - shift_reads = true // Options: Peaks narrow_peak = false diff --git a/nextflow_schema.json b/nextflow_schema.json index 6a5f11b1..2e384c9e 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -325,13 +325,6 @@ "hidden": true, "description": "BAMTools JSON file with custom filters for single-end data.", "fa_icon": "fas fa-cog" - }, - "shift_reads": { - "type": "boolean", - "fa_icon": "fas fa-chart-area", - "default": true, - "help_text": "Shift aligned reads as commonly done for ATACseq, +4bp for reads on the + strand, -5 bp for reads on the - strand. This can only be applied if all samples are paired-end.", - "description": "Shift aligned reads (+4bp and -5bp)." } } }, diff --git a/subworkflows/local/bam_peaks_call_qc_annotate_macs2_homer.nf b/subworkflows/local/bam_peaks_call_qc_annotate_macs2_homer.nf index b28b255c..4c2a8710 100644 --- a/subworkflows/local/bam_peaks_call_qc_annotate_macs2_homer.nf +++ b/subworkflows/local/bam_peaks_call_qc_annotate_macs2_homer.nf @@ -2,7 +2,6 @@ // Call peaks with MACS2, annotate with HOMER and perform downstream QC // -include { BEDTOOLS_BAMTOBED } from '../../modules/nf-core/bedtools/bamtobed/main' include { MACS2_CALLPEAK } from '../../modules/nf-core/macs2/callpeak/main' include { HOMER_ANNOTATEPEAKS } from '../../modules/nf-core/homer/annotatepeaks/main' @@ -29,29 +28,11 @@ workflow BAM_PEAKS_CALL_QC_ANNOTATE_MACS2_HOMER { ch_versions = Channel.empty() - // - // Convert bam to bed - // - BEDTOOLS_BAMTOBED ( - ch_bam.map { meta, ip_bam, control_bam -> [ meta, ip_bam ] } - ) - ch_versions = ch_versions.mix(BEDTOOLS_BAMTOBED.out.versions.first()) - - // Create channels: [meta, ip_bed, []] - BEDTOOLS_BAMTOBED - .out - .bed - .map { - meta, ip_bed -> - [ meta, ip_bed, [] ] - } - .set { ch_bed } - // // Call peaks with MACS2 // MACS2_CALLPEAK ( - ch_bed, + ch_bam, macs_gsize ) ch_versions = ch_versions.mix(MACS2_CALLPEAK.out.versions.first()) diff --git a/subworkflows/local/bam_shift_reads.nf b/subworkflows/local/bam_shift_reads.nf deleted file mode 100644 index 4e99c1df..00000000 --- a/subworkflows/local/bam_shift_reads.nf +++ /dev/null @@ -1,51 +0,0 @@ -include { SAMTOOLS_SORT } from '../../modules/nf-core/samtools/sort/main' -include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main' -include { SAMTOOLS_FLAGSTAT } from '../../modules/nf-core/samtools/flagstat/main' -include { DEEPTOOLS_ALIGNMENTSIEVE } from '../../modules/local/deeptools_alignmentsieve' - -workflow BAM_SHIFT_READS { - take: - ch_bam_bai // channel: [ val(meta), [ bam ], [bai] ] - - main: - ch_versions = Channel.empty() - - // - // Shift reads - // - DEEPTOOLS_ALIGNMENTSIEVE ( - ch_bam_bai - ) - ch_versions = ch_versions.mix(DEEPTOOLS_ALIGNMENTSIEVE.out.versions) - - // - // Sort reads - // - SAMTOOLS_SORT ( - DEEPTOOLS_ALIGNMENTSIEVE.out.bam - ) - ch_versions = ch_versions.mix(SAMTOOLS_SORT.out.versions) - - // - // Index reads - // - SAMTOOLS_INDEX ( - SAMTOOLS_SORT.out.bam - ) - ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions) - - // - // Run samtools flagstat - // - SAMTOOLS_FLAGSTAT ( - SAMTOOLS_SORT.out.bam.join(SAMTOOLS_INDEX.out.bai, by: [0]) - ) - ch_versions = ch_versions.mix(SAMTOOLS_FLAGSTAT.out.versions) - - emit: - bam = SAMTOOLS_SORT.out.bam // channel: [ val(meta), [ bam ] ] - bai = SAMTOOLS_INDEX.out.bai // channel: [ val(meta), [ bai ] ] - csi = SAMTOOLS_INDEX.out.csi // channel: [ val(meta), [ csi ] ] - flagstat = SAMTOOLS_FLAGSTAT.out.flagstat // channel: [ val(meta), [ flagstat ] ] - versions = ch_versions // channel: [ versions.yml ] -} diff --git a/workflows/atacseq.nf b/workflows/atacseq.nf index 72e7f9c9..5daf5839 100644 --- a/workflows/atacseq.nf +++ b/workflows/atacseq.nf @@ -66,11 +66,9 @@ include { MULTIQC } from '../modules/local/multiqc' // // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules // -include { INPUT_CHECK } from '../subworkflows/local/input_check' -include { PREPARE_GENOME } from '../subworkflows/local/prepare_genome' -include { ALIGN_STAR } from '../subworkflows/local/align_star' -include { BAM_SHIFT_READS as MERGED_LIBRARY_BAM_SHIFT_READS } from '../subworkflows/local/bam_shift_reads' -include { BAM_SHIFT_READS as MERGED_REPLICATE_BAM_SHIFT_READS } from '../subworkflows/local/bam_shift_reads' +include { INPUT_CHECK } from '../subworkflows/local/input_check' +include { PREPARE_GENOME } from '../subworkflows/local/prepare_genome' +include { ALIGN_STAR } from '../subworkflows/local/align_star' include { BIGWIG_PLOT_DEEPTOOLS as MERGED_LIBRARY_BIGWIG_PLOT_DEEPTOOLS } from '../subworkflows/local/bigwig_plot_deeptools' include { BAM_FILTER_BAMTOOLS as MERGED_LIBRARY_FILTER_BAM } from '../subworkflows/local/bam_filter_bamtools' include { BAM_BEDGRAPH_BIGWIG_BEDTOOLS_UCSC as MERGED_LIBRARY_BAM_TO_BIGWIG } from '../subworkflows/local/bam_bedgraph_bigwig_bedtools_ucsc' @@ -96,7 +94,6 @@ include { PRESEQ_LCEXTRAP as MERGED_LIBRARY_PRESEQ_LCEXTRAP include { DEEPTOOLS_PLOTFINGERPRINT as MERGED_LIBRARY_DEEPTOOLS_PLOTFINGERPRINT } from '../modules/nf-core/deeptools/plotfingerprint/main' include { ATAQV_ATAQV as MERGED_LIBRARY_ATAQV_ATAQV } from '../modules/nf-core/ataqv/ataqv/main' include { ATAQV_MKARV as MERGED_LIBRARY_ATAQV_MKARV } from '../modules/nf-core/ataqv/mkarv/main' -include { SAMTOOLS_INDEX } from '../modules/nf-core/samtools/index/main' include { PICARD_MERGESAMFILES as PICARD_MERGESAMFILES_LIBRARY } from '../modules/nf-core/picard/mergesamfiles/main' include { PICARD_MERGESAMFILES as PICARD_MERGESAMFILES_REPLICATE } from '../modules/nf-core/picard/mergesamfiles/main' @@ -146,24 +143,6 @@ workflow ATACSEQ { // See the documentation https://nextflow-io.github.io/nf-validation/samplesheets/fromSamplesheet/ // ! There is currently no tooling to help you write a sample sheet schema - // - // Check if reads are all paired-end if 'shift_reads' parameter is set - // - if (params.shift_reads) { - INPUT_CHECK - .out - .reads - .filter { meta, reads -> meta.single_end } - .collect() - .map { - it -> - def count = it.size() - if (count > 0) { - exit 1, 'The parameter --shift_reads can only be applied if all samples are paired-end.' - } - } - } - // // SUBWORKFLOW: Read QC and trim adapters // @@ -240,7 +219,6 @@ workflow ATACSEQ { [] ) ch_genome_bam = FASTQ_ALIGN_CHROMAP.out.bam - ch_genome_bam_index = FASTQ_ALIGN_CHROMAP.out.bai ch_samtools_stats = FASTQ_ALIGN_CHROMAP.out.stats ch_samtools_flagstat = FASTQ_ALIGN_CHROMAP.out.flagstat ch_samtools_idxstats = FASTQ_ALIGN_CHROMAP.out.idxstats @@ -379,29 +357,11 @@ workflow ATACSEQ { ch_versions = ch_versions.mix(MERGED_LIBRARY_PICARD_COLLECTMULTIPLEMETRICS.out.versions.first()) } - // - // SUBWORKFLOW: Shift paired-end reads - // - ch_merged_library_filter_bam = MERGED_LIBRARY_FILTER_BAM.out.bam - ch_merged_library_filter_bai = MERGED_LIBRARY_FILTER_BAM.out.bai - ch_merged_library_filter_flagstat = MERGED_LIBRARY_FILTER_BAM.out.flagstat - - if (params.shift_reads && params.aligner != 'chromap' ) { - MERGED_LIBRARY_BAM_SHIFT_READS ( - ch_merged_library_filter_bam.join(ch_merged_library_filter_bai, by: [0]), - ) - ch_versions = ch_versions.mix(MERGED_LIBRARY_BAM_SHIFT_READS.out.versions) - - ch_merged_library_filter_bam = MERGED_LIBRARY_BAM_SHIFT_READS.out.bam - ch_merged_library_filter_bai = MERGED_LIBRARY_BAM_SHIFT_READS.out.bai - ch_merged_library_filter_flagstat = MERGED_LIBRARY_BAM_SHIFT_READS.out.flagstat - } - // // SUBWORKFLOW: Normalised bigWig coverage tracks // MERGED_LIBRARY_BAM_TO_BIGWIG ( - ch_merged_library_filter_bam.join(ch_merged_library_filter_flagstat, by: [0]), + MERGED_LIBRARY_FILTER_BAM.out.bam.join(MERGED_LIBRARY_FILTER_BAM.out.flagstat, by: [0]), PREPARE_GENOME.out.chrom_sizes ) ch_versions = ch_versions.mix(MERGED_LIBRARY_BAM_TO_BIGWIG.out.versions) @@ -632,29 +592,10 @@ workflow ATACSEQ { ch_markduplicates_replicate_metrics = MERGED_REPLICATE_MARKDUPLICATES_PICARD.out.metrics ch_versions = ch_versions.mix(MERGED_REPLICATE_MARKDUPLICATES_PICARD.out.versions) - // - // SUBWORKFLOW: Shift paired-end reads - // Shift again, as ch_merged_library_replicate_bam is generated out of unshifted reads - // - ch_merged_replicate_markduplicate_bam = MERGED_REPLICATE_MARKDUPLICATES_PICARD.out.bam - ch_merged_replicate_markduplicate_bai = MERGED_REPLICATE_MARKDUPLICATES_PICARD.out.bai - ch_merged_replicate_markduplicate_flagstat = MERGED_REPLICATE_MARKDUPLICATES_PICARD.out.flagstat - - if (params.shift_reads && params.aligner != 'chromap' ) { - MERGED_REPLICATE_BAM_SHIFT_READS ( - ch_merged_replicate_markduplicate_bam.join(ch_merged_replicate_markduplicate_bai, by: [0]), - ) - ch_versions = ch_versions.mix(MERGED_REPLICATE_BAM_SHIFT_READS.out.versions) - - ch_merged_replicate_markduplicate_bam = MERGED_REPLICATE_BAM_SHIFT_READS.out.bam - ch_merged_replicate_markduplicate_bai = MERGED_REPLICATE_BAM_SHIFT_READS.out.bai - ch_merged_replicate_markduplicate_flagstat = MERGED_REPLICATE_BAM_SHIFT_READS.out.flagstat - } - // SUBWORKFLOW: Normalised bigWig coverage tracks // MERGED_REPLICATE_BAM_TO_BIGWIG ( - ch_merged_replicate_markduplicate_bam.join(ch_merged_replicate_markduplicate_flagstat, by: [0]), + MERGED_REPLICATE_MARKDUPLICATES_PICARD.out.bam.join(MERGED_REPLICATE_MARKDUPLICATES_PICARD.out.flagstat, by: [0]), PREPARE_GENOME.out.chrom_sizes ) ch_ucsc_bedgraphtobigwig_replicate_bigwig = MERGED_REPLICATE_BAM_TO_BIGWIG.out.bigwig @@ -662,14 +603,18 @@ workflow ATACSEQ { // Create channels: [ meta, bam, ([] for control_bam) ] if (params.with_control) { - ch_merged_replicate_markduplicate_bam + MERGED_REPLICATE_MARKDUPLICATES_PICARD + .out + .bam .map { meta, bam -> meta.control ? null : [ meta.id, bam ] } .set { ch_bam_merged_control } - ch_merged_replicate_markduplicate_bam + MERGED_REPLICATE_MARKDUPLICATES_PICARD + .out + .bam .map { meta, bam -> meta.control ? [ meta.control, meta, bam ] : null @@ -678,14 +623,15 @@ workflow ATACSEQ { .map { it -> [ it[1] , it[2], it[3] ] } .set { ch_bam_replicate } } else { - ch_merged_replicate_markduplicate_bam + MERGED_REPLICATE_MARKDUPLICATES_PICARD + .out + .bam .map { meta, bam -> [ meta , bam, [] ] } .set { ch_bam_replicate } } - // // SUBWORKFLOW: Call peaks with MACS2, annotate with HOMER and perform downstream QC //