diff --git a/CHANGELOG.md b/CHANGELOG.md index bb45b687..cff4fb4a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,10 +7,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added +- Added a second BEDTools intersect step to allow filtering and overlapping in the same workflow. + ### Changed ### Fixed +- Updated PINTS version from 1.1.6 to 1.1.8 ([Fixes an issue where PINTS fails if one of the predictions was empty](https://github.com/hyulab/PINTS/issues/12)) + ### Deprecated ### Removed diff --git a/conf/modules.config b/conf/modules.config index c7460ed7..20774971 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -175,7 +175,12 @@ process { withName: BEDTOOLS_INTERSECT_FILTER { ext.prefix = { "${meta.id}_filtered" } - ext.args = { params.no_overlap ? "-v": "" } + ext.args = "-v" + } + + withName: BEDTOOLS_INTERSECT { + ext.prefix = { "${meta.id}_intersect" } + ext.args = "-wa" } } diff --git a/conf/test.config b/conf/test.config index a2b50210..08602f53 100644 --- a/conf/test.config +++ b/conf/test.config @@ -29,12 +29,6 @@ params { assay_type = "GROseq" skip_grohmm = true // FIXME Fails due to higher memory requirements tuning_file = "${projectDir}/tests/subworkflows/local/grohmm/tuningparams_small.csv" - filter_bed = "${projectDir}/tests/samplesheets/region.bed" -} - -process { - // FIXME https://github.com/hyulab/PINTS/issues/12 - withName: PINTS_CALLER { - ext.when = false - } + filter_bed = "${projectDir}/tests/samplesheets/unwanted_region.bed" + intersect_bed = "${projectDir}/tests/samplesheets/wanted_region.bed" } diff --git a/modules.json b/modules.json index fef13e53..11019be3 100644 --- a/modules.json +++ b/modules.json @@ -107,7 +107,7 @@ }, "pints/caller": { "branch": "master", - "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + "git_sha": "a8c6d338a76df3736215c93c27d9ccc7b88e9cf4" }, "preseq/ccurve": { "branch": "master", diff --git a/modules/nf-core/pints/caller/main.nf b/modules/nf-core/pints/caller/main.nf index c7ab2ae0..ddbbd8ae 100644 --- a/modules/nf-core/pints/caller/main.nf +++ b/modules/nf-core/pints/caller/main.nf @@ -2,19 +2,20 @@ process PINTS_CALLER { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::pypints=1.1.6" : null) + conda (params.enable_conda ? "bioconda::pypints=1.1.8" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/pypints:1.1.6--pyh5e36f6f_1' : - 'quay.io/biocontainers/pypints:1.1.6--pyh5e36f6f_1' }" + 'https://depot.galaxyproject.org/singularity/pypints:1.1.8--pyh7cba7a3_0' : + 'quay.io/biocontainers/pypints:1.1.8--pyh7cba7a3_0' }" input: tuple val(meta), path(bams) output: - tuple val(meta), path("*_divergent_peaks.bed") , emit: divergent_TREs - tuple val(meta), path("*_bidirectional_peaks.bed") , emit: bidirectional_TREs - tuple val(meta), path("*_unidirectional_peaks.bed"), emit: unidirectional_TREs - path "versions.yml" , emit: versions + tuple val(meta), path("*_divergent_peaks.bed") , optional:true, emit: divergent_TREs + tuple val(meta), path("*_bidirectional_peaks.bed") , optional:true, emit: bidirectional_TREs + tuple val(meta), path("*_unidirectional_peaks.bed"), optional:true, emit: unidirectional_TREs + tuple val(meta), path("peakcalling_*.log") , emit: peakcalling_log + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when diff --git a/modules/nf-core/pints/caller/meta.yml b/modules/nf-core/pints/caller/meta.yml index 647e5b89..fd649816 100644 --- a/modules/nf-core/pints/caller/meta.yml +++ b/modules/nf-core/pints/caller/meta.yml @@ -46,14 +46,21 @@ output: type: file description: Divergent TREs pattern: "*_divergent_peaks.bed" + optional: true - bidirectional_TREs: type: file description: Divergent TREs and convergent TREs pattern: "*_bidirectional_peaks.bed" + optional: true - unidirectional_TREs: type: file description: Unidirectional TREs, maybe lncRNAs transcribed from enhancers (e-lncRNAs) pattern: "*_unidirectional_peaks.bed" + optional: true + - peakcalling_log: + type: file + description: Peakcalling log for debugging purposes + pattern: "peakcalling_*.log" authors: - "@Emiller88" diff --git a/nextflow.config b/nextflow.config index 8772b703..c676b716 100644 --- a/nextflow.config +++ b/nextflow.config @@ -36,7 +36,7 @@ params { tuning_file = null filter_bed = null - no_overlap = true + intersect_bed = null // MultiQC options multiqc_config = null diff --git a/nextflow_schema.json b/nextflow_schema.json index 6cd74350..9bb9bd2e 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -137,13 +137,16 @@ "filter_bed": { "type": "string", "fa_icon": "fas fa-filter", - "pattern": "^\\S+\\.bed(\\.gz)?$" + "pattern": "^\\S+\\.bed(\\.gz)?$", + "description": "Undesired regions, that transcripts should not overlap with", + "help_text": "Promoter regions, or gene regions are a good example" }, - "no_overlap": { - "type": "boolean", - "default": true, - "fa_icon": "far fa-chart-bar", - "description": "Only report those entries in A that have no overlap in B" + "intersect_bed": { + "type": "string", + "fa_icon": "fas fa-sort-amount-desc", + "pattern": "^\\S+\\.bed(\\.gz)?$", + "description": "Desired regions, that transcripts should overlap with", + "help_text": "Histone Modifications(H3K4me1 and H3K27ac), or known TREs from the PINTS element matrix are examples" } }, "required": ["assay_type"], diff --git a/subworkflows/local/transcript_identification.nf b/subworkflows/local/transcript_identification.nf index d7c5d422..ddc418ed 100644 --- a/subworkflows/local/transcript_identification.nf +++ b/subworkflows/local/transcript_identification.nf @@ -9,6 +9,7 @@ include { CAT_CAT } from '../../modules/nf-core/cat/cat/main' include { BEDTOOLS_MERGE } from '../../modules/nf-core/bedtools/merge/main' include { BEDTOOLS_SORT } from '../../modules/nf-core/bedtools/sort/main' include { BEDTOOLS_INTERSECT as BEDTOOLS_INTERSECT_FILTER } from '../../modules/nf-core/bedtools/intersect/main' +include { BEDTOOLS_INTERSECT } from '../../modules/nf-core/bedtools/intersect/main' include { HOMER_GROSEQ } from '../nf-core/homer/groseq/main' @@ -58,12 +59,14 @@ workflow TRANSCRIPT_INDENTIFICATION { if(params.filter_bed) { ch_filter_bed = Channel.from(params.filter_bed) BEDTOOLS_INTERSECT_FILTER ( ch_identification_bed.combine(ch_filter_bed), "bed" ) + ch_identification_bed = BEDTOOLS_INTERSECT_FILTER.out.intersect ch_versions = ch_versions.mix(BEDTOOLS_INTERSECT_FILTER.out.versions.first()) } - - // Use non-filtered bed files if we skip filtering - if(!params.filter_bed) { - ch_identification_bed = BEDTOOLS_INTERSECT_FILTER.out.intersect + if(params.intersect_bed) { + ch_intersect_bed = Channel.from(params.intersect_bed) + BEDTOOLS_INTERSECT ( ch_identification_bed.combine(ch_intersect_bed), "bed" ) + ch_identification_bed = BEDTOOLS_INTERSECT.out.intersect + ch_versions = ch_versions.mix(BEDTOOLS_INTERSECT_FILTER.out.versions.first()) } ch_identification_bed diff --git a/tests/samplesheets/region.bed b/tests/samplesheets/unwanted_region.bed similarity index 100% rename from tests/samplesheets/region.bed rename to tests/samplesheets/unwanted_region.bed diff --git a/tests/samplesheets/wanted_region.bed b/tests/samplesheets/wanted_region.bed new file mode 100644 index 00000000..dffd3e73 --- /dev/null +++ b/tests/samplesheets/wanted_region.bed @@ -0,0 +1,2 @@ +chr21 37267780 37267800 +chr21 41362070 41362130