From e754472a2d715defe322ff6ada39800390b1a547 Mon Sep 17 00:00:00 2001 From: Edmund Miller Date: Sun, 30 Oct 2022 18:02:51 -0500 Subject: [PATCH 1/6] feat(filter): Add intersect step So first, users can filter the identified transcripts and remove any that intersect with the undesired regions, then only find transcripts that intersect with the desired regions. --- conf/modules.config | 7 ++++++- nextflow.config | 2 +- nextflow_schema.json | 15 +++++++++------ subworkflows/local/transcript_identification.nf | 11 +++++++---- 4 files changed, 23 insertions(+), 12 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index c7460ed7..20774971 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -175,7 +175,12 @@ process { withName: BEDTOOLS_INTERSECT_FILTER { ext.prefix = { "${meta.id}_filtered" } - ext.args = { params.no_overlap ? "-v": "" } + ext.args = "-v" + } + + withName: BEDTOOLS_INTERSECT { + ext.prefix = { "${meta.id}_intersect" } + ext.args = "-wa" } } diff --git a/nextflow.config b/nextflow.config index 8772b703..c676b716 100644 --- a/nextflow.config +++ b/nextflow.config @@ -36,7 +36,7 @@ params { tuning_file = null filter_bed = null - no_overlap = true + intersect_bed = null // MultiQC options multiqc_config = null diff --git a/nextflow_schema.json b/nextflow_schema.json index 6cd74350..9bb9bd2e 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -137,13 +137,16 @@ "filter_bed": { "type": "string", "fa_icon": "fas fa-filter", - "pattern": "^\\S+\\.bed(\\.gz)?$" + "pattern": "^\\S+\\.bed(\\.gz)?$", + "description": "Undesired regions, that transcripts should not overlap with", + "help_text": "Promoter regions, or gene regions are a good example" }, - "no_overlap": { - "type": "boolean", - "default": true, - "fa_icon": "far fa-chart-bar", - "description": "Only report those entries in A that have no overlap in B" + "intersect_bed": { + "type": "string", + "fa_icon": "fas fa-sort-amount-desc", + "pattern": "^\\S+\\.bed(\\.gz)?$", + "description": "Desired regions, that transcripts should overlap with", + "help_text": "Histone Modifications(H3K4me1 and H3K27ac), or known TREs from the PINTS element matrix are examples" } }, "required": ["assay_type"], diff --git a/subworkflows/local/transcript_identification.nf b/subworkflows/local/transcript_identification.nf index d7c5d422..ddc418ed 100644 --- a/subworkflows/local/transcript_identification.nf +++ b/subworkflows/local/transcript_identification.nf @@ -9,6 +9,7 @@ include { CAT_CAT } from '../../modules/nf-core/cat/cat/main' include { BEDTOOLS_MERGE } from '../../modules/nf-core/bedtools/merge/main' include { BEDTOOLS_SORT } from '../../modules/nf-core/bedtools/sort/main' include { BEDTOOLS_INTERSECT as BEDTOOLS_INTERSECT_FILTER } from '../../modules/nf-core/bedtools/intersect/main' +include { BEDTOOLS_INTERSECT } from '../../modules/nf-core/bedtools/intersect/main' include { HOMER_GROSEQ } from '../nf-core/homer/groseq/main' @@ -58,12 +59,14 @@ workflow TRANSCRIPT_INDENTIFICATION { if(params.filter_bed) { ch_filter_bed = Channel.from(params.filter_bed) BEDTOOLS_INTERSECT_FILTER ( ch_identification_bed.combine(ch_filter_bed), "bed" ) + ch_identification_bed = BEDTOOLS_INTERSECT_FILTER.out.intersect ch_versions = ch_versions.mix(BEDTOOLS_INTERSECT_FILTER.out.versions.first()) } - - // Use non-filtered bed files if we skip filtering - if(!params.filter_bed) { - ch_identification_bed = BEDTOOLS_INTERSECT_FILTER.out.intersect + if(params.intersect_bed) { + ch_intersect_bed = Channel.from(params.intersect_bed) + BEDTOOLS_INTERSECT ( ch_identification_bed.combine(ch_intersect_bed), "bed" ) + ch_identification_bed = BEDTOOLS_INTERSECT.out.intersect + ch_versions = ch_versions.mix(BEDTOOLS_INTERSECT_FILTER.out.versions.first()) } ch_identification_bed From a6136fd00f3124594d0a4aa9c7630d5d96a91ccb Mon Sep 17 00:00:00 2001 From: Edmund Miller Date: Mon, 31 Oct 2022 11:32:39 -0500 Subject: [PATCH 2/6] test: Add in wanted region for bed files --- conf/test.config | 3 ++- tests/samplesheets/{region.bed => unwanted_region.bed} | 0 tests/samplesheets/wanted_region.bed | 2 ++ 3 files changed, 4 insertions(+), 1 deletion(-) rename tests/samplesheets/{region.bed => unwanted_region.bed} (100%) create mode 100644 tests/samplesheets/wanted_region.bed diff --git a/conf/test.config b/conf/test.config index a2b50210..3e7bb451 100644 --- a/conf/test.config +++ b/conf/test.config @@ -29,7 +29,8 @@ params { assay_type = "GROseq" skip_grohmm = true // FIXME Fails due to higher memory requirements tuning_file = "${projectDir}/tests/subworkflows/local/grohmm/tuningparams_small.csv" - filter_bed = "${projectDir}/tests/samplesheets/region.bed" + filter_bed = "${projectDir}/tests/samplesheets/unwanted_region.bed" + intersect_bed = "${projectDir}/tests/samplesheets/wanted_region.bed" } process { diff --git a/tests/samplesheets/region.bed b/tests/samplesheets/unwanted_region.bed similarity index 100% rename from tests/samplesheets/region.bed rename to tests/samplesheets/unwanted_region.bed diff --git a/tests/samplesheets/wanted_region.bed b/tests/samplesheets/wanted_region.bed new file mode 100644 index 00000000..dffd3e73 --- /dev/null +++ b/tests/samplesheets/wanted_region.bed @@ -0,0 +1,2 @@ +chr21 37267780 37267800 +chr21 41362070 41362130 From 841ae6240af8ebd19338cdd19204056cadbc3437 Mon Sep 17 00:00:00 2001 From: Edmund Miller Date: Mon, 31 Oct 2022 10:39:50 -0500 Subject: [PATCH 3/6] chore: nf-core modules update pints/caller https://github.com/hyulab/PINTS/issues/12 --- CHANGELOG.md | 2 ++ conf/test.config | 7 ------- modules.json | 2 +- modules/nf-core/pints/caller/main.nf | 6 +++--- 4 files changed, 6 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bb45b687..09ba4244 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed +- Updated PINTS version from 1.1.6 to 1.1.8 ([Fixes an issue where PINTS fails if one of the predictions was empty](https://github.com/hyulab/PINTS/issues/12)) + ### Deprecated ### Removed diff --git a/conf/test.config b/conf/test.config index 3e7bb451..08602f53 100644 --- a/conf/test.config +++ b/conf/test.config @@ -32,10 +32,3 @@ params { filter_bed = "${projectDir}/tests/samplesheets/unwanted_region.bed" intersect_bed = "${projectDir}/tests/samplesheets/wanted_region.bed" } - -process { - // FIXME https://github.com/hyulab/PINTS/issues/12 - withName: PINTS_CALLER { - ext.when = false - } -} diff --git a/modules.json b/modules.json index fef13e53..e61d1cfa 100644 --- a/modules.json +++ b/modules.json @@ -107,7 +107,7 @@ }, "pints/caller": { "branch": "master", - "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + "git_sha": "ad1e966b5123a2af6be786f35628c40f64771097" }, "preseq/ccurve": { "branch": "master", diff --git a/modules/nf-core/pints/caller/main.nf b/modules/nf-core/pints/caller/main.nf index c7ab2ae0..bfc0a166 100644 --- a/modules/nf-core/pints/caller/main.nf +++ b/modules/nf-core/pints/caller/main.nf @@ -2,10 +2,10 @@ process PINTS_CALLER { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::pypints=1.1.6" : null) + conda (params.enable_conda ? "bioconda::pypints=1.1.8" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/pypints:1.1.6--pyh5e36f6f_1' : - 'quay.io/biocontainers/pypints:1.1.6--pyh5e36f6f_1' }" + 'https://depot.galaxyproject.org/singularity/pypints:1.1.8--pyh7cba7a3_0' : + 'quay.io/biocontainers/pypints:1.1.8--pyh7cba7a3_0' }" input: tuple val(meta), path(bams) From b11c50f89c5d3a4307a94a8a7a4dc68b4d6646d9 Mon Sep 17 00:00:00 2001 From: Edmund Miller Date: Mon, 31 Oct 2022 10:50:41 -0500 Subject: [PATCH 4/6] fix(pints): Patch module to allow all bed files to be optional Added the log as well to ensure it ran. --- modules.json | 3 ++- modules/nf-core/pints/caller/main.nf | 7 ++++--- .../nf-core/pints/caller/pints-caller.diff | 19 +++++++++++++++++++ 3 files changed, 25 insertions(+), 4 deletions(-) create mode 100644 modules/nf-core/pints/caller/pints-caller.diff diff --git a/modules.json b/modules.json index e61d1cfa..41c03f3e 100644 --- a/modules.json +++ b/modules.json @@ -107,7 +107,8 @@ }, "pints/caller": { "branch": "master", - "git_sha": "ad1e966b5123a2af6be786f35628c40f64771097" + "git_sha": "ad1e966b5123a2af6be786f35628c40f64771097", + "patch": "modules/nf-core/pints/caller/pints-caller.diff" }, "preseq/ccurve": { "branch": "master", diff --git a/modules/nf-core/pints/caller/main.nf b/modules/nf-core/pints/caller/main.nf index bfc0a166..c06c5dad 100644 --- a/modules/nf-core/pints/caller/main.nf +++ b/modules/nf-core/pints/caller/main.nf @@ -11,9 +11,10 @@ process PINTS_CALLER { tuple val(meta), path(bams) output: - tuple val(meta), path("*_divergent_peaks.bed") , emit: divergent_TREs - tuple val(meta), path("*_bidirectional_peaks.bed") , emit: bidirectional_TREs - tuple val(meta), path("*_unidirectional_peaks.bed"), emit: unidirectional_TREs + tuple val(meta), path("*_divergent_peaks.bed") , optional:true, emit: divergent_TREs + tuple val(meta), path("*_bidirectional_peaks.bed") , optional:true, emit: bidirectional_TREs + tuple val(meta), path("*_unidirectional_peaks.bed"), optional:true, emit: unidirectional_TREs + tuple val(meta), path("peakcalling_*.log") , emit: peakcalling_log path "versions.yml" , emit: versions when: diff --git a/modules/nf-core/pints/caller/pints-caller.diff b/modules/nf-core/pints/caller/pints-caller.diff new file mode 100644 index 00000000..eba13264 --- /dev/null +++ b/modules/nf-core/pints/caller/pints-caller.diff @@ -0,0 +1,19 @@ +Changes in module 'nf-core/pints/caller' +--- modules/nf-core/pints/caller/main.nf ++++ modules/nf-core/pints/caller/main.nf +@@ -11,9 +11,10 @@ + tuple val(meta), path(bams) + + output: +- tuple val(meta), path("*_divergent_peaks.bed") , emit: divergent_TREs +- tuple val(meta), path("*_bidirectional_peaks.bed") , emit: bidirectional_TREs +- tuple val(meta), path("*_unidirectional_peaks.bed"), emit: unidirectional_TREs ++ tuple val(meta), path("*_divergent_peaks.bed") , optional:true, emit: divergent_TREs ++ tuple val(meta), path("*_bidirectional_peaks.bed") , optional:true, emit: bidirectional_TREs ++ tuple val(meta), path("*_unidirectional_peaks.bed"), optional:true, emit: unidirectional_TREs ++ tuple val(meta), path("peakcalling_*.log") , emit: peakcalling_log + path "versions.yml" , emit: versions + + when: + +************************************************************ From 534b5dae3f175f1f9f690401ab9e07f9c26864e2 Mon Sep 17 00:00:00 2001 From: Edmund Miller Date: Mon, 31 Oct 2022 11:35:32 -0500 Subject: [PATCH 5/6] chore: Update CHANGELOG --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 09ba4244..cff4fb4a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added +- Added a second BEDTools intersect step to allow filtering and overlapping in the same workflow. + ### Changed ### Fixed From edf04d37094ee72581a9da171216bac71a259688 Mon Sep 17 00:00:00 2001 From: Edmund Miller Date: Mon, 31 Oct 2022 11:54:42 -0500 Subject: [PATCH 6/6] chore(pints): nf-core modules update pints/caller --- modules.json | 3 +-- modules/nf-core/pints/caller/main.nf | 2 +- modules/nf-core/pints/caller/meta.yml | 7 +++++++ .../nf-core/pints/caller/pints-caller.diff | 19 ------------------- 4 files changed, 9 insertions(+), 22 deletions(-) delete mode 100644 modules/nf-core/pints/caller/pints-caller.diff diff --git a/modules.json b/modules.json index 41c03f3e..11019be3 100644 --- a/modules.json +++ b/modules.json @@ -107,8 +107,7 @@ }, "pints/caller": { "branch": "master", - "git_sha": "ad1e966b5123a2af6be786f35628c40f64771097", - "patch": "modules/nf-core/pints/caller/pints-caller.diff" + "git_sha": "a8c6d338a76df3736215c93c27d9ccc7b88e9cf4" }, "preseq/ccurve": { "branch": "master", diff --git a/modules/nf-core/pints/caller/main.nf b/modules/nf-core/pints/caller/main.nf index c06c5dad..ddbbd8ae 100644 --- a/modules/nf-core/pints/caller/main.nf +++ b/modules/nf-core/pints/caller/main.nf @@ -15,7 +15,7 @@ process PINTS_CALLER { tuple val(meta), path("*_bidirectional_peaks.bed") , optional:true, emit: bidirectional_TREs tuple val(meta), path("*_unidirectional_peaks.bed"), optional:true, emit: unidirectional_TREs tuple val(meta), path("peakcalling_*.log") , emit: peakcalling_log - path "versions.yml" , emit: versions + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when diff --git a/modules/nf-core/pints/caller/meta.yml b/modules/nf-core/pints/caller/meta.yml index 647e5b89..fd649816 100644 --- a/modules/nf-core/pints/caller/meta.yml +++ b/modules/nf-core/pints/caller/meta.yml @@ -46,14 +46,21 @@ output: type: file description: Divergent TREs pattern: "*_divergent_peaks.bed" + optional: true - bidirectional_TREs: type: file description: Divergent TREs and convergent TREs pattern: "*_bidirectional_peaks.bed" + optional: true - unidirectional_TREs: type: file description: Unidirectional TREs, maybe lncRNAs transcribed from enhancers (e-lncRNAs) pattern: "*_unidirectional_peaks.bed" + optional: true + - peakcalling_log: + type: file + description: Peakcalling log for debugging purposes + pattern: "peakcalling_*.log" authors: - "@Emiller88" diff --git a/modules/nf-core/pints/caller/pints-caller.diff b/modules/nf-core/pints/caller/pints-caller.diff deleted file mode 100644 index eba13264..00000000 --- a/modules/nf-core/pints/caller/pints-caller.diff +++ /dev/null @@ -1,19 +0,0 @@ -Changes in module 'nf-core/pints/caller' ---- modules/nf-core/pints/caller/main.nf -+++ modules/nf-core/pints/caller/main.nf -@@ -11,9 +11,10 @@ - tuple val(meta), path(bams) - - output: -- tuple val(meta), path("*_divergent_peaks.bed") , emit: divergent_TREs -- tuple val(meta), path("*_bidirectional_peaks.bed") , emit: bidirectional_TREs -- tuple val(meta), path("*_unidirectional_peaks.bed"), emit: unidirectional_TREs -+ tuple val(meta), path("*_divergent_peaks.bed") , optional:true, emit: divergent_TREs -+ tuple val(meta), path("*_bidirectional_peaks.bed") , optional:true, emit: bidirectional_TREs -+ tuple val(meta), path("*_unidirectional_peaks.bed"), optional:true, emit: unidirectional_TREs -+ tuple val(meta), path("peakcalling_*.log") , emit: peakcalling_log - path "versions.yml" , emit: versions - - when: - -************************************************************