From b99882b7d89eacf5112cae13d98585fe2f356005 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Wed, 29 May 2024 10:48:58 -0400 Subject: [PATCH 1/3] feat: add parameter to make go_enrichment optional --- .test/config_lint.yaml | 7 ++++--- config/config.yaml | 9 +++++---- docs/user-guide/preparing-files.md | 4 ++-- workflow/Snakefile | 4 ++-- workflow/rules/annotations.smk | 2 +- workflow/rules/init.smk | 2 +- 6 files changed, 15 insertions(+), 13 deletions(-) diff --git a/.test/config_lint.yaml b/.test/config_lint.yaml index d0e1e57..e043334 100644 --- a/.test/config_lint.yaml +++ b/.test/config_lint.yaml @@ -11,10 +11,11 @@ samplemanifest: "/opt2/.test/samples.test_lintr.tsv" # User parameters ##################################################################################### # run sample contrasts -run_contrasts: "Y" # Y or N +run_contrasts: true contrasts: "/opt2/.test/contrasts.test.tsv" # run_contrasts needs to be "Y" -contrasts_fdr_cutoff: "0.05" -contrasts_lfc_cutoff: "0.59" # FC of 1.5 +contrasts_fdr_cutoff: 0.05 +contrasts_lfc_cutoff: 0.59 # FC of 1.5 +run_go_enrichment: true # reference genome: "hg38" # currently supports hg38, hg19 and mm10. Custom genome can be added with appropriate additions to "reference" section below. diff --git a/config/config.yaml b/config/config.yaml index 0bf0d34..7e85729 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -17,10 +17,11 @@ samplemanifest: "WORKDIR/config/samples.tsv" # User parameters ##################################################################################### # run sample contrasts -run_contrasts: "Y" # Y or N -contrasts: "WORKDIR/config/contrasts.tsv" # run_contrasts needs to be "Y" -contrasts_fdr_cutoff: "0.05" -contrasts_lfc_cutoff: "0.59" # FC of 1.5 +run_contrasts: true # true or false, no quotes +contrasts: "WORKDIR/config/contrasts.tsv" # run_contrasts needs to be `true` +contrasts_fdr_cutoff: 0.05 +contrasts_lfc_cutoff: 0.59 # FC of 1.5 +run_go_enrichment: false # this step is long-running. use `true` if you would like to run it. # reference genome: "hg38" # currently supports hg38, hg19 and mm10. Custom genome can be added with appropriate additions to "reference" section below. diff --git a/docs/user-guide/preparing-files.md b/docs/user-guide/preparing-files.md index 18f3d7c..547738a 100644 --- a/docs/user-guide/preparing-files.md +++ b/docs/user-guide/preparing-files.md @@ -30,7 +30,7 @@ The pipeline allows for the use of a species specific spike-in control, or the u For example for ecoli spike-in: ``` -run_contrasts: "Y" +run_contrasts: true norm_method: "spikein" spikein_genome: "ecoli" spikein_reference: @@ -41,7 +41,7 @@ spikein_reference: For example for drosophila spike-in: ``` -run_contrasts: "Y" +run_contrasts: true norm_method: "spikein" spikein_genome: "drosophila" spikein_reference: diff --git a/workflow/Snakefile b/workflow/Snakefile index 011e479..3d808ba 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -97,7 +97,7 @@ def run_qc(wildcards): def run_contrasts(wildcards): files=[] - if config["run_contrasts"] == "Y": + if config["run_contrasts"]: files.append(join(RESULTSDIR,"replicate_sample.tsv")) # inputs for matrix @@ -159,7 +159,7 @@ def get_rose(wildcards): def get_enrichment(wildcards): files=[] - if config["run_contrasts"] == "Y": + if config["run_contrasts"] and config['run_go_enrichment']: if (GENOME == "hg19") or (GENOME == "hg38"): if ("macs2_narrow" in PEAKTYPE) or ("macs2_broad" in PEAKTYPE): t=expand(join(RESULTSDIR,"peaks","{qthresholds}","{peak_caller}","annotation","go_enrichment","{contrast_list}.{dupstatus}.txt"),peak_caller="macs2",qthresholds=QTRESHOLDS,contrast_list=CONTRAST_LIST,dupstatus=DUPSTATUS) diff --git a/workflow/rules/annotations.smk b/workflow/rules/annotations.smk index e88d864..389b18c 100644 --- a/workflow/rules/annotations.smk +++ b/workflow/rules/annotations.smk @@ -256,7 +256,7 @@ rule rose: echo "Less than 5 usable peaks detected (N=${{num_of_peaks}})" > {output.super_summit} fi """ -if config["run_contrasts"] == "Y": +if config["run_contrasts"]: rule create_contrast_peakcaller_files: """ Reads in all of the output from Rules create_contrast_data_files which match the same peaktype and merges them together diff --git a/workflow/rules/init.smk b/workflow/rules/init.smk index 8aa464b..e4e729a 100644 --- a/workflow/rules/init.smk +++ b/workflow/rules/init.smk @@ -185,7 +185,7 @@ QTRESHOLDS=config["quality_thresholds"] QTRESHOLDS=list(map(lambda x:x.strip(),QTRESHOLDS.split(","))) # set contrast settings -if config["run_contrasts"] == "Y": +if config["run_contrasts"]: print("#"*100) print("# Checking constrasts to run...") contrasts_table = config["contrasts"] From 3ad679827995626d2037d2c5bf1dd83e4bed06e3 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Wed, 29 May 2024 11:02:53 -0400 Subject: [PATCH 2/3] docs: update changelog --- CHANGELOG.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2819547..9d6e9ca 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,9 @@ - Fixes FDR cutoff misassigned to log2FC cutoff. - Fixes `no_dedup` variable names in library normalization scripts. - Containerize rules that require R (`deseq`, `go_enrichment`, and `spikein_assessment`) to fix installation issues with common R library path. (#129, @kelly-sovacool) - The `Rlib_dir` and `Rpkg_config` config options have been removed as they are no longer needed. + - The `Rlib_dir` and `Rpkg_config` config options have been removed as they are no longer needed. +- GO enrichment is now optional, with a new parameter `run_go_enrichment` (default: `false`) in the config file to control whether GO enrichment runs. (#133, @kelly-sovacool) + ## CARLISLE v2.5.0 - Refactors R packages to a common source location (#118, @slsevilla) From 8d9a3bef4cacf0e720f5b208524af2d94ee4fb76 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Wed, 29 May 2024 12:44:52 -0400 Subject: [PATCH 3/3] feat: make the rose rule optional by request of @epehrsson related to #134 --- .test/config_lint.yaml | 1 + CHANGELOG.md | 4 +++- config/config.yaml | 5 ++++- docs/user-guide/output.md | 4 ++-- workflow/Snakefile | 19 ++++++++++--------- 5 files changed, 20 insertions(+), 13 deletions(-) diff --git a/.test/config_lint.yaml b/.test/config_lint.yaml index e043334..5f7c9e5 100644 --- a/.test/config_lint.yaml +++ b/.test/config_lint.yaml @@ -16,6 +16,7 @@ contrasts: "/opt2/.test/contrasts.test.tsv" # run_contrasts needs to be "Y" contrasts_fdr_cutoff: 0.05 contrasts_lfc_cutoff: 0.59 # FC of 1.5 run_go_enrichment: true +run_rose: true # reference genome: "hg38" # currently supports hg38, hg19 and mm10. Custom genome can be added with appropriate additions to "reference" section below. diff --git a/CHANGELOG.md b/CHANGELOG.md index 9d6e9ca..2535cce 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,9 @@ - Fixes `no_dedup` variable names in library normalization scripts. - Containerize rules that require R (`deseq`, `go_enrichment`, and `spikein_assessment`) to fix installation issues with common R library path. (#129, @kelly-sovacool) - The `Rlib_dir` and `Rpkg_config` config options have been removed as they are no longer needed. -- GO enrichment is now optional, with a new parameter `run_go_enrichment` (default: `false`) in the config file to control whether GO enrichment runs. (#133, @kelly-sovacool) +- New parameters in the config file to make certain rules optional: (#133, @kelly-sovacool) + - GO enrichment is controlled by `run_go_enrichment` (default: `false`) + - rose is controlled by `run_rose` (default: `false`) ## CARLISLE v2.5.0 diff --git a/config/config.yaml b/config/config.yaml index 7e85729..dfff518 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -21,7 +21,10 @@ run_contrasts: true # true or false, no quotes contrasts: "WORKDIR/config/contrasts.tsv" # run_contrasts needs to be `true` contrasts_fdr_cutoff: 0.05 contrasts_lfc_cutoff: 0.59 # FC of 1.5 -run_go_enrichment: false # this step is long-running. use `true` if you would like to run it. + +# these steps are long-running. use `true` if you would like to run them +run_go_enrichment: false +run_rose: false # reference genome: "hg38" # currently supports hg38, hg19 and mm10. Custom genome can be added with appropriate additions to "reference" section below. diff --git a/docs/user-guide/output.md b/docs/user-guide/output.md index ecf07d0..0f8de72 100644 --- a/docs/user-guide/output.md +++ b/docs/user-guide/output.md @@ -10,9 +10,9 @@ The following directories are created under the WORKDIR/results directory: - contrasts: this directory includes the contrasts for each line listed in the contrast manifest - peak_caller: this directory includes all peak calls from each peak_caller (SEACR, MACS2, GOPEAKS) for each sample - annotation - - go_enrichment: this directory includes gene set enrichment pathway predictions + - go_enrichment: this directory includes gene set enrichment pathway predictions when `run_go_enrichment` is set to `true` in the config file. - homer: this directory includes the annotation output from HOMER - - rose: this directory includes the annotation output from ROSE + - rose: this directory includes the annotation output from ROSE when `run_rose` is set to `true` in the config file. - qc: this directory includes MULTIQC reports and spike-in control reports (when applicable) ``` diff --git a/workflow/Snakefile b/workflow/Snakefile index 3d808ba..aa7fd79 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -146,15 +146,16 @@ def get_motifs(wildcards): def get_rose(wildcards): files=[] - if ("macs2_narrow" in PEAKTYPE) or ("macs2_broad" in PEAKTYPE): - anno_m=expand(join(RESULTSDIR,"peaks","{qthresholds}","{peak_caller}","annotation","rose","{treatment_control_list}.{dupstatus}.{peak_caller_type}.{s_dist}","{treatment_control_list}_AllStitched.table.super.summits.bed"),peak_caller="macs2",qthresholds=QTRESHOLDS,treatment_control_list=TREATMENT_LIST_M,dupstatus=DUPSTATUS,peak_caller_type=PEAKTYPE_M,s_dist=S_DISTANCE), - files.extend(anno_m) - if ("gopeaks_narrow" in PEAKTYPE) or ("gopeaks_broad" in PEAKTYPE): - anno_g=expand(join(RESULTSDIR,"peaks","{qthresholds}","{peak_caller}","annotation","rose","{treatment_control_list}.{dupstatus}.{peak_caller_type}.{s_dist}","{treatment_control_list}_AllStitched.table.super.summits.bed"),peak_caller="gopeaks",qthresholds=QTRESHOLDS,treatment_control_list=TREATMENT_LIST_SG,dupstatus=DUPSTATUS,peak_caller_type=PEAKTYPE_G,s_dist=S_DISTANCE), - files.extend(anno_g) - if ("seacr_stringent" in PEAKTYPE) or ("seacr_relaxed" in PEAKTYPE): - anno_s=expand(join(RESULTSDIR,"peaks","{qthresholds}","{peak_caller}","annotation","rose","{treatment_control_list}.{dupstatus}.{peak_caller_type}.{s_dist}","{treatment_control_list}_AllStitched.table.super.summits.bed"),peak_caller="seacr",qthresholds=QTRESHOLDS,treatment_control_list=TREATMENT_LIST_SG,dupstatus=DUPSTATUS,peak_caller_type=PEAKTYPE_S,s_dist=S_DISTANCE), - files.extend(anno_s) + if config['run_rose']: + if ("macs2_narrow" in PEAKTYPE) or ("macs2_broad" in PEAKTYPE): + anno_m=expand(join(RESULTSDIR,"peaks","{qthresholds}","{peak_caller}","annotation","rose","{treatment_control_list}.{dupstatus}.{peak_caller_type}.{s_dist}","{treatment_control_list}_AllStitched.table.super.summits.bed"),peak_caller="macs2",qthresholds=QTRESHOLDS,treatment_control_list=TREATMENT_LIST_M,dupstatus=DUPSTATUS,peak_caller_type=PEAKTYPE_M,s_dist=S_DISTANCE), + files.extend(anno_m) + if ("gopeaks_narrow" in PEAKTYPE) or ("gopeaks_broad" in PEAKTYPE): + anno_g=expand(join(RESULTSDIR,"peaks","{qthresholds}","{peak_caller}","annotation","rose","{treatment_control_list}.{dupstatus}.{peak_caller_type}.{s_dist}","{treatment_control_list}_AllStitched.table.super.summits.bed"),peak_caller="gopeaks",qthresholds=QTRESHOLDS,treatment_control_list=TREATMENT_LIST_SG,dupstatus=DUPSTATUS,peak_caller_type=PEAKTYPE_G,s_dist=S_DISTANCE), + files.extend(anno_g) + if ("seacr_stringent" in PEAKTYPE) or ("seacr_relaxed" in PEAKTYPE): + anno_s=expand(join(RESULTSDIR,"peaks","{qthresholds}","{peak_caller}","annotation","rose","{treatment_control_list}.{dupstatus}.{peak_caller_type}.{s_dist}","{treatment_control_list}_AllStitched.table.super.summits.bed"),peak_caller="seacr",qthresholds=QTRESHOLDS,treatment_control_list=TREATMENT_LIST_SG,dupstatus=DUPSTATUS,peak_caller_type=PEAKTYPE_S,s_dist=S_DISTANCE), + files.extend(anno_s) return files def get_enrichment(wildcards):