diff --git a/.test/config_lint.yaml b/.test/config_lint.yaml index d0e1e57..5f7c9e5 100644 --- a/.test/config_lint.yaml +++ b/.test/config_lint.yaml @@ -11,10 +11,12 @@ samplemanifest: "/opt2/.test/samples.test_lintr.tsv" # User parameters ##################################################################################### # run sample contrasts -run_contrasts: "Y" # Y or N +run_contrasts: true contrasts: "/opt2/.test/contrasts.test.tsv" # run_contrasts needs to be "Y" -contrasts_fdr_cutoff: "0.05" -contrasts_lfc_cutoff: "0.59" # FC of 1.5 +contrasts_fdr_cutoff: 0.05 +contrasts_lfc_cutoff: 0.59 # FC of 1.5 +run_go_enrichment: true +run_rose: true # reference genome: "hg38" # currently supports hg38, hg19 and mm10. Custom genome can be added with appropriate additions to "reference" section below. diff --git a/CHANGELOG.md b/CHANGELOG.md index c675cb0..05dfeed 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,17 +1,20 @@ ## CARLISLE development version -- Bug fixes (#127, @epehrsson) +- Bug fixes: (#127, @epehrsson) - Removes single-sample group check for DESeq. - Increases memory for DESeq. - Ensures control replicate number is an integer. - Fixes FDR cutoff misassigned to log2FC cutoff. - Fixes `no_dedup` variable names in library normalization scripts. - Containerize rules that require R (`deseq`, `go_enrichment`, and `spikein_assessment`) to fix installation issues with common R library path. (#129, @kelly-sovacool) - The `Rlib_dir` and `Rpkg_config` config options have been removed as they are no longer needed. -- New visualization features (#132, @epehrsson) + - The `Rlib_dir` and `Rpkg_config` config options have been removed as they are no longer needed. +- New visualizations: (#132, @epehrsson) - New rules `cov_correlation`, `homer_enrich`, `combine_homer`, `count_peaks` - Add peak caller to MACS2 peak xls filename - +- New parameters in the config file to make certain rules optional: (#133, @kelly-sovacool) + - GO enrichment is controlled by `run_go_enrichment` (default: `false`) + - ROSE is controlled by `run_rose` (default: `false`) + ## CARLISLE v2.5.0 - Refactors R packages to a common source location (#118, @slsevilla) - Adds a --force flag to allow for re-initialization of a workdir (#97, @slsevilla) diff --git a/config/config.yaml b/config/config.yaml index 88a974b..f2c5bee 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -17,10 +17,14 @@ samplemanifest: "WORKDIR/config/samples.tsv" # User parameters ##################################################################################### # run sample contrasts -run_contrasts: "Y" # Y or N -contrasts: "WORKDIR/config/contrasts.tsv" # run_contrasts needs to be "Y" -contrasts_fdr_cutoff: "0.05" -contrasts_lfc_cutoff: "0.59" # FC of 1.5 +run_contrasts: true # true or false, no quotes +contrasts: "WORKDIR/config/contrasts.tsv" # run_contrasts needs to be `true` +contrasts_fdr_cutoff: 0.05 +contrasts_lfc_cutoff: 0.59 # FC of 1.5 + +# these steps are long-running. use `true` if you would like to run them +run_go_enrichment: false +run_rose: false # reference genome: "hg38" # currently supports hg38, hg19 and mm10. Custom genome can be added with appropriate additions to "reference" section below. diff --git a/docs/user-guide/output.md b/docs/user-guide/output.md index ecf07d0..0f8de72 100644 --- a/docs/user-guide/output.md +++ b/docs/user-guide/output.md @@ -10,9 +10,9 @@ The following directories are created under the WORKDIR/results directory: - contrasts: this directory includes the contrasts for each line listed in the contrast manifest - peak_caller: this directory includes all peak calls from each peak_caller (SEACR, MACS2, GOPEAKS) for each sample - annotation - - go_enrichment: this directory includes gene set enrichment pathway predictions + - go_enrichment: this directory includes gene set enrichment pathway predictions when `run_go_enrichment` is set to `true` in the config file. - homer: this directory includes the annotation output from HOMER - - rose: this directory includes the annotation output from ROSE + - rose: this directory includes the annotation output from ROSE when `run_rose` is set to `true` in the config file. - qc: this directory includes MULTIQC reports and spike-in control reports (when applicable) ``` diff --git a/docs/user-guide/preparing-files.md b/docs/user-guide/preparing-files.md index 18f3d7c..547738a 100644 --- a/docs/user-guide/preparing-files.md +++ b/docs/user-guide/preparing-files.md @@ -30,7 +30,7 @@ The pipeline allows for the use of a species specific spike-in control, or the u For example for ecoli spike-in: ``` -run_contrasts: "Y" +run_contrasts: true norm_method: "spikein" spikein_genome: "ecoli" spikein_reference: @@ -41,7 +41,7 @@ spikein_reference: For example for drosophila spike-in: ``` -run_contrasts: "Y" +run_contrasts: true norm_method: "spikein" spikein_genome: "drosophila" spikein_reference: diff --git a/workflow/Snakefile b/workflow/Snakefile index 69fe83b..d03f307 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -97,7 +97,7 @@ def run_qc(wildcards): def run_contrasts(wildcards): files=[] - if config["run_contrasts"] == "Y": + if config["run_contrasts"]: files.append(join(RESULTSDIR,"replicate_sample.tsv")) # inputs for matrix @@ -166,20 +166,21 @@ def get_combined(wildcards): def get_rose(wildcards): files=[] - if ("macs2_narrow" in PEAKTYPE) or ("macs2_broad" in PEAKTYPE): - anno_m=expand(join(RESULTSDIR,"peaks","{qthresholds}","{peak_caller}","annotation","rose","{treatment_control_list}.{dupstatus}.{peak_caller_type}.{s_dist}","{treatment_control_list}_AllStitched.table.super.summits.bed"),peak_caller="macs2",qthresholds=QTRESHOLDS,treatment_control_list=TREATMENT_LIST_M,dupstatus=DUPSTATUS,peak_caller_type=PEAKTYPE_M,s_dist=S_DISTANCE), - files.extend(anno_m) - if ("gopeaks_narrow" in PEAKTYPE) or ("gopeaks_broad" in PEAKTYPE): - anno_g=expand(join(RESULTSDIR,"peaks","{qthresholds}","{peak_caller}","annotation","rose","{treatment_control_list}.{dupstatus}.{peak_caller_type}.{s_dist}","{treatment_control_list}_AllStitched.table.super.summits.bed"),peak_caller="gopeaks",qthresholds=QTRESHOLDS,treatment_control_list=TREATMENT_LIST_SG,dupstatus=DUPSTATUS,peak_caller_type=PEAKTYPE_G,s_dist=S_DISTANCE), - files.extend(anno_g) - if ("seacr_stringent" in PEAKTYPE) or ("seacr_relaxed" in PEAKTYPE): - anno_s=expand(join(RESULTSDIR,"peaks","{qthresholds}","{peak_caller}","annotation","rose","{treatment_control_list}.{dupstatus}.{peak_caller_type}.{s_dist}","{treatment_control_list}_AllStitched.table.super.summits.bed"),peak_caller="seacr",qthresholds=QTRESHOLDS,treatment_control_list=TREATMENT_LIST_SG,dupstatus=DUPSTATUS,peak_caller_type=PEAKTYPE_S,s_dist=S_DISTANCE), - files.extend(anno_s) + if config['run_rose']: + if ("macs2_narrow" in PEAKTYPE) or ("macs2_broad" in PEAKTYPE): + anno_m=expand(join(RESULTSDIR,"peaks","{qthresholds}","{peak_caller}","annotation","rose","{treatment_control_list}.{dupstatus}.{peak_caller_type}.{s_dist}","{treatment_control_list}_AllStitched.table.super.summits.bed"),peak_caller="macs2",qthresholds=QTRESHOLDS,treatment_control_list=TREATMENT_LIST_M,dupstatus=DUPSTATUS,peak_caller_type=PEAKTYPE_M,s_dist=S_DISTANCE), + files.extend(anno_m) + if ("gopeaks_narrow" in PEAKTYPE) or ("gopeaks_broad" in PEAKTYPE): + anno_g=expand(join(RESULTSDIR,"peaks","{qthresholds}","{peak_caller}","annotation","rose","{treatment_control_list}.{dupstatus}.{peak_caller_type}.{s_dist}","{treatment_control_list}_AllStitched.table.super.summits.bed"),peak_caller="gopeaks",qthresholds=QTRESHOLDS,treatment_control_list=TREATMENT_LIST_SG,dupstatus=DUPSTATUS,peak_caller_type=PEAKTYPE_G,s_dist=S_DISTANCE), + files.extend(anno_g) + if ("seacr_stringent" in PEAKTYPE) or ("seacr_relaxed" in PEAKTYPE): + anno_s=expand(join(RESULTSDIR,"peaks","{qthresholds}","{peak_caller}","annotation","rose","{treatment_control_list}.{dupstatus}.{peak_caller_type}.{s_dist}","{treatment_control_list}_AllStitched.table.super.summits.bed"),peak_caller="seacr",qthresholds=QTRESHOLDS,treatment_control_list=TREATMENT_LIST_SG,dupstatus=DUPSTATUS,peak_caller_type=PEAKTYPE_S,s_dist=S_DISTANCE), + files.extend(anno_s) return files def get_enrichment(wildcards): files=[] - if config["run_contrasts"] == "Y": + if config["run_contrasts"] and config['run_go_enrichment']: if (GENOME == "hg19") or (GENOME == "hg38"): if ("macs2_narrow" in PEAKTYPE) or ("macs2_broad" in PEAKTYPE): t=expand(join(RESULTSDIR,"peaks","{qthresholds}","{peak_caller}","annotation","go_enrichment","{contrast_list}.{dupstatus}.txt"),peak_caller="macs2",qthresholds=QTRESHOLDS,contrast_list=CONTRAST_LIST,dupstatus=DUPSTATUS) diff --git a/workflow/rules/annotations.smk b/workflow/rules/annotations.smk index 21bdf57..dd62112 100644 --- a/workflow/rules/annotations.smk +++ b/workflow/rules/annotations.smk @@ -292,7 +292,7 @@ rule rose: echo "Less than 5 usable peaks detected (N=${{num_of_peaks}})" > {output.super_summit} fi """ -if config["run_contrasts"] == "Y": +if config["run_contrasts"]: rule create_contrast_peakcaller_files: """ Reads in all of the output from Rules create_contrast_data_files which match the same peaktype and merges them together diff --git a/workflow/rules/init.smk b/workflow/rules/init.smk index 8aa464b..e4e729a 100644 --- a/workflow/rules/init.smk +++ b/workflow/rules/init.smk @@ -185,7 +185,7 @@ QTRESHOLDS=config["quality_thresholds"] QTRESHOLDS=list(map(lambda x:x.strip(),QTRESHOLDS.split(","))) # set contrast settings -if config["run_contrasts"] == "Y": +if config["run_contrasts"]: print("#"*100) print("# Checking constrasts to run...") contrasts_table = config["contrasts"]