diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 528a6e7..c349a9e 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -26,7 +26,7 @@ jobs: /opt2/.tests/Sample10_ARK1_S37.R1.fastq.gz /opt2/.tests/Sample10_ARK1_S37.R2.fastq.gz \ /opt2/.tests/Sample11_ACI_158_S38.R1.fastq.gz /opt2/.tests/Sample11_ACI_158_S38.R2.fastq.gz \ /opt2/.tests/Sample4_CRL1622_S31.R1.fastq.gz /opt2/.tests/Sample4_CRL1622_S31.R2.fastq.gz \ - --output /opt2/output_tn_fqs --targets /opt2/.tests/Agilent_SSv7_allExons_hg38.bed \ + --output /opt2/output_tn_fqs --targets /opt2/resources/Agilent_SSv7_allExons_hg38.bed \ --pairs /opt2/.tests/pairs.tsv --genome hg38 --mode local --ffpe --cnv --runmode init docker run -v $PWD:/opt2 snakemake/snakemake:v7.32.4 \ @@ -34,7 +34,7 @@ jobs: /opt2/.tests/Sample10_ARK1_S37.R1.fastq.gz /opt2/.tests/Sample10_ARK1_S37.R2.fastq.gz \ /opt2/.tests/Sample11_ACI_158_S38.R1.fastq.gz /opt2/.tests/Sample11_ACI_158_S38.R2.fastq.gz \ /opt2/.tests/Sample4_CRL1622_S31.R1.fastq.gz /opt2/.tests/Sample4_CRL1622_S31.R2.fastq.gz \ - --output /opt2/output_tn_fqs --targets /opt2/.tests/Agilent_SSv7_allExons_hg38.bed \ + --output /opt2/output_tn_fqs --targets /opt2/resources/Agilent_SSv7_allExons_hg38.bed \ --pairs /opt2/.tests/pairs.tsv --genome hg38 --mode local --ffpe --cnv --runmode dryrun - name: Tumor-only FastQ Dry Run @@ -44,7 +44,7 @@ jobs: /opt2/.tests/Sample10_ARK1_S37.R1.fastq.gz /opt2/.tests/Sample10_ARK1_S37.R2.fastq.gz \ /opt2/.tests/Sample11_ACI_158_S38.R1.fastq.gz /opt2/.tests/Sample11_ACI_158_S38.R2.fastq.gz \ /opt2/.tests/Sample4_CRL1622_S31.R1.fastq.gz /opt2/.tests/Sample4_CRL1622_S31.R2.fastq.gz \ - --output /opt2/output_tonly_fqs --targets /opt2/.tests/Agilent_SSv7_allExons_hg38.bed \ + --output /opt2/output_tonly_fqs --targets /opt2/resources/Agilent_SSv7_allExons_hg38.bed \ --genome hg38 --mode local --ffpe --runmode init docker run -v $PWD:/opt2 snakemake/snakemake:v7.32.4 \ @@ -52,7 +52,7 @@ jobs: /opt2/.tests/Sample10_ARK1_S37.R1.fastq.gz /opt2/.tests/Sample10_ARK1_S37.R2.fastq.gz \ /opt2/.tests/Sample11_ACI_158_S38.R1.fastq.gz /opt2/.tests/Sample11_ACI_158_S38.R2.fastq.gz \ /opt2/.tests/Sample4_CRL1622_S31.R1.fastq.gz /opt2/.tests/Sample4_CRL1622_S31.R2.fastq.gz \ - --output /opt2/output_tonly_fqs --targets /opt2/.tests/Agilent_SSv7_allExons_hg38.bed \ + --output /opt2/output_tonly_fqs --targets /opt2/resources/Agilent_SSv7_allExons_hg38.bed \ --genome hg38 --mode local --ffpe --runmode dryrun - name: Tumor-normal BAM Dry Run @@ -62,7 +62,7 @@ jobs: /opt2/.tests/Sample10_ARK1_S37.recal.bam \ /opt2/.tests/Sample11_ACI_158_S38.recal.bam \ /opt2/.tests/Sample4_CRL1622_S31.recal.bam \ - --output /opt2/output_tn_bams --targets /opt2/.tests/Agilent_SSv7_allExons_hg38.bed \ + --output /opt2/output_tn_bams --targets /opt2/resources/Agilent_SSv7_allExons_hg38.bed \ --pairs /opt2/.tests/pairs.tsv --genome hg38 --mode local --ffpe --cnv --runmode init docker run -v $PWD:/opt2 snakemake/snakemake:v7.32.4 \ @@ -70,7 +70,7 @@ jobs: /opt2/.tests/Sample10_ARK1_S37.recal.bam \ /opt2/.tests/Sample11_ACI_158_S38.recal.bam \ /opt2/.tests/Sample4_CRL1622_S31.recal.bam \ - --output /opt2/output_tn_bams --targets /opt2/.tests/Agilent_SSv7_allExons_hg38.bed \ + --output /opt2/output_tn_bams --targets /opt2/resources/Agilent_SSv7_allExons_hg38.bed \ --pairs /opt2/.tests/pairs.tsv --genome hg38 --mode local --ffpe --cnv --runmode dryrun - name: Tumor-only BAM Dry Run @@ -80,7 +80,7 @@ jobs: /opt2/.tests/Sample10_ARK1_S37.recal.bam \ /opt2/.tests/Sample11_ACI_158_S38.recal.bam \ /opt2/.tests/Sample4_CRL1622_S31.recal.bam \ - --output /opt2/output_tonly_bams --targets /opt2/.tests/Agilent_SSv7_allExons_hg38.bed \ + --output /opt2/output_tonly_bams --targets /opt2/resources/Agilent_SSv7_allExons_hg38.bed \ --genome hg38 --mode local --ffpe --runmode init docker run -v $PWD:/opt2 snakemake/snakemake:v7.32.4 \ @@ -88,7 +88,7 @@ jobs: /opt2/.tests/Sample10_ARK1_S37.recal.bam \ /opt2/.tests/Sample11_ACI_158_S38.recal.bam \ /opt2/.tests/Sample4_CRL1622_S31.recal.bam \ - --output /opt2/output_tonly_bams --targets /opt2/.tests/Agilent_SSv7_allExons_hg38.bed \ + --output /opt2/output_tonly_bams --targets /opt2/resources/Agilent_SSv7_allExons_hg38.bed \ --genome hg38 --mode local --ffpe --runmode dryrun - name: Lint diff --git a/.tests/Agilent_SSv7_allExons_hg38.bed b/.tests/Agilent_SSv7_allExons_hg38.bed deleted file mode 100644 index e69de29..0000000 diff --git a/CHANGELOG.md b/CHANGELOG.md index b692942..aae5dfd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,8 @@ - The docs website now has a dropdown menu to select which version to view. The latest release is shown by default. (#150, @kelly-sovacool) - Add `xavier gui` subcommand to launch the graphical user interface. (#99, @kelly-sovacool) - Previously, `xavier_gui` (with an underscore) was a command in the `ccbrpipeliner` module. +- Provide default exome targets for hg38 and mm10, which can be overridden by the optional `--targets` argument. (#102, @kelly-sovacool) + - Previously, the `--targets` argument was required with no defaults. ## XAVIER 3.0.3 diff --git a/README.md b/README.md index ff6d72c..7949d4f 100644 --- a/README.md +++ b/README.md @@ -61,7 +61,7 @@ xavier run \ --output /data/$USER/xavier_hg38 \ --genome hg38 \ --pairs pairs.txt \ ---targets Targets_hg38.bed \ +--targets resources/Agilent_SSv7_allExons_hg38.bed \ --mode slurm \ --runmode init @@ -71,7 +71,7 @@ xavier run \ --output /data/$USER/xavier_hg38 \ --genome hg38 \ --pairs pairs.txt \ ---targets Targets_hg38.bed \ +--targets resources/Agilent_SSv7_allExons_hg38.bed \ --mode slurm \ --runmode dryrun @@ -81,7 +81,7 @@ xavier run \ --output /data/$USER/xavier_hg38 \ --genome hg38 \ --pairs pairs.txt \ ---targets Targets_hg38.bed \ +--targets resources/Agilent_SSv7_allExons_hg38.bed \ --mode slurm \ --runmode run ``` @@ -109,7 +109,7 @@ xavier run \ --sif-cache $SIFCACHE \ --tmp-dir $TMPDIR \ --pairs pairs.txt \ ---targets Targets_hg38.bed \ +--targets resources/Agilent_SSv7_allExons_hg38.bed \ --mode slurm \ --runmode init # run diff --git a/config/genomes/biowulf/hg38.json b/config/genomes/biowulf/hg38.json index 640bafb..a104359 100644 --- a/config/genomes/biowulf/hg38.json +++ b/config/genomes/biowulf/hg38.json @@ -1,6 +1,7 @@ { "references": { "FASTQ_SCREEN_CONFIG": "resources/fastq_screen.biowulf.conf", + "exome_targets": "resources/Agilent_SSv7_allExons_hg38.bed", "KRAKENBACDB": "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/kraken/20180907_standard_kraken2", "trimmomatic.adapters": "resources/adapters.fa", "SNPEFF_GENOME": "GRCh38.86", diff --git a/config/genomes/biowulf/mm10.json b/config/genomes/biowulf/mm10.json index 91cc0aa..cbb60a5 100644 --- a/config/genomes/biowulf/mm10.json +++ b/config/genomes/biowulf/mm10.json @@ -1,6 +1,7 @@ { "references": { "FASTQ_SCREEN_CONFIG": "resources/fastq_screen.biowulf.conf", + "exome_targets": "resources/SureSelect_mm10_sorted.bed", "KRAKENBACDB": "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/kraken/20180907_standard_kraken2", "trimmomatic.adapters": "resources/adapters.fa", "SNPEFF_GENOME": "GRCm38.86", diff --git a/config/genomes/frce/hg38.json b/config/genomes/frce/hg38.json index 46e2805..afd456b 100644 --- a/config/genomes/frce/hg38.json +++ b/config/genomes/frce/hg38.json @@ -1,6 +1,7 @@ { "references": { "FASTQ_SCREEN_CONFIG": "resources/fastq_screen.frce.conf", + "exome_targets": "resources/Agilent_SSv7_allExons_hg38.bed", "KRAKENBACDB": "/mnt/projects/CCBR-Pipelines/pipelines/XAVIER/resources/hg38/kraken/20180907_standard_kraken2", "trimmomatic.adapters": "resources/adapters.fa", "SNPEFF_GENOME": "GRCh38.86", diff --git a/config/genomes/frce/mm10.json b/config/genomes/frce/mm10.json index f6b5a9d..cfa77fe 100644 --- a/config/genomes/frce/mm10.json +++ b/config/genomes/frce/mm10.json @@ -1,6 +1,7 @@ { "references": { "FASTQ_SCREEN_CONFIG": "resources/fastq_screen.frce.conf", + "exome_targets": "resources/SureSelect_mm10_sorted.bed", "KRAKENBACDB": "/mnt/projects/CCBR-Pipelines/pipelines/XAVIER/resources/hg38/kraken/20180907_standard_kraken2", "trimmomatic.adapters": "resources/adapters.fa", "SNPEFF_GENOME": "GRCm38.86", diff --git a/docs/usage/run.md b/docs/usage/run.md index 9141570..3486355 100644 --- a/docs/usage/run.md +++ b/docs/usage/run.md @@ -82,7 +82,7 @@ Each of the following arguments are required. Failure to provide a required argu > This option defines the reference genome for your set of samples. On Biowulf, xavier does comes bundled with pre built reference files for human samples; however, it is worth noting that the pipeline does accept a pre-built resource bundle pulled with the cache sub command (coming soon). Currently, the pipeline only supports the human reference hg38; however, support for mouse reference mm10 will be added soon. > > **_Pre built Option_** -> Here is a list of available pre built genomes on Biowulf: hg38. +> Here is a list of available pre built genomes on Biowulf: hg38, mm10. > > **_Custom Option_** > For users running the pipeline outside of Biowulf, a pre-built resource bundle can be pulled with the cache sub command (coming soon). Please supply the custom reference JSON file that was generated by the cache sub command. @@ -98,7 +98,9 @@ Each of the following arguments are required. Failure to provide a required argu > > This file can be obtained from the manufacturer of the target capture kit that was used. > -> **_Example:_** `--targets /data/$USER/Agilent_SSv7_allExons_hg38.bed` +> If not provided, the default targets file from the genome config is used +> +> **_Example:_** `--targets resources/Agilent_SSv7_allExons_hg38.bed` > **_Example:_** `--targets resources/SureSelect_mm10_sorted.bed` ### 2.2 Options diff --git a/resources/Agilent_SSv7_allExons_hg38.bed b/resources/Agilent_SSv7_allExons_hg38.bed old mode 100755 new mode 100644 diff --git a/resources/fastq_screen.frce.conf b/resources/fastq_screen.frce.conf old mode 100755 new mode 100644 diff --git a/src/xavier/__main__.py b/src/xavier/__main__.py index 5d8bceb..5df5326 100755 --- a/src/xavier/__main__.py +++ b/src/xavier/__main__.py @@ -245,6 +245,9 @@ def parsed_arguments(): Path to exome targets BED file. This file can be obtained from the manufacturer of the target capture kit that was used. + If not provided, the default targets file is used from the genome config file. + Example: --targets resources/Agilent_SSv7_allExons_hg38.bed + Example: --targets resources/SureSelect_mm10_sorted.bed """ ) @@ -264,7 +267,7 @@ def parsed_arguments(): --input .tests/*.R?.fastq.gz \\ --output /data/$USER/xavier_hg38 \\ --genome hg38 \\ - --targets .tests/Agilent_SSv7_allExons_hg38.bed + --targets resources/Agilent_SSv7_allExons_hg38.bed # Step 2B.) Dry-run the pipeline xavier run \\ @@ -272,7 +275,7 @@ def parsed_arguments(): --input .tests/*.R?.fastq.gz \\ --output /data/$USER/xavier_hg38 \\ --genome hg38 \\ - --targets Agilent_SSv7_allExons_hg38.bed \\ + --targets resources/Agilent_SSv7_allExons_hg38.bed \\ --mode slurm \\ # Step 2C.) Run the XAVIER pipeline @@ -283,7 +286,7 @@ def parsed_arguments(): --input .tests/*.R?.fastq.gz \\ --output /data/$USER/xavier_hg38 \\ --genome hg38 \\ - --targets .tests/Agilent_SSv7_allExons_hg38.bed \\ + --targets resources/Agilent_SSv7_allExons_hg38.bed \\ --mode slurm version: @@ -354,8 +357,9 @@ def parsed_arguments(): "--targets", # Check if the file exists and if it is readable type=lambda file: permissions(parser, file, os.R_OK), - required=True, + required=False, help=argparse.SUPPRESS, + default=None, ) # Optional Arguments diff --git a/src/xavier/run.py b/src/xavier/run.py index 0ee67bd..516c0bf 100644 --- a/src/xavier/run.py +++ b/src/xavier/run.py @@ -322,7 +322,7 @@ def setup(sub_args, repo_path, output_path, create_nidap_folder_YN="no", links=[ f"{shorthostname} unknown host. Configuration files for references may not be correct. Defaulting to Biowulf config" ) else: - print(f"Thank you for running XAVIER on {shorthostname.upper()}") + print(f"Thank you for running XAVIER on {shorthostname.upper()}") genome_config = os.path.join( repo_path, "config", "genomes", get_hpcname(), sub_args.genome + ".json" @@ -370,7 +370,17 @@ def setup(sub_args, repo_path, output_path, create_nidap_folder_YN="no", links=[ # Add optional cli workflow steps config["input_params"]["CNV_CALLING"] = str(sub_args.cnv).lower() config["input_params"]["FFPE_FILTER"] = str(sub_args.ffpe).lower() - config["input_params"]["EXOME_TARGETS"] = str(sub_args.targets) + config["input_params"]["EXOME_TARGETS"] = ( + str(sub_args.targets) + if sub_args.targets + else os.path.join( + config["project"]["workpath"], config["references"]["exome_targets"] + ) + ) + if not os.path.exists(config["input_params"]["EXOME_TARGETS"]): + raise FileNotFoundError( + f"Exome targets file does not exist: {config['input_params']['EXOME_TARGETS']}" + ) config["input_params"]["VARIANT_CALLERS"] = sub_args.callers config["input_params"]["PAIRS_FILE"] = str(sub_args.pairs) config["input_params"]["BASE_OUTDIR"] = str(sub_args.output) diff --git a/tests/test_cli.py b/tests/test_cli.py index 0dc37df..878b6dc 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1,5 +1,34 @@ +import json +import os import subprocess +import tempfile from xavier.src.xavier.__main__ import main +from xavier.src.xavier.util import get_hpcname + +xavier_run = ( + "xavier run " + "--input .tests/*.fastq.gz " + "--pairs .tests/pairs.tsv " + "--mode local " +) + + +def run_in_temp(command_str): + with tempfile.TemporaryDirectory() as tmp_dir: + outdir = os.path.join(tmp_dir, "testout") + run_command = f"{command_str} --output {outdir}" + output = subprocess.run( + f"{run_command} --runmode init && {run_command} --runmode dryrun", + capture_output=True, + shell=True, + text=True, + ) + if os.path.exists(os.path.join(outdir, "config.json")): + with open(os.path.join(outdir, "config.json"), "r") as infile: + config = json.load(infile) + else: + config = None + return output, config def test_help(): @@ -9,3 +38,37 @@ def test_help(): "./bin/xavier --help", capture_output=True, shell=True, text=True ).stdout ) + + +def test_dryrun_targets(): + if get_hpcname() == "biowulf": + output_human, config_human = run_in_temp(f"{xavier_run} --genome hg38") + output_mouse, config_mouse = run_in_temp(f"{xavier_run} --genome mm10") + output_custom, config_custom = run_in_temp( + f"{xavier_run} --genome mm10 --targets resources/Agilent_SSv7_allExons_hg38.bed" + ) + output_invalid, config_invalid = run_in_temp( + f"{xavier_run} --genome hg38 --target not/a/file.txt" + ) + assert all( + [ + "This was a dry-run (flag -n). The order of jobs does not reflect the order of execution." + in output_human.stdout, + "This was a dry-run (flag -n). The order of jobs does not reflect the order of execution." + in output_mouse.stdout, + "This was a dry-run (flag -n). The order of jobs does not reflect the order of execution." + in output_custom.stdout, + "error: Path 'not/a/file.txt' does not exists! Failed to provide valid input." + in output_invalid.stderr, + config_human["input_params"]["EXOME_TARGETS"].endswith( + "resources/Agilent_SSv7_allExons_hg38.bed" + ), + config_mouse["input_params"]["EXOME_TARGETS"].endswith( + "resources/SureSelect_mm10_sorted.bed" + ), + config_custom["input_params"]["EXOME_TARGETS"].endswith( + "resources/Agilent_SSv7_allExons_hg38.bed" + ), + not config_invalid, + ] + ) diff --git a/tests/test_run.py b/tests/test_run.py index 43d9f35..8fd28d6 100644 --- a/tests/test_run.py +++ b/tests/test_run.py @@ -16,7 +16,7 @@ def test_dryrun(): input=list(glob.glob(xavier_base(".tests/*.fastq.gz"))), output=tmp_dir, genome="hg38", - targets=xavier_base(".tests/Agilent_SSv7_allExons_hg38.bed"), + targets=xavier_base("resources/Agilent_SSv7_allExons_hg38.bed"), mode="local", job_name="pl:xavier", callers=["mutect2", "mutect", "strelka", "vardict", "varscan"],