From b4d1ddd2421c4166af66dfc61788ea01fe3e8f1f Mon Sep 17 00:00:00 2001 From: David Benjamin Date: Sat, 13 Jan 2018 20:49:45 -0500 Subject: [PATCH] Clean m2 wdl. Closes #4072. (#4132) --- .../m2_cromwell_tests/test_m2_wdl_multi.json | 9 +- scripts/mutect2_wdl/mutect2.wdl | 879 +++++++++--------- scripts/mutect2_wdl/mutect2_multi_sample.wdl | 175 ++-- scripts/mutect2_wdl/mutect2_pon.wdl | 102 +- scripts/mutect2_wdl/mutect2_template.json | 16 +- scripts/mutect2_wdl/unsupported/README.md | 29 +- .../unsupported/hapmap_sensitivity.wdl | 151 ++- .../hapmap_sensitivity_all_plexes.wdl | 64 +- .../unsupported/m2_basic_validation.wdl | 123 ++- .../mutect2-replicate-validation.wdl | 177 ++-- .../unsupported/mutect2_compare_tumors.wdl | 189 ++-- .../mutect2_multi_sample_concordance.wdl | 157 ++-- .../unsupported/preprocess_hapmap.wdl | 60 +- 13 files changed, 1090 insertions(+), 1041 deletions(-) diff --git a/scripts/m2_cromwell_tests/test_m2_wdl_multi.json b/scripts/m2_cromwell_tests/test_m2_wdl_multi.json index c3a24fd0c83..f917d0a23c8 100644 --- a/scripts/m2_cromwell_tests/test_m2_wdl_multi.json +++ b/scripts/m2_cromwell_tests/test_m2_wdl_multi.json @@ -1,8 +1,10 @@ { - "Mutect2_Multi.gatk4_jar": "/root/gatk.jar", + "Mutect2_Multi.picard": "/home/travis/picard.jar", + "Mutect2_Multi.gatk_docker": "__GATK_DOCKER__", + "Mutect2_Multi.oncotator_docker": "broadinstitute/oncotator:1.9.6.1", "Mutect2_Multi.intervals": "/home/travis/build/broadinstitute/gatk/scripts/m2_cromwell_tests/interval_list.interval_list", "Mutect2_Multi.ref_fasta": "/home/travis/build/broadinstitute/gatk/src/test/resources/large/human_g1k_v37.20.21.fasta", - "Mutect2_Multi.ref_fasta_index": "/home/travis/build/broadinstitute/gatk/src/test/resources/large/human_g1k_v37.20.21.fasta.fai", + "Mutect2_Multi.ref_fai": "/home/travis/build/broadinstitute/gatk/src/test/resources/large/human_g1k_v37.20.21.fasta.fai", "Mutect2_Multi.ref_dict": "/home/travis/build/broadinstitute/gatk/src/test/resources/large/human_g1k_v37.20.21.dict", "Mutect2_Multi.pair_list": "/home/travis/build/broadinstitute/gatk/scripts/m2_cromwell_tests/pair_list", "Mutect2_Multi.scatter_count": 2, @@ -10,10 +12,7 @@ "Mutect2_Multi.dbsnp_index": "/home/travis/build/broadinstitute/gatk/src/test/resources/large/dbsnp_138.b37.20.21.vcf.idx", "Mutect2_Multi.is_run_orientation_bias_filter": true, "Mutect2_Multi.is_run_oncotator": true, - "Mutect2_Multi.gatk_docker": "__GATK_DOCKER__", - "Mutect2_Multi.oncotator_docker": "broadinstitute/oncotator:1.9.3.0", "Mutect2_Multi.preemptible_attempts": 2, "Mutect2_Multi.artifact_modes": ["G/T", "C/T"], - "Mutect2_Multi.picard_jar": "/home/travis/picard.jar", "Mutect2_Multi.onco_ds_local_db_dir": "/root/" } \ No newline at end of file diff --git a/scripts/mutect2_wdl/mutect2.wdl b/scripts/mutect2_wdl/mutect2.wdl index 75f302b49de..fdc9034b531 100755 --- a/scripts/mutect2_wdl/mutect2.wdl +++ b/scripts/mutect2_wdl/mutect2.wdl @@ -1,11 +1,11 @@ # Run Mutect 2 on a single tumor-normal pair or on a single tumor sample. # # Description of inputs -# gatk4_jar: java jar file containing gatk 4 +# gatk: java jar file containing gatk 4 # intervals: genomic intervals -# ref_fasta, ref_fasta_index, ref_dict: reference genome, index, and dictionary -# tumor_bam, tumor_bam_index: self-explanatory -# normal_bam, normal_bam_index: self-explanatory +# ref_fasta, ref_fai, ref_dict: reference genome, index, and dictionary +# tumor_bam, tumor_bai: self-explanatory +# normal_bam, normal_bai: self-explanatory # pon, pon_index: optional panel of normals and index in vcf format containing known false positves # scatter_count: number of parallel jobs when scattering over intervals # gnomad, gnomad_index: optional database of known germline variants, obtainable from http://gnomad.broadinstitute.org/downloads @@ -16,164 +16,160 @@ # hub is also required, since the task will download a public docker image. # # -# This WDL needs to decide whether to use the ``gatk_jar`` or ``gatk_jar_override`` for the jar location. As of cromwell-0.24, +# This WDL needs to decide whether to use the ``gatk`` or ``gatk_override`` for the jar location. As of cromwell-0.24, # this logic *must* go into each task. Therefore, there is a lot of duplicated code. This allows users to specify a jar file # independent of what is in the docker file. See the README.md for more info. # workflow Mutect2 { - # gatk4_jar needs to be a String input to the workflow in order to work in a Docker image - String gatk4_jar - File? intervals - File ref_fasta - File ref_fasta_index - File ref_dict - File tumor_bam - File tumor_bam_index - File? normal_bam - File? normal_bam_index - String output_vcf_name = basename(tumor_bam, ".bam") + ".vcf" - File? pon - File? pon_index - Int scatter_count - File? gnomad - File? gnomad_index - File? variants_for_contamination - File? variants_for_contamination_index - Boolean is_run_orientation_bias_filter - Boolean is_run_oncotator - - File? gatk4_jar_override - File? onco_ds_tar_gz - String? onco_ds_local_db_dir - Array[String] artifact_modes - File picard_jar - String? m2_extra_args - String? m2_extra_filtering_args - String? sequencing_center - String? sequence_source - File? default_config_file - Boolean is_bamOut = false - - Int? preemptible_attempts - String gatk_docker - String basic_bash_docker = "ubuntu:16.04" - String oncotator_docker - - call SplitIntervals { - input: - gatk4_jar = gatk4_jar, - scatter_count = scatter_count, - intervals = intervals, - ref_fasta = ref_fasta, - ref_fasta_index = ref_fasta_index, - ref_dict = ref_dict, - gatk4_jar_override = gatk4_jar_override, - preemptible_attempts = preemptible_attempts, - gatk_docker = gatk_docker - } - - scatter (subintervals in SplitIntervals.interval_files ) { - - call M2 { - input: - gatk4_jar = gatk4_jar, - intervals = subintervals, - ref_fasta = ref_fasta, - ref_fasta_index = ref_fasta_index, - ref_dict = ref_dict, - tumor_bam = tumor_bam, - tumor_bam_index = tumor_bam_index, - normal_bam = normal_bam, - normal_bam_index = normal_bam_index, - pon = pon, - pon_index = pon_index, - gnomad = gnomad, - gnomad_index = gnomad_index, - gatk4_jar_override = gatk4_jar_override, - preemptible_attempts = preemptible_attempts, - gatk_docker = gatk_docker, - m2_extra_args = m2_extra_args, - is_bamOut = is_bamOut, - preemptible_attempts = preemptible_attempts, - gatk_docker = gatk_docker - } - } - - call MergeVCFs { - input: - gatk4_jar = gatk4_jar, - input_vcfs = M2.output_vcf, - gatk4_jar_override = gatk4_jar_override, - preemptible_attempts = preemptible_attempts, - gatk_docker = gatk_docker, - output_vcf_name = output_vcf_name - } - - if (is_bamOut) { - call MergeBamOuts { + # Mutect2 inputs + File picard + File? intervals + File ref_fasta + File ref_fai + File ref_dict + File tumor_bam + File tumor_bai + File? normal_bam + File? normal_bai + String output_vcf_name = basename(tumor_bam, ".bam") + ".vcf" + File? pon + File? pon_index + Int scatter_count + File? gnomad + File? gnomad_index + File? variants_for_contamination + File? variants_for_contamination_index + Boolean is_run_orientation_bias_filter + Array[String] artifact_modes + String? m2_extra_args + String? m2_extra_filtering_args + Boolean is_bamOut = false + + # oncotator inputs + Boolean is_run_oncotator + File? onco_ds_tar_gz + String? onco_ds_local_db_dir + String? sequencing_center + String? sequence_source + File? default_config_file + + File? gatk_override + + # runtime + String gatk_docker + String basic_bash_docker = "ubuntu:16.04" + String oncotator_docker + Int? preemptible_attempts + + + call SplitIntervals { input: - bam_outs = M2.output_bamOut, - picard_jar = picard_jar, + intervals = intervals, ref_fasta = ref_fasta, - ref_fasta_index = ref_fasta_index, + ref_fai = ref_fai, ref_dict = ref_dict, - gatk4_jar = gatk4_jar, - gatk4_jar_override = gatk4_jar_override, + scatter_count = scatter_count, + gatk_override = gatk_override, gatk_docker = gatk_docker, - output_vcf_name = basename(MergeVCFs.output_vcf, ".vcf") + preemptible_attempts = preemptible_attempts + } + + scatter (subintervals in SplitIntervals.interval_files ) { + call M2 { + input: + intervals = subintervals, + ref_fasta = ref_fasta, + ref_fai = ref_fai, + ref_dict = ref_dict, + tumor_bam = tumor_bam, + tumor_bai = tumor_bai, + normal_bam = normal_bam, + normal_bai = normal_bai, + pon = pon, + pon_index = pon_index, + gnomad = gnomad, + gnomad_index = gnomad_index, + preemptible_attempts = preemptible_attempts, + m2_extra_args = m2_extra_args, + is_bamOut = is_bamOut, + gatk_override = gatk_override, + gatk_docker = gatk_docker, + preemptible_attempts = preemptible_attempts + } } - } - if (is_run_orientation_bias_filter) { - call CollectSequencingArtifactMetrics { + call MergeVCFs { input: - preemptible_attempts = preemptible_attempts, + input_vcfs = M2.output_vcf, + output_vcf_name = output_vcf_name, + gatk_override = gatk_override, gatk_docker = gatk_docker, - tumor_bam = tumor_bam, - tumor_bam_index = tumor_bam_index, + preemptible_attempts = preemptible_attempts + } + + if (is_bamOut) { + call MergeBamOuts { + input: + picard = picard, + ref_fasta = ref_fasta, + ref_fai = ref_fai, + ref_dict = ref_dict, + bam_outs = M2.output_bamOut, + output_vcf_name = basename(MergeVCFs.output_vcf, ".vcf"), + gatk_override = gatk_override, + gatk_docker = gatk_docker + } + } + + if (is_run_orientation_bias_filter) { + call CollectSequencingArtifactMetrics { + input: + picard = picard, + gatk_docker = gatk_docker, + ref_fasta = ref_fasta, + ref_fai = ref_fai, + preemptible_attempts = preemptible_attempts, + tumor_bam = tumor_bam, + tumor_bai = tumor_bai + } + } + + call Filter { + input: + gatk_override = gatk_override, + gatk_docker = gatk_docker, + intervals = intervals, ref_fasta = ref_fasta, - ref_fasta_index = ref_fasta_index, - picard_jar = picard_jar - } - } - - call Filter { - input: - gatk4_jar = gatk4_jar, - gatk4_jar_override = gatk4_jar_override, - unfiltered_vcf = MergeVCFs.output_vcf, - intervals = intervals, - gatk_docker = gatk_docker, - preemptible_attempts = preemptible_attempts, - pre_adapter_metrics = CollectSequencingArtifactMetrics.pre_adapter_metrics, - tumor_bam = tumor_bam, - tumor_bam_index = tumor_bam_index, - ref_fasta = ref_fasta, - ref_fasta_index = ref_fasta_index, - artifact_modes = artifact_modes, - variants_for_contamination = variants_for_contamination, - variants_for_contamination_index = variants_for_contamination_index, - m2_extra_filtering_args = m2_extra_filtering_args - } - - - if (is_run_oncotator) { + ref_fai = ref_fai, + unfiltered_vcf = MergeVCFs.output_vcf, + preemptible_attempts = preemptible_attempts, + pre_adapter_metrics = CollectSequencingArtifactMetrics.pre_adapter_metrics, + tumor_bam = tumor_bam, + tumor_bai = tumor_bai, + artifact_modes = artifact_modes, + variants_for_contamination = variants_for_contamination, + variants_for_contamination_index = variants_for_contamination_index, + m2_extra_filtering_args = m2_extra_filtering_args + } + + + if (is_run_oncotator) { call oncotate_m2 { input: m2_vcf = Filter.filtered_vcf, - preemptible_attempts = preemptible_attempts, - oncotator_docker = oncotator_docker, onco_ds_tar_gz = onco_ds_tar_gz, onco_ds_local_db_dir = onco_ds_local_db_dir, sequencing_center = sequencing_center, sequence_source = sequence_source, default_config_file = default_config_file, case_id = M2.tumor_sample[0], - control_id = M2.normal_sample[0] + control_id = M2.normal_sample[0], + oncotator_docker = oncotator_docker, + preemptible_attempts = preemptible_attempts } - } + } - output { + output { File unfiltered_vcf = MergeVCFs.output_vcf File unfiltered_vcf_index = MergeVCFs.output_vcf_index File filtered_vcf = Filter.filtered_vcf @@ -185,289 +181,316 @@ workflow Mutect2 { File? preadapter_detail_metrics = select_first([CollectSequencingArtifactMetrics.pre_adapter_metrics, "null"]) File? bamout = select_first([MergeBamOuts.merged_bam_out, "null"]) File? bamout_index = select_first([MergeBamOuts.merged_bam_out_index, "null"]) - } + } } task M2 { - String gatk4_jar - File? intervals - File ref_fasta - File ref_fasta_index - File ref_dict - File tumor_bam - File tumor_bam_index - File? normal_bam - File? normal_bam_index - File? pon - File? pon_index - File? gnomad - File? gnomad_index - File? gatk4_jar_override - String? m2_extra_args - Boolean? is_bamOut - - # Runtime parameters - Int? mem - String gatk_docker - Int? preemptible_attempts - Int? disk_space_gb - - String gatk_local_jar = select_first([gatk4_jar_override, gatk4_jar]) - - command <<< - - # Use GATK Jar override if specified - export GATK_LOCAL_JAR=${gatk_local_jar} - - # We need to create these files regardless, even if they stay empty - touch bamout.bam - echo "" > normal_name.txt - - gatk --java-options "-Xmx4g" GetSampleName -I ${tumor_bam} -O tumor_name.txt - tumor_command_line="-I ${tumor_bam} -tumor `cat tumor_name.txt`" - - if [[ "_${normal_bam}" == *.bam ]]; then - gatk --java-options "-Xmx4g" GetSampleName -I ${normal_bam} -O normal_name.txt - normal_command_line="-I ${normal_bam} -normal `cat normal_name.txt`" - fi - - gatk --java-options "-Xmx4g" Mutect2 \ - -R ${ref_fasta} \ - $tumor_command_line \ - $normal_command_line \ - ${"--germline-resource " + gnomad} \ - ${"-pon " + pon} \ - ${"-L " + intervals} \ - -O "output.vcf" \ - ${true='--bam-output bamout.bam' false='' is_bamOut} \ - ${m2_extra_args} - >>> - - runtime { - docker: "${gatk_docker}" - memory: select_first([mem, 5]) + " GB" - disks: "local-disk " + select_first([disk_space_gb, 100]) + " HDD" - preemptible: select_first([preemptible_attempts, 2]) - } - - output { - File output_vcf = "output.vcf" - File output_bamOut = "bamout.bam" - String tumor_sample = read_string("tumor_name.txt") - String normal_sample = read_string("normal_name.txt") - } + # inputs + File? intervals + File ref_fasta + File ref_fai + File ref_dict + File tumor_bam + File tumor_bai + File? normal_bam + File? normal_bai + File? pon + File? pon_index + File? gnomad + File? gnomad_index + String? m2_extra_args + Boolean? is_bamOut + + File? gatk_override + + # runtime + String gatk_docker + Int? mem + Int? preemptible_attempts + Int? disk_space + Int? cpu + Boolean use_ssd = false + + Int machine_mem = select_first([mem, 3]) + Int command_mem = machine_mem - 1 + + command <<< + export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk_override} + + # We need to create these files regardless, even if they stay empty + touch bamout.bam + echo "" > normal_name.txt + + gatk --java-options "-Xmx${command_mem}g" GetSampleName -I ${tumor_bam} -O tumor_name.txt + tumor_command_line="-I ${tumor_bam} -tumor `cat tumor_name.txt`" + + if [[ "_${normal_bam}" == *.bam ]]; then + gatk --java-options "-Xmx${command_mem}g" GetSampleName -I ${normal_bam} -O normal_name.txt + normal_command_line="-I ${normal_bam} -normal `cat normal_name.txt`" + fi + + gatk --java-options "-Xmx${command_mem}g" Mutect2 \ + -R ${ref_fasta} \ + $tumor_command_line \ + $normal_command_line \ + ${"--germline-resource " + gnomad} \ + ${"-pon " + pon} \ + ${"-L " + intervals} \ + -O "output.vcf" \ + ${true='--bam-output bamout.bam' false='' is_bamOut} \ + ${m2_extra_args} + >>> + + runtime { + docker: gatk_docker + memory: machine_mem + " GB" + disks: "local-disk " + select_first([disk_space, 100]) + if use_ssd then " SSD" else " HDD" + preemptible: select_first([preemptible_attempts, 3]) + cpu: select_first([cpu, 1]) + } + + output { + File output_vcf = "output.vcf" + File output_bamOut = "bamout.bam" + String tumor_sample = read_string("tumor_name.txt") + String normal_sample = read_string("normal_name.txt") + } } task MergeVCFs { - String gatk4_jar - Array[File] input_vcfs - File? gatk4_jar_override - String output_vcf_name - - # Runtime parameters - Int? mem - String gatk_docker - Int? preemptible_attempts - Int? disk_space_gb - - String gatk_local_jar = select_first([gatk4_jar_override, gatk4_jar]) - - # using MergeVcfs instead of GatherVcfs so we can create indices - # WARNING 2015-10-28 15:01:48 GatherVcfs Index creation not currently supported when gathering block compressed VCFs. - command { - export GATK_LOCAL_JAR=${gatk_local_jar} - - gatk --java-options "-Xmx2g" MergeVcfs -I ${sep=' -I ' input_vcfs} -O ${output_vcf_name} - } - - runtime { - docker: "${gatk_docker}" - memory: select_first([mem, 5]) + " GB" - disks: "local-disk " + select_first([disk_space_gb, 100]) + " HDD" - preemptible: select_first([preemptible_attempts, 2]) - } - - output { - File output_vcf = "${output_vcf_name}" - File output_vcf_index = "${output_vcf_name}.idx" - } + # inputs + Array[File] input_vcfs + String output_vcf_name + + File? gatk_override + + # runtime + String gatk_docker + Int? mem + Int? preemptible_attempts + Int? disk_space + Int? cpu + Boolean use_ssd = false + + Int machine_mem = select_first([mem, 3]) + Int command_mem = machine_mem - 1 + + # using MergeVcfs instead of GatherVcfs so we can create indices + # WARNING 2015-10-28 15:01:48 GatherVcfs Index creation not currently supported when gathering block compressed VCFs. + command { + export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk_override} + gatk --java-options "-Xmx${command_mem}g" MergeVcfs -I ${sep=' -I ' input_vcfs} -O ${output_vcf_name} + } + + runtime { + docker: gatk_docker + memory: machine_mem + " GB" + disks: "local-disk " + select_first([disk_space, 100]) + if use_ssd then " SSD" else " HDD" + preemptible: select_first([preemptible_attempts, 3]) + cpu: select_first([cpu, 1]) + } + + output { + File output_vcf = "${output_vcf_name}" + File output_vcf_index = "${output_vcf_name}.idx" + } } task CollectSequencingArtifactMetrics { - File tumor_bam - File tumor_bam_index - File ref_fasta - File ref_fasta_index - File picard_jar - - # Runtime parameters - Int? mem - String gatk_docker - Int? preemptible_attempts - Int? disk_space_gb - - command { + # inputs + File picard + File ref_fasta + File ref_fai + File tumor_bam + File tumor_bai + + # runtime + String gatk_docker + Int? mem + Int? preemptible_attempts + Int? disk_space + Int? cpu + Boolean use_ssd = false + + Int machine_mem = select_first([mem, 3]) + Int command_mem = machine_mem - 1 + + command { set -e - java -Xmx4G -jar ${picard_jar} CollectSequencingArtifactMetrics I=${tumor_bam} O="gatk" R=${ref_fasta} VALIDATION_STRINGENCY=LENIENT - } - - runtime { - docker: "${gatk_docker}" - memory: select_first([mem, 5]) + " GB" - disks: "local-disk " + select_first([disk_space_gb, 100]) + " HDD" - preemptible: select_first([preemptible_attempts, 2]) - } - - output { - File pre_adapter_metrics = "gatk.pre_adapter_detail_metrics" - } + java -Xmx4G -jar ${picard} CollectSequencingArtifactMetrics I=${tumor_bam} O="gatk" R=${ref_fasta} VALIDATION_STRINGENCY=LENIENT + } + + runtime { + docker: gatk_docker + memory: machine_mem + " GB" + disks: "local-disk " + select_first([disk_space, 100]) + if use_ssd then " SSD" else " HDD" + preemptible: select_first([preemptible_attempts, 3]) + cpu: select_first([cpu, 1]) + } + + output { + File pre_adapter_metrics = "gatk.pre_adapter_detail_metrics" + } } task Filter { - String gatk4_jar - File? gatk4_jar_override - File unfiltered_vcf - String filtered_vcf_name = basename(unfiltered_vcf, ".vcf") + "-filtered.vcf" - File? intervals - File? pre_adapter_metrics - File? tumor_bam - File? tumor_bam_index - File? ref_fasta - File? ref_fasta_index - Array[String]? artifact_modes - File? variants_for_contamination - File? variants_for_contamination_index - String? m2_extra_filtering_args - - # Runtime parameters - Int? mem - String gatk_docker - Int? preemptible_attempts - Int? disk_space_gb - - String gatk_local_jar = select_first([gatk4_jar_override, gatk4_jar]) - - command { - set -e - - # Use GATK Jar override if specified - export GATK_LOCAL_JAR=${gatk_local_jar} - - touch contamination.table - if [[ "${variants_for_contamination}" == *.vcf ]]; then - gatk --java-options "-Xmx4g" GetPileupSummaries -I ${tumor_bam} ${"-L " + intervals} -V ${variants_for_contamination} -O pileups.table - gatk --java-options "-Xmx4g" CalculateContamination -I pileups.table -O contamination.table - contamination_cmd="--contamination-table contamination.table" - fi - - gatk --java-options "-Xmx4g" FilterMutectCalls -V ${unfiltered_vcf} \ + # inputs + File? intervals + File? ref_fasta + File? ref_fai + File unfiltered_vcf + String filtered_vcf_name = basename(unfiltered_vcf, ".vcf") + "-filtered.vcf" + File? pre_adapter_metrics + File? tumor_bam + File? tumor_bai + Array[String]? artifact_modes + File? variants_for_contamination + File? variants_for_contamination_index + String? m2_extra_filtering_args + + File? gatk_override + + # runtime + String gatk_docker + Int? mem + Int? preemptible_attempts + Int? disk_space + Int? cpu + Boolean use_ssd = false + + Int machine_mem = select_first([mem, 3]) + Int command_mem = machine_mem - 1 + + command { + set -e + export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk_override} + + touch contamination.table + if [[ "${variants_for_contamination}" == *.vcf ]]; then + gatk --java-options "-Xmx${command_mem}g" GetPileupSummaries -I ${tumor_bam} ${"-L " + intervals} -V ${variants_for_contamination} -O pileups.table + gatk --java-options "-Xmx${command_mem}g" CalculateContamination -I pileups.table -O contamination.table + contamination_cmd="--contamination-table contamination.table" + fi + + gatk --java-options "-Xmx${command_mem}g" FilterMutectCalls -V ${unfiltered_vcf} \ -O filtered.vcf $contamination_cmd \ ${m2_extra_filtering_args} - # FilterByOrientationBias must come after all of the other filtering. - if [[ ! -z "${pre_adapter_metrics}" ]]; then - gatk --java-options "-Xmx4g" FilterByOrientationBias -AM ${sep=" -AM " artifact_modes} \ - -V filtered.vcf -P ${pre_adapter_metrics} --output ${filtered_vcf_name} - else - mv filtered.vcf ${filtered_vcf_name} - mv filtered.vcf.idx "${filtered_vcf_name}.idx" - fi - } - - runtime { - docker: "${gatk_docker}" - memory: select_first([mem, 5]) + " GB" - disks: "local-disk " + select_first([disk_space_gb, 100]) + " HDD" - preemptible: select_first([preemptible_attempts, 2]) - } - - output { - File filtered_vcf = "${filtered_vcf_name}" - File filtered_vcf_index = "${filtered_vcf_name}.idx" - File contamination_table = "contamination.table" - } + # FilterByOrientationBias must come after all of the other filtering. + if [[ ! -z "${pre_adapter_metrics}" ]]; then + gatk --java-options "-Xmx${command_mem}g" FilterByOrientationBias -AM ${sep=" -AM " artifact_modes} \ + -V filtered.vcf -P ${pre_adapter_metrics} --output ${filtered_vcf_name} + else + mv filtered.vcf ${filtered_vcf_name} + mv filtered.vcf.idx "${filtered_vcf_name}.idx" + fi + } + + runtime { + docker: gatk_docker + memory: machine_mem + " GB" + disks: "local-disk " + select_first([disk_space, 100]) + if use_ssd then " SSD" else " HDD" + preemptible: select_first([preemptible_attempts, 3]) + cpu: select_first([cpu, 1]) + } + + output { + File filtered_vcf = "${filtered_vcf_name}" + File filtered_vcf_index = "${filtered_vcf_name}.idx" + File contamination_table = "contamination.table" + } } task SplitIntervals { - String gatk4_jar - Int scatter_count - File? intervals - File ref_fasta - File ref_fasta_index - File ref_dict - File? gatk4_jar_override - - # Runtime parameters - Int? mem - String gatk_docker - Int? preemptible_attempts - Int? disk_space_gb - - String gatk_local_jar = select_first([gatk4_jar_override, gatk4_jar]) - - command { - # fail if *any* command below (not just the last) doesn't return 0, in particular if GATK SplitIntervals fails - set -e - - # Use GATK Jar override if specified - export GATK_LOCAL_JAR=${gatk_local_jar} - - mkdir interval-files - gatk SplitIntervals -R ${ref_fasta} ${"-L " + intervals} -scatter ${scatter_count} -O interval-files - cp interval-files/*.intervals . - } - - runtime { - docker: "${gatk_docker}" - memory: select_first([mem, 3]) + " GB" - disks: "local-disk " + select_first([disk_space_gb, 100]) + " HDD" - preemptible: select_first([preemptible_attempts, 2]) - } - - output { - Array[File] interval_files = glob("*.intervals") - } + # inputs + File? intervals + File ref_fasta + File ref_fai + File ref_dict + Int scatter_count + + File? gatk_override + + # runtime + String gatk_docker + Int? mem + Int? preemptible_attempts + Int? disk_space + Int? cpu + Boolean use_ssd = false + + Int machine_mem = select_first([mem, 3]) + Int command_mem = machine_mem - 1 + + + command { + set -e + export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk_override} + + mkdir interval-files + gatk --java-options "-Xmx${command_mem}g" SplitIntervals -R ${ref_fasta} ${"-L " + intervals} -scatter ${scatter_count} -O interval-files + cp interval-files/*.intervals . + } + + runtime { + docker: gatk_docker + memory: machine_mem + " GB" + disks: "local-disk " + select_first([disk_space, 100]) + if use_ssd then " SSD" else " HDD" + preemptible: select_first([preemptible_attempts, 3]) + cpu: select_first([cpu, 1]) + } + + output { + Array[File] interval_files = glob("*.intervals") + } } task MergeBamOuts { - String gatk4_jar - Array[File]+ bam_outs - File picard_jar - File ref_fasta - File ref_fasta_index - File ref_dict - File? gatk4_jar_override - String output_vcf_name - - # Runtime parameters - Int? mem - String gatk_docker - Int? preemptible_attempts - Int? disk_space_gb - - command <<< - # This command block assumes that there is at least one file in bam_outs. - # Do not call this task if len(bam_outs) == 0 - set -e - java -Xmx4G -jar ${picard_jar} GatherBamFiles I=${sep=" I=" bam_outs} O=${output_vcf_name}.out.bam R=${ref_fasta} - - samtools index ${output_vcf_name}.out.bam ${output_vcf_name}.out.bam.bai - >>> - - runtime { - docker: "${gatk_docker}" - memory: select_first([mem, 3]) + " GB" - disks: "local-disk " + select_first([disk_space_gb, 100]) + " HDD" - preemptible: select_first([preemptible_attempts, 2]) - } - - output { - File merged_bam_out = "${output_vcf_name}.out.bam" - File merged_bam_out_index = "${output_vcf_name}.out.bam.bai" - } - } + # inputs + File picard + File ref_fasta + File ref_fai + File ref_dict + Array[File]+ bam_outs + String output_vcf_name + + File? gatk_override + + # runtime + String gatk_docker + Int? mem + Int? preemptible_attempts + Int? disk_space + Int? cpu + Boolean use_ssd = false + + Int machine_mem = select_first([mem, 3]) + Int command_mem = machine_mem - 1 + + command <<< + # This command block assumes that there is at least one file in bam_outs. + # Do not call this task if len(bam_outs) == 0 + set -e + java -Xmx4G -jar ${picard} GatherBamFiles I=${sep=" I=" bam_outs} O=${output_vcf_name}.out.bam R=${ref_fasta} + samtools index ${output_vcf_name}.out.bam ${output_vcf_name}.out.bam.bai + >>> + + runtime { + docker: gatk_docker + memory: machine_mem + " GB" + disks: "local-disk " + select_first([disk_space, 100]) + if use_ssd then " SSD" else " HDD" + preemptible: select_first([preemptible_attempts, 3]) + cpu: select_first([cpu, 1]) + } + + output { + File merged_bam_out = "${output_vcf_name}.out.bam" + File merged_bam_out_index = "${output_vcf_name}.out.bam.bai" + } +} task oncotate_m2 { + # inputs File m2_vcf File? onco_ds_tar_gz String? onco_ds_local_db_dir @@ -478,35 +501,38 @@ task oncotate_m2 { String case_id String? control_id - # Runtime parameters - Int? mem - String oncotator_docker - Int? preemptible_attempts - Int? disk_space_gb - - command <<< + # runtime + String oncotator_docker + Int? mem + Int? preemptible_attempts + Int? disk_space + Int? cpu + Boolean use_ssd = false - # fail if *any* command below (not just the last) doesn't return 0, in particular if wget fails - set -e + Int machine_mem = select_first([mem, 3]) + Int command_mem = machine_mem - 1 - # local db dir is a directory and has been specified - if [[ -d "${onco_ds_local_db_dir}" ]]; then - echo "Using local db-dir: ${onco_ds_local_db_dir}" - echo "THIS ONLY WORKS WITHOUT DOCKER!" - ln -s ${onco_ds_local_db_dir} onco_dbdir + command <<< - elif [[ "${onco_ds_tar_gz}" == *.tar.gz ]]; then - echo "Using given tar file: ${onco_ds_tar_gz}" - mkdir onco_dbdir - tar zxvf ${onco_ds_tar_gz} -C onco_dbdir --strip-components 1 + # fail if *any* command below (not just the last) doesn't return 0, in particular if wget fails + set -e - else - echo "Downloading and installing oncotator datasources from Broad FTP site..." - # Download and untar the db-dir - wget ftp://gsapubftp-anonymous@ftp.broadinstitute.org/bundle/oncotator/oncotator_v1_ds_April052016.tar.gz - tar zxvf oncotator_v1_ds_April052016.tar.gz - ln -s oncotator_v1_ds_April052016 onco_dbdir - fi + # local db dir is a directory and has been specified + if [[ -d "${onco_ds_local_db_dir}" ]]; then + echo "Using local db-dir: ${onco_ds_local_db_dir}" + echo "THIS ONLY WORKS WITHOUT DOCKER!" + ln -s ${onco_ds_local_db_dir} onco_dbdir + elif [[ "${onco_ds_tar_gz}" == *.tar.gz ]]; then + echo "Using given tar file: ${onco_ds_tar_gz}" + mkdir onco_dbdir + tar zxvf ${onco_ds_tar_gz} -C onco_dbdir --strip-components 1 + else + echo "Downloading and installing oncotator datasources from Broad FTP site..." + # Download and untar the db-dir + wget ftp://gsapubftp-anonymous@ftp.broadinstitute.org/bundle/oncotator/oncotator_v1_ds_April052016.tar.gz + tar zxvf oncotator_v1_ds_April052016.tar.gz + ln -s oncotator_v1_ds_April052016 onco_dbdir + fi ${default="/root/oncotator_venv/bin/oncotator" oncotator_exe} --db-dir onco_dbdir/ -c $HOME/tx_exact_uniprot_matches.AKT1_CRLF2_FGFR1.txt \ -v ${m2_vcf} ${case_id}.maf.annotated hg19 -i VCF -o TCGAMAF --skip-no-alt --infer-onps --collapse-number-annotations --log_name oncotator.log \ @@ -518,11 +544,12 @@ task oncotate_m2 { >>> runtime { - docker: "${oncotator_docker}" - memory: select_first([mem, 3]) + " GB" + docker: oncotator_docker + memory: machine_mem + " GB" bootDiskSizeGb: 12 - disks: "local-disk " + select_first([disk_space_gb, 100]) + " HDD" - preemptible: select_first([preemptible_attempts, 2]) + disks: "local-disk " + select_first([disk_space, 100]) + if use_ssd then " SSD" else " HDD" + preemptible: select_first([preemptible_attempts, 3]) + cpu: select_first([cpu, 1]) } output { diff --git a/scripts/mutect2_wdl/mutect2_multi_sample.wdl b/scripts/mutect2_wdl/mutect2_multi_sample.wdl index b0d506c7ea7..083de339ec4 100644 --- a/scripts/mutect2_wdl/mutect2_multi_sample.wdl +++ b/scripts/mutect2_wdl/mutect2_multi_sample.wdl @@ -1,69 +1,32 @@ # Run Mutect 2 on a list of tumors or tumor-normal pairs # # Description of inputs -# gatk4_jar: java jar file containing gatk 4 (protected) # intervals: genomic intervals -# ref_fasta, ref_fasta_index, ref_dict: reference genome, index, and dictionary +# ref_fasta, ref_fai, ref_dict: reference genome, index, and dictionary # pon, pon_index: optional panel of normals and index in vcf format containing known false positves # scatter_count: number of parallel jobs when scattering over intervals # gnomad, gnomad_index: optional database of known germline variants, obtainable from http://gnomad.broadinstitute.org/downloads # variants_for_contamination, variants_for_contamination_index: vcf of common variants with allele frequencies fo calculating contamination # is_run_orientation_bias_filter: if true, run the orientation bias filter post-processing step # pair_list: a tab-separated table with no header in the following format: -# TUMOR_1_BAMTUMOR_1_BAM_INDEXNORMAL_1_BAMNORMAL_1_BAM_INDEX -# TUMOR_2_BAMTUMOR_2_BAM_INDEXNORMAL_2_BAMNORMAL_2_BAM_INDEX +# TUMOR_1_BAMTUMOR_1_baiNORMAL_1_BAMNORMAL_1_bai +# TUMOR_2_BAMTUMOR_2_baiNORMAL_2_BAMNORMAL_2_bai # . . . # Tumor-only input is the same but without the columns for the normal: -# TUMOR_1_BAMTUMOR_1_BAM_INDEX -# TUMOR_2_BAMTUMOR_2_BAM_INDEX +# TUMOR_1_BAMTUMOR_1_bai +# TUMOR_2_BAMTUMOR_2_bai # . . . import "mutect2.wdl" as m2 - -# -# IMPORTANT: This task will not generate useful results for any backends using docker (incl. JES/cloud). -# -task CreateOutputList { - String output_name - Array[String] vcfs - - - # Runtime parameters - Int? mem - Int? preemptible_attempts - Int? disk_space_gb - - command { - for vcf in ${sep=" " vcfs}; do - echo $vcf - echo $vcf >> ${output_name}.list - done - } - - runtime { - docker: "broadinstitute/genomes-in-the-cloud:2.2.4-1469632282" - memory: select_first([mem, 1]) + " GB" - disks: "local-disk " + select_first([disk_space_gb, 100]) + " HDD" - preemptible: select_first([preemptible_attempts, 2]) - } - - output { - File vcf_list = "${output_name}.list" - } -} - - workflow Mutect2_Multi { - # gatk4_jar needs to be a String input to the workflow in order to work in a Docker image - String gatk4_jar - Int scatter_count - File pair_list - Array[Array[String]] pairs = read_tsv(pair_list) + # Mutect2 inputs File? intervals File ref_fasta - File ref_fasta_index + File ref_fai File ref_dict + File pair_list + Array[Array[String]] pairs = read_tsv(pair_list) File? pon File? pon_index File? gnomad @@ -71,91 +34,77 @@ workflow Mutect2_Multi { File? variants_for_contamination File? variants_for_contamination_index Boolean is_run_orientation_bias_filter - Boolean is_run_oncotator - File? gatk4_jar_override + Int scatter_count + Array[String] artifact_modes + File picard + String? m2_extra_args + String? m2_extra_filtering_args + Boolean? is_bamOut + # Oncotator inputs + Boolean is_run_oncotator File? onco_ds_tar_gz String? onco_ds_local_db_dir - Array[String] artifact_modes - File picard_jar - String? m2_extra_args - String? m2_extra_filtering_args String? sequencing_center String? sequence_source File? default_config_file - Boolean? is_bamOut - String gatk_docker - String oncotator_docker - Int? preemptible_attempts + File? gatk_override + + # runtime + String gatk_docker + String oncotator_docker + Int? preemptible_attempts scatter( row in pairs ) { # If the condition is true, variables inside the 'if' block retain their values outside the block. # Otherwise they are treated as null, which in WDL is equivalent to an empty optional if(length(row) == 4) { File normal_bam = row[2] - File normal_bam_index = row[3] + File normal_bai = row[3] } - call m2.Mutect2 { - input: - gatk4_jar = gatk4_jar, - intervals = intervals, - ref_fasta = ref_fasta, - ref_fasta_index = ref_fasta_index, - ref_dict = ref_dict, - tumor_bam = row[0], - tumor_bam_index = row[1], - normal_bam = normal_bam, - normal_bam_index = normal_bam_index, - pon = pon, - pon_index = pon_index, - scatter_count = scatter_count, - gnomad = gnomad, - gnomad_index = gnomad_index, - variants_for_contamination = variants_for_contamination, - variants_for_contamination_index = variants_for_contamination_index, - is_run_orientation_bias_filter = is_run_orientation_bias_filter, - is_run_oncotator = is_run_oncotator, - oncotator_docker = oncotator_docker, - gatk_docker = gatk_docker, - gatk4_jar_override = gatk4_jar_override, - preemptible_attempts = preemptible_attempts, - onco_ds_tar_gz = onco_ds_tar_gz, - onco_ds_local_db_dir = onco_ds_local_db_dir, - artifact_modes = artifact_modes, - picard_jar = picard_jar, - m2_extra_args = m2_extra_args, - m2_extra_filtering_args = m2_extra_filtering_args, - sequencing_center = sequencing_center, - sequence_source = sequence_source, - default_config_file = default_config_file, - is_bamOut = select_first([is_bamOut, false]) - } - } - - - call CreateOutputList as unfilteredOutputList { - input: - output_name = "unfiltered", - vcfs = Mutect2.unfiltered_vcf, - preemptible_attempts = preemptible_attempts - } - - call CreateOutputList as filteredOutputList { - input: - output_name = "filtered", - vcfs = Mutect2.filtered_vcf, - preemptible_attempts = preemptible_attempts + call m2.Mutect2 { + input: + intervals = intervals, + ref_fasta = ref_fasta, + ref_fai = ref_fai, + ref_dict = ref_dict, + tumor_bam = row[0], + tumor_bai = row[1], + normal_bam = normal_bam, + normal_bai = normal_bai, + pon = pon, + pon_index = pon_index, + scatter_count = scatter_count, + gnomad = gnomad, + gnomad_index = gnomad_index, + variants_for_contamination = variants_for_contamination, + variants_for_contamination_index = variants_for_contamination_index, + is_run_orientation_bias_filter = is_run_orientation_bias_filter, + artifact_modes = artifact_modes, + picard = picard, + m2_extra_args = m2_extra_args, + m2_extra_filtering_args = m2_extra_filtering_args, + is_run_oncotator = is_run_oncotator, + onco_ds_tar_gz = onco_ds_tar_gz, + onco_ds_local_db_dir = onco_ds_local_db_dir, + sequencing_center = sequencing_center, + sequence_source = sequence_source, + default_config_file = default_config_file, + is_bamOut = select_first([is_bamOut, false]), + gatk_override = gatk_override, + gatk_docker = gatk_docker, + oncotator_docker = oncotator_docker, + preemptible_attempts = preemptible_attempts + } } output { - File unfiltered_vcfs = unfilteredOutputList.vcf_list - File filtered_vcfs = filteredOutputList.vcf_list - Array[File] unfiltered_vcf_files = Mutect2.unfiltered_vcf - Array[File] unfiltered_vcf_index_files = Mutect2.unfiltered_vcf_index - Array[File] filtered_vcf_files = Mutect2.filtered_vcf - Array[File] filtered_vcf_index_files = Mutect2.filtered_vcf_index + Array[File] unfiltered_vcf = Mutect2.unfiltered_vcf + Array[File] unfiltered_vcf_idx = Mutect2.unfiltered_vcf_index + Array[File] filtered_vcf = Mutect2.filtered_vcf + Array[File] filtered_vcf_idx = Mutect2.filtered_vcf_index Array[File] contamination_tables = Mutect2.contamination_table Array[File?] oncotated_m2_mafs = Mutect2.oncotated_m2_maf diff --git a/scripts/mutect2_wdl/mutect2_pon.wdl b/scripts/mutect2_wdl/mutect2_pon.wdl index 99bdc73ad24..c3f30fce442 100644 --- a/scripts/mutect2_wdl/mutect2_pon.wdl +++ b/scripts/mutect2_wdl/mutect2_pon.wdl @@ -1,10 +1,10 @@ # Create a Mutect2 panel of normals # # Description of inputs -# gatk4_jar: java jar file containing gatk 4 -# picard_jar: java jar file containing picard +# gatk: java jar file containing gatk 4 +# picard: java jar file containing picard # intervals: genomic intervals -# ref_fasta, ref_fasta_index, ref_dict: reference genome, index, and dictionary +# ref_fasta, ref_fai, ref_dict: reference genome, index, and dictionary # normal_bams, normal_bais: arrays of normal bams and bam indices # scatter_count: number of parallel jobs when scattering over intervals # pon_name: the resulting panel of normals is {pon_name}.vcf @@ -18,23 +18,25 @@ import "mutect2.wdl" as m2 workflow Mutect2_Panel { - # gatk4_jar needs to be a String input to the workflow in order to work in a Docker image - String gatk4_jar - Int scatter_count - Array[File] normal_bams - Array[File] normal_bais + # inputs + File picard File? intervals File ref_fasta - File ref_fasta_index + File ref_fai File ref_dict - String gatk_docker - File? gatk4_jar_override - Int? preemptible_attempts - File picard_jar + Int scatter_count + Array[File] normal_bams + Array[File] normal_bais String? m2_extra_args String? duplicate_sample_strategy String pon_name + File? gatk_override + + # runtime + String gatk_docker + Int? preemptible_attempts + Array[Pair[File,File]] normal_bam_pairs = zip(normal_bams, normal_bais) scatter (normal_bam_pair in normal_bam_pairs) { @@ -43,34 +45,32 @@ workflow Mutect2_Panel { call m2.Mutect2 { input: - gatk4_jar = gatk4_jar, intervals = intervals, ref_fasta = ref_fasta, - ref_fasta_index = ref_fasta_index, + ref_fai = ref_fai, ref_dict = ref_dict, tumor_bam = normal_bam, - tumor_bam_index = normal_bai, + tumor_bai = normal_bai, scatter_count = scatter_count, - gatk_docker = gatk_docker, - gatk4_jar_override = gatk4_jar_override, - preemptible_attempts = preemptible_attempts, m2_extra_args = m2_extra_args, is_run_orientation_bias_filter = false, is_run_oncotator = false, - picard_jar = picard_jar, + picard = picard, oncotator_docker = gatk_docker, - artifact_modes = [""] + artifact_modes = [""], + gatk_override = gatk_override, + gatk_docker = gatk_docker, + preemptible_attempts = preemptible_attempts } } call CreatePanel { input: - gatk4_jar = gatk4_jar, input_vcfs = Mutect2.unfiltered_vcf, input_vcfs_idx = Mutect2.unfiltered_vcf_index, duplicate_sample_strategy = duplicate_sample_strategy, output_vcf_name = pon_name, - gatk4_jar_override = gatk4_jar_override, + gatk_override = gatk_override, preemptible_attempts = preemptible_attempts, gatk_docker = gatk_docker } @@ -84,33 +84,41 @@ workflow Mutect2_Panel { } task CreatePanel { - String gatk4_jar - Array[File] input_vcfs - Array[File] input_vcfs_idx - String? duplicate_sample_strategy - String output_vcf_name - File? gatk4_jar_override - Int? preemptible_attempts - String gatk_docker + # inputs + Array[File] input_vcfs + Array[File] input_vcfs_idx + String? duplicate_sample_strategy + String output_vcf_name - String gatk_local_jar = select_first([gatk4_jar_override, gatk4_jar]) + File? gatk_override - command { - # Use GATK Jar override if specified - export GATK_LOCAL_JAR=${gatk_local_jar} + # runtime + String gatk_docker + Int? mem + Int? preemptible_attempts + Int? disk_space + Int? cpu + Boolean use_ssd = false - gatk --java-options "-Xmx2g" CreateSomaticPanelOfNormals -vcfs ${sep=' -vcfs ' input_vcfs} ${"-duplicate-sample-strategy " + duplicate_sample_strategy} -O ${output_vcf_name}.vcf - } + Int machine_mem = select_first([mem, 3]) + Int command_mem = machine_mem - 1 - runtime { - docker: "${gatk_docker}" - memory: "5 GB" - disks: "local-disk " + 300 + " HDD" - preemptible: select_first([preemptible_attempts, 2]) - } + command { + set -e + export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk_override} + gatk --java-options "-Xmx${command_mem}g" CreateSomaticPanelOfNormals -vcfs ${sep=' -vcfs ' input_vcfs} ${"-duplicate-sample-strategy " + duplicate_sample_strategy} -O ${output_vcf_name}.vcf + } + + runtime { + docker: gatk_docker + memory: machine_mem + " GB" + disks: "local-disk " + select_first([disk_space, 100]) + if use_ssd then " SSD" else " HDD" + preemptible: select_first([preemptible_attempts, 3]) + cpu: select_first([cpu, 1]) + } - output { - File output_vcf = "${output_vcf_name}.vcf" - File output_vcf_index = "${output_vcf_name}.vcf.idx" - } + output { + File output_vcf = "${output_vcf_name}.vcf" + File output_vcf_index = "${output_vcf_name}.vcf.idx" + } } \ No newline at end of file diff --git a/scripts/mutect2_wdl/mutect2_template.json b/scripts/mutect2_wdl/mutect2_template.json index 833ac2b4454..23e7ac882ee 100755 --- a/scripts/mutect2_wdl/mutect2_template.json +++ b/scripts/mutect2_wdl/mutect2_template.json @@ -1,13 +1,16 @@ { - "Mutect2.gatk4_jar": "/root/gatk.jar", + "Mutect2.gatk": "/root/gatk.jar", + "Mutect2.picard": "$__picard_jar__", + "Mutect2.gatk_docker": "broadinstitute/gatk:4.beta.5", + "Mutect2.oncotator_docker": "broadinstitute/oncotator:1.9.6.1", "Mutect2.intervals": "$__intervals__", "Mutect2.ref_fasta": "$__ref_fasta__", - "Mutect2.ref_fasta_index": "$__ref_fasta_index__", + "Mutect2.ref_fai": "$__ref_fai__", "Mutect2.ref_dict": "$__ref_dict__", "Mutect2.tumor_bam": "$__tumor_bam__", - "Mutect2.tumor_bam_index": "$__tumor_bam_index__", + "Mutect2.tumor_bai": "$__tumor_bai__", "Mutect2.normal_bam": "$__normal_bam__", - "Mutect2.normal_bam_index": "$__normal_bam_index__", + "Mutect2.normal_bai": "$__normal_bai__", "Mutect2.pon": "$__pon__", "Mutect2.pon_index": "$__pon_index__", "Mutect2.scatter_count": "$__scatter_count__", @@ -17,8 +20,5 @@ "Mutect2.variants_for_contamination_index": "$__variants_for_contamination_index__", "Mutect2.is_run_orientation_bias_filter": "$__is_run_orientation_bias_filter__", "Mutect2.is_run_oncotator": "$__is_run_oncotator__", - "Mutect2.gatk_docker": "broadinstitute/gatk:4.beta.5", - "Mutect2.oncotator_docker": "broadinstitute/oncotator:1.9.6.1", - "Mutect2.artifact_modes": ["G/T", "C/T"], - "Mutect2.picard_jar": "$__picard_jar__" + "Mutect2.artifact_modes": ["G/T", "C/T"] } diff --git a/scripts/mutect2_wdl/unsupported/README.md b/scripts/mutect2_wdl/unsupported/README.md index 9ae0230f8ba..e396ae386e3 100644 --- a/scripts/mutect2_wdl/unsupported/README.md +++ b/scripts/mutect2_wdl/unsupported/README.md @@ -17,6 +17,8 @@ The following files from a clone of the gatk git repository, copied into a singl * scripts/mutect2_wdl/unsupported/mutect2-replicate-validation.wdl * scripts/mutect2_wdl/unsupported/calculate_sensitivity.py +Additionally, the gatk git repository has a script called gatk (in the root directory of the repo) that is used to invoke the gatk. If running on the cloud this is in the gatk docker image and you don't have to do anything. If running on SGE, you must copy this script to a directory that is in your $PATH. + The following resources: * Three preprocessed Hapmap vcfs -- one each for the 5-plex, 10-plex and 20-plex mixtures. These are produced by preprocess_hapmap.wdl but as long as the sample composition of the mixtures remains the same they do not need to be generated again. That is, the proportions need not be the same, but the same 5, 10, and 20 Hapmap samples must be present. * A reference .fasta file, along with accompanying .fasta.fai and .dict files. @@ -33,19 +35,20 @@ In the same directory as your wdl scripts, fill in a file called sensitivity.jso ``` { - "HapmapSensitivityAllPlexes.max_depth": "The maximum depth to consider for sensitivity. 1000 is a reasonable default.", - "HapmapSensitivityAllPlexes.gatk": "[path to gatk .jar file]", + "HapmapSensitivityAllPlexes.gatk_override": "[Path to a gatk jar file. Omitting this line uses the gatk jar in the docker image.]", + "HapmapSensitivityAllPlexes.picard": "[path to Picard .jar file]", + "HapmapSensitivityAllPlexes.gatk_docker": "[gatk docker image eg broadinstitute/gatk:4.beta.3 -- this is not used in SGE but you still have to fill it in.]", + "HapmapSensitivityAllPlexes.intervals": "[path to intervals file]", "HapmapSensitivityAllPlexes.ref_fasta": "[path to reference .fasta file]", - "HapmapSensitivityAllPlexes.ref_fasta_index": "[path to reference .fasta.fai file]", + "HapmapSensitivityAllPlexes.ref_fai": "[path to reference .fasta.fai file]", "HapmapSensitivityAllPlexes.ref_dict": "[path to reference .dict file]", "HapmapSensitivityAllPlexes.five_plex_bam_list": "[path to 5-plex bams list]", "HapmapSensitivityAllPlexes.ten_plex_bam_list": "[path to 10-plex bams list]", "HapmapSensitivityAllPlexes.twenty_plex_bam_list": "[path to 20-plex bams list]", - "HapmapSensitivityAllPlexes.intervals": "[path to intervals file]", + "HapmapSensitivityAllPlexes.max_depth": "The maximum depth to consider for sensitivity. 1000 is a reasonable default.", "HapmapSensitivityAllPlexes.scatter_count": "[How many ways to scatter runs on Mutect2 on each bam file]", "HapmapSensitivityAllPlexes.is_run_orientation_bias_filter": "true/false depending on whether you wish to run this filter", "HapmapSensitivityAllPlexes.artifact_modes": The artifact modes of the orientation bias filter eg: ["G/T", "C/T"], - "HapmapSensitivityAllPlexes.picard_jar": "[path to Picard .jar file]", "HapmapSensitivityAllPlexes.five_plex_preprocessed": "[path to preprocessed 5-plex vcf]", "HapmapSensitivityAllPlexes.five_plex_preprocessed_idx": "[path to preprocessed 5-plex vcf index]", "HapmapSensitivityAllPlexes.ten_plex_preprocessed": "[path to preprocessed 10-plex vcf]", @@ -64,10 +67,11 @@ In the same directory as your wdl scripts, fill in a file called specificity.jso ``` { - "Mutect2ReplicateValidation.gatk4_jar": "[path to gatk .jar file in the docker image if running on the cloud eg /root/gatk.jar]", - "Mutect2ReplicateValidation.gatk4_jar_override": "[path to local gatk .jar file when not running in the cloud]", + "Mutect2ReplicateValidation.gatk_override": "[Path to a gatk jar file. Omitting this line uses the gatk jar in the docker image.]", + "Mutect2ReplicateValidation.gatk_docker": "[gatk docker image eg broadinstitute/gatk:4.beta.3 -- this is not used in SGE but you still have to fill it in.]", + "Mutect2ReplicateValidation.picard": "[path to Picard .jar file]", "Mutect2ReplicateValidation.ref_fasta": "[path to reference .fasta file]", - "Mutect2ReplicateValidation.ref_fasta_index": "[path to reference .fasta.fai file]", + "Mutect2ReplicateValidation.ref_fai": "[path to reference .fasta.fai file]", "Mutect2ReplicateValidation.ref_dict": "[path to reference .dict file]", "Mutect2ReplicateValidation.replicate_pair_list": "[path to replicate bams list]", "Mutect2ReplicateValidation.intervals": "[path to intervals file]", @@ -79,14 +83,17 @@ In the same directory as your wdl scripts, fill in a file called specificity.jso "Mutect2ReplicateValidation.is_run_orientation_bias_filter": "true/false depending on whether you wish to run this filter", "Mutect2ReplicateValidation.artifact_modes": The artifact modes of the orientation bias filter eg: ["G/T", "C/T"], "Mutect2ReplicateValidation.preemptible_attempts": "2", - "Mutect2ReplicateValidation.m2_docker": "[gatk docker image eg broadinstitute/gatk:4.beta.3]", - "Mutect2ReplicateValidation.picard_jar": "[path to Picard .jar file]", "Mutect2ReplicateValidation.m2_extra_args": "optionally, any additional Mutect2 command line arguments", "Mutect2ReplicateValidation.m2_extra_filtering_args": "optionally, any additional Mutect2 command line arguments" } ``` -Note that the docker image path is not used when the validations are run locally. When running locally, a valid docker path must still be given or else cromwell will fail. When running locally, fill in gatk4_jar with some dummy value such as "OVERRIDDEN". +Note that the docker image path is not used when the validations are run on an SGE cluster. When running on SGE, a valid docker path must still be given or else cromwell will fail. + +To summarize the differences between running in the cloud and on SGE: +* Your jsons must include a valid gatk_docker in both cases, however, when running on SGE this docker image is not actually used. +* When running in SGE you must put a gatk_override jar file in your jsons. When running in the cloud you may include one but if you omit this line from your jsons the gatk jar in the docker image will be used. +* When running in SGE you must make sure to copy the gatk script in the root directory of the gatk git repo into a folder that is in your bash $PATH variable. ## Running in Cromwell * Run hapmap_sensitivity_all_plexes.wdl with the parameters in sensitivity.json diff --git a/scripts/mutect2_wdl/unsupported/hapmap_sensitivity.wdl b/scripts/mutect2_wdl/unsupported/hapmap_sensitivity.wdl index 61984802161..cad2e6d376e 100755 --- a/scripts/mutect2_wdl/unsupported/hapmap_sensitivity.wdl +++ b/scripts/mutect2_wdl/unsupported/hapmap_sensitivity.wdl @@ -21,19 +21,18 @@ import "mutect2.wdl" as MutectSingleSample workflow HapmapSensitivity { - File gatk + File picard + File? intervals + File ref_fasta + File ref_fai + File ref_dict Int scatter_count File bam_list Array[Array[String]] replicates = read_tsv(bam_list) - File ref_fasta - File ref_fasta_index - File ref_dict - File? intervals File? pon File? pon_index Boolean is_run_orientation_bias_filter Array[String] artifact_modes - File picard_jar String? m2_extra_args String? m2_extra_filtering_args String prefix #a prefix string like "5plex" @@ -42,8 +41,11 @@ workflow HapmapSensitivity { File preprocessed_hapmap File preprocessed_hapmap_idx + File? gatk_override + String gatk_docker + call RestrictIntervals { - input: gatk = gatk, vcf = preprocessed_hapmap, vcf_idx = preprocessed_hapmap_idx, intervals = intervals + input: gatk_override = gatk_override, gatk_docker = gatk_docker, vcf = preprocessed_hapmap, vcf_idx = preprocessed_hapmap_idx, intervals = intervals } scatter (row in replicates) { @@ -51,51 +53,52 @@ workflow HapmapSensitivity { File index = row[1] call MixingFractions { - input: gatk = gatk, vcf = RestrictIntervals.output_vcf, vcf_idx = RestrictIntervals.output_vcf_idx, bam = bam, bam_idx = index + input: gatk_override = gatk_override, gatk_docker = gatk_docker, vcf = RestrictIntervals.output_vcf, vcf_idx = RestrictIntervals.output_vcf_idx, bam = bam, bam_idx = index } call ExpectedAlleleFraction { - input: gatk = gatk, vcf = RestrictIntervals.output_vcf, vcf_idx = RestrictIntervals.output_vcf_idx, mixing_fractions = MixingFractions.mixing + input: gatk_override = gatk_override, gatk_docker = gatk_docker, vcf = RestrictIntervals.output_vcf, vcf_idx = RestrictIntervals.output_vcf_idx, mixing_fractions = MixingFractions.mixing } call BamDepth { - input: gatk = gatk, vcf = ExpectedAlleleFraction.output_vcf, vcf_idx = ExpectedAlleleFraction.output_vcf_idx, + input: gatk_override = gatk_override, gatk_docker = gatk_docker, vcf = ExpectedAlleleFraction.output_vcf, vcf_idx = ExpectedAlleleFraction.output_vcf_idx, bam = bam, bam_idx = index, max_depth = max_depth } call MutectSingleSample.Mutect2 { input: - gatk4_jar = "OVERRIDDEN", - scatter_count = scatter_count, - tumor_bam = bam, - tumor_bam_index = index, + gatk_override = gatk_override, + picard = picard, + gatk_docker = gatk_docker, + oncotator_docker = "ubuntu:16.04", intervals = intervals, ref_fasta = ref_fasta, - ref_fasta_index = ref_fasta_index, + ref_fai = ref_fai, ref_dict = ref_dict, + scatter_count = scatter_count, + tumor_bam = bam, + tumor_bai = index, pon = pon, pon_index = pon_index, is_run_orientation_bias_filter = is_run_orientation_bias_filter, is_run_oncotator = false, - gatk_docker = "ubuntu:16.04", - oncotator_docker = "ubuntu:16.04", - gatk4_jar_override = gatk, artifact_modes = artifact_modes, - picard_jar = picard_jar, m2_extra_args = m2_extra_args, m2_extra_filtering_args = m2_extra_filtering_args } call Concordance { - input: gatk = gatk, intervals = intervals, - truth = BamDepth.output_vcf, - truth_idx = BamDepth.output_vcf_idx, - eval = Mutect2.filtered_vcf, - eval_idx = Mutect2.filtered_vcf_index + input: + gatk_override = gatk_override, intervals = intervals, + gatk_docker = gatk_docker, + truth = BamDepth.output_vcf, + truth_idx = BamDepth.output_vcf_idx, + eval = Mutect2.filtered_vcf, + eval_idx = Mutect2.filtered_vcf_index } call ConvertToTable { - input: gatk = gatk, input_vcf = Concordance.tpfn, input_vcf_idx = Concordance.tpfn_idx + input: gatk_override = gatk_override, gatk_docker = gatk_docker, input_vcf = Concordance.tpfn, input_vcf_idx = Concordance.tpfn_idx } } #done with scatter over replicates @@ -112,11 +115,11 @@ workflow HapmapSensitivity { } call Jaccard as JaccardSNP { - input: gatk = gatk, calls = Mutect2.filtered_vcf, calls_idx = Mutect2.filtered_vcf_index, prefix = prefix, type = "SNP" + input: gatk_override = gatk_override, gatk_docker = gatk_docker, calls = Mutect2.filtered_vcf, calls_idx = Mutect2.filtered_vcf_index, prefix = prefix, type = "SNP" } call Jaccard as JaccardINDEL { - input: gatk = gatk, calls = Mutect2.filtered_vcf, calls_idx = Mutect2.filtered_vcf_index, prefix = prefix, type = "INDEL" + input: gatk_override = gatk_override, gatk_docker = gatk_docker, calls = Mutect2.filtered_vcf, calls_idx = Mutect2.filtered_vcf_index, prefix = prefix, type = "INDEL" } output { @@ -137,15 +140,26 @@ workflow HapmapSensitivity { #### Tasks for making truth task RestrictIntervals { - File gatk + File? gatk_override + String gatk_docker File vcf File vcf_idx File? intervals command { - # subsampling and restriction to biallelics and intervals - java -jar ${gatk} SelectVariants -V ${vcf} -O restricted.vcf \ - ${"-L " + intervals} \ + export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk_override} + + # only restricting intervals here + # subsamplign and restricting to biallelics done in preprocess_hapmap.wdl + gatk --java-options "-Xmx4g" SelectVariants \ + -V ${vcf} \ + -O restricted.vcf \ + ${"-L " + intervals} + } + + runtime { + docker: "${gatk_docker}" + preemptible: 2 } output { @@ -155,7 +169,8 @@ task RestrictIntervals { } task BamDepth { - File gatk + File? gatk_override + String gatk_docker File vcf File vcf_idx File bam @@ -163,8 +178,14 @@ task BamDepth { Int max_depth #ignore sites with depth greater than this because they are alignment artifacts command { - java -jar ${gatk} AnnotateVcfWithBamDepth -V ${vcf} -I ${bam} -O "bam_depth.vcf" - java -jar ${gatk} SelectVariants -V bam_depth.vcf --select "BAM_DEPTH < ${max_depth}" -O truth.vcf + export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk_override} + gatk --java-options "-Xmx4g" AnnotateVcfWithBamDepth -V ${vcf} -I ${bam} -O "bam_depth.vcf" + gatk --java-options "-Xmx4g" SelectVariants -V bam_depth.vcf --select "BAM_DEPTH < ${max_depth}" -O truth.vcf + } + + runtime { + docker: "${gatk_docker}" + preemptible: 2 } output { @@ -174,27 +195,41 @@ task BamDepth { } task MixingFractions { - File gatk + File? gatk_override + String gatk_docker File vcf File vcf_idx File bam File bam_idx command { - java -jar ${gatk} CalculateMixingFractions -V ${vcf} -I ${bam} -O "mixing.table" + export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk_override} + gatk --java-options "-Xmx4g" CalculateMixingFractions -V ${vcf} -I ${bam} -O "mixing.table" + } + + runtime { + docker: "${gatk_docker}" + preemptible: 2 } output { File mixing = "mixing.table" } } task ExpectedAlleleFraction { - File gatk + File? gatk_override + String gatk_docker File vcf File vcf_idx File mixing_fractions command { - java -jar ${gatk} AnnotateVcfWithExpectedAlleleFraction -V ${vcf} -O af_exp.vcf --mixing-fractions ${mixing_fractions} + export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk_override} + gatk --java-options "-Xmx4g" AnnotateVcfWithExpectedAlleleFraction -V ${vcf} -O af_exp.vcf --mixing-fractions ${mixing_fractions} + } + + runtime { + docker: "${gatk_docker}" + preemptible: 2 } output { @@ -206,15 +241,20 @@ task ExpectedAlleleFraction { ### Tasks for analysing sensitivity task ConvertToTable { - File gatk + File? gatk_override + String gatk_docker File input_vcf File input_vcf_idx command { - java -jar ${gatk} VariantsToTable -V ${input_vcf} -F STATUS -F BAM_DEPTH -F AF_EXP -F TYPE -O "result.table" + export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk_override} + gatk --java-options "-Xmx4g" VariantsToTable -V ${input_vcf} -F STATUS -F BAM_DEPTH -F AF_EXP -F TYPE -O "result.table" } - runtime { memory: "5 GB" } + runtime { + docker: "${gatk_docker}" + preemptible: 2 + } output { File table = "result.table" } } @@ -253,8 +293,8 @@ task AnalyzeSensitivity { } runtime { - continueOnReturnCode: [0,1] - memory: "5 GB" + continueOnReturnCode: [0,1] + preemptible: 2 } output { @@ -267,20 +307,23 @@ task AnalyzeSensitivity { #Make Jaccard index table for SNVs or indels from an array of called vcfs task Jaccard { - File gatk + File? gatk_override + String gatk_docker Array[File] calls Array[File] calls_idx String prefix String type #SNP or INDEL command { + export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk_override} + result="${prefix}_${type}_jaccard.txt" touch $result count=0 for vcf in ${sep = ' ' calls}; do ((count++)) - java -jar ${gatk} SelectVariants -V $vcf --select-type-to-include ${type} -O ${type}_only_$count.vcf + gatk --java-options "-Xmx4g" SelectVariants -V $vcf --select-type-to-include ${type} -O ${type}_only_$count.vcf done for file1 in ${type}_only*.vcf; do @@ -294,7 +337,7 @@ task Jaccard { if [ $file1 == $file2 ]; then printf 1.0000 >> $result else - java -jar ${gatk} SelectVariants -V $file1 --concordance $file2 -O overlap.vcf + gatk --java-options "-Xmx4g" SelectVariants -V $file1 --concordance $file2 -O overlap.vcf overlap=`grep -v '#' overlap.vcf | wc -l` num1=`grep -v '#' $file1 | wc -l` @@ -313,13 +356,17 @@ task Jaccard { } - runtime { memory: "5 GB" } + runtime { + docker: "${gatk_docker}" + preemptible: 2 + } output { File table = "${prefix}_${type}_jaccard.txt" } } task Concordance { - File gatk + File? gatk_override + String gatk_docker File? intervals File truth File truth_idx @@ -327,14 +374,18 @@ task Concordance { File eval_idx command { - java -jar ${gatk} Concordance ${"-L " + intervals} \ + export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk_override} + gatk --java-options "-Xmx4g" Concordance ${"-L " + intervals} \ -truth ${truth} -eval ${eval} \ -tpfn "tpfn.vcf" \ -ftnfn "ftnfn.vcf" \ -summary summary.tsv } - runtime { memory: "5 GB" } + runtime { + docker: "${gatk_docker}" + preemptible: 2 + } output { File tpfn = "tpfn.vcf" diff --git a/scripts/mutect2_wdl/unsupported/hapmap_sensitivity_all_plexes.wdl b/scripts/mutect2_wdl/unsupported/hapmap_sensitivity_all_plexes.wdl index f3ad31c8900..b249910d57d 100755 --- a/scripts/mutect2_wdl/unsupported/hapmap_sensitivity_all_plexes.wdl +++ b/scripts/mutect2_wdl/unsupported/hapmap_sensitivity_all_plexes.wdl @@ -20,6 +20,12 @@ import "hapmap_sensitivity.wdl" as single_plex workflow HapmapSensitivityAllPlexes { + File picard + File? intervals + File ref_fasta + File ref_fai + File ref_dict + Int max_depth Int scatter_count @@ -27,16 +33,10 @@ workflow HapmapSensitivityAllPlexes { File ten_plex_bam_list File twenty_plex_bam_list - File ref_fasta - File ref_fasta_index - File ref_dict File? pon File? pon_index Boolean is_run_orientation_bias_filter - File gatk Array[String] artifact_modes - File picard_jar - File five_plex_preprocessed File five_plex_preprocessed_idx File ten_plex_preprocessed @@ -46,78 +46,82 @@ workflow HapmapSensitivityAllPlexes { String? m2_extra_args String? m2_extra_filtering_args + File python_script - File? intervals + File? gatk_override - File python_script + String gatk_docker call single_plex.HapmapSensitivity as FivePlex { input: + gatk_override = gatk_override, + picard = picard, + gatk_docker = gatk_docker, + intervals = intervals, + ref_fasta = ref_fasta, + ref_fai = ref_fai, + ref_dict = ref_dict, max_depth = max_depth, scatter_count = scatter_count, bam_list = five_plex_bam_list, - ref_fasta = ref_fasta, - ref_fasta_index = ref_fasta_index, - ref_dict = ref_dict, pon = pon, pon_index = pon_index, is_run_orientation_bias_filter = is_run_orientation_bias_filter, - gatk = gatk, artifact_modes = artifact_modes, - picard_jar = picard_jar, preprocessed_hapmap = five_plex_preprocessed, preprocessed_hapmap_idx = five_plex_preprocessed_idx, m2_extra_args = m2_extra_args, m2_extra_filtering_args = m2_extra_filtering_args, prefix = "5plex", - python_script = python_script, - intervals = intervals + python_script = python_script } call single_plex.HapmapSensitivity as TenPlex { input: + gatk_override = gatk_override, + picard = picard, + gatk_docker = gatk_docker, + intervals = intervals, + ref_fasta = ref_fasta, + ref_fai = ref_fai, + ref_dict = ref_dict, max_depth = max_depth, scatter_count = scatter_count, bam_list = ten_plex_bam_list, - ref_fasta = ref_fasta, - ref_fasta_index = ref_fasta_index, - ref_dict = ref_dict, pon = pon, pon_index = pon_index, is_run_orientation_bias_filter = is_run_orientation_bias_filter, - gatk = gatk, artifact_modes = artifact_modes, - picard_jar = picard_jar, preprocessed_hapmap = ten_plex_preprocessed, preprocessed_hapmap_idx = ten_plex_preprocessed_idx, m2_extra_args = m2_extra_args, m2_extra_filtering_args = m2_extra_filtering_args, prefix = "10plex", - python_script = python_script, - intervals = intervals + python_script = python_script } call single_plex.HapmapSensitivity as TwentyPlex { input: + gatk_override = gatk_override, + picard = picard, + gatk_docker = gatk_docker, + intervals = intervals, + ref_fasta = ref_fasta, + ref_fai = ref_fai, + ref_dict = ref_dict, max_depth = max_depth, scatter_count = scatter_count, bam_list = twenty_plex_bam_list, - ref_fasta = ref_fasta, - ref_fasta_index = ref_fasta_index, - ref_dict = ref_dict, pon = pon, pon_index = pon_index, is_run_orientation_bias_filter = is_run_orientation_bias_filter, - gatk = gatk, artifact_modes = artifact_modes, - picard_jar = picard_jar, preprocessed_hapmap = twenty_plex_preprocessed, preprocessed_hapmap_idx = twenty_plex_preprocessed_idx, m2_extra_args = m2_extra_args, m2_extra_filtering_args = m2_extra_filtering_args, prefix = "20plex", - python_script = python_script, - intervals = intervals + python_script = python_script } Array[File] all_plex_sensitivity_tables = [FivePlex.raw_table, TenPlex.raw_table, TwentyPlex.raw_table] @@ -125,7 +129,7 @@ workflow HapmapSensitivityAllPlexes { call single_plex.CombineTables as AllPlexTable { input: input_tables = all_plex_sensitivity_tables, prefix = "all_plex" } call single_plex.AnalyzeSensitivity as AllPlex { - input: input_table = AllPlexTable.table, python_script = python_script, prefix = "all_plex" + input: input_table = AllPlexTable.table, python_script = python_script, prefix = "all_plex" } output { diff --git a/scripts/mutect2_wdl/unsupported/m2_basic_validation.wdl b/scripts/mutect2_wdl/unsupported/m2_basic_validation.wdl index be71f6495ea..d8e3be9ecce 100644 --- a/scripts/mutect2_wdl/unsupported/m2_basic_validation.wdl +++ b/scripts/mutect2_wdl/unsupported/m2_basic_validation.wdl @@ -18,18 +18,16 @@ import "mutect2.wdl" as m2 # The output is a tar file of validation reports (tsv) for the # workflow m2_validation { - #### M2 parameters - String gatk4_jar File? intervals File ref_fasta - File ref_fasta_index + File ref_fai File ref_dict File tumor_bam - File tumor_bam_index + File tumor_bai String tumor_sample_name File? normal_bam - File? normal_bam_index + File? normal_bai String? normal_sample_name File? pon File? pon_index @@ -40,21 +38,23 @@ workflow m2_validation { File? variants_for_contamination_index Boolean is_run_orientation_bias_filter Boolean is_run_oncotator - String m2_docker - String basic_bash_docker = "ubuntu:16.04" - String oncotator_docker - File? gatk4_jar_override Int preemptible_attempts File? onco_ds_tar_gz String? onco_ds_local_db_dir Array[String] artifact_modes - File picard_jar + File picard String? m2_extra_args String? m2_extra_filtering_args String? sequencing_center String? sequence_source File? default_config_file Boolean is_bamOut = false + + File? gatk_override + + String gatk_docker + String basic_bash_docker = "ubuntu:16.04" + String oncotator_docker ##### ### parameter-fu @@ -96,16 +96,19 @@ workflow m2_validation { scatter (i in range(length(tumor_bam_files))) { call m2.Mutect2 as m2_tn { input: - gatk4_jar = gatk4_jar, - gatk4_jar_override = gatk4_jar_override, + gatk_override = gatk_override, + picard = picard, + gatk_docker = gatk_docker, + basic_bash_docker = basic_bash_docker, + oncotator_docker = oncotator_docker, intervals = intervals, ref_fasta = ref_fasta, - ref_fasta_index = ref_fasta_index, + ref_fai = ref_fai, ref_dict = ref_dict, tumor_bam = tumor_bam_files[i], - tumor_bam_index = tumor_bam_indices[i], + tumor_bai = tumor_bam_indices[i], normal_bam = normal_bam_files[i], - normal_bam_index = normal_bam_indices[i], + normal_bai = normal_bam_indices[i], scatter_count = scatter_count, pon = pon, pon_index = pon_index, @@ -113,15 +116,11 @@ workflow m2_validation { gnomad_index = gnomad_index, is_run_orientation_bias_filter = is_run_orientation_bias_filter, is_run_oncotator = is_run_oncotator, - oncotator_docker = oncotator_docker, - m2_docker = m2_docker, preemptible_attempts = select_first([preemptible_attempts, 2]), onco_ds_local_db_dir = onco_ds_local_db_dir, artifact_modes = artifact_modes, - picard_jar = picard_jar, variants_for_contamination = variants_for_contamination, variants_for_contamination_index = variants_for_contamination_index, - basic_bash_docker = basic_bash_docker, m2_extra_args = m2_extra_args, m2_extra_filtering_args = m2_extra_filtering_args, is_bamOut = true @@ -129,16 +128,19 @@ workflow m2_validation { call m2.Mutect2 as m2_validation_bamout { input: - gatk4_jar = gatk4_jar, - gatk4_jar_override = gatk4_jar_override, + gatk_override = gatk_override, + picard = picard, + gatk_docker = gatk_docker, + basic_bash_docker = basic_bash_docker, + oncotator_docker = oncotator_docker, intervals = intervals, ref_fasta = ref_fasta, - ref_fasta_index = ref_fasta_index, + ref_fai = ref_fai, ref_dict = ref_dict, tumor_bam = validation_tumor_bam_files[i], - tumor_bam_index = validation_tumor_bam_indices[i], + tumor_bai = validation_tumor_bam_indices[i], normal_bam = validation_normal_bam_files[i], - normal_bam_index = validation_normal_bam_indices[i], + normal_bai = validation_normal_bam_indices[i], scatter_count = scatter_count, pon = pon, pon_index = pon_index, @@ -146,15 +148,11 @@ workflow m2_validation { gnomad_index = gnomad_index, is_run_orientation_bias_filter = is_run_orientation_bias_filter, is_run_oncotator = is_run_oncotator, - oncotator_docker = oncotator_docker, - m2_docker = m2_docker, preemptible_attempts = preemptible_attempts, onco_ds_local_db_dir = onco_ds_local_db_dir, artifact_modes = artifact_modes, - picard_jar = picard_jar, variants_for_contamination = variants_for_contamination, variants_for_contamination_index = variants_for_contamination_index, - basic_bash_docker = basic_bash_docker, m2_extra_args = m2_extra_args, m2_extra_filtering_args = m2_extra_filtering_args, is_bamOut = true @@ -163,42 +161,42 @@ workflow m2_validation { # Delete the reads from the normal and HC sample from the bamout. call rewrite_bam_by_sample as m2_rewrite_bam_by_sample { input: + gatk_override = gatk_override, + gatk_docker = gatk_docker, bam = m2_validation_bamout.bamout, - gatk_docker = m2_docker, new_sample_name = m2_validation_bamout.tumor_bam_sample_name, - gatk4_jar_override = gatk4_jar_override, output_bam_basename = m2_validation_bamout.tumor_bam_sample_name } call basic_validator as m2_basic_validator { input: - gatk4_jar_override = gatk4_jar_override, + gatk_override = gatk_override, + gatk_docker = gatk_docker, + call_intervals = m2_tn.filtered_vcf, + ref_fasta = ref_fasta, + ref_fai = ref_fai, + ref_dict = ref_dict, discovery_tumor_sample_name = m2_tn.tumor_bam_sample_name, discovery_normal_sample_name = m2_tn.normal_bam_sample_name, validation_tumor_bam = m2_rewrite_bam_by_sample.sample_bam, - validation_tumor_bai = m2_rewrite_bam_by_sample.sample_bam_index, + validation_tumor_bai = m2_rewrite_bam_by_sample.sample_bai, validation_normal_bam = validation_normal_bam_files[i], validation_normal_bai = validation_normal_bam_indices[i], vcf_calls = m2_tn.filtered_vcf, vcf_calls_idx = m2_tn.filtered_vcf_index, - ref_fasta = ref_fasta, - ref_fasta_index = ref_fasta_index, - ref_dict = ref_dict, - call_intervals = m2_tn.filtered_vcf, entity_id = "m2_" + m2_tn.tumor_bam_sample_name, - gatk_docker = m2_docker, base_quality_cutoff = base_quality_cutoff } call m2.CollectSequencingArtifactMetrics as validation_normal_CollectSequencingArtifactMetrics { input: + picard = picard, + gatk_docker = gatk_docker, + ref_fasta = ref_fasta, + ref_fai = ref_fai, preemptible_attempts = preemptible_attempts, - m2_docker = m2_docker, tumor_bam = validation_normal_bam_files[i], - tumor_bam_index = validation_normal_bam_indices[i], - ref_fasta = ref_fasta, - ref_fasta_index = ref_fasta_index, - picard_jar = picard_jar + tumor_bai = validation_normal_bam_indices[i] } } @@ -216,7 +214,14 @@ workflow m2_validation { # Validation bams should *not* be RNA. task basic_validator { - File? gatk4_jar_override + File? gatk_override + String gatk_docker + # Same calls as what is in the VCF + File call_intervals + + File ref_fasta + File ref_fai + File ref_dict String discovery_tumor_sample_name String discovery_normal_sample_name @@ -230,13 +235,6 @@ task basic_validator { File vcf_calls File vcf_calls_idx - File ref_fasta - File ref_fasta_index - File ref_dict - - # Same calls as what is in the VCF - File call_intervals - # Unique name for the entity. Only used for naming output files. String entity_id @@ -244,16 +242,14 @@ task basic_validator { # Runtime parameters Int? mem - String gatk_docker Int? preemptible_attempts Int? disk_space_gb - Int final_mem=select_first([mem, 7]) + Int final_mem = select_first([mem, 7]) command <<< set -e - # Use GATK Jar override if specified - GATK_JAR=${default="/root/gatk.jar" gatk4_jar_override} + GATK_JAR=${default="/root/gatk.jar" gatk_override} echo "Getting sample names...." java -Xmx${final_mem-1}g -jar $GATK_JAR GetSampleName -I ${validation_normal_bam} -O validation_normal_name.txt @@ -298,7 +294,6 @@ task tar_results { Int preemptible_attempts=2 command <<< - set -e python <>> + runtime { docker: "${basic_python_docker}" preemptible: "${preemptible_attempts}" memory: "3 GB" disks: "local-disk " + 50 + " HDD" } + output { File tar_file = "${group_id}_results.tar.gz" } } task rewrite_bam_by_sample { + File? gatk_override + String gatk_docker # Also, removes samples not in the list from the header String new_sample_name File bam - - File? gatk4_jar_override String output_bam_basename # Runtime parameters Int? mem - String gatk_docker Int? preemptible_attempts Int? disk_space_gb - - Int final_mem=select_first([mem, 3]) + Int final_mem = select_first([mem, 3]) command <<< set -e - - # Use GATK Jar override if specified - GATK_JAR=${default="/root/gatk.jar" gatk4_jar_override} + GATK_JAR=${default="/root/gatk.jar" gatk_override} java -Xmx${final_mem-1}g -jar $GATK_JAR PrintReads -I ${bam} -O ${output_bam_basename}.tmp.bam -RF SampleReadFilter -sample ${sep=" -sample " new_sample_name} @@ -357,7 +349,6 @@ task rewrite_bam_by_sample { java -Xmx${final_mem-1}g -jar $GATK_JAR ReplaceSamHeader --HEADER new_header.txt -I ${output_bam_basename}.tmp.bam -O ${output_bam_basename}.bam samtools index ${output_bam_basename}.bam ${output_bam_basename}.bai - >>> runtime { @@ -369,6 +360,6 @@ task rewrite_bam_by_sample { output { File sample_bam = "${output_bam_basename}.bam" - File sample_bam_index = "${output_bam_basename}.bai" + File sample_bai = "${output_bam_basename}.bai" } } diff --git a/scripts/mutect2_wdl/unsupported/mutect2-replicate-validation.wdl b/scripts/mutect2_wdl/unsupported/mutect2-replicate-validation.wdl index 85a01ac1495..b09f042dee1 100755 --- a/scripts/mutect2_wdl/unsupported/mutect2-replicate-validation.wdl +++ b/scripts/mutect2_wdl/unsupported/mutect2-replicate-validation.wdl @@ -3,138 +3,73 @@ import "mutect2.wdl" as m2 -task GatherTables { - # we assume that each table consists of two lines: one header line and one record - Array[File] tables - - command { - # extract the header from one of the files - head -n 1 ${tables[0]} > summary.txt - - # then append the record from each table - for table in ${sep=" " tables}; do - tail -n +2 $table >> summary.txt - done - } - - runtime { - docker: "broadinstitute/genomes-in-the-cloud:2.2.4-1469632282" - memory: "1 GB" - disks: "local-disk " + 100 + " HDD" - } - - output { - File summary = "summary.txt" - } -} - -task CountFalsePositives { - String gatk4_jar - File filtered_vcf - File filtered_vcf_index +workflow Mutect2ReplicateValidation { + File picard + File? intervals File ref_fasta - File ref_fasta_index + File ref_fai File ref_dict - File? intervals - File? gatk4_jar_override - - command { - # Use GATK Jar override if specified - GATK_JAR=${gatk4_jar} - if [[ "${gatk4_jar_override}" == *.jar ]]; then - GATK_JAR=${gatk4_jar_override} - fi - - java -jar $GATK_JAR CountFalsePositives \ - -V ${filtered_vcf} \ - -R ${ref_fasta} \ - ${"-L " + intervals} \ - -O false-positives.txt \ - } - - runtime { - docker: "broadinstitute/genomes-in-the-cloud:2.2.4-1469632282" - memory: "5 GB" - disks: "local-disk " + 500 + " HDD" - } - - output { - File false_positive_counts = "false-positives.txt" - } -} - -workflow Mutect2ReplicateValidation { - File gatk4_jar Int scatter_count - # replicate_pair_list file is a tsv file with the following six columns in this order. - # tumor_bam, tumor_bam_index, tumor_sample_name, normal_bam, normal_bam_index, normal_sample_name + # replicate_pair_list file is a tsv file with the following four columns in this order. + # tumor_bam, tumor_bai, normal_bam, normal_bai File replicate_pair_list Array[Array[String]] pairs = read_tsv(replicate_pair_list) - File? intervals - File ref_fasta - File ref_fasta_index - File ref_dict File? pon File? pon_index File? gnomad File? gnomad_index Boolean is_run_orientation_bias_filter - String gatk_docker - File? gatk4_jar_override - Int? preemptible_attempts Array[String] artifact_modes - File picard_jar String? m2_extra_args String? m2_extra_filtering_args + File? gatk_override + + String gatk_docker + Int? preemptible_attempts + scatter(pair in pairs) { call m2.Mutect2 { input: - gatk4_jar = gatk4_jar, intervals = intervals, ref_fasta = ref_fasta, - ref_fasta_index = ref_fasta_index, + ref_fai = ref_fai, ref_dict = ref_dict, tumor_bam = pair[0], - tumor_bam_index = pair[1], - tumor_sample_name = pair[2], - normal_bam = pair[3], - normal_bam_index = pair[4], - normal_sample_name = pair[5], + tumor_bai = pair[1], + normal_bam = pair[2], + normal_bai = pair[3], pon = pon, pon_index = pon_index, scatter_count = scatter_count, gnomad = gnomad, gnomad_index = gnomad_index, - picard_jar = picard_jar, + picard = picard, is_run_orientation_bias_filter = is_run_orientation_bias_filter, is_run_oncotator = false, - oncotator_docker = gatk_docker, - gatk_docker = gatk_docker, - gatk4_jar_override = gatk4_jar_override, preemptible_attempts = preemptible_attempts, artifact_modes = artifact_modes, m2_extra_args = m2_extra_args, - m2_extra_filtering_args = m2_extra_filtering_args + m2_extra_filtering_args = m2_extra_filtering_args, + gatk_override = gatk_override, + gatk_docker = gatk_docker, + oncotator_docker = gatk_docker } call CountFalsePositives { input: - gatk4_jar = gatk4_jar, + intervals = intervals, + ref_fasta = ref_fasta, + ref_fai = ref_fai, + ref_dict = ref_dict, filtered_vcf = Mutect2.filtered_vcf, filtered_vcf_index = Mutect2.filtered_vcf_index, - ref_fasta = ref_fasta, - ref_fasta_index = ref_fasta_index, - ref_dict = ref_dict, - intervals = intervals, - gatk4_jar_override = gatk4_jar_override + gatk_override = gatk_override, + gatk_docker = gatk_docker } } - call GatherTables { - input: - tables = CountFalsePositives.false_positive_counts - } + call GatherTables { input: tables = CountFalsePositives.false_positive_counts } output { File summary = GatherTables.summary @@ -142,3 +77,61 @@ workflow Mutect2ReplicateValidation { Array[File] unfiltered_vcfs = Mutect2.unfiltered_vcf } } + +task GatherTables { + # we assume that each table consists of two lines: one header line and one record + Array[File] tables + + command { + # extract the header from one of the files + head -n 1 ${tables[0]} > summary.txt + + # then append the record from each table + for table in ${sep=" " tables}; do + tail -n +2 $table >> summary.txt + done + } + + runtime { + docker: "broadinstitute/genomes-in-the-cloud:2.2.4-1469632282" + memory: "1 GB" + disks: "local-disk " + 100 + " HDD" + } + + output { + File summary = "summary.txt" + } +} + +task CountFalsePositives { + File? intervals + File ref_fasta + File ref_fai + File ref_dict + File filtered_vcf + File filtered_vcf_index + + File? gatk_override + + String gatk_docker + + command { + export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk_override} + + gatk --java-options "-Xmx4g" CountFalsePositives \ + -V ${filtered_vcf} \ + -R ${ref_fasta} \ + ${"-L " + intervals} \ + -O false-positives.txt \ + } + + runtime { + docker: gatk_docker + memory: "5 GB" + disks: "local-disk " + 500 + " HDD" + } + + output { + File false_positive_counts = "false-positives.txt" + } +} diff --git a/scripts/mutect2_wdl/unsupported/mutect2_compare_tumors.wdl b/scripts/mutect2_wdl/unsupported/mutect2_compare_tumors.wdl index 118f244e9bc..9e8225c0d1b 100644 --- a/scripts/mutect2_wdl/unsupported/mutect2_compare_tumors.wdl +++ b/scripts/mutect2_wdl/unsupported/mutect2_compare_tumors.wdl @@ -7,131 +7,89 @@ # mutect2_multi_sample.wdl import "mutect2.wdl" as m2 - -task Concordance { - String gatk4_jar - File? gatk4_jar_override - File? intervals - File truth_vcf - File truth_vcf_idx - File eval_vcf - File eval_vcf_idx - - command { - - GATK_JAR=${gatk4_jar} - if [[ "${gatk4_jar_override}" == *.jar ]]; then - GATK_JAR=${gatk4_jar_override} - fi - - java -jar $GATK_JAR Concordance ${"-L " + intervals} \ - -truth ${truth_vcf} -eval ${eval_vcf} -tpfn "true_positives_and_false_negatives.vcf" \ - -tpfp "true_positives_and_false_positives.vcf" \ - -summary summary.tsv - } - - runtime { - memory: "5 GB" - } - - output { - File tpfn = "true_positives_and_false_negatives.vcf" - File tpfn_idx = "true_positives_and_false_negatives.vcf.idx" - File tpfp = "true_positives_and_false_positives.vcf" - File tpfp_idx = "true_positives_and_false_positives.vcf.idx" - File summary = "summary.tsv" - } -} - workflow Mutect2Trio { - String gatk4_jar + File picard + File? intervals + File ref_fasta + File ref_fai + File ref_dict Int scatter_count # trio_list file is a tsv file with the following nine columns in this order. - # normal_bam, normal_bam_index, normal_sample_name, good_tumor_bam, good_tumor_bam_index, good_tumor_sample_name, bad_tumor_bam, bad_tumor_bam_index, bad_tumor_sample_name, + # normal_bam, normal_bai, good_tumor_bam, good_tumor_bai, bad_tumor_bam, bad_tumor_bai File trio_list Array[Array[String]] trios = read_tsv(trio_list) - File? intervals - File ref_fasta - File ref_fasta_index - File ref_dict File? pon File? pon_index File? gnomad File? gnomad_index Boolean is_run_orientation_bias_filter - Boolean is_run_oncotator - String oncotator_docker - String m2_docker - File? gatk4_jar_override - Int preemptible_attempts Array[String] artifact_modes + File? gatk_override + + # runtime + String gatk_docker + Int? preemptible_attempts + scatter(trio in trios) { call m2.Mutect2 as GoodTumor { input: - gatk4_jar=gatk4_jar, - intervals=intervals, - ref_fasta=ref_fasta, - ref_fasta_index=ref_fasta_index, - ref_dict=ref_dict, - tumor_bam=trio[3], - tumor_bam_index=trio[4], - tumor_sample_name=trio[5], - normal_bam=trio[0], - normal_bam_index=trio[1], - normal_sample_name=trio[2], - pon=pon, - pon_index=pon_index, - scatter_count=scatter_count, - gnomad=gnomad, - gnomad_index=gnomad_index, - picard_jar = picard_jar, + picard = picard, + intervals = intervals, + ref_fasta = ref_fasta, + ref_fai = ref_fai, + ref_dict = ref_dict, + tumor_bam = trio[2], + tumor_bai = trio[3], + normal_bam = trio[0], + normal_bai = trio[1], + pon = pon, + pon_index = pon_index, + scatter_count = scatter_count, + gnomad = gnomad, + gnomad_index = gnomad_index, is_run_orientation_bias_filter = is_run_orientation_bias_filter, - is_run_oncotator=is_run_oncotator, - oncotator_docker=oncotator_docker, - m2_docker = m2_docker, - gatk4_jar_override = gatk4_jar_override, - preemptible_attempts = preemptible_attempts, - artifact_modes = artifact_modes + is_run_oncotator = false, + artifact_modes = artifact_modes, + gatk_override = gatk_override, + gatk_docker = gatk_docker, + oncotator_docker = "NO_ONCOTATOR", + preemptible_attempts = preemptible_attempts } call m2.Mutect2 as BadTumor { - input: - gatk4_jar=gatk4_jar, - intervals=intervals, - ref_fasta=ref_fasta, - ref_fasta_index=ref_fasta_index, - ref_dict=ref_dict, - tumor_bam=trio[6], - tumor_bam_index=trio[7], - tumor_sample_name=trio[8], - normal_bam=trio[0], - normal_bam_index=trio[1], - normal_sample_name=trio[2], - pon=pon, - pon_index=pon_index, - scatter_count=scatter_count, - gnomad=gnomad, - gnomad_index=gnomad_index, - picard_jar = picard_jar, - is_run_orientation_bias_filter = is_run_orientation_bias_filter, - is_run_oncotator=is_run_oncotator, - oncotator_docker=oncotator_docker, - m2_docker = m2_docker, - gatk4_jar_override = gatk4_jar_override, - preemptible_attempts = preemptible_attempts, - artifact_modes = artifact_modes - } + input: + picard = picard, + intervals = intervals, + ref_fasta = ref_fasta, + ref_fai = ref_fai, + ref_dict = ref_dict, + tumor_bam = trio[4], + tumor_bai = trio[5], + normal_bam = trio[0], + normal_bai=trio[1], + pon = pon, + pon_index = pon_index, + scatter_count = scatter_count, + gnomad = gnomad, + gnomad_index = gnomad_index, + is_run_orientation_bias_filter = is_run_orientation_bias_filter, + is_run_oncotator = false, + artifact_modes = artifact_modes, + gatk_override = gatk_override, + gatk_docker = gatk_docker, + oncotator_docker = "NO_ONCOTATOR", + preemptible_attempts = preemptible_attempts + } call Concordance { input: - gatk4_jar = gatk4_jar, - gatk4_jar_override = gatk4_jar_override, intervals = intervals, truth_vcf = GoodTumor.filtered_vcf, #note, no orientation bias since it's optional output truth_vcf_idx = GoodTumor.filtered_vcf_index, eval_vcf = BadTumor.filtered_vcf, eval_vcf_idx = BadTumor.filtered_vcf_index, + gatk_override = gatk_override } } @@ -142,4 +100,39 @@ workflow Mutect2Trio { Array[File] tpf_idx = Concordance.tpfp_idx Array[File] summary = Concordance.summary } +} + +task Concordance { + File? intervals + File truth_vcf + File truth_vcf_idx + File eval_vcf + File eval_vcf_idx + + File? gatk_override + + String gatk_docker + + command { + export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk_override} + + gatk --java-options "-Xmx2g" Concordance \ + ${"-L " + intervals} \ + -truth ${truth_vcf} -eval ${eval_vcf} -tpfn "true_positives_and_false_negatives.vcf" \ + -tpfp "true_positives_and_false_positives.vcf" \ + -summary summary.tsv + } + + runtime { + docker: gatk_docker + memory: "5 GB" + } + + output { + File tpfn = "true_positives_and_false_negatives.vcf" + File tpfn_idx = "true_positives_and_false_negatives.vcf.idx" + File tpfp = "true_positives_and_false_positives.vcf" + File tpfp_idx = "true_positives_and_false_positives.vcf.idx" + File summary = "summary.tsv" + } } \ No newline at end of file diff --git a/scripts/mutect2_wdl/unsupported/mutect2_multi_sample_concordance.wdl b/scripts/mutect2_wdl/unsupported/mutect2_multi_sample_concordance.wdl index a0a90f03eb3..615537d3db2 100644 --- a/scripts/mutect2_wdl/unsupported/mutect2_multi_sample_concordance.wdl +++ b/scripts/mutect2_wdl/unsupported/mutect2_multi_sample_concordance.wdl @@ -7,14 +7,14 @@ import "mutect2_multi_sample.wdl" as m2_multi workflow Mutect2_Multi_Concordance { - # gatk4_jar needs to be a String input to the workflow in order to work in a Docker image - String gatk4_jar + # inputs + File picard + File? intervals + File ref_fasta + File ref_fai + File ref_dict Int scatter_count File pair_list - File? intervals - File ref_fasta - File ref_fasta_index - File ref_dict File? pon File? pon_index File? gnomad @@ -22,26 +22,27 @@ workflow Mutect2_Multi_Concordance { File? variants_for_contamination File? variants_for_contamination_index Boolean is_run_orientation_bias_filter - String gatk_docker - File? gatk4_jar_override - Int? preemptible_attempts - Array[String] artifact_modes - File picard_jar - String? m2_args - String? m2_filtering_args - + Array[String] artifact_modes + String? m2_extra_args + String? m2_extra_filtering_args File truth_list Array[Array[String]] truth = read_tsv(truth_list) + File? gatk_override + + # runtime + String gatk_docker + Int? preemptible_attempts + call m2_multi.Mutect2_Multi { - input: - gatk4_jar = gatk4_jar, + input: + picard = picard, + ref_fasta = ref_fasta, + ref_fai = ref_fai, + ref_dict = ref_dict, scatter_count = scatter_count, pair_list = pair_list, intervals = intervals, - ref_fasta = ref_fasta, - ref_fasta_index = ref_fasta_index, - ref_dict = ref_dict, pon = pon, pon_index = pon_index, gnomad = gnomad, @@ -50,82 +51,80 @@ workflow Mutect2_Multi_Concordance { variants_for_contamination_index = variants_for_contamination_index, is_run_orientation_bias_filter = is_run_orientation_bias_filter, is_run_oncotator = false, - gatk_docker = gatk_docker, - oncotator_docker = "NO_ONCOTATOR", - gatk4_jar_override = gatk4_jar_override, preemptible_attempts = preemptible_attempts, artifact_modes = artifact_modes, - picard_jar = picard_jar, - m2_args = m2_args, - m2_filtering_args = m2_filtering_args + m2_extra_args = m2_extra_args, + m2_extra_filtering_args = m2_extra_filtering_args, + gatk_override = gatk_override, + gatk_docker = gatk_docker, + oncotator_docker = "NO_ONCOTATOR" } - scatter (n in range(length(truth))) { - call Concordance { - input: - gatk4_jar = gatk4_jar, - gatk4_jar_override = gatk4_jar_override, - intervals = intervals, - truth_vcf = truth[n][0], - truth_vcf_idx = truth[n][1], - eval_vcf = Mutect2_Multi.filtered_vcf_files[n], - eval_vcf_idx = Mutect2_Multi.filtered_vcf_index_files[n], - gatk_docker = gatk_docker, - preemptible_attempts = preemptible_attempts - } - } + scatter (n in range(length(truth))) { + call Concordance { + input: + intervals = intervals, + truth_vcf = truth[n][0], + truth_vcf_idx = truth[n][1], + eval_vcf = Mutect2_Multi.filtered_vcf[n], + eval_vcf_idx = Mutect2_Multi.filtered_vcf_idx[n], + preemptible_attempts = preemptible_attempts, + gatk_override = gatk_override, + gatk_docker = gatk_docker + } + } output { - Array[File] tpfn = Concordance.tpfn - Array[File] tpfn_idx = Concordance.tpfn_idx - Array[File] tpfp = Concordance.tpfp - Array[File] tpfp_idx = Concordance.tpfp_idx - Array[File] ftnfn = Concordance.ftnfn - Array[File] ftnfn_idx = Concordance.ftnfn_idx - Array[File] summary = Concordance.summary + Array[File] tpfn = Concordance.tpfn + Array[File] tpfn_idx = Concordance.tpfn_idx + Array[File] tpfp = Concordance.tpfp + Array[File] tpfp_idx = Concordance.tpfp_idx + Array[File] ftnfn = Concordance.ftnfn + Array[File] ftnfn_idx = Concordance.ftnfn_idx + Array[File] summary = Concordance.summary } } - task Concordance { - String gatk4_jar - File? gatk4_jar_override - File? intervals - File truth_vcf - File truth_vcf_idx - File eval_vcf - File eval_vcf_idx - String gatk_docker - Int preemptible_attempts +task Concordance { + # inputs + File? intervals + File truth_vcf + File truth_vcf_idx + File eval_vcf + File eval_vcf_idx + + File? gatk_override + + # runtime + String gatk_docker + Int? preemptible_attempts - command { - # Use GATK Jar override if specified - GATK_JAR=${gatk4_jar} - if [[ "${gatk4_jar_override}" == *.jar ]]; then - GATK_JAR=${gatk4_jar_override} - fi + command { + export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk_override} - java -jar $GATK_JAR Concordance ${"-L " + intervals} \ + gatk --java-options "-Xmx2g" Concordance \ + ${"-L " + intervals} \ -truth ${truth_vcf} -eval ${eval_vcf} \ -tpfn "tpfn.vcf" \ -tpfp "tpfp.vcf" \ -ftnfn "ftnfn.vcf" \ -summary summary.tsv - } + } - runtime { - memory: "5 GB" - docker: "${gatk_docker}" - disks: "local-disk " + 400 + " HDD" - preemptible: "${preemptible_attempts}" - } + runtime { + memory: "5 GB" + docker: "${gatk_docker}" + disks: "local-disk " + 100 + " HDD" + preemptible: select_first([preemptible_attempts, 2]) + } - output { - File tpfn = "tpfn.vcf" - File tpfn_idx = "tpfn.vcf.idx" - File tpfp = "tpfp.vcf" - File tpfp_idx = "tpfp.vcf.idx" - File ftnfn = "ftnfn.vcf" - File ftnfn_idx = "ftnfn.vcf.idx" - File summary = "summary.tsv" - } + output { + File tpfn = "tpfn.vcf" + File tpfn_idx = "tpfn.vcf.idx" + File tpfp = "tpfp.vcf" + File tpfp_idx = "tpfp.vcf.idx" + File ftnfn = "ftnfn.vcf" + File ftnfn_idx = "ftnfn.vcf.idx" + File summary = "summary.tsv" + } } \ No newline at end of file diff --git a/scripts/mutect2_wdl/unsupported/preprocess_hapmap.wdl b/scripts/mutect2_wdl/unsupported/preprocess_hapmap.wdl index 860f7ca7eff..929ce5f952e 100644 --- a/scripts/mutect2_wdl/unsupported/preprocess_hapmap.wdl +++ b/scripts/mutect2_wdl/unsupported/preprocess_hapmap.wdl @@ -17,7 +17,7 @@ # neighboring indels. workflow PreprocessHapmap { - File gatk + # inputs File hapmap File hapmap_idx File five_plex_samples @@ -27,30 +27,35 @@ workflow PreprocessHapmap { File gnomad # common variants eg gnomad variants with AF > 0.001 File gnomad_idx + File? gatk_override + + # runtime + String gatk_docker + call Subsample as SubsampleFive { - input: gatk = gatk, hapmap = hapmap, hapmap_idx = hapmap_idx, samples = five_plex_samples, gnomad = gnomad, gnomad_idx = gnomad_idx + input: gatk_override = gatk_override, gatk_docker = gatk_docker, hapmap = hapmap, hapmap_idx = hapmap_idx, samples = five_plex_samples, gnomad = gnomad, gnomad_idx = gnomad_idx } call Subsample as SubsampleTen { - input: gatk = gatk, hapmap = hapmap, hapmap_idx = hapmap_idx, samples = ten_plex_samples, gnomad = gnomad, gnomad_idx = gnomad_idx + input: gatk_override = gatk_override, gatk_docker = gatk_docker, hapmap = hapmap, hapmap_idx = hapmap_idx, samples = ten_plex_samples, gnomad = gnomad, gnomad_idx = gnomad_idx } call Subsample as SubsampleTwenty { - input: gatk = gatk, hapmap = hapmap, hapmap_idx = hapmap_idx, samples = twenty_plex_samples, gnomad = gnomad, gnomad_idx = gnomad_idx + input: gatk_override = gatk_override, gatk_docker = gatk_docker, hapmap = hapmap, hapmap_idx = hapmap_idx, samples = twenty_plex_samples, gnomad = gnomad, gnomad_idx = gnomad_idx } call RemoveNearbyIndels as RemoveFive { - input: gatk = gatk, input_vcf = SubsampleFive.output_vcf, input_vcf_idx = SubsampleFive.output_vcf_idx, + input: gatk_override = gatk_override, gatk_docker = gatk_docker, input_vcf = SubsampleFive.output_vcf, input_vcf_idx = SubsampleFive.output_vcf_idx, min_indel_spacing = min_indel_spacing, name = "five_plex" } call RemoveNearbyIndels as RemoveTen { - input: gatk = gatk, input_vcf = SubsampleTen.output_vcf, input_vcf_idx = SubsampleTen.output_vcf_idx, + input: gatk_override = gatk_override, gatk_docker = gatk_docker, input_vcf = SubsampleTen.output_vcf, input_vcf_idx = SubsampleTen.output_vcf_idx, min_indel_spacing = min_indel_spacing, name = "ten_plex" } call RemoveNearbyIndels as RemoveTwenty { - input: gatk = gatk, input_vcf = SubsampleTwenty.output_vcf, input_vcf_idx = SubsampleTwenty.output_vcf_idx, + input: gatk_override = gatk_override, gatk_docker = gatk_docker, input_vcf = SubsampleTwenty.output_vcf, input_vcf_idx = SubsampleTwenty.output_vcf_idx, min_indel_spacing = min_indel_spacing, name = "twenty_plex" } @@ -65,25 +70,37 @@ workflow PreprocessHapmap { } task Subsample { - File gatk + # inputs File hapmap File hapmap_idx File samples File gnomad # common variants, to reduce false positives eg mapping artifacts in the original Hapmap vcf File gnomad_idx + File? gatk_override + + # runtime + String gatk_docker + command { + export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk_override} + # subsampling and restriction to biallelics - java -jar ${gatk} SelectVariants -V ${hapmap} -O sub.vcf \ - -restrictAllelesTo BIALLELIC \ - --sample_name ${samples} \ + gatk --java-options "-Xmx4g" SelectVariants -V ${hapmap} -O sub.vcf \ + -restrict-alleles-to BIALLELIC \ + --sample-name ${samples} \ -L ${gnomad} \ - -maxIndelSize 10 \ - --excludeNonVariants + -max-indel-size 10 \ + --exclude-non-variants #remove NEGATIVE_TRAIN_SITE variants and re-index grep -v NEGATIVE_TRAIN_SITE sub.vcf > subsampled.vcf - java -jar ${gatk} IndexFeatureFile -F subsampled.vcf + gatk --java-options "-Xmx4g" IndexFeatureFile -F subsampled.vcf + } + + runtime { + docker: "${gatk_docker}" + preemptible: 2 } output { @@ -93,14 +110,25 @@ task Subsample { } task RemoveNearbyIndels { - File gatk + # inputs File input_vcf File input_vcf_idx Int min_indel_spacing String name + File? gatk_override + + # runtime + String gatk_docker + command { - java -jar ${gatk} RemoveNearbyIndels -V ${input_vcf} -O ${name}.vcf -minIndelSpacing ${min_indel_spacing} + export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk_override} + gatk --java-options "-Xmx4g" RemoveNearbyIndels -V ${input_vcf} -O ${name}.vcf -min-indel-spacing ${min_indel_spacing} + } + + runtime { + docker: "${gatk_docker}" + preemptible: 2 } output {