Skip to content

Commit

Permalink
bring Funcotator changes to M2 NIO WDL (#5742)
Browse files Browse the repository at this point in the history
  • Loading branch information
davidbenjamin authored Mar 13, 2019
1 parent d8d06cd commit 5c46cb8
Showing 1 changed file with 91 additions and 51 deletions.
142 changes: 91 additions & 51 deletions scripts/mutect2_wdl/mutect2_nio.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -56,12 +56,17 @@
##
## Funcotator parameters (see Funcotator help for more details).
## funco_reference_version: "hg19" for hg19 or b37. "hg38" for hg38. Default: "hg19"
## funco_transcript_selection_list: Transcripts (one GENCODE ID per line) to give priority during selection process.
## funco_output_format: "MAF" to produce a MAF file, "VCF" to procude a VCF file. Default: "MAF"
## funco_compress: (Only valid if funco_output_format == "VCF" ) If true, will compress the output of Funcotator. If false, produces an uncompressed output file. Default: false
## funco_use_gnomad_AF: If true, will include gnomAD allele frequency annotations in output by connecting to the internet to query gnomAD (this impacts performance). If false, will not annotate with gnomAD. Default: false
## funco_transcript_selection_mode: How to select transcripts in Funcotator. ALL, CANONICAL, or BEST_EFFECT
## funco_transcript_selection_list: Transcripts (one GENCODE ID per line) to give priority during selection process.
## funco_data_sources_tar_gz: Funcotator datasources tar gz file. Bucket location is recommended when running on the cloud.
## funco_annotation_defaults: Default values for annotations, when values are unspecified. Specified as <ANNOTATION>:<VALUE>. For example: "Center:Broad"
## funco_annotation_overrides: Values for annotations, even when values are unspecified. Specified as <ANNOTATION>:<VALUE>. For example: "Center:Broad"
## funcotator_excluded_fields: Annotations that should not appear in the output (VCF or MAF). Specified as <ANNOTATION>. For example: "ClinVar_ALLELEID"
## funco_filter_funcotations: If true, will only annotate variants that have passed filtering (. or PASS value in the FILTER column). If false, will annotate all variants in the input file. Default: true
## funcotator_extra_args: Any additional arguments to pass to Funcotator. Default: ""
##
## Outputs :
## - One VCF file and its index with primary filtering applied; secondary filtering and functional annotation if requested; a bamout.bam
Expand Down Expand Up @@ -119,22 +124,28 @@ workflow Mutect2 {
File? default_config_file
String? oncotator_extra_args

# funcotator inputs
# Funcotator inputs
Boolean? run_funcotator
Boolean run_funcotator_or_default = select_first([run_funcotator, false])
String? funco_reference_version
String? funco_output_format
Boolean? funco_compress
Boolean? funco_use_gnomad_AF
File? funco_data_sources_tar_gz
String? funco_transcript_selection_mode
File? funco_transcript_selection_list
Array[String]? funco_annotation_defaults
Array[String]? funco_annotation_overrides
Array[String]? funcotator_excluded_fields
Boolean? funco_filter_funcotations
String? funcotator_extra_args

File? gatk_override
String funco_default_output_format = "MAF"


# runtime
String gatk_docker
File? gatk_override
String basic_bash_docker = "ubuntu:16.04"
String? oncotator_docker
String oncotator_docker_or_default = select_first([oncotator_docker, "broadinstitute/oncotator:1.9.9.0"])
Expand Down Expand Up @@ -446,28 +457,33 @@ workflow Mutect2 {
if (run_funcotator_or_default) {
File funcotate_vcf_input = select_first([FilterAlignmentArtifacts.filtered_vcf, FilterByOrientationBias.filtered_vcf, Filter.filtered_vcf])
File funcotate_vcf_input_index = select_first([FilterAlignmentArtifacts.filtered_vcf_index, FilterByOrientationBias.filtered_vcf_index, Filter.filtered_vcf_index])
call FuncotateMaf {
call Funcotate {
input:
ref_fasta = ref_fasta,
input_vcf = funcotate_vcf_input,
input_vcf_idx = funcotate_vcf_input_index,
ref_fasta = ref_fasta,
reference_version = select_first([funco_reference_version, "hg19"]),
output_file_base_name = basename(funcotate_vcf_input, ".vcf") + ".annotated",
output_format = if defined(funco_output_format) then "" + funco_output_format else funco_default_output_format,
compress = if defined(funco_compress) then funco_compress else false,
use_gnomad = if defined(funco_use_gnomad_AF) then funco_use_gnomad_AF else false,
data_sources_tar_gz = funco_data_sources_tar_gz,
case_id = M2.tumor_sample[0],
control_id = M2.normal_sample[0],
sequencing_center = sequencing_center,
sequence_source = sequence_source,
transcript_selection_mode = funco_transcript_selection_mode,
transcript_selection_list = funco_transcript_selection_list,
annotation_defaults = funco_annotation_defaults,
annotation_overrides = funco_annotation_overrides,
funcotator_excluded_fields = funcotator_excluded_fields,
filter_funcotations = filter_funcotations_or_default,
extra_args = funcotator_extra_args,
gatk_docker = gatk_docker,
gatk_override = gatk_override,
filter_funcotations = filter_funcotations_or_default,
funcotator_excluded_fields = funcotator_excluded_fields,
sequencing_center = sequencing_center,
sequence_source = sequence_source,
disk_space_gb = ceil(size(funcotate_vcf_input, "GB") * large_input_to_output_multiplier) + funco_tar_size + disk_pad,
preemptible_attempts = preemptible_attempts,
max_retries = max_retries,
extra_args = funcotator_extra_args
disk_space_gb = ceil(size(funcotate_vcf_input, "GB") * large_input_to_output_multiplier) + onco_tar_size + disk_pad
}
}

Expand All @@ -479,7 +495,8 @@ workflow Mutect2 {
File? contamination_table = CalculateContamination.contamination_table

File? oncotated_m2_maf = oncotate_m2.oncotated_m2_maf
File? funcotated_maf = FuncotateMaf.funcotated_output
File? funcotated_file = Funcotate.funcotated_output_file
File? funcotated_file_index = Funcotate.funcotated_output_file_index
File? preadapter_detail_metrics = CollectSequencingArtifactMetrics.pre_adapter_metrics
File? bamout = MergeBamOuts.merged_bam_out
File? bamout_index = MergeBamOuts.merged_bam_out_index
Expand Down Expand Up @@ -1289,40 +1306,52 @@ task SumFloats {
}
}

task FuncotateMaf {
# inputs
task Funcotate {
# ==============
# Inputs
String ref_fasta
String input_vcf
String input_vcf_idx
String reference_version
String output_format = "MAF"
String output_file_base_name
String output_format
Boolean compress
Boolean use_gnomad
# This should be updated when a new version of the data sources is released
# TODO: Make this dynamically chosen in the command.
File? data_sources_tar_gz = "gs://broad-public-datasets/funcotator/funcotator_dataSources.v1.6.20190124s.tar.gz"
String? control_id
String? case_id
String? sequencing_center
String? sequence_source
String case_id
String? control_id

File? data_sources_tar_gz
String? transcript_selection_mode
File? transcript_selection_list
Array[String]? annotation_defaults
Array[String]? annotation_overrides
Array[String]? funcotator_excluded_fields
Boolean filter_funcotations
Boolean? filter_funcotations
File? interval_list

String? extra_args

# ==============
# Process input args:
String output_maf = output_file_base_name + ".maf"
String output_maf_index = output_maf + ".idx"
String output_vcf = output_file_base_name + if compress then ".vcf.gz" else ".vcf"
String output_vcf_index = output_vcf + if compress then ".tbi" else ".idx"
String output_file = if output_format == "MAF" then output_maf else output_vcf
String output_file_index = if output_format == "MAF" then output_maf_index else output_vcf_index
String transcript_selection_arg = if defined(transcript_selection_list) then " --transcript-list " else ""
String annotation_def_arg = if defined(annotation_defaults) then " --annotation-default " else ""
String annotation_over_arg = if defined(annotation_overrides) then " --annotation-override " else ""
String filter_funcotations_args = if (filter_funcotations) then " --remove-filtered-variants " else ""
String filter_funcotations_args = if defined(filter_funcotations) && (filter_funcotations) then " --remove-filtered-variants " else ""
String excluded_fields_args = if defined(funcotator_excluded_fields) then " --exclude-field " else ""
String final_output_filename = basename(input_vcf, ".vcf") + ".maf.annotated"
# ==============
# runtime
String interval_list_arg = if defined(interval_list) then " -L " else ""
String extra_args_arg = select_first([extra_args, ""])

# ==============
# Runtime options:
String gatk_docker
File? gatk_override
Int? mem
Expand All @@ -1333,56 +1362,66 @@ task FuncotateMaf {

Boolean use_ssd = false

# This should be updated when a new version of the data sources is released
String default_datasources_version = "funcotator_dataSources.v1.4.20180615"

# You may have to change the following two parameter values depending on the task requirements
Int default_ram_mb = 3000
# WARNING: In the workflow, you should calculate the disk space as an input to this task (disk_space_gb).
# WARNING: In the workflow, you should calculate the disk space as an input to this task (disk_space_gb). Please see [TODO: Link from Jose] for examples.
Int default_disk_space_gb = 100

# Mem is in units of GB but our command and memory runtime values are in MB
Int machine_mem = if defined(mem) then mem *1000 else default_ram_mb
Int command_mem = machine_mem - 1000

String dollar = "$"

command <<<
set -e
export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk_override}

DATA_SOURCES_TAR_GZ=${data_sources_tar_gz}
if [[ ! -e $DATA_SOURCES_TAR_GZ ]] ; then
# We have to download the data sources:
echo "Data sources gzip does not exist: $DATA_SOURCES_TAR_GZ"
echo "Downloading default data sources..."
wget ftp://[email protected]/bundle/funcotator/${default_datasources_version}.tar.gz
tar -zxf ${default_datasources_version}.tar.gz
DATA_SOURCES_FOLDER=${default_datasources_version}
else
# Extract the tar.gz:
mkdir datasources_dir
tar zxvf ${data_sources_tar_gz} -C datasources_dir --strip-components 1
DATA_SOURCES_FOLDER="$PWD/datasources_dir"
# Extract our data sources:
echo "Extracting data sources zip file..."
mkdir datasources_dir
tar zxvf ${data_sources_tar_gz} -C datasources_dir --strip-components 1
DATA_SOURCES_FOLDER="$PWD/datasources_dir"

# Handle gnomAD:
if ${use_gnomad} ; then
echo "Enabling gnomAD..."
for potential_gnomad_gz in gnomAD_exome.tar.gz gnomAD_genome.tar.gz ; do
if [[ -f ${dollar}{DATA_SOURCES_FOLDER}/${dollar}{potential_gnomad_gz} ]] ; then
cd ${dollar}{DATA_SOURCES_FOLDER}
tar -zvxf ${dollar}{potential_gnomad_gz}
cd -
else
echo "ERROR: Cannot find gnomAD folder: ${dollar}{potential_gnomad_gz}" 1>&2
false
fi
done
fi

# Run Funcotator:
gatk --java-options "-Xmx${command_mem}m" Funcotator \
--data-sources-path $DATA_SOURCES_FOLDER \
--ref-version ${reference_version} \
--output-file-format ${output_format} \
-R ${ref_fasta} \
-V ${input_vcf} \
-O ${final_output_filename} \
${"-L " + interval_list} \
-O ${output_file} \
${interval_list_arg} ${default="" interval_list} \
--annotation-default normal_barcode:${default="Unknown" control_id} \
--annotation-default tumor_barcode:${default="Unknown" case_id} \
--annotation-default Center:${default="Unknown" sequencing_center} \
--annotation-default source:${default="Unknown" sequence_source} \
${"--transcript-selection-mode " + transcript_selection_mode} \
${"--transcript-list " + transcript_selection_list} \
--annotation-default normal_barcode:${control_id} \
--annotation-default tumor_barcode:${case_id} \
--annotation-default Center:${default="Unknown" sequencing_center} \
--annotation-default source:${default="Unknown" sequence_source} \
${transcript_selection_arg}${default="" sep=" --transcript-list " transcript_selection_list} \
${annotation_def_arg}${default="" sep=" --annotation-default " annotation_defaults} \
${annotation_over_arg}${default="" sep=" --annotation-override " annotation_overrides} \
${excluded_fields_args}${default="" sep=" --exclude-field " funcotator_excluded_fields} \
${filter_funcotations_args} \
${extra_args}
${extra_args_arg}
# Make sure we have a placeholder index for MAF files so this workflow doesn't fail:
if [[ "${output_format}" == "MAF" ]] ; then
touch ${output_maf_index}
fi
>>>

runtime {
Expand All @@ -1396,6 +1435,7 @@ task FuncotateMaf {
}

output {
File funcotated_output = "${final_output_filename}"
File funcotated_output_file = "${output_file}"
File funcotated_output_file_index = "${output_file_index}"
}
}

0 comments on commit 5c46cb8

Please sign in to comment.