From f9398588ad5e335729e9fb4b45e9480d940bd4bf Mon Sep 17 00:00:00 2001 From: Darryl Nousome Date: Tue, 28 Nov 2023 11:00:50 -0500 Subject: [PATCH 1/2] feat: rename all vcf samples for continuity --- workflow/modules/variant_calling.nf | 63 +++++++++++++++------- workflow/modules/variant_calling_tonly.nf | 30 +++++++---- workflow/modules/workflows.nf | 23 ++++---- workflow/modules/workflows_tonly.nf | 2 +- workflow/scripts/add_gt_lofreq.sh | 65 +++++++++++++++++++++++ 5 files changed, 140 insertions(+), 43 deletions(-) create mode 100755 workflow/scripts/add_gt_lofreq.sh diff --git a/workflow/modules/variant_calling.nf b/workflow/modules/variant_calling.nf index f0cae08..ce6d21c 100644 --- a/workflow/modules/variant_calling.nf +++ b/workflow/modules/variant_calling.nf @@ -304,10 +304,19 @@ process strelka_tn { --runDir=wd \ --callRegions ${bed}.gz ./wd/runWorkflow.py -m local -j $task.cpus - mv wd/results/variants/somatic.snvs.vcf.gz ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.snvs.vcf.gz - mv wd/results/variants/somatic.indels.vcf.gz ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.indels.vcf.gz - mv wd/results/variants/somatic.snvs.vcf.gz.tbi ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.snvs.vcf.gz.tbi - mv wd/results/variants/somatic.indels.vcf.gz.tbi ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.indels.vcf.gz.tbi + mv wd/results/variants/somatic.snvs.vcf.gz ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic_temp.snvs.vcf.gz + mv wd/results/variants/somatic.indels.vcf.gz ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic_temp.indels.vcf.gz + + printf "NORMAL\t${normalname}\nTUMOR\t${tumorname}\n" >sampname + + bcftools reheader -s sampname ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic_temp.snvs.vcf.gz \ + | bcftools view -Oz -o ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.snvs.vcf.gz + bcftools reheader -s sampname ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic_temp.indels.vcf.gz \ + | bcftools view -Oz -o ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.indels.vcf.gz + + bcftools index -t ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.snvs.vcf.gz + bcftools index -t ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.indels.vcf.gz + """ stub: @@ -329,7 +338,7 @@ process vardict_tn { output: tuple val(tumorname), val(normalname), - path("${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.vardict.vcf") + path("${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.vardict.vcf.gz") //bcbio notes of vardict filtering var2vcf_paired.pl -P 0.9 -m 4.25 -f 0.01 -M” and //filtered with “((AF*DP < 6) && ((MQ < 55.0 && NM > 1.0) || (MQ < 60.0 && NM > 2.0) || (DP < 10) || (QUAL < 45)))” script: @@ -351,12 +360,18 @@ process vardict_tn { -S \ -f 0.05 > ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.vardict.vcf + printf "${normal.Name}\t${normalname}\t${tumor.Name}\t${tumorname}\n" > sampname + + bcftools reheader -s sampname ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.vardict.vcf \ + | bcftools view -Oz -o ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.vardict.vcf.gz + + """ stub: """ - touch ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.vardict.vcf + touch ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.vardict.vcf.gz """ @@ -376,7 +391,7 @@ process varscan_tn { output: tuple val(tumorname), val(normalname), - path("${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.varscan.vcf") + path("${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.varscan.vcf.gz") shell: ''' @@ -388,23 +403,25 @@ process varscan_tn { eval "$varscan_cmd" awk '{{gsub(/\\y[W|K|Y|R|S|M]\\y/,"N",$4); OFS = "\\t"; print}}' !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.vcf.indel \ - | sed '/^$/d' > !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.vcf.indel_temp + | sed '/^$/d' | bcftools view - -Oz -o !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.indel_temp.vcf.gz awk '{{gsub(/\\y[W|K|Y|R|S|M]\\y/,"N",$4); OFS = "\\t"; print}}' !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.vcf.snp \ - | sed '/^$/d' > !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.vcf.snp_temp + | sed '/^$/d' | bcftools view - -Oz -o !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.snp_temp.vcf.gz - java -jar $DISCVRSeq_JAR MergeVcfsAndGenotypes \ - -R !{GENOMEREF} \ - --assumeIdenticalSamples \ - --filteredrecordsmergetype KEEP_UNCONDITIONAL \ - --variant !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.vcf.snp_temp \ - --variant!{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.vcf.indel_temp \ - -O !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.vcf + gatk SortVcf -I !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.snp_temp.vcf.gz \ + -I !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.indel_temp.vcf.gz \ + -R !{GENOMEREF} -SD !{GENOMEDICT} \ + -O !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}_temp.varscan.vcf + + printf "NORMAL\t!{normalname}\nTUMOR\t!{tumorname}\n" > sampname + + bcftools reheader -s sampname !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}_temp.varscan.vcf \ + | bcftools view -Oz -o !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.vcf.gz ''' stub: """ - touch ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.varscan.vcf + touch ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.varscan.vcf.gz """ } @@ -514,7 +531,13 @@ process muse_tn { MuSE sump -I ${tumorname}_vs_${normalname}.MuSE.txt \ -O ${tumorname}_vs_${normalname}.vcf -n $task.cpus -D $DBSNP -G - bcftools view ${tumorname}_vs_${normalname}.vcf -Oz -o ${tumorname}_vs_${normalname}.vcf.gz + bcftools view ${tumorname}_vs_${normalname}.vcf -Oz -o ${tumorname}_vs_${normalname}_temp.vcf.gz + + printf "NORMAL\t${normalname}\nTUMOR\t${tumorname}\n" > sampname + + bcftools reheader -s sampname ${tumorname}_vs_${normalname}_temp.vcf.gz \ + | bcftools view -Oz -o ${tumorname}_vs_${normalname}.vcf.gz + """ stub: @@ -596,8 +619,8 @@ process combineVariants_alternative { """ mkdir ${vc} bcftools concat $vcfin -a -Oz -o ${sample}.${vc}.temp1.vcf.gz - bcftools reheader -f $GENOMEFAI ${sample}.${vc}.temp1.vcf.gz -o ${sample}.${vc}.temp.vcf.gz - bcftools sort ${sample}.${vc}.temp.vcf.gz -Oz -o ${sample}.${vc}.marked.vcf.gz + bcftools reheader -f $GENOMEFAI ${sample}.${vc}.temp1.vcf.gz -o ${sample}.${vc}.temp.vcf + bcftools sort ${sample}.${vc}.temp.vcf -Oz -o ${sample}.${vc}.marked.vcf.gz bcftools norm ${sample}.${vc}.marked.vcf.gz -m- --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\ awk '{{gsub(/\\y[W|K|Y|R|S|M]\\y/,"N",\$4); OFS = "\\t"; print}}' |\ sed '/^\$/d' > ${sample}.${vc}.temp.vcf diff --git a/workflow/modules/variant_calling_tonly.nf b/workflow/modules/variant_calling_tonly.nf index 227dab4..3d67e26 100644 --- a/workflow/modules/variant_calling_tonly.nf +++ b/workflow/modules/variant_calling_tonly.nf @@ -241,7 +241,7 @@ process varscan_tonly { output: tuple val(tumorname), - path("${tumor.simpleName}_${bed.simpleName}.tonly.varscan.vcf") + path("${tumor.simpleName}_${bed.simpleName}.tonly.varscan.vcf.gz") shell: @@ -251,13 +251,17 @@ process varscan_tonly { varscan_cmd="varscan mpileup2cns <($pileup_cmd) $varscan_opts" eval "$varscan_cmd > !{tumor.simpleName}_!{bed.simpleName}.tonly.varscan.vcf" + + printf "TUMOR\t!{tumorname}\n" > sampname + + bcftools reheader -s sampname !{tumor.simpleName}_!{bed.simpleName}.tonly.varscan.vcf \ + | bcftools view -Oz -o !{tumor.simpleName}_!{bed.simpleName}.tonly.varscan.vcf.gz + ''' stub: - """ - touch ${tumor.simpleName}_${bed.simpleName}.tonly.varscan.vcf - + touch ${tumor.simpleName}_${bed.simpleName}.tonly.varscan.vcf.gz """ } @@ -270,19 +274,20 @@ process vardict_tonly { output: tuple val(tumorname), - path("${tumor.simpleName}_${bed.simpleName}.tonly.vardict.vcf") + path("${tumor.simpleName}_${bed.simpleName}.tonly.vardict.vcf.gz") script: """ bedtools makewindows -b ${bed} -w 50150 -s 50000 > temp_${bed} + VarDict -G $GENOMEREF \ - -f 0.05 \ + -f 0.01 \ -x 500 \ --nosv \ -b ${tumor} --fisher \ -t -Q 20 -c 1 -S 2 -E 3 --th $task.cpus \ - -R temp_${bed} | var2vcf_valid.pl \ + temp_${bed} | var2vcf_valid.pl \ -N ${tumor} \ -Q 20 \ -d 10 \ @@ -291,12 +296,17 @@ process vardict_tonly { -E \ -f 0.05 > ${tumor.simpleName}_${bed.simpleName}.tonly.vardict.vcf + printf "${tumor.Name}\t${tumorname}\n" > sampname + + bcftools reheader -s sampname ${tumor.simpleName}_${bed.simpleName}.tonly.vardict.vcf \ + | bcftools view -Oz -o ${tumor.simpleName}_${bed.simpleName}.tonly.vardict.vcf.gz + """ stub: """ - touch ${tumor.simpleName}_${bed.simpleName}.tonly.vardict.vcf + touch ${tumor.simpleName}_${bed.simpleName}.tonly.vardict.vcf.gz """ @@ -357,9 +367,9 @@ process somaticcombine_tonly { java -jar \$DISCVRSeq_JAR MergeVcfsAndGenotypes \ -R $GENOMEREF \ --genotypeMergeOption PRIORITIZE \ - --priority_list mutect2,octopus,vardict,varscan \ + --priority_list mutect2_tonly,octopus_tonly,vardict_tonly,varscan_tonly \ --filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED \ - -O ${tumorsample}_combined.vcf.gz \ + -O ${tumorsample}_combined_tonly.vcf.gz \ $vcfin2 """ diff --git a/workflow/modules/workflows.nf b/workflow/modules/workflows.nf index 79521c7..42e126c 100644 --- a/workflow/modules/workflows.nf +++ b/workflow/modules/workflows.nf @@ -179,7 +179,7 @@ workflow VC { pileup_paired_all=pileup_paired_tout.join(pileup_paired_nout) contamination_paired(pileup_paired_all) - + //Mutect2 TN mutect2.out.groupTuple(by:[0,1]) | multiMap { tumor,normal,vcfs,f1r2,stats -> mut2out_lor: tuple("${tumor}_vs_${normal}", @@ -205,7 +205,7 @@ workflow VC { annotvep_tn_mut2(mutect2_in) - //Tumor Only Calling + //Mutect2 Tumor Only bambyinterval_t=bambyinterval.map{tumorname,tumor,tumorbai,normalname,normalbam,normalbai,bed ->tuple(tumorname,tumor,tumorbai,bed)} mutect2_t_tonly(bambyinterval_t) @@ -231,7 +231,7 @@ workflow VC { | join(contamination_tumoronly.out) | mutect2filter_tonly | join(sample_sheet) - | map{tumor,markedvcf,markedindex,normvcf,normindex,stats,normal -> tuple(tumor,"mutect2",normvcf,normindex)} + | map{tumor,markedvcf,markedindex,normvcf,normindex,stats,normal -> tuple(tumor,"mutect2_tonly",normvcf,normindex)} annotvep_tonly_mut2(mutect2_in_tonly) //Strelka TN @@ -243,14 +243,14 @@ workflow VC { | map{sample,markedvcf,markedindex,finalvcf,finalindex,tumor,normal -> tuple(tumor,normal,"strelka",finalvcf,finalindex)} annotvep_tn_strelka(strelka_in) - //Vardict + //Vardict TN vardict_in=vardict_tn(bambyinterval) | groupTuple(by:[0,1]) | map{tumor,normal,vcf-> tuple("${tumor}_vs_${normal}",vcf.toSorted{it -> (it.name =~ /${tumor}_vs_${normal}_(.*?).vardict.vcf/)[0][1].toInteger()},"vardict")} | combineVariants_vardict | join(sample_sheet_paired) | map{sample,marked,markedindex,normvcf,normindex,tumor,normal ->tuple(tumor,normal,"vardict",normvcf,normindex)} annotvep_tn_vardict(vardict_in) - //VarDict_tonly + //VarDict TOnly vardict_in_tonly=bambyinterval | map{tumorname,tumorbam,tumorbai,normname,normbam,normbai,bed -> tuple(tumorname,tumorbam,tumorbai,bed)} @@ -263,12 +263,12 @@ workflow VC { //VarScan TN varscan_in=bambyinterval.combine(contamination_paired.out) | varscan_tn | groupTuple(by:[0,1]) - | map{tumor,normal,vcf-> tuple("${tumor}_vs_${normal}",vcf.toSorted{it -> (it.name =~ /${tumor}_vs_${normal}_(.*?).varscan.vcf/)[0][1].toInteger()},"varscan")} + | map{tumor,normal,vcf-> tuple("${tumor}_vs_${normal}",vcf.toSorted{it -> (it.name =~ /${tumor}_vs_${normal}_(.*?).varscan.vcf.gz/)[0][1].toInteger()},"varscan")} | combineVariants_varscan | join(sample_sheet_paired) | map{sample,marked,markedindex,normvcf,normindex,tumor,normal ->tuple(tumor,normal,"varscan",normvcf,normindex)} annotvep_tn_varscan(varscan_in) - //VarScan_TOnly + //VarScan TOnly varscan_in_tonly=bambyinterval.combine(contamination_paired.out) | map{tumor,bam,bai,normal,nbam,nbai,bed,tumorname2,tpile,npile,tumorc,normalc -> tuple(tumor,bam,bai,bed,tpile,tumorc)} | varscan_tonly | groupTuple() @@ -292,7 +292,7 @@ workflow VC { | map{sample,marked,markedindex,normvcf,normindex,tumor,normal ->tuple(tumor,normal,"muse",normvcf,normindex)} annotvep_tn_muse(muse_in) - //Octopus_TN + //Octopus TN octopus_in=octopus_tn(bambyinterval) | bcftools_index_octopus | groupTuple() | map{samplename,vcf,vcfindex-> tuple(samplename,vcf.toSorted{it->(it.name =~ /${samplename}_(.*).octopus.vcf.gz/)[0][1].toInteger()},vcfindex,"octopus")} @@ -301,7 +301,7 @@ workflow VC { tuple(samplename.split('_vs_')[0],samplename.split('_vs_')[1],"octopus",normvcf,normindex)} annotvep_tn_octopus(octopus_in) - //Octopus_TOnly + //Octopus TOnly octopus_in_tonly=bambyinterval.map{tumor,bam,bai,normal,nbam,nbai,bed-> tuple(tumor,bam,bai,bed)} | octopus_tonly | bcftools_index_octopus_tonly | groupTuple() @@ -324,7 +324,6 @@ workflow VC { | map{tumor,vcf,index ->tuple(tumor,"combined_tonly",vcf,index)} | annotvep_tonly_combined - //Implement PCGR Annotator/CivIC Next emit: @@ -467,8 +466,8 @@ workflow QC_GL { fastqin fastpout applybqsr - glnexusout //GLnexus germline output - bcfout //DV germline output + glnexusout + bcfout main: //QC Steps diff --git a/workflow/modules/workflows_tonly.nf b/workflow/modules/workflows_tonly.nf index ebcaf72..9b844e5 100644 --- a/workflow/modules/workflows_tonly.nf +++ b/workflow/modules/workflows_tonly.nf @@ -164,7 +164,7 @@ workflow VC_TONLY { mutect2_tonly_in=mutect2filter_tonly(mut2tonly_filter) | join(sample_sheet) - | map{tumor,markedvcf,markedindex,finalvcf,finalindex,stats -> tuple(tumor,"mutect2",finalvcf,finalindex)} + | map{tumor,markedvcf,markedindex,finalvcf,finalindex,stats -> tuple(tumor,"mutect2_tonly",finalvcf,finalindex)} annotvep_tonly_mut2(mutect2_tonly_in) diff --git a/workflow/scripts/add_gt_lofreq.sh b/workflow/scripts/add_gt_lofreq.sh new file mode 100755 index 0000000..ac2b373 --- /dev/null +++ b/workflow/scripts/add_gt_lofreq.sh @@ -0,0 +1,65 @@ +#!/bin/bash +#Author: Dr Charles Foster http://github.com/charlesfoster + +i_flag='' +g_flag='' +h_flag='' +n_flag='' +o_flag='' + +print_usage() { + printf "Usage: bash add_artificial_genotype.sh -i in.vcf.gz [-g genotype] [-n sample_name] -o out.vcf.gz\n" + printf "Genotype defaults to 1 if not specified\n" + printf "Sample name gussed from infile name if not specified\n" +} + +if [[ $# -eq 0 ]] ; then + print_usage + exit 1 +fi + +while getopts 'i:g:n:ho:' flag; do + case "${flag}" in + i) IN="${OPTARG}" ;; + n) NAME="${OPTARG}" ;; + g) GENOTYPE="${OPTARG}" ;; + h) print_usage + exit 1 ;; + o) OUT="${OPTARG}" ;; + *) print_usage + exit 1 ;; + esac +done + +if [ ! -f ${IN} ]; then + printf "\nError: input file not found\n" + print_usage + exit 1 +fi + +if [ -z "${GENOTYPE}" ] + then + printf "\nNo genotype specified: setting to 1" + GENOTYPE=1 +fi + +if [ -z "${NAME}" ] + then + NAME=$(basename ${IN} | cut -f1 -d ".") + printf "\nNo name specified: guessed it to be ${NAME}" +fi + +if [ -z "${OUT}" ] + then + OUT=$(echo ${IN} | sed "s/.vcf.gz/_withGT.vcf.gz/") + printf "\nNo outfile specified: setting to ${OUT}\n" +fi + +gunzip -kc ${IN} | \ +sed -e '6i##FORMAT=' \ +-e "s|FILTER\tINFO|FILTER\tINFO\tFORMAT\t${NAME}|g" | \ +awk -F'\t' -v genotype=${GENOTYPE} -v OFS="\t" '/^[^#]/{ $9 = "GT"; $10 = genotype }1' | \ +bgzip -c > ${OUT} +tabix -p vcf ${OUT} +printf "VCF with artificial genotype written to ${OUT}\n" +exit 0 From fc25c2d2d59ed9ae2d6fd662bba19d92aa9612a7 Mon Sep 17 00:00:00 2001 From: Darryl Nousome Date: Wed, 29 Nov 2023 09:36:15 -0500 Subject: [PATCH 2/2] fix: change back to gatk3 --- docker/logan_base/Dockerfile | 16 +++++++++------ nextflow.config | 6 +++++- workflow/modules/trim_align.nf | 2 +- workflow/modules/variant_calling.nf | 25 +++++++++++++++-------- workflow/modules/variant_calling_tonly.nf | 4 ++-- 5 files changed, 34 insertions(+), 19 deletions(-) diff --git a/docker/logan_base/Dockerfile b/docker/logan_base/Dockerfile index 9b04b2c..5182c64 100644 --- a/docker/logan_base/Dockerfile +++ b/docker/logan_base/Dockerfile @@ -17,12 +17,10 @@ WORKDIR /opt2 # This section installs system packages required for your project # If you need extra system packages add them here. # python/3.8.0 and python/2.7.16 (strelka and manta) -# JDK 17 for DISCVRSeq RUN apt-get update \ && apt-get -y upgrade \ && DEBIAN_FRONTEND=noninteractive apt-get install -y \ - bc \ - openjdk-17-jdk + bc # Common bioinformatics tools # bwa/0.7.17-4 bowtie/1.2.3 bowtie2/2.3.5.1 @@ -54,9 +52,15 @@ RUN wget https://github.com/broadinstitute/gatk/releases/download/4.3.0.0/gatk-4 && /opt2/gatk-4.3.0.0/gatk --list ENV PATH="/opt2/gatk-4.3.0.0:$PATH" -# Use DISCVRSeq For CombineVariants Replacement -RUN wget https://github.com/BimberLab/DISCVRSeq/releases/download/1.3.61/DISCVRSeq-1.3.61.jar -ENV DISCVRSeq_JAR="/opt2/DISCVRSeq-1.3.61.jar" +# Install last release of GATK3 (GATK/3.8-1) +# Only being used for the CombineVariants +# command that is not available in GATK4 +# Available via env variable: $GATK_JAR +# Requires Java8 or 1.8 +RUN wget https://storage.googleapis.com/gatk-software/package-archive/gatk/GenomeAnalysisTK-3.8-1-0-gf15c1c3ef.tar.bz2 \ + && tar -xvjf /opt2/GenomeAnalysisTK-3.8-1-0-gf15c1c3ef.tar.bz2 \ + && rm /opt2/GenomeAnalysisTK-3.8-1-0-gf15c1c3ef.tar.bz2 +ENV GATK_JAR="/opt2/GenomeAnalysisTK-3.8-1-0-gf15c1c3ef/GenomeAnalysisTK.jar" # Install dependencies needed to add a new repository over HTTPS RUN DEBIAN_FRONTEND=noninteractive apt-get install -y \ diff --git a/nextflow.config b/nextflow.config index b9b3b00..c179409 100644 --- a/nextflow.config +++ b/nextflow.config @@ -23,7 +23,7 @@ params { script_freecpaired = "${projectDir}/workflow/scripts/freec_paired.pl" freec_significance = "${projectDir}/workflow/scripts/assess_significance.R" freec_plot = "${projectDir}/workflow/scripts/makeGraph.R" - lofreq_convert = "${projectDir}/workflow/scripts/lofreq_convert.sh" + lofreq_convert = "${projectDir}/workflow/scripts/add_gt_lofreq.sh" vep_cache = "/fdb/VEP/102/cache" //Biowulf @@ -84,6 +84,10 @@ profiles { withLabel: process_somaticcaller { container = 'docker://dnousome/ccbr_logan_base:v0.3.3' } + //Name Based + withName:bwamem2 { + container = 'docker://dnousome/ccbr_logan_base:v0.3.3' + } withName:fastq_screen { container = 'docker://nciccbr/ccbr_fastq_screen_0.13.0:v2.0' } diff --git a/workflow/modules/trim_align.nf b/workflow/modules/trim_align.nf index badac4c..fefe243 100644 --- a/workflow/modules/trim_align.nf +++ b/workflow/modules/trim_align.nf @@ -63,7 +63,7 @@ process bwamem2 { ${GENOMEREF} \ ${samplename}.R1.trimmed.fastq.gz ${samplename}.R2.trimmed.fastq.gz | \ samblaster -M | \ - samtools sort -@$task.cpus -m 4G - -o ${samplename}.bam + samtools sort -@ $task.cpus -m 4G - -o ${samplename}.bam samtools index -@ $task.cpus ${samplename}.bam ${samplename}.bai diff --git a/workflow/modules/variant_calling.nf b/workflow/modules/variant_calling.nf index ce6d21c..1e6bb0a 100644 --- a/workflow/modules/variant_calling.nf +++ b/workflow/modules/variant_calling.nf @@ -360,7 +360,7 @@ process vardict_tn { -S \ -f 0.05 > ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.vardict.vcf - printf "${normal.Name}\t${normalname}\t${tumor.Name}\t${tumorname}\n" > sampname + printf "${normal.Name}\t${normalname}\n${tumor.Name}\t${tumorname}\n" > sampname bcftools reheader -s sampname ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.vardict.vcf \ | bcftools view -Oz -o ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.vardict.vcf.gz @@ -493,8 +493,15 @@ process lofreq_tn { ${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final_minus-dbsnp.indels.vcf.gz --threads $task.cpus -Oz -o \ ${tumorname}_vs_${normalname}_${bed.simpleName}_temp_lofreq.vcf.gz - $LOFREQ_CONVERT ${tumorname}_vs_${normalname}_${bed.simpleName}_temp_lofreq.vcf.gz ${tumorname} \ - | bcftools view -Oz -o ${tumorname}_vs_${normalname}_${bed.simpleName}_lofreq.vcf.gz + $LOFREQ_CONVERT -i ${tumorname}_vs_${normalname}_${bed.simpleName}_temp_lofreq.vcf.gz -g 1/0 \ + -n ${tumorname} -o ${tumorname}_vs_${normalname}_${bed.simpleName}_temp1_lofreq.vcf.gz + + bcftools view -h ${tumorname}_vs_${normalname}_${bed.simpleName}_temp1_lofreq.vcf.gz >temphead + + sed 's/^##FORMAT=/##FORMAT=/' temphead > temphead1 + bcftools reheader ${tumorname}_vs_${normalname}_${bed.simpleName}_temp1_lofreq.vcf.gz -h temphead1 |\ + bcftools view -Oz -o ${tumorname}_vs_${normalname}_${bed.simpleName}_lofreq.vcf.gz + bcftools index -t ${tumorname}_vs_${normalname}_${bed.simpleName}_lofreq.vcf.gz """ @@ -738,12 +745,12 @@ process somaticcombine { vcfin1=[callers, vcfs].transpose().collect { a, b -> a + " " + b } vcfin2="-V:" + vcfin1.join(" -V:") - """ - java -jar \$DISCVRSeq_JAR MergeVcfsAndGenotypes \ - -R $GENOMEREF \ - --genotypeMergeOption PRIORITIZE \ - --priority_list mutect2,strelka,octopus,muse,lofreq,vardict,varscan \ - --filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED \ + """ + java -jar \$GATK_JAR -T CombineVariants \ + -nt $task.cpus \ + --filteredrecordsmergetype KEEP_IF_ANY_UNFILTERED \ + --genotypemergeoption PRIORITIZE \ + --rod_priority_list mutect2,strelka,muse,lofreq,vardict,varscan \ -O ${tumorsample}_vs_${normal}_combined.vcf.gz \ $vcfin2 """ diff --git a/workflow/modules/variant_calling_tonly.nf b/workflow/modules/variant_calling_tonly.nf index 3d67e26..81a35c7 100644 --- a/workflow/modules/variant_calling_tonly.nf +++ b/workflow/modules/variant_calling_tonly.nf @@ -364,8 +364,8 @@ process somaticcombine_tonly { vcfin2="-V:" + vcfin1.join(" -V:") """ - java -jar \$DISCVRSeq_JAR MergeVcfsAndGenotypes \ - -R $GENOMEREF \ + java -jar \$GATK_JAR -T CombineVariants \ + -nt $task.cpus \ --genotypeMergeOption PRIORITIZE \ --priority_list mutect2_tonly,octopus_tonly,vardict_tonly,varscan_tonly \ --filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED \