Skip to content

Commit

Permalink
Merge pull request #20 from CCBR/dev/feature-SVCNV
Browse files Browse the repository at this point in the history
Dev/feature svcnv
  • Loading branch information
dnousome authored Nov 29, 2023
2 parents 21d7526 + fc25c2d commit 2beaaa8
Show file tree
Hide file tree
Showing 8 changed files with 173 additions and 61 deletions.
16 changes: 10 additions & 6 deletions docker/logan_base/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,10 @@ WORKDIR /opt2
# This section installs system packages required for your project
# If you need extra system packages add them here.
# python/3.8.0 and python/2.7.16 (strelka and manta)
# JDK 17 for DISCVRSeq
RUN apt-get update \
&& apt-get -y upgrade \
&& DEBIAN_FRONTEND=noninteractive apt-get install -y \
bc \
openjdk-17-jdk
bc

# Common bioinformatics tools
# bwa/0.7.17-4 bowtie/1.2.3 bowtie2/2.3.5.1
Expand Down Expand Up @@ -54,9 +52,15 @@ RUN wget https://github.com/broadinstitute/gatk/releases/download/4.3.0.0/gatk-4
&& /opt2/gatk-4.3.0.0/gatk --list
ENV PATH="/opt2/gatk-4.3.0.0:$PATH"

# Use DISCVRSeq For CombineVariants Replacement
RUN wget https://github.com/BimberLab/DISCVRSeq/releases/download/1.3.61/DISCVRSeq-1.3.61.jar
ENV DISCVRSeq_JAR="/opt2/DISCVRSeq-1.3.61.jar"
# Install last release of GATK3 (GATK/3.8-1)
# Only being used for the CombineVariants
# command that is not available in GATK4
# Available via env variable: $GATK_JAR
# Requires Java8 or 1.8
RUN wget https://storage.googleapis.com/gatk-software/package-archive/gatk/GenomeAnalysisTK-3.8-1-0-gf15c1c3ef.tar.bz2 \
&& tar -xvjf /opt2/GenomeAnalysisTK-3.8-1-0-gf15c1c3ef.tar.bz2 \
&& rm /opt2/GenomeAnalysisTK-3.8-1-0-gf15c1c3ef.tar.bz2
ENV GATK_JAR="/opt2/GenomeAnalysisTK-3.8-1-0-gf15c1c3ef/GenomeAnalysisTK.jar"

# Install dependencies needed to add a new repository over HTTPS
RUN DEBIAN_FRONTEND=noninteractive apt-get install -y \
Expand Down
6 changes: 5 additions & 1 deletion nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ params {
script_freecpaired = "${projectDir}/workflow/scripts/freec_paired.pl"
freec_significance = "${projectDir}/workflow/scripts/assess_significance.R"
freec_plot = "${projectDir}/workflow/scripts/makeGraph.R"
lofreq_convert = "${projectDir}/workflow/scripts/lofreq_convert.sh"
lofreq_convert = "${projectDir}/workflow/scripts/add_gt_lofreq.sh"
vep_cache = "/fdb/VEP/102/cache"

//Biowulf
Expand Down Expand Up @@ -84,6 +84,10 @@ profiles {
withLabel: process_somaticcaller {
container = 'docker://dnousome/ccbr_logan_base:v0.3.3'
}
//Name Based
withName:bwamem2 {
container = 'docker://dnousome/ccbr_logan_base:v0.3.3'
}
withName:fastq_screen {
container = 'docker://nciccbr/ccbr_fastq_screen_0.13.0:v2.0'
}
Expand Down
2 changes: 1 addition & 1 deletion workflow/modules/trim_align.nf
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ process bwamem2 {
${GENOMEREF} \
${samplename}.R1.trimmed.fastq.gz ${samplename}.R2.trimmed.fastq.gz | \
samblaster -M | \
samtools sort -@$task.cpus -m 4G - -o ${samplename}.bam
samtools sort -@ $task.cpus -m 4G - -o ${samplename}.bam
samtools index -@ $task.cpus ${samplename}.bam ${samplename}.bai
Expand Down
86 changes: 58 additions & 28 deletions workflow/modules/variant_calling.nf
Original file line number Diff line number Diff line change
Expand Up @@ -304,10 +304,19 @@ process strelka_tn {
--runDir=wd \
--callRegions ${bed}.gz
./wd/runWorkflow.py -m local -j $task.cpus
mv wd/results/variants/somatic.snvs.vcf.gz ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.snvs.vcf.gz
mv wd/results/variants/somatic.indels.vcf.gz ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.indels.vcf.gz
mv wd/results/variants/somatic.snvs.vcf.gz.tbi ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.snvs.vcf.gz.tbi
mv wd/results/variants/somatic.indels.vcf.gz.tbi ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.indels.vcf.gz.tbi
mv wd/results/variants/somatic.snvs.vcf.gz ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic_temp.snvs.vcf.gz
mv wd/results/variants/somatic.indels.vcf.gz ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic_temp.indels.vcf.gz
printf "NORMAL\t${normalname}\nTUMOR\t${tumorname}\n" >sampname
bcftools reheader -s sampname ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic_temp.snvs.vcf.gz \
| bcftools view -Oz -o ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.snvs.vcf.gz
bcftools reheader -s sampname ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic_temp.indels.vcf.gz \
| bcftools view -Oz -o ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.indels.vcf.gz
bcftools index -t ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.snvs.vcf.gz
bcftools index -t ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.indels.vcf.gz
"""

stub:
Expand All @@ -329,7 +338,7 @@ process vardict_tn {

output:
tuple val(tumorname), val(normalname),
path("${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.vardict.vcf")
path("${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.vardict.vcf.gz")
//bcbio notes of vardict filtering var2vcf_paired.pl -P 0.9 -m 4.25 -f 0.01 -M” and
//filtered with “((AF*DP < 6) && ((MQ < 55.0 && NM > 1.0) || (MQ < 60.0 && NM > 2.0) || (DP < 10) || (QUAL < 45)))”
script:
Expand All @@ -351,12 +360,18 @@ process vardict_tn {
-S \
-f 0.05 > ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.vardict.vcf
printf "${normal.Name}\t${normalname}\n${tumor.Name}\t${tumorname}\n" > sampname
bcftools reheader -s sampname ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.vardict.vcf \
| bcftools view -Oz -o ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.vardict.vcf.gz
"""

stub:

"""
touch ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.vardict.vcf
touch ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.vardict.vcf.gz
"""

Expand All @@ -376,7 +391,7 @@ process varscan_tn {

output:
tuple val(tumorname), val(normalname),
path("${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.varscan.vcf")
path("${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.varscan.vcf.gz")

shell:
'''
Expand All @@ -388,23 +403,25 @@ process varscan_tn {
eval "$varscan_cmd"
awk '{{gsub(/\\y[W|K|Y|R|S|M]\\y/,"N",$4); OFS = "\\t"; print}}' !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.vcf.indel \
| sed '/^$/d' > !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.vcf.indel_temp
| sed '/^$/d' | bcftools view - -Oz -o !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.indel_temp.vcf.gz
awk '{{gsub(/\\y[W|K|Y|R|S|M]\\y/,"N",$4); OFS = "\\t"; print}}' !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.vcf.snp \
| sed '/^$/d' > !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.vcf.snp_temp
| sed '/^$/d' | bcftools view - -Oz -o !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.snp_temp.vcf.gz
java -jar $DISCVRSeq_JAR MergeVcfsAndGenotypes \
-R !{GENOMEREF} \
--assumeIdenticalSamples \
--filteredrecordsmergetype KEEP_UNCONDITIONAL \
--variant !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.vcf.snp_temp \
--variant!{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.vcf.indel_temp \
-O !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.vcf
gatk SortVcf -I !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.snp_temp.vcf.gz \
-I !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.indel_temp.vcf.gz \
-R !{GENOMEREF} -SD !{GENOMEDICT} \
-O !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}_temp.varscan.vcf
printf "NORMAL\t!{normalname}\nTUMOR\t!{tumorname}\n" > sampname
bcftools reheader -s sampname !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}_temp.varscan.vcf \
| bcftools view -Oz -o !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.vcf.gz
'''

stub:
"""
touch ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.varscan.vcf
touch ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.varscan.vcf.gz
"""

}
Expand Down Expand Up @@ -476,8 +493,15 @@ process lofreq_tn {
${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final_minus-dbsnp.indels.vcf.gz --threads $task.cpus -Oz -o \
${tumorname}_vs_${normalname}_${bed.simpleName}_temp_lofreq.vcf.gz
$LOFREQ_CONVERT ${tumorname}_vs_${normalname}_${bed.simpleName}_temp_lofreq.vcf.gz ${tumorname} \
| bcftools view -Oz -o ${tumorname}_vs_${normalname}_${bed.simpleName}_lofreq.vcf.gz
$LOFREQ_CONVERT -i ${tumorname}_vs_${normalname}_${bed.simpleName}_temp_lofreq.vcf.gz -g 1/0 \
-n ${tumorname} -o ${tumorname}_vs_${normalname}_${bed.simpleName}_temp1_lofreq.vcf.gz
bcftools view -h ${tumorname}_vs_${normalname}_${bed.simpleName}_temp1_lofreq.vcf.gz >temphead
sed 's/^##FORMAT=<ID=DP4,Number=4,Type=Integer,Description="Counts for ref-forward bases, ref-reverse, alt-forward and alt-reverse bases">/##FORMAT=<ID=DP4,Number=1,Type=String,Description="Strand read counts: ref\\/fwd, ref\\/rev, var\\/fwd, var\\/rev">/' temphead > temphead1
bcftools reheader ${tumorname}_vs_${normalname}_${bed.simpleName}_temp1_lofreq.vcf.gz -h temphead1 |\
bcftools view -Oz -o ${tumorname}_vs_${normalname}_${bed.simpleName}_lofreq.vcf.gz
bcftools index -t ${tumorname}_vs_${normalname}_${bed.simpleName}_lofreq.vcf.gz
"""
Expand Down Expand Up @@ -514,7 +538,13 @@ process muse_tn {
MuSE sump -I ${tumorname}_vs_${normalname}.MuSE.txt \
-O ${tumorname}_vs_${normalname}.vcf -n $task.cpus -D $DBSNP -G
bcftools view ${tumorname}_vs_${normalname}.vcf -Oz -o ${tumorname}_vs_${normalname}.vcf.gz
bcftools view ${tumorname}_vs_${normalname}.vcf -Oz -o ${tumorname}_vs_${normalname}_temp.vcf.gz
printf "NORMAL\t${normalname}\nTUMOR\t${tumorname}\n" > sampname
bcftools reheader -s sampname ${tumorname}_vs_${normalname}_temp.vcf.gz \
| bcftools view -Oz -o ${tumorname}_vs_${normalname}.vcf.gz
"""

stub:
Expand Down Expand Up @@ -596,8 +626,8 @@ process combineVariants_alternative {
"""
mkdir ${vc}
bcftools concat $vcfin -a -Oz -o ${sample}.${vc}.temp1.vcf.gz
bcftools reheader -f $GENOMEFAI ${sample}.${vc}.temp1.vcf.gz -o ${sample}.${vc}.temp.vcf.gz
bcftools sort ${sample}.${vc}.temp.vcf.gz -Oz -o ${sample}.${vc}.marked.vcf.gz
bcftools reheader -f $GENOMEFAI ${sample}.${vc}.temp1.vcf.gz -o ${sample}.${vc}.temp.vcf
bcftools sort ${sample}.${vc}.temp.vcf -Oz -o ${sample}.${vc}.marked.vcf.gz
bcftools norm ${sample}.${vc}.marked.vcf.gz -m- --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\
awk '{{gsub(/\\y[W|K|Y|R|S|M]\\y/,"N",\$4); OFS = "\\t"; print}}' |\
sed '/^\$/d' > ${sample}.${vc}.temp.vcf
Expand Down Expand Up @@ -715,12 +745,12 @@ process somaticcombine {
vcfin1=[callers, vcfs].transpose().collect { a, b -> a + " " + b }
vcfin2="-V:" + vcfin1.join(" -V:")

"""
java -jar \$DISCVRSeq_JAR MergeVcfsAndGenotypes \
-R $GENOMEREF \
--genotypeMergeOption PRIORITIZE \
--priority_list mutect2,strelka,octopus,muse,lofreq,vardict,varscan \
--filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED \
"""
java -jar \$GATK_JAR -T CombineVariants \
-nt $task.cpus \
--filteredrecordsmergetype KEEP_IF_ANY_UNFILTERED \
--genotypemergeoption PRIORITIZE \
--rod_priority_list mutect2,strelka,muse,lofreq,vardict,varscan \
-O ${tumorsample}_vs_${normal}_combined.vcf.gz \
$vcfin2
"""
Expand Down
34 changes: 22 additions & 12 deletions workflow/modules/variant_calling_tonly.nf
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ process varscan_tonly {

output:
tuple val(tumorname),
path("${tumor.simpleName}_${bed.simpleName}.tonly.varscan.vcf")
path("${tumor.simpleName}_${bed.simpleName}.tonly.varscan.vcf.gz")

shell:

Expand All @@ -251,13 +251,17 @@ process varscan_tonly {
varscan_cmd="varscan mpileup2cns <($pileup_cmd) $varscan_opts"
eval "$varscan_cmd > !{tumor.simpleName}_!{bed.simpleName}.tonly.varscan.vcf"
printf "TUMOR\t!{tumorname}\n" > sampname
bcftools reheader -s sampname !{tumor.simpleName}_!{bed.simpleName}.tonly.varscan.vcf \
| bcftools view -Oz -o !{tumor.simpleName}_!{bed.simpleName}.tonly.varscan.vcf.gz
'''

stub:

"""
touch ${tumor.simpleName}_${bed.simpleName}.tonly.varscan.vcf
touch ${tumor.simpleName}_${bed.simpleName}.tonly.varscan.vcf.gz
"""

}
Expand All @@ -270,19 +274,20 @@ process vardict_tonly {

output:
tuple val(tumorname),
path("${tumor.simpleName}_${bed.simpleName}.tonly.vardict.vcf")
path("${tumor.simpleName}_${bed.simpleName}.tonly.vardict.vcf.gz")

script:

"""
bedtools makewindows -b ${bed} -w 50150 -s 50000 > temp_${bed}
VarDict -G $GENOMEREF \
-f 0.05 \
-f 0.01 \
-x 500 \
--nosv \
-b ${tumor} --fisher \
-t -Q 20 -c 1 -S 2 -E 3 --th $task.cpus \
-R temp_${bed} | var2vcf_valid.pl \
temp_${bed} | var2vcf_valid.pl \
-N ${tumor} \
-Q 20 \
-d 10 \
Expand All @@ -291,12 +296,17 @@ process vardict_tonly {
-E \
-f 0.05 > ${tumor.simpleName}_${bed.simpleName}.tonly.vardict.vcf
printf "${tumor.Name}\t${tumorname}\n" > sampname
bcftools reheader -s sampname ${tumor.simpleName}_${bed.simpleName}.tonly.vardict.vcf \
| bcftools view -Oz -o ${tumor.simpleName}_${bed.simpleName}.tonly.vardict.vcf.gz
"""

stub:

"""
touch ${tumor.simpleName}_${bed.simpleName}.tonly.vardict.vcf
touch ${tumor.simpleName}_${bed.simpleName}.tonly.vardict.vcf.gz
"""

Expand Down Expand Up @@ -354,12 +364,12 @@ process somaticcombine_tonly {
vcfin2="-V:" + vcfin1.join(" -V:")

"""
java -jar \$DISCVRSeq_JAR MergeVcfsAndGenotypes \
-R $GENOMEREF \
java -jar \$GATK_JAR -T CombineVariants \
-nt $task.cpus \
--genotypeMergeOption PRIORITIZE \
--priority_list mutect2,octopus,vardict,varscan \
--priority_list mutect2_tonly,octopus_tonly,vardict_tonly,varscan_tonly \
--filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED \
-O ${tumorsample}_combined.vcf.gz \
-O ${tumorsample}_combined_tonly.vcf.gz \
$vcfin2
"""

Expand Down
Loading

0 comments on commit 2beaaa8

Please sign in to comment.