From a3957887d88c19030805b988668ff9204d0bdcb9 Mon Sep 17 00:00:00 2001 From: Darryl Nousome Date: Wed, 15 Nov 2023 17:56:57 -0500 Subject: [PATCH 1/4] fixes: additional fixes for indexes --- docker/logan_base/build.sh | 8 ++-- nextflow.config | 22 +++++------ workflow/modules/copynumber.nf | 3 +- workflow/modules/variant_calling.nf | 60 ----------------------------- workflow/modules/workflows.nf | 2 +- workflow/modules/workflows_tonly.nf | 5 ++- 6 files changed, 21 insertions(+), 79 deletions(-) diff --git a/docker/logan_base/build.sh b/docker/logan_base/build.sh index 5ed0769..1b10839 100644 --- a/docker/logan_base/build.sh +++ b/docker/logan_base/build.sh @@ -4,11 +4,11 @@ #docker buildx inspect upbeat_ganguly #docker buildx build --platform linux/amd64 -f Dockerfile -t dnousome/ccbr_logan_base:v0.3.0 -t dnousome/ccbr_logan_base:latest --push . -docker build --platform linux/amd64 --tag ccbr_logan_base:v0.3.0 -f Dockerfile . -docker tag ccbr_logan_base:v0.3.0 dnousome/ccbr_logan_base:v0.3.0 -docker tag ccbr_logan_base:v0.3.0 dnousome/ccbr_logan_base +docker build --platform linux/amd64 --tag ccbr_logan_base:v0.3.3 -f Dockerfile . +docker tag ccbr_logan_base:v0.3.3 dnousome/ccbr_logan_base:v0.3.3 +docker tag ccbr_logan_base:v0.3.3 dnousome/ccbr_logan_base -docker push dnousome/ccbr_logan_base:v0.3.0 +docker push dnousome/ccbr_logan_base:v0.3.3 docker push dnousome/ccbr_logan_base:latest diff --git a/nextflow.config b/nextflow.config index d868ce3..ee489ec 100644 --- a/nextflow.config +++ b/nextflow.config @@ -69,19 +69,19 @@ profiles { process { executor = 'local' withLabel: process_low { - container = 'docker://dnousome/ccbr_logan_base:v0.3.1' + container = 'docker://dnousome/ccbr_logan_base:v0.3.3' } withLabel: process_mid { - container = 'docker://dnousome/ccbr_logan_base:v0.3.1' + container = 'docker://dnousome/ccbr_logan_base:v0.3.3' } withLabel: process_highcpu { - container = 'docker://dnousome/ccbr_logan_base:v0.3.1' + container = 'docker://dnousome/ccbr_logan_base:v0.3.3' } withLabel: process_highmem { - container = 'docker://dnousome/ccbr_logan_base:v0.3.1' + container = 'docker://dnousome/ccbr_logan_base:v0.3.3' } withLabel: process_somaticcaller { - container = 'docker://dnousome/ccbr_logan_base:v0.3.1' + container = 'docker://dnousome/ccbr_logan_base:v0.3.3' } withName:fastq_screen { container = 'docker://nciccbr/ccbr_fastq_screen_0.13.0:v2.0' @@ -191,7 +191,7 @@ profiles { } //Other Processes withName:bwamem2 { - container = 'docker://dnousome/ccbr_logan_base:v0.3.1' + container = 'docker://dnousome/ccbr_logan_base:v0.3.3' memory=150.GB time=48.h cpus=16 @@ -235,30 +235,30 @@ profiles { } //Global Processes withLabel: process_low { - container = 'docker://dnousome/ccbr_logan_base:v0.3.1' + container = 'docker://dnousome/ccbr_logan_base:v0.3.3' memory = 16.GB time = 12.h cpus = 2 } withLabel: process_mid { - container = 'docker://dnousome/ccbr_logan_base:v0.3.1' + container = 'docker://dnousome/ccbr_logan_base:v0.3.3' memory=24.GB time=24.h cpus=4 } withLabel: process_highcpu { - container = 'docker://dnousome/ccbr_logan_base:v0.3.1' + container = 'docker://dnousome/ccbr_logan_base:v0.3.3' memory = 54.GB time = 72.h cpus = 16 } withLabel: process_highmem { - container = 'docker://dnousome/ccbr_logan_base:v0.3.1' + container = 'docker://dnousome/ccbr_logan_base:v0.3.3' memory = 48.GB time = 48.h } withLabel: process_somaticcaller { - container = 'docker://dnousome/ccbr_logan_base:v0.3.1' + container = 'docker://dnousome/ccbr_logan_base:v0.3.3' memory = 48.GB cpus = 4 time = 72.h diff --git a/workflow/modules/copynumber.nf b/workflow/modules/copynumber.nf index a30fa36..5d6116a 100644 --- a/workflow/modules/copynumber.nf +++ b/workflow/modules/copynumber.nf @@ -370,7 +370,8 @@ process purple { tuple val(tumorname), path(cobaltin), path(amberin), - path(somaticvcf) + path(somaticvcf), + path(somaticvcfindex) output: tuple val(tumorname), path("${tumorname}") diff --git a/workflow/modules/variant_calling.nf b/workflow/modules/variant_calling.nf index 8bbbb1e..050c672 100644 --- a/workflow/modules/variant_calling.nf +++ b/workflow/modules/variant_calling.nf @@ -548,29 +548,6 @@ process combineVariants { -process bcftools_index_octopus { - label 'process_low' - - input: - tuple val(sample), - path(vcf) - - output: - tuple val(sample), - path(vcf), - path("${vcf}.tbi") - - script: - """ - bcftools index -t ${vcf} - """ - - stub: - """ - touch ${vcf} ${vcf}.tbi - """ - -} process combineVariants_octopus { label 'process_highmem' @@ -645,43 +622,6 @@ process bcftools_index_octopus { } -process combineVariants_octopus { - label 'process_highmem' - publishDir(path: "${outdir}/vcfs/", mode: 'copy') - - input: - tuple val(sample), path(vcfs), path(vcfsindex), val(vc) - - output: - tuple val(sample), - path("${vc}/${sample}.${vc}.marked.vcf.gz"), path("${vc}/${sample}.${vc}.norm.vcf.gz") - - script: - vcfin = vcfs.join(" ") - - """ - mkdir ${vc} - bcftools concat $vcfin -a -Oz -o ${sample}.${vc}.temp.vcf.gz - bcftools sort ${sample}.${vc}.temp.vcf.gz -Oz -o ${sample}.${vc}.marked.vcf.gz - bcftools norm ${sample}.${vc}.marked.vcf.gz --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\ - awk '{{gsub(/\\y[W|K|Y|R|S|M]\\y/,"N",\$4); OFS = "\\t"; print}}' |\ - sed '/^\$/d' > ${sample}.${vc}.temp.vcf - - bcftools view ${sample}.${vc}.temp.vcf -f PASS -Oz -o ${vc}/${sample}.${vc}.norm.vcf.gz - - mv ${sample}.${vc}.marked.vcf.gz ${vc} - """ - - stub: - - """ - mkdir ${vc} - touch ${vc}/${sample}.${vc}.marked.vcf.gz - touch ${vc}/${sample}.${vc}.norm.vcf.gz - - """ - -} diff --git a/workflow/modules/workflows.nf b/workflow/modules/workflows.nf index 59343cc..372c884 100644 --- a/workflow/modules/workflows.nf +++ b/workflow/modules/workflows.nf @@ -405,7 +405,7 @@ workflow CNVhuman { bamwithsample | cobalt_tn purplein=amber_tn.out.join(cobalt_tn.out) purplein.join(somaticcall_input)| - map{t1,amber,cobalt,n1,vc,vcf -> tuple(t1,amber,cobalt,vcf)} + map{t1,amber,cobalt,n1,vc,vcf,vcfindex -> tuple(t1,amber,cobalt,vcf,vcfindex)} | purple } diff --git a/workflow/modules/workflows_tonly.nf b/workflow/modules/workflows_tonly.nf index f2b0165..38e58bd 100644 --- a/workflow/modules/workflows_tonly.nf +++ b/workflow/modules/workflows_tonly.nf @@ -238,7 +238,7 @@ workflow SV_TONLY { //Survivor gunzip(manta_out).concat(svaba_out).groupTuple() - | survivor_sv | annotsv_survivor_tonly.out.ifEmpty("Empty SV input--No SV annotated") + | survivor_sv | annotsv_survivor_tonly | ifEmpty("Empty SV input--No SV annotated") } @@ -266,8 +266,9 @@ workflow CNVhuman_tonly { bamwithsample | cobalt_tonly purplein=amber_tonly.out.join(cobalt_tonly.out) purplein.join(somaticcall_input)| - map{t1,amber,cobalt,vc,vcf -> tuple(t1,amber,cobalt,vcf)} + map{t1,amber,cobalt,vc,vcf,index -> tuple(t1,amber,cobalt,vcf,index)} | purple + } From 1d27183a95f914d4dc44d2f38483927713fcfd48 Mon Sep 17 00:00:00 2001 From: Darryl Nousome Date: Wed, 15 Nov 2023 18:23:35 -0500 Subject: [PATCH 2/4] fix: lofreq error --- workflow/modules/variant_calling.nf | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/workflow/modules/variant_calling.nf b/workflow/modules/variant_calling.nf index 050c672..2e80ae7 100644 --- a/workflow/modules/variant_calling.nf +++ b/workflow/modules/variant_calling.nf @@ -421,7 +421,6 @@ process octopus_tn { process lofreq_tn { label 'process_somaticcaller' - module=["lofreq/2.1.5","bcftools/1.17"] input: tuple val(tumorname), path(tumor), path(tumorbai), @@ -439,7 +438,7 @@ process lofreq_tn { script: """ - lofreq -f $GENOMEREF -n ${normal} -t ${tumor} \ + lofreq somatic -f $GENOMEREF -n ${normal} -t ${tumor} \ -d $DBSNP \ --threads $task.cpus \ -l ${bed} \ From 6ed9b36bd3ef52e96e179a678f8beff839dc6029 Mon Sep 17 00:00:00 2001 From: Darryl Nousome Date: Thu, 16 Nov 2023 20:28:46 -0500 Subject: [PATCH 3/4] fix: shorten workflows --- workflow/modules/variant_calling.nf | 174 ++++++++++--------- workflow/modules/variant_calling_tonly.nf | 1 + workflow/modules/workflows.nf | 195 +++++++++++----------- workflow/modules/workflows_tonly.nf | 41 ++--- 4 files changed, 202 insertions(+), 209 deletions(-) diff --git a/workflow/modules/variant_calling.nf b/workflow/modules/variant_calling.nf index 2e80ae7..64b0dde 100644 --- a/workflow/modules/variant_calling.nf +++ b/workflow/modules/variant_calling.nf @@ -18,13 +18,15 @@ process mutect2 { label 'process_somaticcaller' input: - tuple val(tumorname), path(tumor), path(tumorbai),val(normalname), path(normal), path(normalbai), path(bed) + tuple val(tumorname), path(tumor), path(tumorbai), + val(normalname), path(normal), path(normalbai), + path(bed) output: - tuple val(tumorname), - path("${tumor.simpleName}_${bed.simpleName}.mut2.vcf.gz"), - path("${tumor.simpleName}_${bed.simpleName}.f1r2.tar.gz"), - path("${tumor.simpleName}_${bed.simpleName}.mut2.vcf.gz.stats") + tuple val(tumorname), val(normalname), + path("${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.mut2.vcf.gz"), + path("${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.f1r2.tar.gz"), + path("${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.mut2.vcf.gz.stats") script: @@ -38,16 +40,16 @@ process mutect2 { --tumor-sample ${tumor.simpleName} \ $GNOMADGERMLINE \ --panel-of-normals ${PON} \ - --output ${tumor.simpleName}_${bed.simpleName}.mut2.vcf.gz \ - --f1r2-tar-gz ${tumor.simpleName}_${bed.simpleName}.f1r2.tar.gz \ + --output ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.mut2.vcf.gz \ + --f1r2-tar-gz ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.f1r2.tar.gz \ --independent-mates """ stub: """ - touch ${tumor.simpleName}_${bed.simpleName}.mut2.vcf.gz - touch ${tumor.simpleName}_${bed.simpleName}.f1r2.tar.gz - touch ${tumor.simpleName}_${bed.simpleName}.mut2.vcf.gz.stats + touch ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.mut2.vcf.gz + touch ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.f1r2.tar.gz + touch ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.mut2.vcf.gz.stats """ } @@ -56,7 +58,8 @@ process pileup_paired_t { label 'process_highmem' input: - tuple val(tumorname), path(tumor), path(tumorbai),val(normalname), path(normal), path(normalbai), path(bed) + tuple val(tumorname), path(tumor), path(tumorbai), + val(normalname), path(normal), path(normalbai), path(bed) output: tuple val(tumorname), @@ -84,7 +87,8 @@ process pileup_paired_n { label 'process_highmem' input: - tuple val(tumorname), path(tumor), path(tumorbai),val(normalname), path(normal), path(normalbai), path(bed) + tuple val(tumorname), path(tumor), path(tumorbai), + val(normalname), path(normal), path(normalbai), path(bed) output: tuple val(tumorname), @@ -224,48 +228,50 @@ process mutect2filter { publishDir(path: "${outdir}/vcfs/mutect2", mode: 'copy') input: - tuple val(sample), path(mutvcfs), path(stats), path(obs), + tuple val(tumor), val(normal),path(mutvcfs), path(stats), path(obs), path(pileups), path(normal_pileups),path(tumorcontamination),path(normalcontamination) + output: - tuple val(sample), - path("${sample}.mut2.marked.vcf.gz"), path("${sample}.mut2.marked.vcf.gz.tbi"), - path("${sample}.mut2.norm.vcf.gz"), path("${sample}.mut2.norm.vcf.gz.tbi"), - path("${sample}.mut2.marked.vcf.gz.filteringStats.tsv") + tuple val("${tumor}_vs_${normal}"), + path("${tumor}_vs_${normal}.mut2.marked.vcf.gz"), + path("${tumor}_vs_${normal}.mut2.marked.vcf.gz.tbi"), + path("${tumor}_vs_${normal}.mut2.norm.vcf.gz"), path("${tumor}_vs_${normal}.mut2.norm.vcf.gz.tbi"), + path("${tumor}_vs_${normal}.mut2.marked.vcf.gz.filteringStats.tsv") script: mut2in = mutvcfs.join(" -I ") """ - gatk GatherVcfs -I ${mut2in} -O ${sample}.concat.vcf.gz - gatk IndexFeatureFile -I ${sample}.concat.vcf.gz + gatk GatherVcfs -I ${mut2in} -O ${tumor}_vs_${normal}.concat.vcf.gz + gatk IndexFeatureFile -I ${tumor}_vs_${normal}.concat.vcf.gz gatk FilterMutectCalls \ -R $GENOMEREF \ - -V ${sample}.concat.vcf.gz \ + -V ${tumor}_vs_${normal}.concat.vcf.gz \ --ob-priors ${obs} \ --contamination-table ${tumorcontamination} \ --stats ${stats} \ - -O ${sample}.mut2.marked.vcf.gz + -O ${tumor}_vs_${normal}.mut2.marked.vcf.gz gatk SelectVariants \ -R $GENOMEREF \ - --variant ${sample}.mut2.marked.vcf.gz \ + --variant ${tumor}_vs_${normal}.mut2.marked.vcf.gz \ --exclude-filtered \ - --output ${sample}.mut2.final.vcf.gz + --output ${tumor}_vs_${normal}.mut2.final.vcf.gz - bcftools sort ${sample}.mut2.final.vcf.gz |\ + bcftools sort ${tumor}_vs_${normal}.mut2.final.vcf.gz |\ bcftools norm --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\ awk '{{gsub(/\\y[W|K|Y|R|S|M]\\y/,"N",\$4); OFS = "\\t"; print}}' |\ - sed '/^\$/d' > ${sample}.mut2.norm.vcf |\ - bcftools view - -Oz -o ${sample}.mut2.norm.vcf.gz - bcftools index -t ${sample}.mut2.norm.vcf.gz + sed '/^\$/d' > ${tumor}_vs_${normal}.mut2.norm.vcf |\ + bcftools view - -Oz -o ${tumor}_vs_${normal}.mut2.norm.vcf.gz + bcftools index -t ${tumor}_vs_${normal}.mut2.norm.vcf.gz """ stub: """ - touch ${sample}.mut2.marked.vcf.gz ${sample}.mut2.marked.vcf.gz.tbi - touch ${sample}.mut2.norm.vcf.gz ${sample}.mut2.norm.vcf.gz.tbi - touch ${sample}.mut2.marked.vcf.gz.filteringStats.tsv + touch ${tumor}_vs_${normal}.mut2.marked.vcf.gz ${tumor}_vs_${normal}.mut2.marked.vcf.gz.tbi + touch ${tumor}_vs_${normal}.mut2.norm.vcf.gz ${tumor}_vs_${normal}.mut2.norm.vcf.gz.tbi + touch ${tumor}_vs_${normal}.mut2.marked.vcf.gz.filteringStats.tsv """ @@ -275,12 +281,15 @@ process mutect2filter { process strelka_tn { label 'process_highcpu' input: - tuple val(tumorname), path(tumor), path(tumorbai), val(normalname), path(normal), path(normalbai), path(bed) + tuple val(tumorname), path(tumor), path(tumorbai), + val(normalname), path(normal), path(normalbai), path(bed) output: - tuple val(tumorname), - path("${tumor.simpleName}_${bed.simpleName}.somatic.snvs.vcf.gz"), - path("${tumor.simpleName}_${bed.simpleName}.somatic.indels.vcf.gz") + tuple val(tumorname), val(normalname), + path("${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.snvs.vcf.gz"), + path("${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.snvs.vcf.gz.tbi"), + path("${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.indels.vcf.gz"), + path("${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.indels.vcf.gz.tbi") script: @@ -297,20 +306,20 @@ process strelka_tn { --runDir=wd \ --callRegions ${bed}.gz ./wd/runWorkflow.py -m local -j $task.cpus - mv wd/results/variants/somatic.snvs.vcf.gz ${tumor.simpleName}_${bed.simpleName}.somatic.snvs.vcf.gz - mv wd/results/variants/somatic.indels.vcf.gz ${tumor.simpleName}_${bed.simpleName}.somatic.indels.vcf.gz - + mv wd/results/variants/somatic.snvs.vcf.gz ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.snvs.vcf.gz + mv wd/results/variants/somatic.indels.vcf.gz ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.indels.vcf.gz + mv wd/results/variants/somatic.snvs.vcf.gz.tbi ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.snvs.vcf.gz.tbi + mv wd/results/variants/somatic.indels.vcf.gz.tbi ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.indels.vcf.gz.tbi """ stub: """ - touch ${tumor.simpleName}_${bed.simpleName}.somatic.snvs.vcf.gz - touch ${tumor.simpleName}_${bed.simpleName}.somatic.indels.vcf.gz + touch ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.snvs.vcf.gz ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.snvs.vcf.gz.tbi + touch ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.indels.vcf.gz ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.indels.vcf.gz.tbi """ - } @@ -321,8 +330,8 @@ process vardict_tn { tuple val(tumorname), path(tumor), path(tumorbai), val(normalname), path(normal), path(normalbai), path(bed) output: - tuple val(tumorname), - path("${tumor.simpleName}_${bed.simpleName}.vardict.vcf") + tuple val(tumorname), val(normalname), + path("${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.vardict.vcf") //bcbio notes of vardict filtering var2vcf_paired.pl -P 0.9 -m 4.25 -f 0.01 -M” and //filtered with “((AF*DP < 6) && ((MQ < 55.0 && NM > 1.0) || (MQ < 60.0 && NM > 2.0) || (DP < 10) || (QUAL < 45)))” script: @@ -342,14 +351,14 @@ process vardict_tn { -d 10 \ -v 6 \ -S \ - -f 0.05 > ${tumor.simpleName}_${bed.simpleName}.vardict.vcf + -f 0.05 > ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.vardict.vcf """ stub: """ - touch ${tumor.simpleName}_${bed.simpleName}.vardict.vcf + touch ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.vardict.vcf """ @@ -363,11 +372,13 @@ process varscan_tn { input: tuple val(tumorname), path(tumor), path(tumorbai), val(normalname), path(normal), path(normalbai), path(bed), - path(tumorpileup), path(normalpileup), path(tumor_con_table), path(normal_con_table) + val(tumor1), + path(tumorpileup), path(normalpileup), + path(tumor_con_table), path(normal_con_table) output: - tuple val(tumorname), - path("${tumor.simpleName}_${bed.simpleName}.varscan.vcf") + tuple val(tumorname),val(normalname), + path("${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.varscan.vcf") shell: @@ -375,18 +386,19 @@ process varscan_tn { tumor_purity=$( echo "1-$(printf '%.6f' $(tail -n -1 !{tumor_con_table} | cut -f2 ))" | bc -l) normal_purity=$( echo "1-$(printf '%.6f' $(tail -n -1 !{normal_con_table} | cut -f2 ))" | bc -l) varscan_opts="--strand-filter 1 --min-var-freq 0.01 --min-avg-qual 30 --somatic-p-value 0.05 --output-vcf 1 --normal-purity $normal_purity --tumor-purity $tumor_purity" - varscan somatic < samtools mpileup -d 10000 -q 15 -Q 15 -f !GENOME -l !{bed.simpleName} !{normal} !{tumor} !{tumor.simpleName}_{bed.simpleName}.vardict.vcf $varscan_opts --mpileup 1 + varscan somatic < samtools mpileup -d 10000 -q 15 -Q 15 -f !GENOME -l !{bed.simpleName} !{normal} !{tumor} !{tumor.simpleName}_vs_!{normal.simpleName}_{bed.simpleName}.varscan.vcf $varscan_opts --mpileup 1 ''' stub: """ - touch ${tumor.simpleName}_${bed.simpleName}.varscan.vcf + touch ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.varscan.vcf """ } + process octopus_tn { //label 'process_highcpu' Using separate docker for octopus @@ -396,7 +408,7 @@ process octopus_tn { output: - tuple val("${tumorname}_vs_${normalname}"), + tuple val("${tumorname}_vs_${normalname}"), path("${tumorname}_vs_${normalname}_${bed.simpleName}.octopus.vcf.gz") script: @@ -419,6 +431,7 @@ process octopus_tn { } + process lofreq_tn { label 'process_somaticcaller' @@ -428,7 +441,8 @@ process lofreq_tn { output: - tuple val(tumorname), + + tuple val(tumorname), val(normalname), path("${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final.snvs.vcf.gz"), path("${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final_minus-dbsnp.snvs.vcf.gz"), path("${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final.indels.vcf.gz"), @@ -443,11 +457,11 @@ process lofreq_tn { --threads $task.cpus \ -l ${bed} \ --call-indels \ - -o ${tumorname}_vs_${normalname}_${bed.simpleName} + -o ${tumorname}_vs_${normalname}_${bed.simpleName}_ bcftools concat ${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final_minus-dbsnp.snvs.vcf.gz \ - ${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final_minus-dbsnp.indels.vcf.gz" --threads $task.cpus -Oz -o \ - ${tumorname}_vs_${normalname}_${bed.simpleName}_lofreq.vcf.gz" + ${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final_minus-dbsnp.indels.vcf.gz --threads $task.cpus -Oz -o \ + ${tumorname}_vs_${normalname}_${bed.simpleName}_lofreq.vcf.gz """ @@ -468,15 +482,13 @@ process lofreq_tn { process muse_tn { label 'process_somaticcaller' - module=["muse/2.0.1"] - input: tuple val(tumorname), path(tumor), path(tumorbai), val(normalname), path(normal), path(normalbai) output: - tuple val(tumorname), + tuple val(tumorname), val(normalname), path("${tumorname}_vs_${normalname}.vcf.gz") script: @@ -595,16 +607,15 @@ process combineVariants_octopus { } - process bcftools_index_octopus { label 'process_low' input: - tuple val(sample), + tuple val(tumor), path(vcf) output: - tuple val(sample), + tuple val(tumor), path(vcf), path("${vcf}.tbi") @@ -622,23 +633,20 @@ process bcftools_index_octopus { } - - - - - process combineVariants_strelka { //Concat all somatic snvs/indels across all files, strelka separates snv/indels label 'process_mid' publishDir(path: "${outdir}/vcfs/strelka", mode: 'copy') input: - tuple val(sample), path(strelkasnvs), path(strelkaindels) + tuple val(sample), + path(strelkasnvs), path(snvindex), + path(strelkaindels), path(indelindex) output: tuple val(sample), - path("${sample}.strelka.vcf.gz"),path("${sample}.strelka.vcf.gz.tbi"), - path("${sample}.filtered.strelka.vcf.gz"),path("${sample}.filtered.strelka.vcf.gz.tbi") + path("${sample}.strelka.vcf.gz"), path("${sample}.strelka.vcf.gz.tbi"), + path("${sample}.filtered.strelka.vcf.gz"), path("${sample}.filtered.strelka.vcf.gz.tbi") script: @@ -648,7 +656,7 @@ process combineVariants_strelka { """ - bcftools concat $vcfin $indelsin --threads $task.cpus -Oz -o ${sample}.temp.strelka.vcf.gz + bcftools concat $vcfin $indelsin --threads $task.cpus -Oz -o ${sample}.temp.strelka.vcf.gz -a bcftools norm ${sample}.temp.strelka.vcf.gz -m- --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\ awk '{{gsub(/\\y[W|K|Y|R|S|M]\\y/,"N",\$4); OFS = "\\t"; print}}' |\ sed '/^\$/d' > ${sample}.temp1.strelka.vcf.gz @@ -671,6 +679,7 @@ process combineVariants_strelka { } + process somaticcombine { label 'process_mid' publishDir(path: "${outdir}/vcfs/combined", mode: 'copy') @@ -682,13 +691,12 @@ process somaticcombine { output: tuple val(tumorsample), val(normal), - path("${tumorsample}_combined.vcf.gz"), - path("${tumorsample}_combined.vcf.gz.tbi") + path("${tumorsample}_vs_${normal}_combined.vcf.gz"), + path("${tumorsample}_vs_${normal}_combined.vcf.gz.tbi") script: - vcfin1=[callers, vcfs].transpose().collect { a, b -> a + " " + b } - vcfin2="-V:" + vcfin1.join(" -V:") - println vcfin2 + vcfin1=[callers, vcfs].transpose().collect { a, b -> a + " " + b } + vcfin2="-V:" + vcfin1.join(" -V:") """ java -jar DISCVRSeq-1.3.61.jar MergeVcfsAndGenotypes \ @@ -696,19 +704,22 @@ process somaticcombine { --genotypeMergeOption PRIORITIZE \ --priority_list mutect2,strelka,octopus,muse,lofreq,vardict,varscan \ --filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED - -O ${tumorsample}_combined.vcf.gz \ + -O ${tumorsample}_vs_${normal}_combined.vcf.gz \ $vcfin2 """ stub: + vcfin1=[callers, vcfs].transpose().collect { a, b -> a + " " + b } + vcfin2="-V:" + vcfin1.join(" -V:") """ - touch ${tumorsample}_combined.vcf.gz - touch ${tumorsample}_combined.vcf.gz.tbi + touch ${tumorsample}_vs_${normal}_combined.vcf.gz + touch ${tumorsample}_vs_${normal}_combined.vcf.gz.tbi """ } + process annotvep_tn { publishDir(path: "${outdir}/mafs/", mode: 'copy') @@ -717,7 +728,7 @@ process annotvep_tn { val(vc), path(tumorvcf),path(vcfindex) output: - path("paired/${vc}/${tumorsample}.maf") + path("paired/${vc}/${tumorsample}_vs_${normalsample}.maf") shell: @@ -756,7 +767,7 @@ process annotvep_tn { vcf2maf.pl \ --vep-forks !{task.cpus} --input-vcf !{tumorvcf.baseName} \ - --output-maf paired/!{vc}/!{tumorsample}.maf \ + --output-maf paired/!{vc}/!{tumorsample}_vs_!{normalsample}.maf \ --tumor-id !{tumorsample} \ --normal-id !{normalsample} \ --vep-path /opt/vep/src/ensembl-vep \ @@ -764,19 +775,16 @@ process annotvep_tn { --ncbi-build !{VEPBUILD} --species !{VEPSPECIES} --ref-fasta !{GENOMEREF} \ --vep-overwrite - ''' stub: """ mkdir -p paired/${vc} - touch paired/${vc}/${tumorsample}.maf + touch paired/${vc}/${tumorsample}_vs_${normalsample}.maf """ } - - process combinemafs_tn { label 'process_low' publishDir(path: "${outdir}/mafs/paired", mode: 'copy') diff --git a/workflow/modules/variant_calling_tonly.nf b/workflow/modules/variant_calling_tonly.nf index 993cb34..8233cb0 100644 --- a/workflow/modules/variant_calling_tonly.nf +++ b/workflow/modules/variant_calling_tonly.nf @@ -333,6 +333,7 @@ process octopus_tonly { } + process somaticcombine_tonly { label 'process_mid' publishDir(path: "${outdir}/vcfs/combined_tonly", mode: 'copy') diff --git a/workflow/modules/workflows.nf b/workflow/modules/workflows.nf index 372c884..11c0bab 100644 --- a/workflow/modules/workflows.nf +++ b/workflow/modules/workflows.nf @@ -111,9 +111,7 @@ workflow ALIGN { tobqsr=bwamem2.out.combine(gatherbqsr.out,by:0) applybqsr(tobqsr) - //samtoolsindex(applybqsr.out) - - //samtoolsindex.out.view() + //sample_sheet.view() bamwithsample=applybqsr.out.combine(sample_sheet,by:0).map{it.swap(3,0)}.combine(applybqsr.out,by:0).map{it.swap(3,0)} @@ -158,6 +156,9 @@ workflow VC { sample_sheet main: + //Create Pairing for TN (in case of dups) + sample_sheet_paired=sample_sheet|map{tu,no -> tuple ("${tu}_vs_${no}",tu, no)} + bambyinterval=bamwithsample.combine(splitout.flatten()) //Paired Mutect2 @@ -176,142 +177,133 @@ workflow VC { pileup_paired_all=pileup_paired_tout.join(pileup_paired_nout) - contamination_paired(pileup_paired_all) - - mut2out_lor=mutect2.out.groupTuple() - .map { samplename,vcfs,f1r2,stats -> tuple( samplename, - f1r2.toSorted{ it -> (it.name =~ /${samplename}_(.*?).f1r2.tar.gz/)[0][1].toInteger() } - )} - - learnreadorientationmodel(mut2out_lor) - - mut2out_mstats=mutect2.out.groupTuple() - .map { samplename,vcfs,f1r2,stats -> tuple( samplename, - stats.toSorted{ it -> (it.name =~ /${samplename}_(.*?).mut2.vcf.gz.stats/)[0][1].toInteger() } - )} - - mergemut2stats(mut2out_mstats) - - allmut2tn=mutect2.out.groupTuple() - .map { samplename,vcfs,f1r2,stats -> tuple( samplename, - vcfs.toSorted{ it -> (it.name =~ /${samplename}_(.*?).mut2.vcf.gz/)[0][1].toInteger() } - )} + contamination_paired(pileup_paired_all) + + + mutect2.out.groupTuple(by:[0,1]) + | multiMap { tumor,normal,vcfs,f1r2,stats -> + mut2out_lor: tuple("${tumor}_vs_${normal}", + f1r2.toSorted{ it -> (it.name =~ /${tumor}_vs_${normal}_(.*?).f1r2.tar.gz/)[0][1].toInteger() } ) + mut2out_mstats: tuple( "${tumor}_vs_${normal}", + stats.toSorted{ it -> (it.name =~ /${tumor}_vs_${normal}_(.*?).mut2.vcf.gz.stats/)[0][1].toInteger() }) + allmut2tn: tuple( "${tumor}_vs_${normal}", + vcfs.toSorted{ it -> (it.name =~ /${tumor}_vs_${normal}_(.*?).mut2.vcf.gz/)[0][1].toInteger() } ) + } + | set{mut2out} - mut2tn_filter=allmut2tn - .join(mergemut2stats.out) - .join(learnreadorientationmodel.out) - .join(contamination_paired.out) + learnreadorientationmodel(mut2out.mut2out_lor) + mergemut2stats(mut2out.mut2out_mstats) + + mutect2_in=mut2out.allmut2tn + | join(mergemut2stats.out) + | join(learnreadorientationmodel.out) + | map{t,vcf,stats,ro -> tuple(t.split('_vs_')[0],t.split('_vs_')[1],vcf,stats,ro)} + | join(contamination_paired.out) + | mutect2filter + | join(sample_sheet_paired) + | map{sample,markedvcf,markedindex,normvcf,normindex,stats,tumor,normal -> tuple(tumor,normal,"mutect2",normvcf,normindex)} + + annotvep_tn_mut2(mutect2_in) //Tumor Only Calling bambyinterval_t=bambyinterval.map{tumorname,tumor,tumorbai,normalname,normalbam,normalbai,bed ->tuple(tumorname,tumor,tumorbai,bed)} - mutect2_t_tonly(bambyinterval_t) - - //LOR - mut2tout_lor=mutect2_t_tonly.out.groupTuple() - .map { samplename,vcfs,f1r2,stats -> tuple( samplename, - f1r2.toSorted{ it -> (it.name =~ /${samplename}_(.*?).f1r2.tar.gz/)[0][1].toInteger() } - )} - learnreadorientationmodel_tonly(mut2tout_lor) - - //Stats - mut2tonly_mstats=mutect2_t_tonly.out.groupTuple() - .map { samplename,vcfs,f1r2,stats -> tuple( samplename, - stats.toSorted{ it -> (it.name =~ /${samplename}_(.*?).tonly.mut2.vcf.gz.stats/)[0][1].toInteger() } - )} - mergemut2stats_tonly(mut2tonly_mstats) - - - //Contamination - contamination_tumoronly(pileup_paired_tout) - - //Final TUMOR ONLY FILTER - allmut2tonly=mutect2_t_tonly.out.groupTuple() - .map { samplename,vcfs,f1r2,stats -> tuple( samplename, - vcfs.toSorted{ it -> (it.name =~ /${samplename}_(.*?).tonly.mut2.vcf.gz/)[0][1].toInteger() } - )} - - mut2tonly_filter=allmut2tonly - .join(mergemut2stats_tonly.out) - .join(learnreadorientationmodel_tonly.out) - .join(contamination_tumoronly.out) + mutect2_t_tonly(bambyinterval_t) + + mutect2_t_tonly.out.groupTuple() + | multiMap { tumor,vcfs,f1r2,stats -> + mut2tout_lor: tuple(tumor, + f1r2.toSorted{ it -> (it.name =~ /${tumor}_(.*?).f1r2.tar.gz/)[0][1].toInteger() } ) + mut2tonly_mstats: tuple( tumor, + stats.toSorted{ it -> (it.name =~ /${tumor}_(.*?).tonly.mut2.vcf.gz.stats/)[0][1].toInteger() }) + allmut2tonly: tuple(tumor, + vcfs.toSorted{ it -> (it.name =~ /${tumor}_(.*?).tonly.mut2.vcf.gz/)[0][1].toInteger() } ) + } + | set{mut2tonlyout} - //Annotation) - mutect2_in=mutect2filter(mut2tn_filter) - | join(sample_sheet) - | map{tumor,markedvcf,markedindex,normvcf,normindex,stats,normal -> tuple(tumor,normal,"mutect2",normvcf,normindex)} - annotvep_tn_mut2(mutect2_in) - + learnreadorientationmodel_tonly(mut2tonlyout.mut2tout_lor) + mergemut2stats_tonly(mut2tonlyout.mut2tonly_mstats) + contamination_tumoronly(pileup_paired_tout) - mutect2_in_tonly=mutect2filter_tonly(mut2tonly_filter) + mutect2_in_tonly=mut2tonlyout.allmut2tonly + | join(mergemut2stats_tonly.out) + | join(learnreadorientationmodel_tonly.out) + | join(contamination_tumoronly.out) + | mutect2filter_tonly | join(sample_sheet) | map{tumor,markedvcf,markedindex,normvcf,normindex, stats,normal -> tuple(tumor,"mutect2",normvcf,normindex)} annotvep_tonly_mut2(mutect2_in_tonly) //Strelka TN - strelka_tn(bambyinterval) - strelkaout=strelka_tn.out.groupTuple() - .map { samplename,vcfs,indels -> tuple( samplename, - vcfs.toSorted{ it -> (it.name =~ /${samplename}_(.*?).somatic.snvs.vcf.gz/)[0][1].toInteger() }, - indels.toSorted{ it -> (it.name =~ /${samplename}_(.*?).somatic.indels.vcf.gz/)[0][1].toInteger() } - )} - strelka_in=combineVariants_strelka(strelkaout) | join(sample_sheet) - | map{tumor,markedvcf,markedindex,finalvcf,finalindex,normal -> tuple(tumor,normal,"strelka",finalvcf,finalindex)} + strelka_in=strelka_tn(bambyinterval) | groupTuple(by:[0,1]) + | map { tumor,normal,vcfs,vcfindex,indels,indelindex -> tuple("${tumor}_vs_${normal}", + vcfs.toSorted{ it -> (it.name =~ /${tumor}_vs_${normal}_(.*?).somatic.snvs.vcf.gz/)[0][1].toInteger() },vcfindex, + indels.toSorted{ it -> (it.name =~ /${tumor}_vs_${normal}_(.*?).somatic.indels.vcf.gz/)[0][1].toInteger() } ,indelindex)} + | combineVariants_strelka | join(sample_sheet_paired) + | map{sample,markedvcf,markedindex,finalvcf,finalindex,tumor,normal -> tuple(tumor,normal,"strelka",finalvcf,finalindex)} annotvep_tn_strelka(strelka_in) //Vardict - vardict_comb=vardict_tn(bambyinterval).groupTuple().map{tumor,vcf-> tuple(tumor,vcf,"vardict")} | combineVariants_vardict - vardict_in=vardict_comb.join(sample_sheet) - .map{tumor,marked,markedindex,normvcf,normindex,normal ->tuple(tumor,normal,"vardict",normvcf,normindex)} + vardict_in=vardict_tn(bambyinterval) | groupTuple(by:[0,1]) + | map{tumor,normal,vcf-> tuple("${tumor}_vs_${normal}",vcf.toSorted{it -> (it.name =~ /${tumor}_vs_${normal}_(.*?).vardict.vcf/)[0][1].toInteger()},"vardict")} + | combineVariants_vardict | join(sample_sheet_paired) + | map{sample,marked,markedindex,normvcf,normindex,tumor,normal ->tuple(tumor,normal,"vardict",normvcf,normindex)} annotvep_tn_vardict(vardict_in) //VarDict_tonly - vardict_tonly_comb=bambyinterval.map{tumorname,tumorbam,tumorbai,normname,normbam,normbai,bed -> - tuple(tumorname,tumorbam,tumorbai,bed)} - vardict_tonly(vardict_tonly_comb).groupTuple().map{tumor,vcf-> tuple(tumor,vcf,"vardict_tonly")} | combineVariants_vardict_tonly - - vardict_in_tonly=combineVariants_vardict_tonly.out.join(sample_sheet) - .map{tumor,marked,markedindex,normvcf,normindex,normal ->tuple(tumor,"vardict_tonly",normvcf,normindex)} + vardict_in_tonly=bambyinterval + | map{tumorname,tumorbam,tumorbai,normname,normbam,normbai,bed -> + tuple(tumorname,tumorbam,tumorbai,bed)} + | vardict_tonly | groupTuple() + | map{tumor,vcf-> tuple(tumor,vcf.toSorted{it -> (it.name =~ /${tumor}_(.*?).tonly.vardict.vcf/)[0][1].toInteger()},"vardict_tonly")} + | combineVariants_vardict_tonly | join(sample_sheet) + | map{tumor,marked,markedindex,normvcf,normindex,normal ->tuple(tumor,"vardict_tonly",normvcf,normindex)} annotvep_tonly_vardict(vardict_in_tonly) //VarScan TN - varscan_in=bambyinterval.join(contamination_paired.out) - | varscan_tn | groupTuple() |map{tumor,vcf-> tuple(tumor,vcf,"varscan")} | combineVariants_varscan - | join(sample_sheet) - | map{tumor,marked,markedindex,normvcf,normindex,normal ->tuple(tumor,normal,"varscan",normvcf,normindex)} + varscan_in=bambyinterval.combine(contamination_paired.out) + | varscan_tn | groupTuple(by:[0,1]) + | map{tumor,normal,vcf-> tuple("${tumor}_vs_${normal}",vcf.toSorted{it -> (it.name =~ /${tumor}_vs_${normal}_(.*?).varscan.vcf/)[0][1].toInteger()},"varscan")} + | combineVariants_varscan | join(sample_sheet_paired) + | map{sample,marked,markedindex,normvcf,normindex,tumor,normal ->tuple(tumor,normal,"varscan",normvcf,normindex)} annotvep_tn_varscan(varscan_in) //VarScan_TOnly - varscan_in_tonly=bambyinterval.join(contamination_paired.out) - | map{tumor,bam,bai,normal,nbam,nbai,bed,tpile,npile,tumorc,normalc -> - tuple(tumor,bam,bai,bed,tpile,tumorc)} | varscan_tonly - | groupTuple() | map{tumor,vcf-> tuple(tumor,vcf,"varscan_tonly")} | combineVariants_varscan_tonly + varscan_in_tonly=bambyinterval.combine(contamination_paired.out) + | map{tumor,bam,bai,normal,nbam,nbai,bed,tumorname2,tpile,npile,tumorc,normalc -> + tuple(tumor,bam,bai,bed,tpile,tumorc)} | varscan_tonly | groupTuple() + | map{tumor,vcf-> tuple(tumor,vcf.toSorted{it -> (it.name =~ /${tumor}_(.*?).tonly.varscan.vcf/)[0][1].toInteger()},"varscan_tonly")} + | combineVariants_varscan_tonly | join(sample_sheet) | map{tumor,marked,markedindex,normvcf,normindex,normal ->tuple(tumor,"varscan_tonly",normvcf,normindex)} annotvep_tonly_varscan(varscan_in_tonly) - + //Lofreq TN - lofreq_in=lofreq_tn(bambyinterval).groupTuple().map{tumor,snv,dbsnv,indel,dbindel,vcf-> tuple(tumor,vcf,"lofreq")} - | combineVariants_lofreq | join(sample_sheet) - | map{tumor,marked,markedindex,normvcf,normindex,normal->tuple(tumor,normal,"lofreq",normvcf,normindex)} + lofreq_in=lofreq_tn(bambyinterval) | groupTuple(by:[0,1]) + | map{tu,no,snv,dbsnv,indel,dbindel,vcf-> tuple("${tu}_vs_${no}",vcf.toSorted{it -> (it.name =~ /${tu}_vs_${no}_(.*?)_lofreq.vcf.gz/)[0][1].toInteger()},"lofreq")} + | combineVariants_lofreq | join(sample_sheet_paired) + | map{sample,marked,markedindex,normvcf,normindex,tumor,normal->tuple(tumor,normal,"lofreq",normvcf,normindex)} annotvep_tn_lofreq(lofreq_in) //MuSE TN - muse_in=muse_tn(bamwithsample).groupTuple().map{tumor,vcf-> tuple(tumor,vcf,"muse")} - | combineVariants_muse | join(sample_sheet) - | map{tumor,marked,markedindex,normvcf,normindex,normal ->tuple(tumor,normal,"muse",normvcf,normindex)} + muse_in=muse_tn(bamwithsample) + | map{tumor,normal,vcf-> tuple("${tumor}_vs_${normal}",vcf,"muse")} + | combineVariants_muse | join(sample_sheet_paired) + | map{sample,marked,markedindex,normvcf,normindex,tumor,normal ->tuple(tumor,normal,"muse",normvcf,normindex)} annotvep_tn_muse(muse_in) //Octopus_TN octopus_in=octopus_tn(bambyinterval) | bcftools_index_octopus - | groupTuple() | map{samplename,vcf,vcfindex-> tuple(samplename,vcf.toSorted{it->(it.name =~ /${samplename}_(.*).octopus.vcf.gz/)[0][1].toInteger()},vcfindex,"octopus")} - | combineVariants_octopus | map{samplename,marked,markedindex,normvcf,normindex -> + | groupTuple() + | map{samplename,vcf,vcfindex-> tuple(samplename,vcf.toSorted{it->(it.name =~ /${samplename}_(.*).octopus.vcf.gz/)[0][1].toInteger()},vcfindex,"octopus")} + | combineVariants_octopus + | map{samplename,marked,markedindex,normvcf,normindex -> tuple(samplename.split('_vs_')[0],samplename.split('_vs_')[1],"octopus",normvcf,normindex)} annotvep_tn_octopus(octopus_in) //Octopus_TOnly octopus_in_tonly=bambyinterval.map{tumor,bam,bai,normal,nbam,nbai,bed-> - tuple(tumor,bam,bai,bed)} | octopus_tonly | bcftools_index_octopus_tonly + tuple(tumor,bam,bai,bed)} | octopus_tonly | bcftools_index_octopus_tonly | groupTuple() | map{samplename,vcf,vcfindex->tuple(samplename,vcf.toSorted{it->(it.name =~ /${samplename}_(.*).tonly.octopus.vcf.gz/)[0][1].toInteger()},vcfindex,"octopus_tonly")} | combineVariants_octopus_tonly @@ -321,7 +313,7 @@ workflow VC { //Combine All Variants Using VCF and Then Reannotate mutect2_in|concat(strelka_in)|concat(octopus_in)|concat(muse_in)|concat(lofreq_in) - | concat(vardict_in) |concat(varscan_in)|groupTuple() + | concat(vardict_in) |concat(varscan_in) | groupTuple(by:[0,1]) | somaticcombine | map{tumor,normal,vcf,index ->tuple(tumor,normal,"combined",vcf,index)} | annotvep_tn_combined @@ -337,7 +329,8 @@ workflow VC { emit: somaticcall_input=octopus_in - + + } diff --git a/workflow/modules/workflows_tonly.nf b/workflow/modules/workflows_tonly.nf index 38e58bd..1546128 100644 --- a/workflow/modules/workflows_tonly.nf +++ b/workflow/modules/workflows_tonly.nf @@ -139,31 +139,25 @@ workflow VC_TONLY { mutect2_t_tonly(bambyinterval) - - //LOR - mut2tout_lor=mutect2_t_tonly.out.groupTuple() - .map { samplename,vcfs,f1r2,stats -> tuple( samplename, - f1r2.toSorted{ it -> (it.name =~ /${samplename}_(.*?).f1r2.tar.gz/)[0][1].toInteger() } - )} - learnreadorientationmodel_tonly(mut2tout_lor) - - //Stats - mut2tonly_mstats=mutect2_t_tonly.out.groupTuple() - .map { samplename,vcfs,f1r2,stats -> tuple( samplename, - stats.toSorted{ it -> (it.name =~ /${samplename}_(.*?).tonly.mut2.vcf.gz.stats/)[0][1].toInteger() } - )} - mergemut2stats_tonly(mut2tonly_mstats) - - //Contamination + mutect2_t_tonly.out.groupTuple() + | multiMap { tumor,vcfs,f1r2,stats -> + mut2tout_lor: tuple(tumor, + f1r2.toSorted{ it -> (it.name =~ /${tumor}_(.*?).f1r2.tar.gz/)[0][1].toInteger() } ) + mut2tonly_mstats: tuple( tumor, + stats.toSorted{ it -> (it.name =~ /${tumor}_(.*?).tonly.mut2.vcf.gz.stats/)[0][1].toInteger() }) + allmut2tonly: tuple(tumor, + vcfs.toSorted{ it -> (it.name =~ /${tumor}_(.*?).tonly.mut2.vcf.gz/)[0][1].toInteger() } ) + } + | set{mut2tonlyout} + + + + learnreadorientationmodel_tonly(mut2tonlyout.mut2tout_lor) + mergemut2stats_tonly(mut2tonlyout.mut2tonly_mstats) contamination_tumoronly(pileup_paired_tout) - //Final TUMOR ONLY FILTER - allmut2tonly=mutect2_t_tonly.out.groupTuple() - .map { samplename,vcfs,f1r2,stats -> tuple( samplename, - vcfs.toSorted{ it -> (it.name =~ /${samplename}_(.*?).tonly.mut2.vcf.gz/)[0][1].toInteger() } - )} - mut2tonly_filter=allmut2tonly + mut2tonly_filter=mut2tonlyout.allmut2tonly .join(mergemut2stats_tonly.out) .join(learnreadorientationmodel_tonly.out) .join(contamination_tumoronly.out) @@ -211,9 +205,6 @@ workflow VC_TONLY { somaticcall_input=combineVariants_octopus.out - emit: - somaticcall_input=combineVariants_octopus.out - } From ba00d4ea660105d25d873013e493a08ccddf5788 Mon Sep 17 00:00:00 2001 From: Darryl Nousome Date: Wed, 22 Nov 2023 11:12:53 -0500 Subject: [PATCH 4/4] feat: update docker images and callers --- conf/hg38.config | 5 +- conf/mm10.config | 1 + docker/logan_base/Dockerfile | 13 +---- nextflow.config | 1 + workflow/modules/copynumber.nf | 4 -- workflow/modules/variant_calling.nf | 71 ++++++++++++++--------- workflow/modules/variant_calling_tonly.nf | 14 ++--- workflow/modules/workflows.nf | 14 ++--- workflow/modules/workflows_tonly.nf | 22 +++---- workflow/scripts/lofreq_convert.sh | 32 ++++++++++ 10 files changed, 109 insertions(+), 68 deletions(-) create mode 100755 workflow/scripts/lofreq_convert.sh diff --git a/conf/hg38.config b/conf/hg38.config index 5ae4d11..90c1881 100644 --- a/conf/hg38.config +++ b/conf/hg38.config @@ -1,9 +1,10 @@ params { genomes { 'hg38' { - genome = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/bwamem2/Homo_sapiens_assembly38.fasta" // file(params.genome) + genome = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/bwamem2/Homo_sapiens_assembly38.fasta" + genomefai = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/bwamem2/Homo_sapiens_assembly38.fasta.fai" genomedict= "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/genome/Homo_sapiens_assembly38.dict" - wgsregion = "/data/nousomedr/annotation/resources_broad_hg38_v0_wgs_calling_regions.hg38.interval_list" // + wgsregion = "/data/nousomedr/annotation/resources_broad_hg38_v0_wgs_calling_regions.hg38.interval_list" intervals="${projectDir}/workflow/resources/hg38_v0_wgs_calling_regions.hg38.bed" //millsindel = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz" //Mills_and_1000G_gold_standard.indels.hg38.vcf.gz" //shapeitindel = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz" //ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz" //file(params.gold_indels2) // diff --git a/conf/mm10.config b/conf/mm10.config index 6dd27af..6841ee3 100644 --- a/conf/mm10.config +++ b/conf/mm10.config @@ -2,6 +2,7 @@ params { genomes { 'mm10' { genome = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/genome/bwamem2index/genome.fa" // file(params.genome) + genomefai = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/genome/bwamem2index/genome.fa.fai" // file(params.genome) bwagenome= "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/genome/bwaindex/genome.fa" genomedict= "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/genome/bwamem2index/genome.dict" //FIXwgsregion = "/data/nousomedr/annotation/resources_broad_hg38_v0_wgs_calling_regions.hg38.interval_list" // diff --git a/docker/logan_base/Dockerfile b/docker/logan_base/Dockerfile index 55832b5..9b04b2c 100644 --- a/docker/logan_base/Dockerfile +++ b/docker/logan_base/Dockerfile @@ -17,10 +17,12 @@ WORKDIR /opt2 # This section installs system packages required for your project # If you need extra system packages add them here. # python/3.8.0 and python/2.7.16 (strelka and manta) +# JDK 17 for DISCVRSeq RUN apt-get update \ && apt-get -y upgrade \ && DEBIAN_FRONTEND=noninteractive apt-get install -y \ - bc + bc \ + openjdk-17-jdk # Common bioinformatics tools # bwa/0.7.17-4 bowtie/1.2.3 bowtie2/2.3.5.1 @@ -158,15 +160,6 @@ RUN wget https://github.com/AstraZeneca-NGS/VarDictJava/releases/download/v1.8.3 && rm /opt2/VarDict-1.8.3.tar ENV PATH="/opt2/VarDict-1.8.3/bin:$PATH" -# Install Octopus/v0.7.4 -#RUN wget https://github.com/luntergroup/octopus/archive/refs/tags/v0.7.4.tar.gz \ -# && tar -xvzf /opt2/v0.7.4.tar.gz \ -# && rm /opt2/v0.7.4.tar.gz \ -# && cd /opt2/octopus-0.7.4 \ -# && cmake . -#ENV PATH="/opt2/octopus-0.7.4/bin:$PATH" - - # Fastp From Opengene github RUN wget http://opengene.org/fastp/fastp.0.23.2 \ && mkdir fastp \ diff --git a/nextflow.config b/nextflow.config index ee489ec..b9b3b00 100644 --- a/nextflow.config +++ b/nextflow.config @@ -23,6 +23,7 @@ params { script_freecpaired = "${projectDir}/workflow/scripts/freec_paired.pl" freec_significance = "${projectDir}/workflow/scripts/assess_significance.R" freec_plot = "${projectDir}/workflow/scripts/makeGraph.R" + lofreq_convert = "${projectDir}/workflow/scripts/lofreq_convert.sh" vep_cache = "/fdb/VEP/102/cache" //Biowulf diff --git a/workflow/modules/copynumber.nf b/workflow/modules/copynumber.nf index 5d6116a..b15a8c2 100644 --- a/workflow/modules/copynumber.nf +++ b/workflow/modules/copynumber.nf @@ -21,8 +21,6 @@ ENSEMBLCACHE='/data/SCLC-BRAINMETS/cn/common/ensembl_data' DRIVERS='/data/SCLC-BRAINMETS/cn/common/DriverGenePanel.38.tsv' HOTSPOTS='/data/SCLC-BRAINMETS/cn/variants/KnownHotspots.somatic.38.vcf.gz' - - //DBSNP_INDEL=file(params.genomes[params.genome].KNOWNINDELS) //ascatR= @@ -379,7 +377,6 @@ process purple { script: """ - java -jar purple.jar \ -tumor ${tumorname} \ -amber ${amberin} \ @@ -392,7 +389,6 @@ process purple { -driver_gene_panel $DRIVERS \ -somatic_hotspots $HOTSPOTS \ -output_dir ${tumorname} - """ stub: diff --git a/workflow/modules/variant_calling.nf b/workflow/modules/variant_calling.nf index 64b0dde..f0cae08 100644 --- a/workflow/modules/variant_calling.nf +++ b/workflow/modules/variant_calling.nf @@ -1,4 +1,5 @@ GENOMEREF=file(params.genomes[params.genome].genome) +GENOMEFAI=file(params.genomes[params.genome].genomefai) GENOMEDICT=file(params.genomes[params.genome].genomedict) KGPGERMLINE=params.genomes[params.genome].kgp DBSNP=file(params.genomes[params.genome].dbsnp) @@ -9,6 +10,7 @@ VEPSPECIES=params.genomes[params.genome].vepspecies VEPBUILD=params.genomes[params.genome].vepbuild SOMATIC_FOREST=params.genomes[params.genome].octopus_sforest GERMLINE_FOREST=params.genomes[params.genome].octopus_gforest +LOFREQ_CONVERT=params.lofreq_convert //Output outdir=file(params.output) @@ -242,8 +244,7 @@ process mutect2filter { mut2in = mutvcfs.join(" -I ") """ - gatk GatherVcfs -I ${mut2in} -O ${tumor}_vs_${normal}.concat.vcf.gz - gatk IndexFeatureFile -I ${tumor}_vs_${normal}.concat.vcf.gz + gatk SortVcf -I ${mut2in} -O ${tumor}_vs_${normal}.concat.vcf.gz --CREATE_INDEX gatk FilterMutectCalls \ -R $GENOMEREF \ -V ${tumor}_vs_${normal}.concat.vcf.gz \ @@ -251,8 +252,6 @@ process mutect2filter { --contamination-table ${tumorcontamination} \ --stats ${stats} \ -O ${tumor}_vs_${normal}.mut2.marked.vcf.gz - - gatk SelectVariants \ -R $GENOMEREF \ --variant ${tumor}_vs_${normal}.mut2.marked.vcf.gz \ @@ -262,8 +261,7 @@ process mutect2filter { bcftools sort ${tumor}_vs_${normal}.mut2.final.vcf.gz |\ bcftools norm --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\ awk '{{gsub(/\\y[W|K|Y|R|S|M]\\y/,"N",\$4); OFS = "\\t"; print}}' |\ - sed '/^\$/d' > ${tumor}_vs_${normal}.mut2.norm.vcf |\ - bcftools view - -Oz -o ${tumor}_vs_${normal}.mut2.norm.vcf.gz + sed '/^\$/d' | bcftools view - -Oz -o ${tumor}_vs_${normal}.mut2.norm.vcf.gz bcftools index -t ${tumor}_vs_${normal}.mut2.norm.vcf.gz """ @@ -377,23 +375,36 @@ process varscan_tn { path(tumor_con_table), path(normal_con_table) output: - tuple val(tumorname),val(normalname), + tuple val(tumorname), val(normalname), path("${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.varscan.vcf") shell: - ''' tumor_purity=$( echo "1-$(printf '%.6f' $(tail -n -1 !{tumor_con_table} | cut -f2 ))" | bc -l) normal_purity=$( echo "1-$(printf '%.6f' $(tail -n -1 !{normal_con_table} | cut -f2 ))" | bc -l) + dual_pileup="samtools mpileup -d 10000 -q 15 -Q 15 -f !{GENOMEREF} -l !{bed} !{normal} !{tumor}" varscan_opts="--strand-filter 1 --min-var-freq 0.01 --min-avg-qual 30 --somatic-p-value 0.05 --output-vcf 1 --normal-purity $normal_purity --tumor-purity $tumor_purity" - varscan somatic < samtools mpileup -d 10000 -q 15 -Q 15 -f !GENOME -l !{bed.simpleName} !{normal} !{tumor} !{tumor.simpleName}_vs_!{normal.simpleName}_{bed.simpleName}.varscan.vcf $varscan_opts --mpileup 1 + varscan_cmd="varscan somatic <($dual_pileup) !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.vcf $varscan_opts --mpileup 1" + eval "$varscan_cmd" + + awk '{{gsub(/\\y[W|K|Y|R|S|M]\\y/,"N",$4); OFS = "\\t"; print}}' !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.vcf.indel \ + | sed '/^$/d' > !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.vcf.indel_temp + awk '{{gsub(/\\y[W|K|Y|R|S|M]\\y/,"N",$4); OFS = "\\t"; print}}' !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.vcf.snp \ + | sed '/^$/d' > !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.vcf.snp_temp + + java -jar $DISCVRSeq_JAR MergeVcfsAndGenotypes \ + -R !{GENOMEREF} \ + --assumeIdenticalSamples \ + --filteredrecordsmergetype KEEP_UNCONDITIONAL \ + --variant !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.vcf.snp_temp \ + --variant!{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.vcf.indel_temp \ + -O !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.vcf + ''' stub: - """ touch ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.varscan.vcf - """ } @@ -420,6 +431,7 @@ process octopus_tn { --threads $task.cpus \ $GERMLINE_FOREST \ $SOMATIC_FOREST \ + --target-working-memory 64Gb \ -o ${tumorname}_vs_${normalname}_${bed.simpleName}.octopus.vcf.gz """ @@ -447,7 +459,8 @@ process lofreq_tn { path("${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final_minus-dbsnp.snvs.vcf.gz"), path("${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final.indels.vcf.gz"), path("${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final_minus-dbsnp.indels.vcf.gz"), - path("${tumorname}_vs_${normalname}_${bed.simpleName}_lofreq.vcf.gz") + path("${tumorname}_vs_${normalname}_${bed.simpleName}_lofreq.vcf.gz"), + path("${tumorname}_vs_${normalname}_${bed.simpleName}_lofreq.vcf.gz.tbi") script: @@ -461,8 +474,11 @@ process lofreq_tn { bcftools concat ${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final_minus-dbsnp.snvs.vcf.gz \ ${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final_minus-dbsnp.indels.vcf.gz --threads $task.cpus -Oz -o \ - ${tumorname}_vs_${normalname}_${bed.simpleName}_lofreq.vcf.gz + ${tumorname}_vs_${normalname}_${bed.simpleName}_temp_lofreq.vcf.gz + $LOFREQ_CONVERT ${tumorname}_vs_${normalname}_${bed.simpleName}_temp_lofreq.vcf.gz ${tumorname} \ + | bcftools view -Oz -o ${tumorname}_vs_${normalname}_${bed.simpleName}_lofreq.vcf.gz + bcftools index -t ${tumorname}_vs_${normalname}_${bed.simpleName}_lofreq.vcf.gz """ @@ -473,7 +489,7 @@ process lofreq_tn { touch "${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final_minus-dbsnp.snvs.vcf.gz" touch "${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final.indels.vcf.gz" touch "${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final_minus-dbsnp.indels.vcf.gz" - touch "${tumorname}_vs_${normalname}_${bed.simpleName}_lofreq.vcf.gz" + touch "${tumorname}_vs_${normalname}_${bed.simpleName}_lofreq.vcf.gz" "${tumorname}_vs_${normalname}_${bed.simpleName}_lofreq.vcf.gz.tbi" """ } @@ -496,8 +512,9 @@ process muse_tn { """ MuSE call -f $GENOMEREF -O ${tumorname}_vs_${normalname} -n $task.cpus $tumor $normal MuSE sump -I ${tumorname}_vs_${normalname}.MuSE.txt \ - -O ${tumorname}_vs_${normalname} -n $task.cpus -D $DBSNP -G - + -O ${tumorname}_vs_${normalname}.vcf -n $task.cpus -D $DBSNP -G + + bcftools view ${tumorname}_vs_${normalname}.vcf -Oz -o ${tumorname}_vs_${normalname}.vcf.gz """ stub: @@ -528,11 +545,10 @@ process combineVariants { """ mkdir ${vc} - gatk --java-options "-Xmx48g" MergeVcfs \ - -O ${sample}.${vc}.temp.vcf.gz \ - -D $GENOMEDICT \ + gatk --java-options "-Xmx48g" SortVcf \ + -O ${sample}.${vc}.marked.vcf.gz \ + -SD $GENOMEDICT \ -I $vcfin - bcftools sort ${sample}.${vc}.temp.vcf.gz -Oz -o ${sample}.${vc}.marked.vcf.gz bcftools norm ${sample}.${vc}.marked.vcf.gz -m- --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\ awk '{{gsub(/\\y[W|K|Y|R|S|M]\\y/,"N",\$4); OFS = "\\t"; print}}' |\ sed '/^\$/d' > ${sample}.${vc}.temp.vcf @@ -540,8 +556,8 @@ process combineVariants { bcftools view ${sample}.${vc}.temp.vcf -f PASS -Oz -o ${vc}/${sample}.${vc}.norm.vcf.gz mv ${sample}.${vc}.marked.vcf.gz ${vc} - - bcftools index ${vc}/${sample}.${vc}.marked.vcf.gz -t + mv ${sample}.${vc}.marked.vcf.gz.tbi ${vc} + bcftools index ${vc}/${sample}.${vc}.norm.vcf.gz -t """ @@ -560,7 +576,7 @@ process combineVariants { -process combineVariants_octopus { +process combineVariants_alternative { label 'process_highmem' publishDir(path: "${outdir}/vcfs/", mode: 'copy') @@ -579,7 +595,8 @@ process combineVariants_octopus { """ mkdir ${vc} - bcftools concat $vcfin -a -Oz -o ${sample}.${vc}.temp.vcf.gz + bcftools concat $vcfin -a -Oz -o ${sample}.${vc}.temp1.vcf.gz + bcftools reheader -f $GENOMEFAI ${sample}.${vc}.temp1.vcf.gz -o ${sample}.${vc}.temp.vcf.gz bcftools sort ${sample}.${vc}.temp.vcf.gz -Oz -o ${sample}.${vc}.marked.vcf.gz bcftools norm ${sample}.${vc}.marked.vcf.gz -m- --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\ awk '{{gsub(/\\y[W|K|Y|R|S|M]\\y/,"N",\$4); OFS = "\\t"; print}}' |\ @@ -699,11 +716,11 @@ process somaticcombine { vcfin2="-V:" + vcfin1.join(" -V:") """ - java -jar DISCVRSeq-1.3.61.jar MergeVcfsAndGenotypes \ + java -jar \$DISCVRSeq_JAR MergeVcfsAndGenotypes \ -R $GENOMEREF \ --genotypeMergeOption PRIORITIZE \ --priority_list mutect2,strelka,octopus,muse,lofreq,vardict,varscan \ - --filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED + --filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED \ -O ${tumorsample}_vs_${normal}_combined.vcf.gz \ $vcfin2 """ @@ -725,7 +742,7 @@ process annotvep_tn { input: tuple val(tumorsample), val(normalsample), - val(vc), path(tumorvcf),path(vcfindex) + val(vc), path(tumorvcf), path(vcfindex) output: path("paired/${vc}/${tumorsample}_vs_${normalsample}.maf") diff --git a/workflow/modules/variant_calling_tonly.nf b/workflow/modules/variant_calling_tonly.nf index 8233cb0..227dab4 100644 --- a/workflow/modules/variant_calling_tonly.nf +++ b/workflow/modules/variant_calling_tonly.nf @@ -1,4 +1,5 @@ GENOMEREF=file(params.genomes[params.genome].genome) +GENOMEFAI=file(params.genomes[params.genome].genomefai) GENOMEDICT=file(params.genomes[params.genome].genomedict) KGPGERMLINE=params.genomes[params.genome].kgp //1000G_phase1.snps.high_confidence.hg38.vcf.gz" DBSNP=file(params.genomes[params.genome].dbsnp) //dbsnp_138.hg38.vcf.gz" @@ -249,9 +250,7 @@ process varscan_tonly { pileup_cmd="samtools mpileup -d 100000 -q 15 -Q 15 -f !{GENOMEREF} !{tumor}" varscan_cmd="varscan mpileup2cns <($pileup_cmd) $varscan_opts" - - eval "$varscan_cmd > !{tumor.simpleName}_!{bed.simpleName}.tonly.varscan.vcf.gz" - eval "bcftools view -U !{tumor.simpleName}_!{bed.simpleName}.tonly.varscan.vcf.gz > !{tumor.simpleName}_!{bed.simpleName}.tonly.varscan.vcf" + eval "$varscan_cmd > !{tumor.simpleName}_!{bed.simpleName}.tonly.varscan.vcf" ''' stub: @@ -318,7 +317,9 @@ process octopus_tonly { """ octopus -R $GENOMEREF -C cancer -I ${tumor} \ - --annotations AC AD DP -t ${bed} \ + --annotations AC AD DP \ + --target-working-memory 64Gb \ + -t ${bed} \ $SOMATIC_FOREST \ -o ${tumorname}_${bed.simpleName}.tonly.octopus.vcf.gz --threads $task.cpus @@ -351,14 +352,13 @@ process somaticcombine_tonly { script: vcfin1=[callers, vcfs].transpose().collect { a, b -> a + " " + b } vcfin2="-V:" + vcfin1.join(" -V:") - println vcfin2 """ - java -jar DISCVRSeq-1.3.61.jar MergeVcfsAndGenotypes \ + java -jar \$DISCVRSeq_JAR MergeVcfsAndGenotypes \ -R $GENOMEREF \ --genotypeMergeOption PRIORITIZE \ --priority_list mutect2,octopus,vardict,varscan \ - --filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED + --filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED \ -O ${tumorsample}_combined.vcf.gz \ $vcfin2 """ diff --git a/workflow/modules/workflows.nf b/workflow/modules/workflows.nf index 11c0bab..79521c7 100644 --- a/workflow/modules/workflows.nf +++ b/workflow/modules/workflows.nf @@ -22,8 +22,8 @@ include {mutect2; mutect2filter; pileup_paired_t; pileup_paired_n; octopus_tn; bcftools_index_octopus; bcftools_index_octopus as bcftools_index_octopus_tonly; combineVariants as combineVariants_vardict; combineVariants as combineVariants_vardict_tonly; combineVariants as combineVariants_varscan; combineVariants as combineVariants_varscan_tonly; - combineVariants as combineVariants_lofreq; combineVariants as combineVariants_muse; - combineVariants_octopus; combineVariants_octopus as combineVariants_octopus_tonly; + combineVariants_alternative as combineVariants_lofreq; combineVariants as combineVariants_muse; + combineVariants_alternative as combineVariants_octopus; combineVariants_alternative as combineVariants_octopus_tonly; annotvep_tn as annotvep_tn_mut2; annotvep_tn as annotvep_tn_strelka; annotvep_tn as annotvep_tn_varscan; annotvep_tn as annotvep_tn_vardict; annotvep_tn as annotvep_tn_octopus; annotvep_tn as annotvep_tn_lofreq; annotvep_tn as annotvep_tn_muse; @@ -231,7 +231,7 @@ workflow VC { | join(contamination_tumoronly.out) | mutect2filter_tonly | join(sample_sheet) - | map{tumor,markedvcf,markedindex,normvcf,normindex, stats,normal -> tuple(tumor,"mutect2",normvcf,normindex)} + | map{tumor,markedvcf,markedindex,normvcf,normindex,stats,normal -> tuple(tumor,"mutect2",normvcf,normindex)} annotvep_tonly_mut2(mutect2_in_tonly) //Strelka TN @@ -280,7 +280,7 @@ workflow VC { //Lofreq TN lofreq_in=lofreq_tn(bambyinterval) | groupTuple(by:[0,1]) - | map{tu,no,snv,dbsnv,indel,dbindel,vcf-> tuple("${tu}_vs_${no}",vcf.toSorted{it -> (it.name =~ /${tu}_vs_${no}_(.*?)_lofreq.vcf.gz/)[0][1].toInteger()},"lofreq")} + | map{tu,no,snv,dbsnv,indel,dbindel,vcf,vcfindex-> tuple("${tu}_vs_${no}",vcf.toSorted{it -> (it.name =~ /${tu}_vs_${no}_(.*?)_lofreq.vcf.gz/)[0][1].toInteger()},vcfindex,"lofreq")} | combineVariants_lofreq | join(sample_sheet_paired) | map{sample,marked,markedindex,normvcf,normindex,tumor,normal->tuple(tumor,normal,"lofreq",normvcf,normindex)} annotvep_tn_lofreq(lofreq_in) @@ -319,7 +319,7 @@ workflow VC { | annotvep_tn_combined mutect2_in_tonly|concat(octopus_in_tonly) - | concat(vardict_in_tonly)|concat(varscan_in_tonly) + | concat(vardict_in_tonly)|concat(varscan_in_tonly) | groupTuple() | somaticcombine_tonly | map{tumor,vcf,index ->tuple(tumor,"combined_tonly",vcf,index)} | annotvep_tonly_combined @@ -512,9 +512,9 @@ workflow QC_GL { samtools_flagstats_out=samtools_flagstats.out.collect() bcftools_stats_out= bcftools_stats.out.collect() gatk_varianteval_out= gatk_varianteval.out.collect() - snpeff_out=snpeff.out.collect()//map{vcf,csv,html->vcf,csv,html}.collect() + snpeff_out=snpeff.out.collect() vcftools_out=vcftools.out - collectvariantcallmetrics_out=collectvariantcallmetrics.out//.map{details,summary->details,summary} + collectvariantcallmetrics_out=collectvariantcallmetrics.out conall=fclane_out.concat(fqs_out,kraken_out,qualimap_out,samtools_flagstats_out,bcftools_stats_out, gatk_varianteval_out,snpeff_out,vcftools_out,collectvariantcallmetrics_out,somalier_analysis_out).flatten().toList() diff --git a/workflow/modules/workflows_tonly.nf b/workflow/modules/workflows_tonly.nf index 1546128..ebcaf72 100644 --- a/workflow/modules/workflows_tonly.nf +++ b/workflow/modules/workflows_tonly.nf @@ -20,7 +20,7 @@ include {mutect2; mutect2filter; pileup_paired_t; pileup_paired_n; contamination_paired; learnreadorientationmodel; mergemut2stats; combineVariants as combineVariants_vardict; combineVariants as combineVariants_varscan; combineVariants as combineVariants_vardict_tonly; combineVariants as combineVariants_varscan_tonly; - combineVariants_octopus ; + combineVariants_alternative ; annotvep_tn as annotvep_tn_mut2; annotvep_tn as annotvep_tn_strelka; annotvep_tn as annotvep_tn_varscan; annotvep_tn as annotvep_tn_vardict; combinemafs_tn} from './variant_calling.nf' @@ -163,16 +163,16 @@ workflow VC_TONLY { .join(contamination_tumoronly.out) mutect2_tonly_in=mutect2filter_tonly(mut2tonly_filter) - | join(sample_sheet) - | map{tumor,markedvcf,markedindex,finalvcf,finalindex,stats -> tuple(tumor,"mutect2",finalvcf,finalindex)} + | join(sample_sheet) + | map{tumor,markedvcf,markedindex,finalvcf,finalindex,stats -> tuple(tumor,"mutect2",finalvcf,finalindex)} annotvep_tonly_mut2(mutect2_tonly_in) //VarDict vardict_in_tonly=vardict_tonly(bambyinterval) | groupTuple()| map{tumor,vcf -> tuple(tumor,vcf,"vardict_tonly")} - | combineVariants_vardict_tonly - | join(sample_sheet) - | map{tumor,marked,markedindex,normvcf,normindex ->tuple(tumor,"vardict_tonly",normvcf,normindex)} + | combineVariants_vardict_tonly + | join(sample_sheet) + | map{tumor,marked,markedindex,normvcf,normindex ->tuple(tumor,"vardict_tonly",normvcf,normindex)} annotvep_tonly_vardict(vardict_in_tonly) //VarScan_tonly @@ -185,11 +185,11 @@ workflow VC_TONLY { //Octopus_tonly octopus_in_tonly=bambyinterval | octopus_tonly | bcftools_index_octopus - | groupTuple() - | map{tumor,vcf,vcfindex -> tuple(tumor,vcf.toSorted{it -> it.name} - ,vcfindex, "octopus_tonly")} - | combineVariants_octopus | join(sample_sheet) - | map{tumor,marked,markedindex,normvcf,normindex ->tuple(tumor,"octopus_tonly",normvcf,normindex)} + | groupTuple() + | map{tumor,vcf,vcfindex -> tuple(tumor,vcf.toSorted{it -> it.name} + ,vcfindex, "octopus_tonly")} + | combineVariants_alternative | join(sample_sheet) + | map{tumor,marked,markedindex,normvcf,normindex ->tuple(tumor,"octopus_tonly",normvcf,normindex)} annotvep_tonly_octopus(octopus_in_tonly) diff --git a/workflow/scripts/lofreq_convert.sh b/workflow/scripts/lofreq_convert.sh new file mode 100755 index 0000000..1d5edda --- /dev/null +++ b/workflow/scripts/lofreq_convert.sh @@ -0,0 +1,32 @@ +INPUT_FILE="$1" +TUMOR_NAME="$2" +export TUMOR_NAME + +zcat "${INPUT_FILE}" \ + | awk '($4=="A" || $4 == "C" || $4=="T" || $4=="G" || /^\#/)' \ + | perl -ne 'print if /^#|^(chr)*[\dX]+\s.+/' \ + | perl -ne 's/AF=/VAF=/g;s/ID=AF/ID=VAF/;print;' \ + | perl -ne ' + # Add 2 new rows to the description and 2 new columns in the header + if(/^#/){ + if(/##INFO=