From a3957887d88c19030805b988668ff9204d0bdcb9 Mon Sep 17 00:00:00 2001
From: Darryl Nousome <dnousome@gmail.com>
Date: Wed, 15 Nov 2023 17:56:57 -0500
Subject: [PATCH 1/4] fixes: additional fixes for indexes

---
 docker/logan_base/build.sh          |  8 ++--
 nextflow.config                     | 22 +++++------
 workflow/modules/copynumber.nf      |  3 +-
 workflow/modules/variant_calling.nf | 60 -----------------------------
 workflow/modules/workflows.nf       |  2 +-
 workflow/modules/workflows_tonly.nf |  5 ++-
 6 files changed, 21 insertions(+), 79 deletions(-)

diff --git a/docker/logan_base/build.sh b/docker/logan_base/build.sh
index 5ed0769..1b10839 100644
--- a/docker/logan_base/build.sh
+++ b/docker/logan_base/build.sh
@@ -4,11 +4,11 @@
 #docker buildx inspect upbeat_ganguly
 #docker buildx build --platform linux/amd64 -f Dockerfile -t dnousome/ccbr_logan_base:v0.3.0 -t dnousome/ccbr_logan_base:latest --push .
 
-docker build --platform linux/amd64 --tag ccbr_logan_base:v0.3.0 -f Dockerfile . 
-docker tag ccbr_logan_base:v0.3.0 dnousome/ccbr_logan_base:v0.3.0
-docker tag ccbr_logan_base:v0.3.0 dnousome/ccbr_logan_base
+docker build --platform linux/amd64 --tag ccbr_logan_base:v0.3.3 -f Dockerfile . 
+docker tag ccbr_logan_base:v0.3.3 dnousome/ccbr_logan_base:v0.3.3
+docker tag ccbr_logan_base:v0.3.3 dnousome/ccbr_logan_base
 
-docker push dnousome/ccbr_logan_base:v0.3.0
+docker push dnousome/ccbr_logan_base:v0.3.3
 docker push dnousome/ccbr_logan_base:latest
 
 
diff --git a/nextflow.config b/nextflow.config
index d868ce3..ee489ec 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -69,19 +69,19 @@ profiles {
         process {
             executor = 'local'
             withLabel: process_low {
-                container = 'docker://dnousome/ccbr_logan_base:v0.3.1' 
+                container = 'docker://dnousome/ccbr_logan_base:v0.3.3' 
             }
             withLabel: process_mid {
-                container = 'docker://dnousome/ccbr_logan_base:v0.3.1' 
+                container = 'docker://dnousome/ccbr_logan_base:v0.3.3' 
             }
             withLabel: process_highcpu {
-                container = 'docker://dnousome/ccbr_logan_base:v0.3.1' 
+                container = 'docker://dnousome/ccbr_logan_base:v0.3.3' 
             }
             withLabel: process_highmem {
-                container = 'docker://dnousome/ccbr_logan_base:v0.3.1' 
+                container = 'docker://dnousome/ccbr_logan_base:v0.3.3' 
             }
             withLabel: process_somaticcaller {
-                container = 'docker://dnousome/ccbr_logan_base:v0.3.1' 
+                container = 'docker://dnousome/ccbr_logan_base:v0.3.3' 
             }
             withName:fastq_screen {
                 container = 'docker://nciccbr/ccbr_fastq_screen_0.13.0:v2.0' 
@@ -191,7 +191,7 @@ profiles {
                 }
             //Other Processes
             withName:bwamem2 {
-                container = 'docker://dnousome/ccbr_logan_base:v0.3.1'
+                container = 'docker://dnousome/ccbr_logan_base:v0.3.3'
                 memory=150.GB
                 time=48.h
                 cpus=16
@@ -235,30 +235,30 @@ profiles {
             }
             //Global Processes
             withLabel: process_low {
-                container = 'docker://dnousome/ccbr_logan_base:v0.3.1'
+                container = 'docker://dnousome/ccbr_logan_base:v0.3.3'
                 memory = 16.GB
                 time = 12.h
                 cpus = 2
             }
             withLabel: process_mid {               
-                container = 'docker://dnousome/ccbr_logan_base:v0.3.1'
+                container = 'docker://dnousome/ccbr_logan_base:v0.3.3'
                 memory=24.GB
                 time=24.h
                 cpus=4
             }
             withLabel: process_highcpu {
-                container = 'docker://dnousome/ccbr_logan_base:v0.3.1'
+                container = 'docker://dnousome/ccbr_logan_base:v0.3.3'
                 memory = 54.GB
                 time = 72.h
                 cpus = 16
             }
             withLabel: process_highmem {
-                container = 'docker://dnousome/ccbr_logan_base:v0.3.1'
+                container = 'docker://dnousome/ccbr_logan_base:v0.3.3'
                 memory = 48.GB
                 time = 48.h
             }
             withLabel: process_somaticcaller {   
-                container = 'docker://dnousome/ccbr_logan_base:v0.3.1'
+                container = 'docker://dnousome/ccbr_logan_base:v0.3.3'
                 memory = 48.GB
                 cpus = 4
                 time = 72.h
diff --git a/workflow/modules/copynumber.nf b/workflow/modules/copynumber.nf
index a30fa36..5d6116a 100644
--- a/workflow/modules/copynumber.nf
+++ b/workflow/modules/copynumber.nf
@@ -370,7 +370,8 @@ process purple {
         tuple val(tumorname),
         path(cobaltin), 
         path(amberin),
-        path(somaticvcf)
+        path(somaticvcf),
+        path(somaticvcfindex)
 
     output:
         tuple val(tumorname), path("${tumorname}")
diff --git a/workflow/modules/variant_calling.nf b/workflow/modules/variant_calling.nf
index 8bbbb1e..050c672 100644
--- a/workflow/modules/variant_calling.nf
+++ b/workflow/modules/variant_calling.nf
@@ -548,29 +548,6 @@ process combineVariants {
 
 
 
-process bcftools_index_octopus {
-    label 'process_low'
-
-    input:
-        tuple val(sample),
-        path(vcf)
-
-    output:
-        tuple val(sample), 
-        path(vcf), 
-        path("${vcf}.tbi")
-    
-    script:    
-    """
-    bcftools index -t ${vcf}
-    """
-
-    stub:
-    """
-    touch ${vcf} ${vcf}.tbi
-    """
-
-}
 
 process combineVariants_octopus {
     label 'process_highmem'
@@ -645,43 +622,6 @@ process bcftools_index_octopus {
 
 }
 
-process combineVariants_octopus {
-    label 'process_highmem'
-    publishDir(path: "${outdir}/vcfs/", mode: 'copy')
-
-    input:
-        tuple val(sample), path(vcfs), path(vcfsindex), val(vc)
-    
-    output:
-        tuple val(sample), 
-        path("${vc}/${sample}.${vc}.marked.vcf.gz"), path("${vc}/${sample}.${vc}.norm.vcf.gz")
-    
-    script:
-    vcfin = vcfs.join(" ")
-    
-    """
-    mkdir ${vc}
-    bcftools concat $vcfin -a -Oz -o ${sample}.${vc}.temp.vcf.gz
-    bcftools sort ${sample}.${vc}.temp.vcf.gz -Oz -o ${sample}.${vc}.marked.vcf.gz
-    bcftools norm ${sample}.${vc}.marked.vcf.gz --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\
-        awk '{{gsub(/\\y[W|K|Y|R|S|M]\\y/,"N",\$4); OFS = "\\t"; print}}' |\
-        sed '/^\$/d' > ${sample}.${vc}.temp.vcf
-
-    bcftools view ${sample}.${vc}.temp.vcf -f PASS -Oz -o ${vc}/${sample}.${vc}.norm.vcf.gz
-
-    mv ${sample}.${vc}.marked.vcf.gz ${vc}
-    """
-
-    stub:
-
-    """
-    mkdir ${vc}
-    touch ${vc}/${sample}.${vc}.marked.vcf.gz
-    touch ${vc}/${sample}.${vc}.norm.vcf.gz
-    
-    """
-
-}
 
 
 
diff --git a/workflow/modules/workflows.nf b/workflow/modules/workflows.nf
index 59343cc..372c884 100644
--- a/workflow/modules/workflows.nf
+++ b/workflow/modules/workflows.nf
@@ -405,7 +405,7 @@ workflow CNVhuman {
         bamwithsample | cobalt_tn
         purplein=amber_tn.out.join(cobalt_tn.out)
         purplein.join(somaticcall_input)| 
-        map{t1,amber,cobalt,n1,vc,vcf -> tuple(t1,amber,cobalt,vcf)}  
+        map{t1,amber,cobalt,n1,vc,vcf,vcfindex -> tuple(t1,amber,cobalt,vcf,vcfindex)}  
             | purple
         
 }         
diff --git a/workflow/modules/workflows_tonly.nf b/workflow/modules/workflows_tonly.nf
index f2b0165..38e58bd 100644
--- a/workflow/modules/workflows_tonly.nf
+++ b/workflow/modules/workflows_tonly.nf
@@ -238,7 +238,7 @@ workflow SV_TONLY {
 
         //Survivor
         gunzip(manta_out).concat(svaba_out).groupTuple()
-       | survivor_sv | annotsv_survivor_tonly.out.ifEmpty("Empty SV input--No SV annotated")
+       | survivor_sv | annotsv_survivor_tonly | ifEmpty("Empty SV input--No SV annotated")
 }
 
 
@@ -266,8 +266,9 @@ workflow CNVhuman_tonly {
         bamwithsample | cobalt_tonly
         purplein=amber_tonly.out.join(cobalt_tonly.out)
         purplein.join(somaticcall_input)| 
-        map{t1,amber,cobalt,vc,vcf -> tuple(t1,amber,cobalt,vcf)}  
+        map{t1,amber,cobalt,vc,vcf,index -> tuple(t1,amber,cobalt,vcf,index)}  
             | purple
+
         
 }
 

From 1d27183a95f914d4dc44d2f38483927713fcfd48 Mon Sep 17 00:00:00 2001
From: Darryl Nousome <dnousome@gmail.com>
Date: Wed, 15 Nov 2023 18:23:35 -0500
Subject: [PATCH 2/4] fix: lofreq error

---
 workflow/modules/variant_calling.nf | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/workflow/modules/variant_calling.nf b/workflow/modules/variant_calling.nf
index 050c672..2e80ae7 100644
--- a/workflow/modules/variant_calling.nf
+++ b/workflow/modules/variant_calling.nf
@@ -421,7 +421,6 @@ process octopus_tn {
 
 process lofreq_tn {
     label 'process_somaticcaller' 
-    module=["lofreq/2.1.5","bcftools/1.17"]
 
     input:
         tuple val(tumorname), path(tumor), path(tumorbai), 
@@ -439,7 +438,7 @@ process lofreq_tn {
     script:
 
     """
-    lofreq -f $GENOMEREF -n ${normal} -t ${tumor} \
+    lofreq somatic -f $GENOMEREF -n ${normal} -t ${tumor} \
         -d $DBSNP \
         --threads $task.cpus \
         -l ${bed} \

From 6ed9b36bd3ef52e96e179a678f8beff839dc6029 Mon Sep 17 00:00:00 2001
From: Darryl Nousome <dnousome@gmail.com>
Date: Thu, 16 Nov 2023 20:28:46 -0500
Subject: [PATCH 3/4] fix: shorten workflows

---
 workflow/modules/variant_calling.nf       | 174 ++++++++++---------
 workflow/modules/variant_calling_tonly.nf |   1 +
 workflow/modules/workflows.nf             | 195 +++++++++++-----------
 workflow/modules/workflows_tonly.nf       |  41 ++---
 4 files changed, 202 insertions(+), 209 deletions(-)

diff --git a/workflow/modules/variant_calling.nf b/workflow/modules/variant_calling.nf
index 2e80ae7..64b0dde 100644
--- a/workflow/modules/variant_calling.nf
+++ b/workflow/modules/variant_calling.nf
@@ -18,13 +18,15 @@ process mutect2 {
     label 'process_somaticcaller'
 
     input:
-        tuple val(tumorname), path(tumor), path(tumorbai),val(normalname), path(normal), path(normalbai), path(bed)
+        tuple val(tumorname), path(tumor), path(tumorbai),
+        val(normalname), path(normal), path(normalbai), 
+        path(bed)
     
     output:
-        tuple val(tumorname),
-        path("${tumor.simpleName}_${bed.simpleName}.mut2.vcf.gz"),
-        path("${tumor.simpleName}_${bed.simpleName}.f1r2.tar.gz"),
-        path("${tumor.simpleName}_${bed.simpleName}.mut2.vcf.gz.stats")
+        tuple val(tumorname), val(normalname),
+        path("${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.mut2.vcf.gz"),
+        path("${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.f1r2.tar.gz"),
+        path("${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.mut2.vcf.gz.stats")
 
     
     script:
@@ -38,16 +40,16 @@ process mutect2 {
     --tumor-sample ${tumor.simpleName} \
     $GNOMADGERMLINE \
     --panel-of-normals ${PON} \
-    --output ${tumor.simpleName}_${bed.simpleName}.mut2.vcf.gz \
-    --f1r2-tar-gz ${tumor.simpleName}_${bed.simpleName}.f1r2.tar.gz \
+    --output ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.mut2.vcf.gz \
+    --f1r2-tar-gz ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.f1r2.tar.gz \
     --independent-mates
     """
 
     stub:
     """
-    touch ${tumor.simpleName}_${bed.simpleName}.mut2.vcf.gz
-    touch ${tumor.simpleName}_${bed.simpleName}.f1r2.tar.gz
-    touch ${tumor.simpleName}_${bed.simpleName}.mut2.vcf.gz.stats
+    touch ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.mut2.vcf.gz
+    touch ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.f1r2.tar.gz
+    touch ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.mut2.vcf.gz.stats
     """
 }
 
@@ -56,7 +58,8 @@ process pileup_paired_t {
     label 'process_highmem'
 
     input:
-        tuple val(tumorname), path(tumor), path(tumorbai),val(normalname), path(normal), path(normalbai), path(bed)
+        tuple val(tumorname), path(tumor), path(tumorbai),
+        val(normalname), path(normal), path(normalbai), path(bed)
     
     output:
         tuple val(tumorname),
@@ -84,7 +87,8 @@ process pileup_paired_n {
     label 'process_highmem'
 
     input:
-        tuple val(tumorname), path(tumor), path(tumorbai),val(normalname), path(normal), path(normalbai), path(bed)
+        tuple val(tumorname), path(tumor), path(tumorbai),
+        val(normalname), path(normal), path(normalbai), path(bed)
     
     output:
         tuple val(tumorname),
@@ -224,48 +228,50 @@ process mutect2filter {
     publishDir(path: "${outdir}/vcfs/mutect2", mode: 'copy')
 
     input:
-        tuple val(sample), path(mutvcfs), path(stats), path(obs), 
+        tuple val(tumor), val(normal),path(mutvcfs), path(stats), path(obs), 
         path(pileups), path(normal_pileups),path(tumorcontamination),path(normalcontamination)
+    
     output:
-        tuple val(sample), 
-        path("${sample}.mut2.marked.vcf.gz"), path("${sample}.mut2.marked.vcf.gz.tbi"),
-        path("${sample}.mut2.norm.vcf.gz"), path("${sample}.mut2.norm.vcf.gz.tbi"), 
-        path("${sample}.mut2.marked.vcf.gz.filteringStats.tsv")
+        tuple val("${tumor}_vs_${normal}"), 
+        path("${tumor}_vs_${normal}.mut2.marked.vcf.gz"), 
+        path("${tumor}_vs_${normal}.mut2.marked.vcf.gz.tbi"),
+        path("${tumor}_vs_${normal}.mut2.norm.vcf.gz"), path("${tumor}_vs_${normal}.mut2.norm.vcf.gz.tbi"), 
+        path("${tumor}_vs_${normal}.mut2.marked.vcf.gz.filteringStats.tsv")
 
     script:
     mut2in = mutvcfs.join(" -I ")
 
     """
-    gatk GatherVcfs -I ${mut2in} -O ${sample}.concat.vcf.gz 
-    gatk IndexFeatureFile -I ${sample}.concat.vcf.gz 
+    gatk GatherVcfs -I ${mut2in} -O ${tumor}_vs_${normal}.concat.vcf.gz 
+    gatk IndexFeatureFile -I ${tumor}_vs_${normal}.concat.vcf.gz 
     gatk FilterMutectCalls \
         -R $GENOMEREF \
-        -V ${sample}.concat.vcf.gz \
+        -V ${tumor}_vs_${normal}.concat.vcf.gz \
         --ob-priors ${obs} \
         --contamination-table ${tumorcontamination} \
         --stats ${stats} \
-        -O ${sample}.mut2.marked.vcf.gz
+        -O ${tumor}_vs_${normal}.mut2.marked.vcf.gz
 
 
     gatk SelectVariants \
         -R $GENOMEREF \
-        --variant ${sample}.mut2.marked.vcf.gz \
+        --variant ${tumor}_vs_${normal}.mut2.marked.vcf.gz \
         --exclude-filtered \
-        --output ${sample}.mut2.final.vcf.gz
+        --output ${tumor}_vs_${normal}.mut2.final.vcf.gz
     
-    bcftools sort ${sample}.mut2.final.vcf.gz |\
+    bcftools sort ${tumor}_vs_${normal}.mut2.final.vcf.gz |\
     bcftools norm --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\
         awk '{{gsub(/\\y[W|K|Y|R|S|M]\\y/,"N",\$4); OFS = "\\t"; print}}' |\
-        sed '/^\$/d' > ${sample}.mut2.norm.vcf |\
-    bcftools view - -Oz -o  ${sample}.mut2.norm.vcf.gz
-    bcftools index -t ${sample}.mut2.norm.vcf.gz
+        sed '/^\$/d' > ${tumor}_vs_${normal}.mut2.norm.vcf |\
+    bcftools view - -Oz -o ${tumor}_vs_${normal}.mut2.norm.vcf.gz
+    bcftools index -t ${tumor}_vs_${normal}.mut2.norm.vcf.gz
     """
 
     stub:
     """
-    touch ${sample}.mut2.marked.vcf.gz ${sample}.mut2.marked.vcf.gz.tbi
-    touch ${sample}.mut2.norm.vcf.gz ${sample}.mut2.norm.vcf.gz.tbi
-    touch ${sample}.mut2.marked.vcf.gz.filteringStats.tsv
+    touch ${tumor}_vs_${normal}.mut2.marked.vcf.gz ${tumor}_vs_${normal}.mut2.marked.vcf.gz.tbi
+    touch ${tumor}_vs_${normal}.mut2.norm.vcf.gz ${tumor}_vs_${normal}.mut2.norm.vcf.gz.tbi
+    touch ${tumor}_vs_${normal}.mut2.marked.vcf.gz.filteringStats.tsv
     """
 
 
@@ -275,12 +281,15 @@ process mutect2filter {
 process strelka_tn {
     label 'process_highcpu'
     input:
-        tuple val(tumorname), path(tumor), path(tumorbai), val(normalname), path(normal), path(normalbai), path(bed)
+        tuple val(tumorname), path(tumor), path(tumorbai), 
+        val(normalname), path(normal), path(normalbai), path(bed)
     
     output:
-        tuple val(tumorname),
-        path("${tumor.simpleName}_${bed.simpleName}.somatic.snvs.vcf.gz"),
-        path("${tumor.simpleName}_${bed.simpleName}.somatic.indels.vcf.gz")
+        tuple val(tumorname), val(normalname),
+        path("${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.snvs.vcf.gz"),
+        path("${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.snvs.vcf.gz.tbi"),
+        path("${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.indels.vcf.gz"),
+        path("${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.indels.vcf.gz.tbi")
     
     script:
 
@@ -297,20 +306,20 @@ process strelka_tn {
         --runDir=wd \
         --callRegions ${bed}.gz
     ./wd/runWorkflow.py -m local -j $task.cpus
-    mv wd/results/variants/somatic.snvs.vcf.gz  ${tumor.simpleName}_${bed.simpleName}.somatic.snvs.vcf.gz
-    mv wd/results/variants/somatic.indels.vcf.gz  ${tumor.simpleName}_${bed.simpleName}.somatic.indels.vcf.gz
-
+    mv wd/results/variants/somatic.snvs.vcf.gz  ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.snvs.vcf.gz
+    mv wd/results/variants/somatic.indels.vcf.gz  ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.indels.vcf.gz
+    mv wd/results/variants/somatic.snvs.vcf.gz.tbi  ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.snvs.vcf.gz.tbi
+    mv wd/results/variants/somatic.indels.vcf.gz.tbi  ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.indels.vcf.gz.tbi
     """
 
     stub:
     
     """
-    touch ${tumor.simpleName}_${bed.simpleName}.somatic.snvs.vcf.gz
-    touch ${tumor.simpleName}_${bed.simpleName}.somatic.indels.vcf.gz
+    touch ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.snvs.vcf.gz  ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.snvs.vcf.gz.tbi
+    touch ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.indels.vcf.gz ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.somatic.indels.vcf.gz.tbi
 
     """
 
-
 }
 
 
@@ -321,8 +330,8 @@ process vardict_tn {
         tuple val(tumorname), path(tumor), path(tumorbai), val(normalname), path(normal), path(normalbai), path(bed)
     
     output:
-        tuple val(tumorname),
-        path("${tumor.simpleName}_${bed.simpleName}.vardict.vcf")
+        tuple val(tumorname), val(normalname),
+        path("${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.vardict.vcf")
     //bcbio notes of vardict filtering var2vcf_paired.pl -P 0.9 -m 4.25 -f 0.01 -M” and 
     //filtered with “((AF*DP < 6) && ((MQ < 55.0 && NM > 1.0) || (MQ < 60.0 && NM > 2.0) || (DP < 10) || (QUAL < 45)))” 
     script:
@@ -342,14 +351,14 @@ process vardict_tn {
             -d 10 \
             -v 6 \
             -S \
-            -f 0.05 >  ${tumor.simpleName}_${bed.simpleName}.vardict.vcf
+            -f 0.05 >  ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.vardict.vcf
 
     """
 
     stub:
     
     """
-    touch ${tumor.simpleName}_${bed.simpleName}.vardict.vcf
+    touch ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.vardict.vcf
 
     """
 
@@ -363,11 +372,13 @@ process varscan_tn {
     input:
         tuple val(tumorname), path(tumor), path(tumorbai), 
         val(normalname), path(normal), path(normalbai), path(bed),
-        path(tumorpileup), path(normalpileup), path(tumor_con_table), path(normal_con_table)
+        val(tumor1),
+        path(tumorpileup), path(normalpileup), 
+        path(tumor_con_table), path(normal_con_table)
     
     output:
-        tuple val(tumorname),
-        path("${tumor.simpleName}_${bed.simpleName}.varscan.vcf")
+        tuple val(tumorname),val(normalname),
+        path("${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.varscan.vcf")
     
     shell:
 
@@ -375,18 +386,19 @@ process varscan_tn {
     tumor_purity=$( echo "1-$(printf '%.6f' $(tail -n -1 !{tumor_con_table} | cut -f2 ))" | bc -l)
     normal_purity=$( echo "1-$(printf '%.6f' $(tail -n -1 !{normal_con_table} | cut -f2 ))" | bc -l)
     varscan_opts="--strand-filter 1 --min-var-freq 0.01 --min-avg-qual 30 --somatic-p-value 0.05 --output-vcf 1 --normal-purity $normal_purity --tumor-purity $tumor_purity"
-    varscan somatic < samtools mpileup -d 10000 -q 15 -Q 15 -f !GENOME -l !{bed.simpleName} !{normal} !{tumor} !{tumor.simpleName}_{bed.simpleName}.vardict.vcf $varscan_opts --mpileup 1 
+    varscan somatic < samtools mpileup -d 10000 -q 15 -Q 15 -f !GENOME -l !{bed.simpleName} !{normal} !{tumor} !{tumor.simpleName}_vs_!{normal.simpleName}_{bed.simpleName}.varscan.vcf $varscan_opts --mpileup 1 
     '''
 
     stub:
     
     """
-    touch ${tumor.simpleName}_${bed.simpleName}.varscan.vcf
+    touch ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.varscan.vcf
     
     """
 
 }
 
+
 process octopus_tn {
     //label 'process_highcpu' Using separate docker for octopus
 
@@ -396,7 +408,7 @@ process octopus_tn {
     
 
     output:
-        tuple val("${tumorname}_vs_${normalname}"), 
+        tuple val("${tumorname}_vs_${normalname}"),
         path("${tumorname}_vs_${normalname}_${bed.simpleName}.octopus.vcf.gz")
     
     script:
@@ -419,6 +431,7 @@ process octopus_tn {
 
 } 
 
+
 process lofreq_tn {
     label 'process_somaticcaller' 
 
@@ -428,7 +441,8 @@ process lofreq_tn {
     
 
     output:
-        tuple val(tumorname),
+    
+        tuple val(tumorname), val(normalname), 
         path("${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final.snvs.vcf.gz"),
         path("${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final_minus-dbsnp.snvs.vcf.gz"),
         path("${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final.indels.vcf.gz"),
@@ -443,11 +457,11 @@ process lofreq_tn {
         --threads $task.cpus \
         -l ${bed} \
         --call-indels \
-        -o ${tumorname}_vs_${normalname}_${bed.simpleName}
+        -o ${tumorname}_vs_${normalname}_${bed.simpleName}_
     
     bcftools concat ${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final_minus-dbsnp.snvs.vcf.gz \
-        ${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final_minus-dbsnp.indels.vcf.gz" --threads $task.cpus -Oz -o \
-        ${tumorname}_vs_${normalname}_${bed.simpleName}_lofreq.vcf.gz"
+        ${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final_minus-dbsnp.indels.vcf.gz --threads $task.cpus -Oz -o \
+        ${tumorname}_vs_${normalname}_${bed.simpleName}_lofreq.vcf.gz
 
 
     """
@@ -468,15 +482,13 @@ process lofreq_tn {
 
 process muse_tn {
     label 'process_somaticcaller' 
-    module=["muse/2.0.1"]
-
     input:
         tuple val(tumorname), path(tumor), path(tumorbai), 
         val(normalname), path(normal), path(normalbai)
     
 
     output:
-        tuple val(tumorname),
+        tuple val(tumorname), val(normalname),
         path("${tumorname}_vs_${normalname}.vcf.gz")
     
     script:
@@ -595,16 +607,15 @@ process combineVariants_octopus {
 }
 
 
-
 process bcftools_index_octopus {
     label 'process_low'
 
     input:
-        tuple val(sample),
+        tuple val(tumor),
         path(vcf)
 
     output:
-        tuple val(sample), 
+        tuple val(tumor),
         path(vcf), 
         path("${vcf}.tbi")
     
@@ -622,23 +633,20 @@ process bcftools_index_octopus {
 }
 
 
-
-
-
-
-
 process combineVariants_strelka {
     //Concat all somatic snvs/indels across all files, strelka separates snv/indels
     label 'process_mid'
     publishDir(path: "${outdir}/vcfs/strelka", mode: 'copy')
 
     input:
-        tuple val(sample), path(strelkasnvs), path(strelkaindels)
+        tuple val(sample), 
+        path(strelkasnvs), path(snvindex),
+        path(strelkaindels), path(indelindex)
     
     output:
         tuple val(sample), 
-        path("${sample}.strelka.vcf.gz"),path("${sample}.strelka.vcf.gz.tbi"),
-        path("${sample}.filtered.strelka.vcf.gz"),path("${sample}.filtered.strelka.vcf.gz.tbi")
+        path("${sample}.strelka.vcf.gz"), path("${sample}.strelka.vcf.gz.tbi"),
+        path("${sample}.filtered.strelka.vcf.gz"), path("${sample}.filtered.strelka.vcf.gz.tbi")
     
     
     script:
@@ -648,7 +656,7 @@ process combineVariants_strelka {
 
 
     """
-    bcftools concat $vcfin $indelsin --threads $task.cpus -Oz -o ${sample}.temp.strelka.vcf.gz
+    bcftools concat $vcfin $indelsin --threads $task.cpus -Oz -o ${sample}.temp.strelka.vcf.gz -a 
     bcftools norm ${sample}.temp.strelka.vcf.gz -m- --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\
         awk '{{gsub(/\\y[W|K|Y|R|S|M]\\y/,"N",\$4); OFS = "\\t"; print}}' |\
         sed '/^\$/d' > ${sample}.temp1.strelka.vcf.gz
@@ -671,6 +679,7 @@ process combineVariants_strelka {
 
 }
 
+
 process somaticcombine {
     label 'process_mid'
     publishDir(path: "${outdir}/vcfs/combined", mode: 'copy')
@@ -682,13 +691,12 @@ process somaticcombine {
 
     output:
         tuple val(tumorsample), val(normal),
-        path("${tumorsample}_combined.vcf.gz"),
-        path("${tumorsample}_combined.vcf.gz.tbi")
+        path("${tumorsample}_vs_${normal}_combined.vcf.gz"),
+        path("${tumorsample}_vs_${normal}_combined.vcf.gz.tbi")
 
     script:
-    vcfin1=[callers, vcfs].transpose().collect { a, b -> a + " " + b }
-    vcfin2="-V:" + vcfin1.join(" -V:")
-    println vcfin2
+        vcfin1=[callers, vcfs].transpose().collect { a, b -> a + " " + b }
+        vcfin2="-V:" + vcfin1.join(" -V:")
 
     """
     java -jar DISCVRSeq-1.3.61.jar MergeVcfsAndGenotypes \
@@ -696,19 +704,22 @@ process somaticcombine {
         --genotypeMergeOption PRIORITIZE \
         --priority_list mutect2,strelka,octopus,muse,lofreq,vardict,varscan \
         --filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED
-        -O ${tumorsample}_combined.vcf.gz \
+        -O ${tumorsample}_vs_${normal}_combined.vcf.gz \
         $vcfin2
     """
 
     stub:
+    vcfin1=[callers, vcfs].transpose().collect { a, b -> a + " " + b }
+    vcfin2="-V:" + vcfin1.join(" -V:")
 
     """
-    touch ${tumorsample}_combined.vcf.gz
-    touch ${tumorsample}_combined.vcf.gz.tbi
+    touch ${tumorsample}_vs_${normal}_combined.vcf.gz
+    touch ${tumorsample}_vs_${normal}_combined.vcf.gz.tbi
     """
 
 }
 
+
 process annotvep_tn {    
     publishDir(path: "${outdir}/mafs/", mode: 'copy')
 
@@ -717,7 +728,7 @@ process annotvep_tn {
         val(vc), path(tumorvcf),path(vcfindex) 
 
     output:
-        path("paired/${vc}/${tumorsample}.maf")
+        path("paired/${vc}/${tumorsample}_vs_${normalsample}.maf")
 
     shell:
 
@@ -756,7 +767,7 @@ process annotvep_tn {
 
     vcf2maf.pl \
     --vep-forks !{task.cpus} --input-vcf !{tumorvcf.baseName} \
-    --output-maf paired/!{vc}/!{tumorsample}.maf \
+    --output-maf paired/!{vc}/!{tumorsample}_vs_!{normalsample}.maf \
     --tumor-id !{tumorsample} \
     --normal-id !{normalsample} \
     --vep-path /opt/vep/src/ensembl-vep \
@@ -764,19 +775,16 @@ process annotvep_tn {
     --ncbi-build !{VEPBUILD} --species !{VEPSPECIES} --ref-fasta !{GENOMEREF} \
     --vep-overwrite
 
-
     '''
 
     stub:
     """
     mkdir -p paired/${vc}
-    touch paired/${vc}/${tumorsample}.maf
+    touch paired/${vc}/${tumorsample}_vs_${normalsample}.maf
     """
 }
 
 
-
-
 process combinemafs_tn {
     label 'process_low'
     publishDir(path: "${outdir}/mafs/paired", mode: 'copy')
diff --git a/workflow/modules/variant_calling_tonly.nf b/workflow/modules/variant_calling_tonly.nf
index 993cb34..8233cb0 100644
--- a/workflow/modules/variant_calling_tonly.nf
+++ b/workflow/modules/variant_calling_tonly.nf
@@ -333,6 +333,7 @@ process octopus_tonly {
 }
 
 
+
 process somaticcombine_tonly {
     label 'process_mid'
     publishDir(path: "${outdir}/vcfs/combined_tonly", mode: 'copy')
diff --git a/workflow/modules/workflows.nf b/workflow/modules/workflows.nf
index 372c884..11c0bab 100644
--- a/workflow/modules/workflows.nf
+++ b/workflow/modules/workflows.nf
@@ -111,9 +111,7 @@ workflow ALIGN {
 
     tobqsr=bwamem2.out.combine(gatherbqsr.out,by:0)
     applybqsr(tobqsr) 
-    //samtoolsindex(applybqsr.out)
-    
-    //samtoolsindex.out.view()
+
     //sample_sheet.view()
     bamwithsample=applybqsr.out.combine(sample_sheet,by:0).map{it.swap(3,0)}.combine(applybqsr.out,by:0).map{it.swap(3,0)}
 
@@ -158,6 +156,9 @@ workflow VC {
         sample_sheet
         
     main: 
+    //Create Pairing for TN (in case of dups)
+    sample_sheet_paired=sample_sheet|map{tu,no -> tuple ("${tu}_vs_${no}",tu, no)}
+
     bambyinterval=bamwithsample.combine(splitout.flatten())
 
     //Paired Mutect2    
@@ -176,142 +177,133 @@ workflow VC {
 
 
     pileup_paired_all=pileup_paired_tout.join(pileup_paired_nout)
-    contamination_paired(pileup_paired_all)
-
-    mut2out_lor=mutect2.out.groupTuple()
-    .map { samplename,vcfs,f1r2,stats -> tuple( samplename,
-    f1r2.toSorted{ it -> (it.name =~ /${samplename}_(.*?).f1r2.tar.gz/)[0][1].toInteger() } 
-    )}
-
-    learnreadorientationmodel(mut2out_lor)
-
-    mut2out_mstats=mutect2.out.groupTuple()
-    .map { samplename,vcfs,f1r2,stats -> tuple( samplename,
-    stats.toSorted{ it -> (it.name =~ /${samplename}_(.*?).mut2.vcf.gz.stats/)[0][1].toInteger() } 
-    )}
-
-    mergemut2stats(mut2out_mstats)
-
-    allmut2tn=mutect2.out.groupTuple()
-    .map { samplename,vcfs,f1r2,stats -> tuple( samplename,
-    vcfs.toSorted{ it -> (it.name =~ /${samplename}_(.*?).mut2.vcf.gz/)[0][1].toInteger() } 
-    )}
+    contamination_paired(pileup_paired_all) 
+
+
+    mutect2.out.groupTuple(by:[0,1])
+        | multiMap { tumor,normal,vcfs,f1r2,stats -> 
+        mut2out_lor: tuple("${tumor}_vs_${normal}",
+                f1r2.toSorted{ it -> (it.name =~ /${tumor}_vs_${normal}_(.*?).f1r2.tar.gz/)[0][1].toInteger() } )
+        mut2out_mstats:  tuple( "${tumor}_vs_${normal}",
+                stats.toSorted{ it -> (it.name =~ /${tumor}_vs_${normal}_(.*?).mut2.vcf.gz.stats/)[0][1].toInteger() })
+        allmut2tn: tuple( "${tumor}_vs_${normal}",
+                vcfs.toSorted{ it -> (it.name =~ /${tumor}_vs_${normal}_(.*?).mut2.vcf.gz/)[0][1].toInteger() } )
+        } 
+    | set{mut2out}
     
-    mut2tn_filter=allmut2tn
-    .join(mergemut2stats.out)
-    .join(learnreadorientationmodel.out)
-    .join(contamination_paired.out)
+    learnreadorientationmodel(mut2out.mut2out_lor)
+    mergemut2stats(mut2out.mut2out_mstats)
+   
+    mutect2_in=mut2out.allmut2tn
+        | join(mergemut2stats.out)
+        | join(learnreadorientationmodel.out) 
+        | map{t,vcf,stats,ro -> tuple(t.split('_vs_')[0],t.split('_vs_')[1],vcf,stats,ro)} 
+        | join(contamination_paired.out) 
+        | mutect2filter
+        | join(sample_sheet_paired) 
+        | map{sample,markedvcf,markedindex,normvcf,normindex,stats,tumor,normal -> tuple(tumor,normal,"mutect2",normvcf,normindex)}  
+
+    annotvep_tn_mut2(mutect2_in)
 
     //Tumor Only Calling
     bambyinterval_t=bambyinterval.map{tumorname,tumor,tumorbai,normalname,normalbam,normalbai,bed ->tuple(tumorname,tumor,tumorbai,bed)}
-    mutect2_t_tonly(bambyinterval_t)    
-    
-    //LOR     
-    mut2tout_lor=mutect2_t_tonly.out.groupTuple()
-    .map { samplename,vcfs,f1r2,stats -> tuple( samplename,
-    f1r2.toSorted{ it -> (it.name =~ /${samplename}_(.*?).f1r2.tar.gz/)[0][1].toInteger() } 
-    )}
-    learnreadorientationmodel_tonly(mut2tout_lor)
-
-    //Stats
-    mut2tonly_mstats=mutect2_t_tonly.out.groupTuple()
-    .map { samplename,vcfs,f1r2,stats -> tuple( samplename,
-    stats.toSorted{ it -> (it.name =~ /${samplename}_(.*?).tonly.mut2.vcf.gz.stats/)[0][1].toInteger() } 
-    )}
-    mergemut2stats_tonly(mut2tonly_mstats)
-
-
-    //Contamination
-    contamination_tumoronly(pileup_paired_tout)
-    
-    //Final TUMOR ONLY FILTER
-    allmut2tonly=mutect2_t_tonly.out.groupTuple()
-    .map { samplename,vcfs,f1r2,stats -> tuple( samplename,
-    vcfs.toSorted{ it -> (it.name =~ /${samplename}_(.*?).tonly.mut2.vcf.gz/)[0][1].toInteger() } 
-    )}
-    
-    mut2tonly_filter=allmut2tonly
-    .join(mergemut2stats_tonly.out)
-    .join(learnreadorientationmodel_tonly.out)
-    .join(contamination_tumoronly.out)
+    mutect2_t_tonly(bambyinterval_t)  
+        
+    mutect2_t_tonly.out.groupTuple()
+        | multiMap { tumor,vcfs,f1r2,stats -> 
+        mut2tout_lor: tuple(tumor,
+                f1r2.toSorted{ it -> (it.name =~ /${tumor}_(.*?).f1r2.tar.gz/)[0][1].toInteger() } )
+        mut2tonly_mstats:  tuple( tumor,
+                stats.toSorted{ it -> (it.name =~ /${tumor}_(.*?).tonly.mut2.vcf.gz.stats/)[0][1].toInteger() })
+        allmut2tonly: tuple(tumor,
+                vcfs.toSorted{ it -> (it.name =~ /${tumor}_(.*?).tonly.mut2.vcf.gz/)[0][1].toInteger() } )
+        } 
+    | set{mut2tonlyout}
 
     
-    //Annotation)
-    mutect2_in=mutect2filter(mut2tn_filter)
-    | join(sample_sheet)
-    | map{tumor,markedvcf,markedindex,normvcf,normindex,stats,normal -> tuple(tumor,normal,"mutect2",normvcf,normindex)}  
-    annotvep_tn_mut2(mutect2_in)
-
+    learnreadorientationmodel_tonly(mut2tonlyout.mut2tout_lor)
+    mergemut2stats_tonly(mut2tonlyout.mut2tonly_mstats)
+    contamination_tumoronly(pileup_paired_tout)
     
-    mutect2_in_tonly=mutect2filter_tonly(mut2tonly_filter)
+    mutect2_in_tonly=mut2tonlyout.allmut2tonly
+        | join(mergemut2stats_tonly.out)
+        | join(learnreadorientationmodel_tonly.out)
+        | join(contamination_tumoronly.out) 
+    | mutect2filter_tonly
     | join(sample_sheet)
     | map{tumor,markedvcf,markedindex,normvcf,normindex, stats,normal -> tuple(tumor,"mutect2",normvcf,normindex)} 
     annotvep_tonly_mut2(mutect2_in_tonly)
     
     //Strelka TN 
-    strelka_tn(bambyinterval)
-    strelkaout=strelka_tn.out.groupTuple()
-    .map { samplename,vcfs,indels -> tuple( samplename,
-    vcfs.toSorted{ it -> (it.name =~ /${samplename}_(.*?).somatic.snvs.vcf.gz/)[0][1].toInteger() },
-    indels.toSorted{ it -> (it.name =~ /${samplename}_(.*?).somatic.indels.vcf.gz/)[0][1].toInteger() }  
-    )}
-    strelka_in=combineVariants_strelka(strelkaout) | join(sample_sheet)
-    | map{tumor,markedvcf,markedindex,finalvcf,finalindex,normal -> tuple(tumor,normal,"strelka",finalvcf,finalindex)} 
+    strelka_in=strelka_tn(bambyinterval) | groupTuple(by:[0,1])
+        | map { tumor,normal,vcfs,vcfindex,indels,indelindex -> tuple("${tumor}_vs_${normal}",
+            vcfs.toSorted{ it -> (it.name =~ /${tumor}_vs_${normal}_(.*?).somatic.snvs.vcf.gz/)[0][1].toInteger() },vcfindex,
+            indels.toSorted{ it -> (it.name =~ /${tumor}_vs_${normal}_(.*?).somatic.indels.vcf.gz/)[0][1].toInteger() } ,indelindex)} 
+        | combineVariants_strelka |  join(sample_sheet_paired)
+        | map{sample,markedvcf,markedindex,finalvcf,finalindex,tumor,normal -> tuple(tumor,normal,"strelka",finalvcf,finalindex)} 
     annotvep_tn_strelka(strelka_in)
 
     //Vardict
-    vardict_comb=vardict_tn(bambyinterval).groupTuple().map{tumor,vcf-> tuple(tumor,vcf,"vardict")} | combineVariants_vardict
-    vardict_in=vardict_comb.join(sample_sheet)
-     .map{tumor,marked,markedindex,normvcf,normindex,normal ->tuple(tumor,normal,"vardict",normvcf,normindex)}  
+    vardict_in=vardict_tn(bambyinterval) | groupTuple(by:[0,1])
+        | map{tumor,normal,vcf-> tuple("${tumor}_vs_${normal}",vcf.toSorted{it -> (it.name =~ /${tumor}_vs_${normal}_(.*?).vardict.vcf/)[0][1].toInteger()},"vardict")} 
+        | combineVariants_vardict | join(sample_sheet_paired)
+        | map{sample,marked,markedindex,normvcf,normindex,tumor,normal ->tuple(tumor,normal,"vardict",normvcf,normindex)}  
     annotvep_tn_vardict(vardict_in)
 
     //VarDict_tonly
-    vardict_tonly_comb=bambyinterval.map{tumorname,tumorbam,tumorbai,normname,normbam,normbai,bed ->
-        tuple(tumorname,tumorbam,tumorbai,bed)} 
-    vardict_tonly(vardict_tonly_comb).groupTuple().map{tumor,vcf-> tuple(tumor,vcf,"vardict_tonly")} | combineVariants_vardict_tonly
-    
-    vardict_in_tonly=combineVariants_vardict_tonly.out.join(sample_sheet)
-    .map{tumor,marked,markedindex,normvcf,normindex,normal ->tuple(tumor,"vardict_tonly",normvcf,normindex)} 
+    vardict_in_tonly=bambyinterval 
+        | map{tumorname,tumorbam,tumorbai,normname,normbam,normbai,bed ->
+            tuple(tumorname,tumorbam,tumorbai,bed)} 
+        | vardict_tonly | groupTuple()
+        | map{tumor,vcf-> tuple(tumor,vcf.toSorted{it -> (it.name =~ /${tumor}_(.*?).tonly.vardict.vcf/)[0][1].toInteger()},"vardict_tonly")}
+        | combineVariants_vardict_tonly | join(sample_sheet)
+        | map{tumor,marked,markedindex,normvcf,normindex,normal ->tuple(tumor,"vardict_tonly",normvcf,normindex)} 
     annotvep_tonly_vardict(vardict_in_tonly)
     
     //VarScan TN
-    varscan_in=bambyinterval.join(contamination_paired.out) 
-    | varscan_tn | groupTuple() |map{tumor,vcf-> tuple(tumor,vcf,"varscan")} | combineVariants_varscan
-    | join(sample_sheet)
-    | map{tumor,marked,markedindex,normvcf,normindex,normal ->tuple(tumor,normal,"varscan",normvcf,normindex)} 
+    varscan_in=bambyinterval.combine(contamination_paired.out) 
+        | varscan_tn | groupTuple(by:[0,1]) 
+        | map{tumor,normal,vcf-> tuple("${tumor}_vs_${normal}",vcf.toSorted{it -> (it.name =~ /${tumor}_vs_${normal}_(.*?).varscan.vcf/)[0][1].toInteger()},"varscan")} 
+        | combineVariants_varscan | join(sample_sheet_paired)
+        | map{sample,marked,markedindex,normvcf,normindex,tumor,normal ->tuple(tumor,normal,"varscan",normvcf,normindex)} 
     annotvep_tn_varscan(varscan_in)
     
     //VarScan_TOnly
-    varscan_in_tonly=bambyinterval.join(contamination_paired.out)
-    | map{tumor,bam,bai,normal,nbam,nbai,bed,tpile,npile,tumorc,normalc ->
-            tuple(tumor,bam,bai,bed,tpile,tumorc)} | varscan_tonly  
-    | groupTuple() | map{tumor,vcf-> tuple(tumor,vcf,"varscan_tonly")} | combineVariants_varscan_tonly
+    varscan_in_tonly=bambyinterval.combine(contamination_paired.out) 
+    | map{tumor,bam,bai,normal,nbam,nbai,bed,tumorname2,tpile,npile,tumorc,normalc ->
+            tuple(tumor,bam,bai,bed,tpile,tumorc)} | varscan_tonly  | groupTuple() 
+    | map{tumor,vcf-> tuple(tumor,vcf.toSorted{it -> (it.name =~ /${tumor}_(.*?).tonly.varscan.vcf/)[0][1].toInteger()},"varscan_tonly")}
+    | combineVariants_varscan_tonly
     | join(sample_sheet)
     | map{tumor,marked,markedindex,normvcf,normindex,normal ->tuple(tumor,"varscan_tonly",normvcf,normindex)} 
     annotvep_tonly_varscan(varscan_in_tonly)
-        
+    
     //Lofreq TN
-    lofreq_in=lofreq_tn(bambyinterval).groupTuple().map{tumor,snv,dbsnv,indel,dbindel,vcf-> tuple(tumor,vcf,"lofreq")} 
-        | combineVariants_lofreq | join(sample_sheet)
-        | map{tumor,marked,markedindex,normvcf,normindex,normal->tuple(tumor,normal,"lofreq",normvcf,normindex)} 
+    lofreq_in=lofreq_tn(bambyinterval) | groupTuple(by:[0,1]) 
+        | map{tu,no,snv,dbsnv,indel,dbindel,vcf-> tuple("${tu}_vs_${no}",vcf.toSorted{it -> (it.name =~ /${tu}_vs_${no}_(.*?)_lofreq.vcf.gz/)[0][1].toInteger()},"lofreq")} 
+        | combineVariants_lofreq | join(sample_sheet_paired)
+        | map{sample,marked,markedindex,normvcf,normindex,tumor,normal->tuple(tumor,normal,"lofreq",normvcf,normindex)} 
     annotvep_tn_lofreq(lofreq_in)
 
     //MuSE TN
-    muse_in=muse_tn(bamwithsample).groupTuple().map{tumor,vcf-> tuple(tumor,vcf,"muse")} 
-        | combineVariants_muse | join(sample_sheet)
-        | map{tumor,marked,markedindex,normvcf,normindex,normal ->tuple(tumor,normal,"muse",normvcf,normindex)} 
+    muse_in=muse_tn(bamwithsample) 
+        | map{tumor,normal,vcf-> tuple("${tumor}_vs_${normal}",vcf,"muse")} 
+        | combineVariants_muse | join(sample_sheet_paired)
+        | map{sample,marked,markedindex,normvcf,normindex,tumor,normal ->tuple(tumor,normal,"muse",normvcf,normindex)} 
     annotvep_tn_muse(muse_in)
 
     //Octopus_TN
     octopus_in=octopus_tn(bambyinterval) | bcftools_index_octopus 
-        | groupTuple() | map{samplename,vcf,vcfindex-> tuple(samplename,vcf.toSorted{it->(it.name =~ /${samplename}_(.*).octopus.vcf.gz/)[0][1].toInteger()},vcfindex,"octopus")}
-        | combineVariants_octopus | map{samplename,marked,markedindex,normvcf,normindex -> 
+        | groupTuple() 
+        | map{samplename,vcf,vcfindex-> tuple(samplename,vcf.toSorted{it->(it.name =~ /${samplename}_(.*).octopus.vcf.gz/)[0][1].toInteger()},vcfindex,"octopus")}
+        | combineVariants_octopus 
+        | map{samplename,marked,markedindex,normvcf,normindex -> 
             tuple(samplename.split('_vs_')[0],samplename.split('_vs_')[1],"octopus",normvcf,normindex)}
     annotvep_tn_octopus(octopus_in) 
 
     //Octopus_TOnly
     octopus_in_tonly=bambyinterval.map{tumor,bam,bai,normal,nbam,nbai,bed->
-    tuple(tumor,bam,bai,bed)} | octopus_tonly | bcftools_index_octopus_tonly
+    tuple(tumor,bam,bai,bed)} | octopus_tonly | bcftools_index_octopus_tonly 
     | groupTuple() 
         | map{samplename,vcf,vcfindex->tuple(samplename,vcf.toSorted{it->(it.name =~ /${samplename}_(.*).tonly.octopus.vcf.gz/)[0][1].toInteger()},vcfindex,"octopus_tonly")}
         | combineVariants_octopus_tonly 
@@ -321,7 +313,7 @@ workflow VC {
 
     //Combine All Variants Using VCF and Then Reannotate
     mutect2_in|concat(strelka_in)|concat(octopus_in)|concat(muse_in)|concat(lofreq_in)
-        | concat(vardict_in) |concat(varscan_in)|groupTuple()
+        | concat(vardict_in) |concat(varscan_in) | groupTuple(by:[0,1])
         | somaticcombine 
         | map{tumor,normal,vcf,index ->tuple(tumor,normal,"combined",vcf,index)} 
         | annotvep_tn_combined
@@ -337,7 +329,8 @@ workflow VC {
 
     emit:
         somaticcall_input=octopus_in
-    
+
+
 }
 
 
diff --git a/workflow/modules/workflows_tonly.nf b/workflow/modules/workflows_tonly.nf
index 38e58bd..1546128 100644
--- a/workflow/modules/workflows_tonly.nf
+++ b/workflow/modules/workflows_tonly.nf
@@ -139,31 +139,25 @@ workflow VC_TONLY {
 
     mutect2_t_tonly(bambyinterval)    
     
-    
-    //LOR     
-    mut2tout_lor=mutect2_t_tonly.out.groupTuple()
-        .map { samplename,vcfs,f1r2,stats -> tuple( samplename,
-        f1r2.toSorted{ it -> (it.name =~ /${samplename}_(.*?).f1r2.tar.gz/)[0][1].toInteger() } 
-        )}
-        learnreadorientationmodel_tonly(mut2tout_lor)
-
-    //Stats
-    mut2tonly_mstats=mutect2_t_tonly.out.groupTuple()
-    .map { samplename,vcfs,f1r2,stats -> tuple( samplename,
-    stats.toSorted{ it -> (it.name =~ /${samplename}_(.*?).tonly.mut2.vcf.gz.stats/)[0][1].toInteger() } 
-    )}
-    mergemut2stats_tonly(mut2tonly_mstats)
-
-    //Contamination
+    mutect2_t_tonly.out.groupTuple()
+        | multiMap { tumor,vcfs,f1r2,stats -> 
+        mut2tout_lor: tuple(tumor,
+                f1r2.toSorted{ it -> (it.name =~ /${tumor}_(.*?).f1r2.tar.gz/)[0][1].toInteger() } )
+        mut2tonly_mstats:  tuple( tumor,
+                stats.toSorted{ it -> (it.name =~ /${tumor}_(.*?).tonly.mut2.vcf.gz.stats/)[0][1].toInteger() })
+        allmut2tonly: tuple(tumor,
+                vcfs.toSorted{ it -> (it.name =~ /${tumor}_(.*?).tonly.mut2.vcf.gz/)[0][1].toInteger() } )
+        } 
+    | set{mut2tonlyout}
+
+  
+
+    learnreadorientationmodel_tonly(mut2tonlyout.mut2tout_lor)
+    mergemut2stats_tonly(mut2tonlyout.mut2tonly_mstats)
     contamination_tumoronly(pileup_paired_tout)
 
-    //Final TUMOR ONLY FILTER
-    allmut2tonly=mutect2_t_tonly.out.groupTuple()
-    .map { samplename,vcfs,f1r2,stats -> tuple( samplename,
-    vcfs.toSorted{ it -> (it.name =~ /${samplename}_(.*?).tonly.mut2.vcf.gz/)[0][1].toInteger() } 
-    )}
     
-    mut2tonly_filter=allmut2tonly
+    mut2tonly_filter=mut2tonlyout.allmut2tonly
     .join(mergemut2stats_tonly.out)
     .join(learnreadorientationmodel_tonly.out)
     .join(contamination_tumoronly.out)
@@ -211,9 +205,6 @@ workflow VC_TONLY {
         somaticcall_input=combineVariants_octopus.out
 
 
-    emit:
-        somaticcall_input=combineVariants_octopus.out
-
 }
 
 

From ba00d4ea660105d25d873013e493a08ccddf5788 Mon Sep 17 00:00:00 2001
From: Darryl Nousome <dnousome@gmail.com>
Date: Wed, 22 Nov 2023 11:12:53 -0500
Subject: [PATCH 4/4] feat: update docker images and callers

---
 conf/hg38.config                          |  5 +-
 conf/mm10.config                          |  1 +
 docker/logan_base/Dockerfile              | 13 +----
 nextflow.config                           |  1 +
 workflow/modules/copynumber.nf            |  4 --
 workflow/modules/variant_calling.nf       | 71 ++++++++++++++---------
 workflow/modules/variant_calling_tonly.nf | 14 ++---
 workflow/modules/workflows.nf             | 14 ++---
 workflow/modules/workflows_tonly.nf       | 22 +++----
 workflow/scripts/lofreq_convert.sh        | 32 ++++++++++
 10 files changed, 109 insertions(+), 68 deletions(-)
 create mode 100755 workflow/scripts/lofreq_convert.sh

diff --git a/conf/hg38.config b/conf/hg38.config
index 5ae4d11..90c1881 100644
--- a/conf/hg38.config
+++ b/conf/hg38.config
@@ -1,9 +1,10 @@
 params {
     genomes {
         'hg38' {
-            genome = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/bwamem2/Homo_sapiens_assembly38.fasta" // file(params.genome) 
+            genome = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/bwamem2/Homo_sapiens_assembly38.fasta" 
+            genomefai = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/bwamem2/Homo_sapiens_assembly38.fasta.fai" 
             genomedict= "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/genome/Homo_sapiens_assembly38.dict"
-            wgsregion = "/data/nousomedr/annotation/resources_broad_hg38_v0_wgs_calling_regions.hg38.interval_list" //
+            wgsregion = "/data/nousomedr/annotation/resources_broad_hg38_v0_wgs_calling_regions.hg38.interval_list" 
             intervals="${projectDir}/workflow/resources/hg38_v0_wgs_calling_regions.hg38.bed"
             //millsindel = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz" //Mills_and_1000G_gold_standard.indels.hg38.vcf.gz"
             //shapeitindel =  "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz" //ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz" //file(params.gold_indels2) //
diff --git a/conf/mm10.config b/conf/mm10.config
index 6dd27af..6841ee3 100644
--- a/conf/mm10.config
+++ b/conf/mm10.config
@@ -2,6 +2,7 @@ params {
     genomes {
         'mm10' {
             genome = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/genome/bwamem2index/genome.fa" // file(params.genome) 
+            genomefai = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/genome/bwamem2index/genome.fa.fai" // file(params.genome) 
             bwagenome= "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/genome/bwaindex/genome.fa"
             genomedict= "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/genome/bwamem2index/genome.dict"
             //FIXwgsregion = "/data/nousomedr/annotation/resources_broad_hg38_v0_wgs_calling_regions.hg38.interval_list" //
diff --git a/docker/logan_base/Dockerfile b/docker/logan_base/Dockerfile
index 55832b5..9b04b2c 100644
--- a/docker/logan_base/Dockerfile
+++ b/docker/logan_base/Dockerfile
@@ -17,10 +17,12 @@ WORKDIR /opt2
 # This section installs system packages required for your project
 # If you need extra system packages add them here.
 # python/3.8.0 and python/2.7.16 (strelka and manta)
+# JDK 17 for DISCVRSeq
 RUN apt-get update \
  && apt-get -y upgrade \
  && DEBIAN_FRONTEND=noninteractive apt-get install -y \
-      bc    
+      bc \
+      openjdk-17-jdk   
 
 # Common bioinformatics tools 
 # bwa/0.7.17-4  bowtie/1.2.3  bowtie2/2.3.5.1 
@@ -158,15 +160,6 @@ RUN wget https://github.com/AstraZeneca-NGS/VarDictJava/releases/download/v1.8.3
     && rm /opt2/VarDict-1.8.3.tar
 ENV PATH="/opt2/VarDict-1.8.3/bin:$PATH"
 
-# Install Octopus/v0.7.4
-#RUN wget https://github.com/luntergroup/octopus/archive/refs/tags/v0.7.4.tar.gz \ 
-#    && tar -xvzf /opt2/v0.7.4.tar.gz \
-#    && rm /opt2/v0.7.4.tar.gz \
-#    && cd /opt2/octopus-0.7.4 \
-#    && cmake .
-#ENV PATH="/opt2/octopus-0.7.4/bin:$PATH"
-
-
 # Fastp From Opengene github
 RUN wget http://opengene.org/fastp/fastp.0.23.2 \
     && mkdir fastp \
diff --git a/nextflow.config b/nextflow.config
index ee489ec..b9b3b00 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -23,6 +23,7 @@ params {
     script_freecpaired = "${projectDir}/workflow/scripts/freec_paired.pl"
     freec_significance = "${projectDir}/workflow/scripts/assess_significance.R"
     freec_plot = "${projectDir}/workflow/scripts/makeGraph.R"
+    lofreq_convert = "${projectDir}/workflow/scripts/lofreq_convert.sh"
     vep_cache = "/fdb/VEP/102/cache"
 
     //Biowulf
diff --git a/workflow/modules/copynumber.nf b/workflow/modules/copynumber.nf
index 5d6116a..b15a8c2 100644
--- a/workflow/modules/copynumber.nf
+++ b/workflow/modules/copynumber.nf
@@ -21,8 +21,6 @@ ENSEMBLCACHE='/data/SCLC-BRAINMETS/cn/common/ensembl_data'
 DRIVERS='/data/SCLC-BRAINMETS/cn/common/DriverGenePanel.38.tsv'
 HOTSPOTS='/data/SCLC-BRAINMETS/cn/variants/KnownHotspots.somatic.38.vcf.gz'
 
-
-
 //DBSNP_INDEL=file(params.genomes[params.genome].KNOWNINDELS) 
 //ascatR=
 
@@ -379,7 +377,6 @@ process purple {
     script:
 
     """
-
     java -jar purple.jar \
     -tumor ${tumorname} \
     -amber ${amberin} \
@@ -392,7 +389,6 @@ process purple {
     -driver_gene_panel $DRIVERS \
     -somatic_hotspots $HOTSPOTS \
     -output_dir ${tumorname}
-
     """
 
     stub:
diff --git a/workflow/modules/variant_calling.nf b/workflow/modules/variant_calling.nf
index 64b0dde..f0cae08 100644
--- a/workflow/modules/variant_calling.nf
+++ b/workflow/modules/variant_calling.nf
@@ -1,4 +1,5 @@
 GENOMEREF=file(params.genomes[params.genome].genome)
+GENOMEFAI=file(params.genomes[params.genome].genomefai)
 GENOMEDICT=file(params.genomes[params.genome].genomedict)
 KGPGERMLINE=params.genomes[params.genome].kgp 
 DBSNP=file(params.genomes[params.genome].dbsnp) 
@@ -9,6 +10,7 @@ VEPSPECIES=params.genomes[params.genome].vepspecies
 VEPBUILD=params.genomes[params.genome].vepbuild
 SOMATIC_FOREST=params.genomes[params.genome].octopus_sforest
 GERMLINE_FOREST=params.genomes[params.genome].octopus_gforest
+LOFREQ_CONVERT=params.lofreq_convert
 
 //Output
 outdir=file(params.output)
@@ -242,8 +244,7 @@ process mutect2filter {
     mut2in = mutvcfs.join(" -I ")
 
     """
-    gatk GatherVcfs -I ${mut2in} -O ${tumor}_vs_${normal}.concat.vcf.gz 
-    gatk IndexFeatureFile -I ${tumor}_vs_${normal}.concat.vcf.gz 
+    gatk SortVcf -I ${mut2in} -O ${tumor}_vs_${normal}.concat.vcf.gz --CREATE_INDEX
     gatk FilterMutectCalls \
         -R $GENOMEREF \
         -V ${tumor}_vs_${normal}.concat.vcf.gz \
@@ -251,8 +252,6 @@ process mutect2filter {
         --contamination-table ${tumorcontamination} \
         --stats ${stats} \
         -O ${tumor}_vs_${normal}.mut2.marked.vcf.gz
-
-
     gatk SelectVariants \
         -R $GENOMEREF \
         --variant ${tumor}_vs_${normal}.mut2.marked.vcf.gz \
@@ -262,8 +261,7 @@ process mutect2filter {
     bcftools sort ${tumor}_vs_${normal}.mut2.final.vcf.gz |\
     bcftools norm --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\
         awk '{{gsub(/\\y[W|K|Y|R|S|M]\\y/,"N",\$4); OFS = "\\t"; print}}' |\
-        sed '/^\$/d' > ${tumor}_vs_${normal}.mut2.norm.vcf |\
-    bcftools view - -Oz -o ${tumor}_vs_${normal}.mut2.norm.vcf.gz
+        sed '/^\$/d' | bcftools view - -Oz -o ${tumor}_vs_${normal}.mut2.norm.vcf.gz
     bcftools index -t ${tumor}_vs_${normal}.mut2.norm.vcf.gz
     """
 
@@ -377,23 +375,36 @@ process varscan_tn {
         path(tumor_con_table), path(normal_con_table)
     
     output:
-        tuple val(tumorname),val(normalname),
+        tuple val(tumorname), val(normalname),
         path("${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.varscan.vcf")
     
     shell:
-
     '''
     tumor_purity=$( echo "1-$(printf '%.6f' $(tail -n -1 !{tumor_con_table} | cut -f2 ))" | bc -l)
     normal_purity=$( echo "1-$(printf '%.6f' $(tail -n -1 !{normal_con_table} | cut -f2 ))" | bc -l)
+    dual_pileup="samtools mpileup -d 10000 -q 15 -Q 15 -f !{GENOMEREF} -l !{bed} !{normal} !{tumor}"
     varscan_opts="--strand-filter 1 --min-var-freq 0.01 --min-avg-qual 30 --somatic-p-value 0.05 --output-vcf 1 --normal-purity $normal_purity --tumor-purity $tumor_purity"
-    varscan somatic < samtools mpileup -d 10000 -q 15 -Q 15 -f !GENOME -l !{bed.simpleName} !{normal} !{tumor} !{tumor.simpleName}_vs_!{normal.simpleName}_{bed.simpleName}.varscan.vcf $varscan_opts --mpileup 1 
+    varscan_cmd="varscan somatic <($dual_pileup) !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.vcf $varscan_opts --mpileup 1"
+    eval "$varscan_cmd"
+
+    awk '{{gsub(/\\y[W|K|Y|R|S|M]\\y/,"N",$4); OFS = "\\t"; print}}' !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.vcf.indel \
+        | sed '/^$/d' > !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.vcf.indel_temp
+    awk '{{gsub(/\\y[W|K|Y|R|S|M]\\y/,"N",$4); OFS = "\\t"; print}}' !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.vcf.snp \
+        | sed '/^$/d' > !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.vcf.snp_temp
+
+    java -jar $DISCVRSeq_JAR MergeVcfsAndGenotypes \
+        -R !{GENOMEREF} \
+        --assumeIdenticalSamples \
+        --filteredrecordsmergetype KEEP_UNCONDITIONAL \
+        --variant !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.vcf.snp_temp \
+        --variant!{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.vcf.indel_temp \
+        -O !{tumor.simpleName}_vs_!{normal.simpleName}_!{bed.simpleName}.varscan.vcf
+
     '''
 
     stub:
-    
     """
     touch ${tumor.simpleName}_vs_${normal.simpleName}_${bed.simpleName}.varscan.vcf
-    
     """
 
 }
@@ -420,6 +431,7 @@ process octopus_tn {
     --threads $task.cpus \
     $GERMLINE_FOREST \
     $SOMATIC_FOREST \
+    --target-working-memory 64Gb \
     -o ${tumorname}_vs_${normalname}_${bed.simpleName}.octopus.vcf.gz
     """
 
@@ -447,7 +459,8 @@ process lofreq_tn {
         path("${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final_minus-dbsnp.snvs.vcf.gz"),
         path("${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final.indels.vcf.gz"),
         path("${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final_minus-dbsnp.indels.vcf.gz"),
-        path("${tumorname}_vs_${normalname}_${bed.simpleName}_lofreq.vcf.gz")
+        path("${tumorname}_vs_${normalname}_${bed.simpleName}_lofreq.vcf.gz"),
+        path("${tumorname}_vs_${normalname}_${bed.simpleName}_lofreq.vcf.gz.tbi")
     
     script:
 
@@ -461,8 +474,11 @@ process lofreq_tn {
     
     bcftools concat ${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final_minus-dbsnp.snvs.vcf.gz \
         ${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final_minus-dbsnp.indels.vcf.gz --threads $task.cpus -Oz -o \
-        ${tumorname}_vs_${normalname}_${bed.simpleName}_lofreq.vcf.gz
+        ${tumorname}_vs_${normalname}_${bed.simpleName}_temp_lofreq.vcf.gz
 
+    $LOFREQ_CONVERT ${tumorname}_vs_${normalname}_${bed.simpleName}_temp_lofreq.vcf.gz ${tumorname} \
+        | bcftools view -Oz -o ${tumorname}_vs_${normalname}_${bed.simpleName}_lofreq.vcf.gz
+    bcftools index -t ${tumorname}_vs_${normalname}_${bed.simpleName}_lofreq.vcf.gz
 
     """
 
@@ -473,7 +489,7 @@ process lofreq_tn {
     touch "${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final_minus-dbsnp.snvs.vcf.gz"
     touch "${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final.indels.vcf.gz"
     touch "${tumorname}_vs_${normalname}_${bed.simpleName}_somatic_final_minus-dbsnp.indels.vcf.gz"
-    touch "${tumorname}_vs_${normalname}_${bed.simpleName}_lofreq.vcf.gz"
+    touch "${tumorname}_vs_${normalname}_${bed.simpleName}_lofreq.vcf.gz" "${tumorname}_vs_${normalname}_${bed.simpleName}_lofreq.vcf.gz.tbi"
     
     """
 } 
@@ -496,8 +512,9 @@ process muse_tn {
     """
     MuSE call -f $GENOMEREF -O ${tumorname}_vs_${normalname} -n $task.cpus $tumor $normal
     MuSE sump -I ${tumorname}_vs_${normalname}.MuSE.txt \
-        -O ${tumorname}_vs_${normalname} -n $task.cpus -D $DBSNP -G
-        
+        -O ${tumorname}_vs_${normalname}.vcf -n $task.cpus -D $DBSNP -G
+    
+    bcftools view ${tumorname}_vs_${normalname}.vcf -Oz -o ${tumorname}_vs_${normalname}.vcf.gz
     """
 
     stub:
@@ -528,11 +545,10 @@ process combineVariants {
     
     """
     mkdir ${vc}
-    gatk --java-options "-Xmx48g" MergeVcfs \
-        -O ${sample}.${vc}.temp.vcf.gz \
-        -D $GENOMEDICT \
+    gatk --java-options "-Xmx48g" SortVcf \
+        -O ${sample}.${vc}.marked.vcf.gz \
+        -SD $GENOMEDICT \
         -I $vcfin
-    bcftools sort ${sample}.${vc}.temp.vcf.gz -Oz -o ${sample}.${vc}.marked.vcf.gz
     bcftools norm ${sample}.${vc}.marked.vcf.gz -m- --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\
         awk '{{gsub(/\\y[W|K|Y|R|S|M]\\y/,"N",\$4); OFS = "\\t"; print}}' |\
         sed '/^\$/d' > ${sample}.${vc}.temp.vcf
@@ -540,8 +556,8 @@ process combineVariants {
     bcftools view ${sample}.${vc}.temp.vcf -f PASS -Oz -o ${vc}/${sample}.${vc}.norm.vcf.gz
 
     mv ${sample}.${vc}.marked.vcf.gz ${vc}
-    
-    bcftools index ${vc}/${sample}.${vc}.marked.vcf.gz -t
+    mv ${sample}.${vc}.marked.vcf.gz.tbi ${vc}
+
     bcftools index ${vc}/${sample}.${vc}.norm.vcf.gz -t
     """
 
@@ -560,7 +576,7 @@ process combineVariants {
 
 
 
-process combineVariants_octopus {
+process combineVariants_alternative {
     label 'process_highmem'
     publishDir(path: "${outdir}/vcfs/", mode: 'copy')
 
@@ -579,7 +595,8 @@ process combineVariants_octopus {
     
     """
     mkdir ${vc}
-    bcftools concat $vcfin -a -Oz -o ${sample}.${vc}.temp.vcf.gz
+    bcftools concat $vcfin -a -Oz -o ${sample}.${vc}.temp1.vcf.gz
+    bcftools reheader -f $GENOMEFAI ${sample}.${vc}.temp1.vcf.gz -o ${sample}.${vc}.temp.vcf.gz
     bcftools sort ${sample}.${vc}.temp.vcf.gz -Oz -o ${sample}.${vc}.marked.vcf.gz
     bcftools norm ${sample}.${vc}.marked.vcf.gz -m- --threads $task.cpus --check-ref s -f $GENOMEREF -O v |\
         awk '{{gsub(/\\y[W|K|Y|R|S|M]\\y/,"N",\$4); OFS = "\\t"; print}}' |\
@@ -699,11 +716,11 @@ process somaticcombine {
         vcfin2="-V:" + vcfin1.join(" -V:")
 
     """
-    java -jar DISCVRSeq-1.3.61.jar MergeVcfsAndGenotypes \
+    java -jar \$DISCVRSeq_JAR MergeVcfsAndGenotypes \
         -R $GENOMEREF \
         --genotypeMergeOption PRIORITIZE \
         --priority_list mutect2,strelka,octopus,muse,lofreq,vardict,varscan \
-        --filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED
+        --filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED \
         -O ${tumorsample}_vs_${normal}_combined.vcf.gz \
         $vcfin2
     """
@@ -725,7 +742,7 @@ process annotvep_tn {
 
     input:
         tuple val(tumorsample), val(normalsample), 
-        val(vc), path(tumorvcf),path(vcfindex) 
+        val(vc), path(tumorvcf), path(vcfindex) 
 
     output:
         path("paired/${vc}/${tumorsample}_vs_${normalsample}.maf")
diff --git a/workflow/modules/variant_calling_tonly.nf b/workflow/modules/variant_calling_tonly.nf
index 8233cb0..227dab4 100644
--- a/workflow/modules/variant_calling_tonly.nf
+++ b/workflow/modules/variant_calling_tonly.nf
@@ -1,4 +1,5 @@
 GENOMEREF=file(params.genomes[params.genome].genome)
+GENOMEFAI=file(params.genomes[params.genome].genomefai)
 GENOMEDICT=file(params.genomes[params.genome].genomedict)
 KGPGERMLINE=params.genomes[params.genome].kgp //1000G_phase1.snps.high_confidence.hg38.vcf.gz"
 DBSNP=file(params.genomes[params.genome].dbsnp) //dbsnp_138.hg38.vcf.gz"
@@ -249,9 +250,7 @@ process varscan_tonly {
     pileup_cmd="samtools mpileup -d 100000 -q 15 -Q 15 -f !{GENOMEREF} !{tumor}"
     varscan_cmd="varscan mpileup2cns <($pileup_cmd) $varscan_opts"
 
-
-    eval "$varscan_cmd > !{tumor.simpleName}_!{bed.simpleName}.tonly.varscan.vcf.gz"
-    eval "bcftools view -U !{tumor.simpleName}_!{bed.simpleName}.tonly.varscan.vcf.gz > !{tumor.simpleName}_!{bed.simpleName}.tonly.varscan.vcf"
+    eval "$varscan_cmd > !{tumor.simpleName}_!{bed.simpleName}.tonly.varscan.vcf"
     '''
 
     stub:
@@ -318,7 +317,9 @@ process octopus_tonly {
 
     """
     octopus -R $GENOMEREF -C cancer -I ${tumor} \
-    --annotations AC AD DP -t ${bed} \
+    --annotations AC AD DP \
+    --target-working-memory 64Gb \
+    -t ${bed} \
     $SOMATIC_FOREST \
     -o ${tumorname}_${bed.simpleName}.tonly.octopus.vcf.gz --threads $task.cpus
 
@@ -351,14 +352,13 @@ process somaticcombine_tonly {
     script:
         vcfin1=[callers, vcfs].transpose().collect { a, b -> a + " " + b }
         vcfin2="-V:" + vcfin1.join(" -V:")
-        println vcfin2
 
     """
-    java -jar DISCVRSeq-1.3.61.jar MergeVcfsAndGenotypes \
+    java -jar \$DISCVRSeq_JAR MergeVcfsAndGenotypes \
         -R $GENOMEREF \
         --genotypeMergeOption PRIORITIZE \
         --priority_list mutect2,octopus,vardict,varscan \
-        --filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED
+        --filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED \
         -O ${tumorsample}_combined.vcf.gz \
         $vcfin2
     """
diff --git a/workflow/modules/workflows.nf b/workflow/modules/workflows.nf
index 11c0bab..79521c7 100644
--- a/workflow/modules/workflows.nf
+++ b/workflow/modules/workflows.nf
@@ -22,8 +22,8 @@ include {mutect2; mutect2filter; pileup_paired_t; pileup_paired_n;
     octopus_tn; bcftools_index_octopus; bcftools_index_octopus as bcftools_index_octopus_tonly;
     combineVariants as combineVariants_vardict; combineVariants as combineVariants_vardict_tonly; 
     combineVariants as combineVariants_varscan; combineVariants as combineVariants_varscan_tonly;
-    combineVariants as combineVariants_lofreq; combineVariants as combineVariants_muse;
-    combineVariants_octopus; combineVariants_octopus as combineVariants_octopus_tonly;
+    combineVariants_alternative as combineVariants_lofreq; combineVariants as combineVariants_muse;
+    combineVariants_alternative as combineVariants_octopus; combineVariants_alternative as combineVariants_octopus_tonly;
     annotvep_tn as annotvep_tn_mut2; annotvep_tn as annotvep_tn_strelka; 
     annotvep_tn as annotvep_tn_varscan; annotvep_tn as annotvep_tn_vardict; annotvep_tn as annotvep_tn_octopus;
     annotvep_tn as annotvep_tn_lofreq; annotvep_tn as annotvep_tn_muse;
@@ -231,7 +231,7 @@ workflow VC {
         | join(contamination_tumoronly.out) 
     | mutect2filter_tonly
     | join(sample_sheet)
-    | map{tumor,markedvcf,markedindex,normvcf,normindex, stats,normal -> tuple(tumor,"mutect2",normvcf,normindex)} 
+    | map{tumor,markedvcf,markedindex,normvcf,normindex,stats,normal -> tuple(tumor,"mutect2",normvcf,normindex)} 
     annotvep_tonly_mut2(mutect2_in_tonly)
     
     //Strelka TN 
@@ -280,7 +280,7 @@ workflow VC {
     
     //Lofreq TN
     lofreq_in=lofreq_tn(bambyinterval) | groupTuple(by:[0,1]) 
-        | map{tu,no,snv,dbsnv,indel,dbindel,vcf-> tuple("${tu}_vs_${no}",vcf.toSorted{it -> (it.name =~ /${tu}_vs_${no}_(.*?)_lofreq.vcf.gz/)[0][1].toInteger()},"lofreq")} 
+        | map{tu,no,snv,dbsnv,indel,dbindel,vcf,vcfindex-> tuple("${tu}_vs_${no}",vcf.toSorted{it -> (it.name =~ /${tu}_vs_${no}_(.*?)_lofreq.vcf.gz/)[0][1].toInteger()},vcfindex,"lofreq")} 
         | combineVariants_lofreq | join(sample_sheet_paired)
         | map{sample,marked,markedindex,normvcf,normindex,tumor,normal->tuple(tumor,normal,"lofreq",normvcf,normindex)} 
     annotvep_tn_lofreq(lofreq_in)
@@ -319,7 +319,7 @@ workflow VC {
         | annotvep_tn_combined
 
     mutect2_in_tonly|concat(octopus_in_tonly)
-        | concat(vardict_in_tonly)|concat(varscan_in_tonly)
+        | concat(vardict_in_tonly)|concat(varscan_in_tonly) | groupTuple() 
         | somaticcombine_tonly 
         | map{tumor,vcf,index ->tuple(tumor,"combined_tonly",vcf,index)} 
         | annotvep_tonly_combined
@@ -512,9 +512,9 @@ workflow QC_GL {
     samtools_flagstats_out=samtools_flagstats.out.collect()
     bcftools_stats_out= bcftools_stats.out.collect()
     gatk_varianteval_out= gatk_varianteval.out.collect()
-    snpeff_out=snpeff.out.collect()//map{vcf,csv,html->vcf,csv,html}.collect()
+    snpeff_out=snpeff.out.collect()
     vcftools_out=vcftools.out
-    collectvariantcallmetrics_out=collectvariantcallmetrics.out//.map{details,summary->details,summary}
+    collectvariantcallmetrics_out=collectvariantcallmetrics.out
 
     conall=fclane_out.concat(fqs_out,kraken_out,qualimap_out,samtools_flagstats_out,bcftools_stats_out,
     gatk_varianteval_out,snpeff_out,vcftools_out,collectvariantcallmetrics_out,somalier_analysis_out).flatten().toList()
diff --git a/workflow/modules/workflows_tonly.nf b/workflow/modules/workflows_tonly.nf
index 1546128..ebcaf72 100644
--- a/workflow/modules/workflows_tonly.nf
+++ b/workflow/modules/workflows_tonly.nf
@@ -20,7 +20,7 @@ include {mutect2; mutect2filter; pileup_paired_t; pileup_paired_n;
     contamination_paired; learnreadorientationmodel; mergemut2stats;
     combineVariants as combineVariants_vardict; combineVariants as combineVariants_varscan; 
     combineVariants as combineVariants_vardict_tonly; combineVariants as combineVariants_varscan_tonly;
-    combineVariants_octopus ; 
+    combineVariants_alternative ; 
     annotvep_tn as annotvep_tn_mut2; annotvep_tn as annotvep_tn_strelka; annotvep_tn as annotvep_tn_varscan; annotvep_tn as annotvep_tn_vardict;
     combinemafs_tn} from './variant_calling.nf'
 
@@ -163,16 +163,16 @@ workflow VC_TONLY {
     .join(contamination_tumoronly.out)
 
     mutect2_tonly_in=mutect2filter_tonly(mut2tonly_filter) 
-    | join(sample_sheet)
-    | map{tumor,markedvcf,markedindex,finalvcf,finalindex,stats -> tuple(tumor,"mutect2",finalvcf,finalindex)} 
+        | join(sample_sheet)
+        | map{tumor,markedvcf,markedindex,finalvcf,finalindex,stats -> tuple(tumor,"mutect2",finalvcf,finalindex)} 
     annotvep_tonly_mut2(mutect2_tonly_in)
 
 
     //VarDict
     vardict_in_tonly=vardict_tonly(bambyinterval) | groupTuple()| map{tumor,vcf -> tuple(tumor,vcf,"vardict_tonly")} 
-    | combineVariants_vardict_tonly
-    | join(sample_sheet)
-    | map{tumor,marked,markedindex,normvcf,normindex ->tuple(tumor,"vardict_tonly",normvcf,normindex)}
+        | combineVariants_vardict_tonly
+        | join(sample_sheet)
+        | map{tumor,marked,markedindex,normvcf,normindex ->tuple(tumor,"vardict_tonly",normvcf,normindex)}
     annotvep_tonly_vardict(vardict_in_tonly)
 
     //VarScan_tonly
@@ -185,11 +185,11 @@ workflow VC_TONLY {
 
     //Octopus_tonly
     octopus_in_tonly=bambyinterval | octopus_tonly | bcftools_index_octopus
-    | groupTuple()
-    | map{tumor,vcf,vcfindex -> tuple(tumor,vcf.toSorted{it -> it.name}
-            ,vcfindex, "octopus_tonly")} 
-    | combineVariants_octopus | join(sample_sheet)
-    | map{tumor,marked,markedindex,normvcf,normindex ->tuple(tumor,"octopus_tonly",normvcf,normindex)} 
+        | groupTuple()
+        | map{tumor,vcf,vcfindex -> tuple(tumor,vcf.toSorted{it -> it.name}
+                ,vcfindex, "octopus_tonly")} 
+        | combineVariants_alternative | join(sample_sheet)
+        | map{tumor,marked,markedindex,normvcf,normindex ->tuple(tumor,"octopus_tonly",normvcf,normindex)} 
     annotvep_tonly_octopus(octopus_in_tonly)
 
 
diff --git a/workflow/scripts/lofreq_convert.sh b/workflow/scripts/lofreq_convert.sh
new file mode 100755
index 0000000..1d5edda
--- /dev/null
+++ b/workflow/scripts/lofreq_convert.sh
@@ -0,0 +1,32 @@
+INPUT_FILE="$1"
+TUMOR_NAME="$2"
+export TUMOR_NAME
+
+zcat "${INPUT_FILE}" \
+  | awk '($4=="A" || $4 == "C" || $4=="T" || $4=="G" || /^\#/)' \
+  | perl -ne 'print if /^#|^(chr)*[\dX]+\s.+/' \
+  | perl -ne 's/AF=/VAF=/g;s/ID=AF/ID=VAF/;print;' \
+  | perl -ne '
+              # Add 2 new rows to the description and 2 new columns in the header
+              if(/^#/){
+                if(/##INFO=<ID=DP,.+\n/){
+                  $DP=$&;
+                };
+                $DP =~ s/INFO/FORMAT/;
+                print $DP if /min_dp/;
+                if(/##INFO=<ID=DP4,.+\n/){
+                  $DP4=$&;
+                };
+                $DP4 =~ s/INFO/FORMAT/;
+                print $DP4 if /min_dp/;
+                if(/^#CHROM.+/){
+                  s/$&/$&\tFORMAT\t$ENV{'TUMOR_NAME'}/;
+                };
+                print;
+              }
+              # For each feature, add FORMAT column with descriptors and populate TUMOUR column with depth, reads counts
+              else{
+                my @data = map { chomp; [ split /=|;/ ] } $_;
+                $NEW_ROW = "$_\tDP:DP4\t$data[0][1]:$data[0][7]\n";
+                print $NEW_ROW;
+              }'
\ No newline at end of file