diff --git a/conf/base.config b/conf/base.config index 0b5fd0c..0a09e0b 100644 --- a/conf/base.config +++ b/conf/base.config @@ -46,7 +46,9 @@ process { time = { check_max( 16.h * task.attempt, 'time' ) } } withLabel:process_long { - time = { check_max( 20.h * task.attempt, 'time' ) } + cpus = { check_max( 4 * task.attempt, 'cpus' ) } + memory = { check_max( 16.GB * task.attempt, 'memory' ) } + time = { check_max( 72.h * task.attempt, 'time' ) } } withLabel:process_high_memory { memory = { check_max( 200.GB * task.attempt, 'memory' ) } diff --git a/conf/genomes.config b/conf/genomes.config index fde6bcc..74d653b 100644 --- a/conf/genomes.config +++ b/conf/genomes.config @@ -7,12 +7,12 @@ params { genomedict= "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/genome/Homo_sapiens_assembly38.dict" wgsregion = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/resources_broad_hg38_v0_wgs_calling_regions.hg38.interval_list" intervals= "${projectDir}/assets/hg38_v0_wgs_calling_regions.hg38.bed" - //millsindel = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz" //Mills_and_1000G_gold_standard.indels.hg38.vcf.gz" - //shapeitindel = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz" //ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz" //file(params.gold_indels2) // - KNOWNINDELS= '/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz' + millsindel = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz" //Mills_and_1000G_gold_standard.indels.hg38.vcf.gz" + shapeitindel = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz" //ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz" //file(params.gold_indels2) // + INDELREF = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz" //ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz" + KNOWNINDELS = "-known /data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz -known /data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz" KNOWNRECAL = '--known-sites /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GATK_resource_bundle/dbsnp_138.hg38.vcf.gz --known-sites /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GATK_resource_bundle/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz --known-sites /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GATK_resource_bundle/ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz' dbsnp = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/dbsnp_138.hg38.vcf.gz" - dbsnp_indel = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz" gnomad = '--germline-resource /data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GNOMAD/somatic-hg38-af-only-gnomad.hg38.vcf.gz' // /data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GNOMAD/somatic-hg38-af-only-gnomad.hg38.vcf.gz pon = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/PON/updatedpon.vcf.gz" //pon="/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/PON/hg38.noCOSMIC_ClinVar.pon.vcf.gz" //file{params.pon} kgp = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/1000G_phase1.snps.high_confidence.hg38.vcf.gz" @@ -25,6 +25,7 @@ params { vepcache = "/fdb/VEP/102/cache" vepspecies = "homo_sapiens" vepbuild = "GRCh38" + annotsvgenome = "GRCh38" octopus_sforest= "--somatic-forest /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/octopus/somatic.v0.7.4.forest" octopus_gforest= "--forest /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/octopus/germline.v0.7.4.forest" SEQUENZAGC = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/SEQUENZA/hg38_gc50Base.txt.gz" @@ -37,7 +38,8 @@ params { bwagenome= "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/genome/bwaindex/genome.fa" genomedict= "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/genome/bwamem2index/genome.dict" intervals="/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/genome/bwamem2index/mm10_wgsregions.bed" - KNOWNINDELS = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/dbsnp/mm10_known_indels.vcf.gz" + KNOWNINDELS = "-known /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/dbsnp/mm10_known_indels.vcf.gz" + INDELREF = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/dbsnp/mm10_known_indels.vcf.gz" KNOWNRECAL = "-known-sites /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/dbsnp/mm10_known_indels.vcf.gz -known-sites /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/dbsnp/mm10_known_snps.vcf.gz" dbsnp = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/dbsnp/mm10_allstrains_dbSNP142.vcf.gz" pon = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/dbsnp/mm10_dbSNP_allStrains_compSet_noIND.vcf.gz" @@ -53,6 +55,7 @@ params { vepcache = "/fdb/VEP/102/cache" vepspecies = "mus_musculus" vepbuild= "GRCm38" + annotsvgenome = "mm10" octopus_sforest = "" octopus_gforest = "" SEQUENZAGC = '/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/SEQUENZA/mm10.gc50Base.wig.gz' diff --git a/main.nf b/main.nf index 0a715e1..4622d07 100644 --- a/main.nf +++ b/main.nf @@ -25,8 +25,6 @@ include {INPUT_TONLY; INPUT_TONLY_BAM; VC_TONLY; SV_TONLY; CNVhuman_tonly; CNVmouse_tonly; QC_TONLY } from "./subworkflows/local/workflows_tonly.nf" - -//SUB WORKFLOWS to SPLIT workflow.onComplete { if (!workflow.stubRun && !workflow.commandLine.contains('-preview')) { def message = Utils.spooker(workflow) diff --git a/modules/local/copynumber.nf b/modules/local/copynumber.nf index ef47cc7..9cd1e27 100644 --- a/modules/local/copynumber.nf +++ b/modules/local/copynumber.nf @@ -21,7 +21,6 @@ ENSEMBLCACHE='/data/SCLC-BRAINMETS/cn/common/ensembl_data' DRIVERS='/data/SCLC-BRAINMETS/cn/common/DriverGenePanel.38.tsv' HOTSPOTS='/data/SCLC-BRAINMETS/cn/variants/KnownHotspots.somatic.38.vcf.gz' -//DBSNP_INDEL=file(params.genomes[params.genome].KNOWNINDELS) //ascatR= @@ -273,7 +272,7 @@ process amber_tonly { """ - java -Xmx32G -cp amber.jar com.hartwig.hmftools.amber.AmberApplication \ + java -Xmx32G -cp /opt2/hmftools/amber.jar com.hartwig.hmftools.amber.AmberApplication \ -tumor ${tumorname} -tumor_bam ${tumor} \ -output_dir ${tumorname}_amber \ -threads $task.cpus \ @@ -310,7 +309,7 @@ process amber_tn { """ - java -Xmx32G -cp amber.jar com.hartwig.hmftools.amber.AmberApplication \ + java -Xmx32G -cp /opt2/hmftools/amber.jar com.hartwig.hmftools.amber.AmberApplication \ -tumor ${tumorname} -tumor_bam ${tumor} \ -reference ${normalname} -reference_bam ${normal} \ -output_dir ${tumorname}_vs_${normalname}_amber \ @@ -346,7 +345,7 @@ process cobalt_tonly { """ - java -jar -Xmx8G cobalt.jar \ + java -jar -Xmx8G /opt2/hmftools/cobalt.jar \ -tumor ${tumorname} -tumor_bam ${tumor} \ -output_dir ${tumorname}_cobalt \ -threads $task.cpus \ @@ -382,7 +381,7 @@ process cobalt_tn { """ - java -jar -Xmx8G cobalt.jar \ + java -jar -Xmx8G /opt2/hmftools/cobalt.jar \ -tumor ${tumorname} -tumor_bam ${tumorname} \ -reference ${normalname} -reference_bam ${normal} \ -output_dir ${tumorname}_vs_${normalname}_cobalt \ @@ -418,7 +417,7 @@ process purple { script: """ - java -jar purple.jar \ + java -jar /opt2/hmftools/purple.jar \ -tumor ${tumorname} \ -amber ${amberin} \ -cobalt ${cobaltin} \ diff --git a/modules/local/structural_variant.nf b/modules/local/structural_variant.nf index dda67c7..34b0f5a 100644 --- a/modules/local/structural_variant.nf +++ b/modules/local/structural_variant.nf @@ -1,11 +1,12 @@ GENOMEREF=file(params.genomes[params.genome].genome) -GENOME=params.genome +ANNOTSVGENOME=file(params.genomes[params.genome].annotsvgenome) BWAGENOME=file(params.genomes[params.genome].bwagenome) -DBSNP_INDEL=file(params.genomes[params.genome].KNOWNINDELS) +INDELREF=file(params.genomes[params.genome].INDELREF) process svaba_somatic { + container = "${params.containers.logan}" label 'process_highcpu' input: @@ -30,7 +31,7 @@ process svaba_somatic { script: """ - svaba run -t ${tumor} -n ${normal} -p $task.cpus -D $DBSNP_INDEL -a ${tumor.simpleName} -G $BWAGENOME + svaba run -t ${tumor} -n ${normal} -p $task.cpus -D $INDELREF -a ${tumor.simpleName} -G $BWAGENOME """ stub: @@ -56,7 +57,7 @@ process svaba_somatic { process manta_somatic { - + container = "${params.containers.logan}" label 'process_highcpu' input: @@ -102,7 +103,6 @@ process manta_somatic { process annotsv_tn { //AnnotSV for Manta/Svaba works with either vcf.gz or .vcf files //Requires bedtools,bcftools - module = ['annotsv/3.3.1'] input: @@ -119,7 +119,7 @@ process annotsv_tn { mkdir ${sv} AnnotSV -SVinputFile ${somaticvcf} \ - -genomeBuild $GENOME \ + -genomeBuild $ANNOTSVGENOME \ -SVinputInfo 1 -outputFile ${tumorname} \ -outputDir ${sv} @@ -136,6 +136,7 @@ process annotsv_tn { process manta_tonly { + container = "${params.containers.logan}" label 'process_highcpu' input: @@ -178,6 +179,7 @@ process manta_tonly { process svaba_tonly { + container = "${params.containers.logan}" label 'process_highcpu' input: @@ -198,7 +200,7 @@ process svaba_tonly { script: """ - svaba run -t ${tumor} -p $task.cpus -D $DBSNP_INDEL -a ${tumor.simpleName} -G $BWAGENOME + svaba run -t ${tumor} -p $task.cpus -D $INDELREF -a ${tumor.simpleName} -G $BWAGENOME """ stub: @@ -230,7 +232,7 @@ process gunzip { script: """ - gunzip ${vcf} > ${tumorname}.tumorSV.vcf + gunzip -f ${vcf} > ${tumorname}.tumorSV.vcf """ stub: @@ -291,7 +293,7 @@ process annotsv_tonly { mkdir ${sv} AnnotSV -SVinputFile ${somaticvcf} \ - -genomeBuild $GENOME \ + -genomeBuild $ANNOTSVGENOME \ -SVinputInfo 1 -outputFile ${tumorname} \ -outputDir ${sv} diff --git a/modules/local/trim_align.nf b/modules/local/trim_align.nf index bcab724..715ff64 100644 --- a/modules/local/trim_align.nf +++ b/modules/local/trim_align.nf @@ -1,6 +1,6 @@ -GENOMEREF=file(params.genomes[params.genome].genome) +GENOMEREF = file(params.genomes[params.genome].genome) KNOWNRECAL = params.genomes[params.genome].KNOWNRECAL - +KNOWNINDELS = params.genomes[params.genome].KNOWNINDELS process fastp { container = "${params.containers.logan}" @@ -77,6 +77,70 @@ process bwamem2 { +process indelrealign { + container "${params.containers.logan}" + label 'process_long' + + input: + tuple val(samplename), path("${samplename}.bam"), path("${samplename}.bai") + + output: + tuple val(samplename), path("${samplename}.ir.bam"), path("${samplename}.ir.bai") + + script: + + """ + /usr/lib/jvm/java-8-openjdk-amd64/bin/java -Xmx16g -jar \$GATK_JAR -T RealignerTargetCreator \ + -I ${samplename}.bam \ + -R ${GENOMEREF} \ + -o ${samplename}.intervals \ + -nt $task.cpus \ + ${KNOWNINDELS} + + /usr/lib/jvm/java-8-openjdk-amd64/bin/java -Xmx16g -jar \$GATK_JAR -T IndelRealigner \ + -R ${GENOMEREF} \ + -I ${samplename}.bam \ + ${KNOWNINDELS} \ + -targetIntervals ${samplename}.intervals \ + -o ${samplename}.ir.bam + """ + + stub: + """ + touch ${samplename}.ir.bam ${samplename}.ir.bai + """ + +} + + +process bqsr_ir { + /* + Base quality recalibration for all samples + */ + container = "${params.containers.logan}" + label 'process_low' + input: + tuple val(samplename), path("${samplename}.ir.bam"), path("${samplename}.ir.bai"), path(bed) + + output: + tuple val(samplename), path("${samplename}_${bed.simpleName}.recal_data.grp") + + script: + """ + gatk --java-options '-Xmx16g' BaseRecalibrator \ + --input ${samplename}.ir.bam \ + --reference ${GENOMEREF} \ + ${KNOWNRECAL} \ + --output ${samplename}_${bed.simpleName}.recal_data.grp \ + --intervals ${bed} + """ + + stub: + """ + touch ${samplename}_${bed.simpleName}.recal_data.grp + """ +} + process bqsr { /* Base quality recalibration for all samples @@ -103,7 +167,6 @@ process bqsr { """ touch ${samplename}_${bed.simpleName}.recal_data.grp """ - } process gatherbqsr { @@ -131,16 +194,15 @@ process gatherbqsr { """ } - process applybqsr { /* Base quality recalibration for all samples to */ container = "${params.containers.logan}" - label 'process_low' + label 'process_long' input: - tuple val(samplename), path("${samplename}.bam"), path("${samplename}.bai"), path("${samplename}.recal_data.grp") + tuple val(samplename), path(bam), path(bai), path("${samplename}.recal_data.grp") output: tuple val(samplename), path("${samplename}.bqsr.bam"), path("${samplename}.bqsr.bai") @@ -150,7 +212,7 @@ process applybqsr { """ gatk --java-options '-Xmx32g' ApplyBQSR \ --reference ${GENOMEREF} \ - --input ${samplename}.bam \ + --input ${bam} \ --bqsr-recal-file ${samplename}.recal_data.grp \ --output ${samplename}.bqsr.bam \ --use-jdk-inflater \ @@ -166,7 +228,6 @@ process applybqsr { } - process samtoolsindex { container = "${params.containers.logan}" label 'process_medium' @@ -198,48 +259,21 @@ process bamtocram_tonly { tuple val(tumorname), path(tumor), path(tumorbai) output: - path("${sample}.cram") - - script: - """ - samtools view -@ $task.cpus -C -T $GENOMEREF -o ${sample}.cram {$tumor}.bam - """ -} - + path("${tumorname}.cram"), path("${tumorname}.cram.crai") -/* -process indelrealign { - input: - tuple val(samplename), path("${samplename}.bam"), path("${samplename}.bai") - - output: - tuple val(samplename), path("${samplename}.ir.bam") script: - """ - /usr/bin/java -Xmx32g -jar \${GATK_JAR} -T RealignerTargetCreator \ - -I ${samplename}.bam \ - -R ${GENOMEREF} \ - -o ${samplename}.intervals \ - -nt 16 \ - -known ${MILLSINDEL} -known ${SHAPEITINDEL} - - /usr/bin/java -Xmx32g -jar \${GATK_JAR} -T IndelRealigner \ - -R ${GENOMEREF} \ - -I ${samplename}.bam \ - -known ${MILLSINDEL} -known ${SHAPEITINDEL} \ - --use_jdk_inflater \ - --use_jdk_deflater \ - -targetIntervals ${samplename}.intervals \ - -o ${samplename}.ir.bam + samtools view -@ $task.cpus -C -T $GENOMEREF -o ${sample}.cram $tumor + samtools index ${tumorname}.cram -@ $task.cpus """ - - + stub: """ - touch ${samplename}.ir.bam + touch ${tumorname}.cram ${tumorname}.cram.crai """ - } -*/ + + + + diff --git a/modules/local/variant_calling.nf b/modules/local/variant_calling.nf index 3b1386f..f16c1be 100644 --- a/modules/local/variant_calling.nf +++ b/modules/local/variant_calling.nf @@ -54,7 +54,6 @@ process mutect2 { """ } - process pileup_paired_t { container "${params.containers.logan}" label 'process_highmem' @@ -64,7 +63,7 @@ process pileup_paired_t { val(normalname), path(normal), path(normalbai), path(bed) output: - tuple val(tumorname), + tuple val(tumorname), val(normalname), path("${tumor.simpleName}_${bed.simpleName}.tumor.pileup.table") script: @@ -84,7 +83,6 @@ process pileup_paired_t { } - process pileup_paired_n { container "${params.containers.logan}" label 'process_highmem' @@ -95,7 +93,8 @@ process pileup_paired_n { output: tuple val(tumorname), - path("${tumor.simpleName}_${bed.simpleName}.normal.pileup.table") + val(normalname), + path("${normal.simpleName}_${bed.simpleName}.normal.pileup.table") script: """ @@ -103,15 +102,14 @@ process pileup_paired_n { -I ${normal} \ -V $KGPGERMLINE \ -L ${bed} \ - -O ${tumor.simpleName}_${bed.simpleName}.normal.pileup.table + -O ${normalname}_${bed.simpleName}.normal.pileup.table """ stub: """ - touch ${tumor.simpleName}_${bed.simpleName}.normal.pileup.table + touch ${normalname}_${bed.simpleName}.normal.pileup.table """ - } @@ -120,7 +118,7 @@ process contamination_paired { label 'process_highmem' input: - tuple val(tumorname), + tuple val(tumorname), val(normalname), path(tumor_pileups), path(normal_pileups) diff --git a/nextflow.config b/nextflow.config index 6f66adb..b9afbd6 100644 --- a/nextflow.config +++ b/nextflow.config @@ -39,6 +39,7 @@ params { cnv=null qc=null bam=null + indelrealign=null //Set all Inputs to null sample_sheet=null diff --git a/subworkflows/local/workflows.nf b/subworkflows/local/workflows.nf index 6c4f201..5747067 100644 --- a/subworkflows/local/workflows.nf +++ b/subworkflows/local/workflows.nf @@ -9,13 +9,14 @@ include {fc_lane; fastq_screen;kraken;qualimap_bamqc;fastqc; somalier_extract;somalier_analysis_human;somalier_analysis_mouse; multiqc} from '../../modules/local/qc.nf' -include {fastp; bwamem2; //indelrealign; +include {fastp; bwamem2; indelrealign; bqsr_ir; bqsr; gatherbqsr; applybqsr; samtoolsindex} from '../../modules/local/trim_align.nf' include {deepvariant_step1; deepvariant_step2; deepvariant_step3; deepvariant_combined;glnexus} from '../../modules/local/germline.nf' -include {mutect2; mutect2filter; pileup_paired_t; pileup_paired_n; +include {pileup_paired_t; pileup_paired_n; + mutect2; mutect2filter; contamination_paired; learnreadorientationmodel;mergemut2stats; strelka_tn; combineVariants_strelka; varscan_tn; vardict_tn; lofreq_tn; muse_tn; @@ -57,12 +58,7 @@ workflow DETERMINEBAM { params.BAMINPUT=true }else if(params.file_input){ file(params.file_input).text - //.splitCsv(header: false, sep: "\t", strip:true) - // .map{ sample,bam,bai -> - //if (bam[0] =~ /.bam/){ - // params.BAMINPUT= - //} - //} + } } @@ -108,10 +104,6 @@ workflow ALIGN { splitinterval(intervalbedin) bwamem2(fastp.out) - - //indelrealign(bwamem2.out) - //indelbambyinterval=indelrealign.out.combine(splitinterval.out.flatten()) - bqsrbambyinterval=bwamem2.out.combine(splitinterval.out.flatten()) bambyinterval=bwamem2.out.combine(splitinterval.out.flatten()) @@ -135,7 +127,6 @@ workflow ALIGN { fastpout=fastp.out fastqin=fastqinput splitout=splitinterval.out - //indelbambyinterval bqsrbambyinterval sample_sheet bqsrout=applybqsr.out @@ -172,77 +163,95 @@ workflow VC { main: //Create Pairing for TN (in case of dups) sample_sheet_paired=sample_sheet|map{tu,no -> tuple ("${tu}_vs_${no}",tu, no)} - bambyinterval=bamwithsample.combine(splitout.flatten()) - //Paired Mutect2 - mutect2(bambyinterval) - pileup_paired_t(bambyinterval) - pileup_paired_n(bambyinterval) + //Prep Pileupss + pileup_paired_t(bambyinterval) + pileup_paired_n(bambyinterval) + + pileup_paired_t.out.groupTuple(by:[0,1]) + | multiMap { samplename, normalname, pileups -> + tout: tuple( samplename, normalname, + pileups.toSorted{ it -> (it.name =~ /${samplename}_(.*?).tumor.pileup.table/)[0][1].toInteger() } ) + tonly: tuple( samplename, + pileups.toSorted{ it -> (it.name =~ /${samplename}_(.*?).tumor.pileup.table/)[0][1].toInteger() } ) + } + | set{pileup_paired_tout} + + + pileup_paired_n.out.groupTuple(by:[0,1]) + | multiMap { samplename, normalname, pileups-> + nout: tuple (samplename,normalname, + pileups.toSorted{ it -> (it.name =~ /${normalname}_(.*?).normal.pileup.table/)[0][1].toInteger() } ) + nonly: tuple (normalname, + pileups.toSorted{ it -> (it.name =~ /${normalname}_(.*?).normal.pileup.table/)[0][1].toInteger() } ) + } + | set{pileup_paired_nout} + - pileup_paired_tout=pileup_paired_t.out.groupTuple() - .map{samplename,pileups-> tuple( samplename, - pileups.toSorted{ it -> (it.name =~ /${samplename}_(.*?).tumor.pileup.table/)[0][1].toInteger() } , - )} - pileup_paired_nout=pileup_paired_n.out.groupTuple() - .map{samplename,pileups-> tuple( samplename, - pileups.toSorted{ it -> (it.name =~ /${samplename}_(.*?).normal.pileup.table/)[0][1].toInteger() } , - )} + pileup_paired_match=pileup_paired_tout.tout.join(pileup_paired_nout.nout,by:[0,1]) + contamination_paired(pileup_paired_match) + pileup_all=pileup_paired_tout.tonly.concat(pileup_paired_nout.nonly) + contamination_tumoronly(pileup_all) - pileup_paired_all=pileup_paired_tout.join(pileup_paired_nout) - contamination_paired(pileup_paired_all) - //Mutect2 TN + //Paired Mutect2 + mutect2(bambyinterval) mutect2.out.groupTuple(by:[0,1]) - | multiMap { tumor,normal,vcfs,f1r2,stats -> - mut2out_lor: tuple("${tumor}_vs_${normal}", - f1r2.toSorted{ it -> (it.name =~ /${tumor}_vs_${normal}_(.*?).f1r2.tar.gz/)[0][1].toInteger() } ) - mut2out_mstats: tuple( "${tumor}_vs_${normal}", - stats.toSorted{ it -> (it.name =~ /${tumor}_vs_${normal}_(.*?).mut2.vcf.gz.stats/)[0][1].toInteger() }) - allmut2tn: tuple( "${tumor}_vs_${normal}", - vcfs.toSorted{ it -> (it.name =~ /${tumor}_vs_${normal}_(.*?).mut2.vcf.gz/)[0][1].toInteger() } ) - } + | multiMap { tumor,normal,vcfs,f1r2,stats -> + mut2out_lor: tuple("${tumor}_vs_${normal}", + f1r2.toSorted{ it -> (it.name =~ /${tumor}_vs_${normal}_(.*?).f1r2.tar.gz/)[0][1].toInteger() } ) + mut2out_mstats: tuple( "${tumor}_vs_${normal}", + stats.toSorted{ it -> (it.name =~ /${tumor}_vs_${normal}_(.*?).mut2.vcf.gz.stats/)[0][1].toInteger() }) + allmut2tn: tuple( "${tumor}_vs_${normal}", + vcfs.toSorted{ it -> (it.name =~ /${tumor}_vs_${normal}_(.*?).mut2.vcf.gz/)[0][1].toInteger() } ) + } | set{mut2out} learnreadorientationmodel(mut2out.mut2out_lor) mergemut2stats(mut2out.mut2out_mstats) mutect2_in=mut2out.allmut2tn - | join(mergemut2stats.out) - | join(learnreadorientationmodel.out) - | map{t,vcf,stats,ro -> tuple(t.split('_vs_')[0],t.split('_vs_')[1],vcf,stats,ro)} - | join(contamination_paired.out) - | mutect2filter - | join(sample_sheet_paired) - | map{sample,markedvcf,markedindex,normvcf,normindex,stats,tumor,normal -> tuple(tumor,normal,"mutect2",normvcf,normindex)} + | join(mergemut2stats.out) + | join(learnreadorientationmodel.out) + | map{t,vcf,stats,ro -> tuple(t.split('_vs_')[0],t.split('_vs_')[1],vcf,stats,ro)} + | join(contamination_paired.out) + | mutect2filter + | join(sample_sheet_paired) + | map{sample,markedvcf,markedindex,normvcf,normindex,stats,tumor,normal -> tuple(tumor,normal,"mutect2",normvcf,normindex)} annotvep_tn_mut2(mutect2_in) //Mutect2 Tumor Only - bambyinterval_t=bambyinterval.map{tumorname,tumor,tumorbai,normalname,normalbam,normalbai,bed ->tuple(tumorname,tumor,tumorbai,bed)} - mutect2_t_tonly(bambyinterval_t) + bambyinterval + | multiMap {tumorname,tumor,tumorbai,normalname,normalbam,normalbai,bed -> + t1: tuple(tumorname,tumor,tumorbai,bed) + n1: tuple(normalname,normalbam,normalbai,bed) + } + | set{bambyinterval_tonly} + bambyinterval_t=bambyinterval_tonly.t1.concat(bambyinterval_tonly.n1) + mutect2_t_tonly(bambyinterval_t) mutect2_t_tonly.out.groupTuple() - | multiMap { tumor,vcfs,f1r2,stats -> - mut2tout_lor: tuple(tumor, - f1r2.toSorted{ it -> (it.name =~ /${tumor}_(.*?).f1r2.tar.gz/)[0][1].toInteger() } ) - mut2tonly_mstats: tuple( tumor, - stats.toSorted{ it -> (it.name =~ /${tumor}_(.*?).tonly.mut2.vcf.gz.stats/)[0][1].toInteger() }) - allmut2tonly: tuple(tumor, - vcfs.toSorted{ it -> (it.name =~ /${tumor}_(.*?).tonly.mut2.vcf.gz/)[0][1].toInteger() } ) - } + | multiMap { tumor,vcfs,f1r2,stats -> + mut2tout_lor: tuple(tumor, + f1r2.toSorted{ it -> (it.name =~ /${tumor}_(.*?).f1r2.tar.gz/)[0][1].toInteger() } ) + mut2tonly_mstats: tuple( tumor, + stats.toSorted{ it -> (it.name =~ /${tumor}_(.*?).tonly.mut2.vcf.gz.stats/)[0][1].toInteger() }) + allmut2tonly: tuple(tumor, + vcfs.toSorted{ it -> (it.name =~ /${tumor}_(.*?).tonly.mut2.vcf.gz/)[0][1].toInteger() } ) + } | set{mut2tonlyout} - learnreadorientationmodel_tonly(mut2tonlyout.mut2tout_lor) - mergemut2stats_tonly(mut2tonlyout.mut2tonly_mstats) - contamination_tumoronly(pileup_paired_tout) + learnreadorientationmodel_tonly(mut2tonlyout.mut2tout_lor) + mergemut2stats_tonly(mut2tonlyout.mut2tonly_mstats) mutect2_in_tonly=mut2tonlyout.allmut2tonly - | join(mergemut2stats_tonly.out) - | join(learnreadorientationmodel_tonly.out) - | join(contamination_tumoronly.out) + | join(mergemut2stats_tonly.out) + | join(learnreadorientationmodel_tonly.out) + | join(contamination_tumoronly.out) | mutect2filter_tonly | join(sample_sheet) | map{tumor,markedvcf,markedindex,normvcf,normindex,stats,normal -> tuple(tumor,"mutect2_tonly",normvcf,normindex)} @@ -250,43 +259,41 @@ workflow VC { //Strelka TN strelka_in=strelka_tn(bambyinterval) | groupTuple(by:[0,1]) - | map { tumor,normal,vcfs,vcfindex,indels,indelindex -> tuple("${tumor}_vs_${normal}", - vcfs.toSorted{ it -> (it.name =~ /${tumor}_vs_${normal}_(.*?).somatic.snvs.vcf.gz/)[0][1].toInteger() },vcfindex, - indels.toSorted{ it -> (it.name =~ /${tumor}_vs_${normal}_(.*?).somatic.indels.vcf.gz/)[0][1].toInteger() } ,indelindex)} - | combineVariants_strelka | join(sample_sheet_paired) - | map{sample,markedvcf,markedindex,finalvcf,finalindex,tumor,normal -> tuple(tumor,normal,"strelka",finalvcf,finalindex)} + | map { tumor,normal,vcfs,vcfindex,indels,indelindex -> tuple("${tumor}_vs_${normal}", + vcfs.toSorted{ it -> (it.name =~ /${tumor}_vs_${normal}_(.*?).somatic.snvs.vcf.gz/)[0][1].toInteger() },vcfindex, + indels.toSorted{ it -> (it.name =~ /${tumor}_vs_${normal}_(.*?).somatic.indels.vcf.gz/)[0][1].toInteger() } ,indelindex)} + | combineVariants_strelka | join(sample_sheet_paired) + | map{sample,markedvcf,markedindex,finalvcf,finalindex,tumor,normal -> tuple(tumor,normal,"strelka",finalvcf,finalindex)} annotvep_tn_strelka(strelka_in) + //Vardict TN vardict_in=vardict_tn(bambyinterval) | groupTuple(by:[0,1]) - | map{tumor,normal,vcf-> tuple("${tumor}_vs_${normal}",vcf.toSorted{it -> (it.name =~ /${tumor}_vs_${normal}_(.*?).vardict.vcf/)[0][1].toInteger()},"vardict")} - | combineVariants_vardict | join(sample_sheet_paired) - | map{sample,marked,markedindex,normvcf,normindex,tumor,normal ->tuple(tumor,normal,"vardict",normvcf,normindex)} + | map{tumor,normal,vcf-> tuple("${tumor}_vs_${normal}",vcf.toSorted{it -> (it.name =~ /${tumor}_vs_${normal}_(.*?).vardict.vcf/)[0][1].toInteger()},"vardict")} + | combineVariants_vardict | join(sample_sheet_paired) + | map{sample,marked,markedindex,normvcf,normindex,tumor,normal ->tuple(tumor,normal,"vardict",normvcf,normindex)} annotvep_tn_vardict(vardict_in) //VarDict TOnly - vardict_in_tonly=bambyinterval - | map{tumorname,tumorbam,tumorbai,normname,normbam,normbai,bed -> - tuple(tumorname,tumorbam,tumorbai,bed)} - | vardict_tonly | groupTuple() - | map{tumor,vcf-> tuple(tumor,vcf.toSorted{it -> (it.name =~ /${tumor}_(.*?).tonly.vardict.vcf/)[0][1].toInteger()},"vardict_tonly")} - | combineVariants_vardict_tonly | join(sample_sheet) - | map{tumor,marked,markedindex,normvcf,normindex,normal ->tuple(tumor,"vardict_tonly",normvcf,normindex)} + vardict_in_tonly=vardict_tonly(bambyinterval_t) + | groupTuple() + | map{tumor,vcf-> tuple(tumor,vcf.toSorted{it -> (it.name =~ /${tumor}_(.*?).tonly.vardict.vcf/)[0][1].toInteger()},"vardict_tonly")} + | combineVariants_vardict_tonly | join(sample_sheet) + | map{tumor,marked,markedindex,normvcf,normindex,normal ->tuple(tumor,"vardict_tonly",normvcf,normindex)} annotvep_tonly_vardict(vardict_in_tonly) //VarScan TN varscan_in=bambyinterval.combine(contamination_paired.out,by:0) - | varscan_tn | groupTuple(by:[0,1]) - | map{tumor,normal,vcf-> tuple("${tumor}_vs_${normal}",vcf.toSorted{it -> (it.name =~ /${tumor}_vs_${normal}_(.*?).varscan.vcf.gz/)[0][1].toInteger()},"varscan")} - | combineVariants_varscan | join(sample_sheet_paired) - | map{sample,marked,markedindex,normvcf,normindex,tumor,normal ->tuple(tumor,normal,"varscan",normvcf,normindex)} + | varscan_tn | groupTuple(by:[0,1]) + | map{tumor,normal,vcf-> tuple("${tumor}_vs_${normal}",vcf.toSorted{it -> (it.name =~ /${tumor}_vs_${normal}_(.*?).varscan.vcf.gz/)[0][1].toInteger()},"varscan")} + | combineVariants_varscan | join(sample_sheet_paired) + | map{sample,marked,markedindex,normvcf,normindex,tumor,normal ->tuple(tumor,normal,"varscan",normvcf,normindex)} annotvep_tn_varscan(varscan_in) - + //VarScan TOnly - varscan_in_tonly=bambyinterval.combine(contamination_paired.out,by:0) - | map{tumor,bam,bai,normal,nbam,nbai,bed,tpile,npile,tumorc,normalc -> - tuple(tumor,bam,bai,bed,tpile,tumorc)} | varscan_tonly | groupTuple + varscan_in_tonly=bambyinterval_t.combine(contamination_tumoronly.out,by:0) + | varscan_tonly | groupTuple | map{tumor,vcf-> tuple(tumor,vcf.toSorted{it -> (it.name =~ /${tumor}_(.*?).tonly.varscan.vcf.gz/)[0][1].toInteger()},"varscan_tonly")} | combineVariants_varscan_tonly | join(sample_sheet) @@ -319,9 +326,9 @@ workflow VC { | map{tumor,normal,vcf,vcfindex ->tuple(tumor,normal,"octopus",vcf,vcfindex)} //Octopus TOnly - octopus_in_tonly=bambyinterval.map{tumor,bam,bai,normal,nbam,nbai,bed-> - tuple(tumor,bam,bai,bed)} | octopus_tonly | bcftools_index_octopus_tonly - | groupTuple() + octopus_in_tonly=octopus_tonly(bambyinterval_t) + | bcftools_index_octopus_tonly + | groupTuple() | map{samplename,vcf,vcfindex->tuple(samplename,vcf.toSorted{it->(it.name =~ /${samplename}_(.*).tonly.octopus.vcf.gz/)[0][1].toInteger()},vcfindex,"octopus_tonly")} | combineVariants_octopus_tonly | join(sample_sheet) | @@ -348,6 +355,7 @@ workflow VC { emit: somaticcall_input=octopus_in + } @@ -372,7 +380,7 @@ workflow SV { //Survivor gunzip(manta_out).concat(svaba_out).groupTuple() - | survivor_sv | annotsv_survivor_tn | ifEmpty("Empty SV input--No SV annotated") + | survivor_sv | annotsv_survivor_tn | ifEmpty("Empty SV input--No SV annotated") } @@ -425,18 +433,6 @@ workflow CNVhuman { } - /* - //baminput=sample_sheet - // .map{samplename,bam,vcf-> tuple(samplename,file(bam),file("${bam}.bai"))} - - //somaticinput=sample_sheet - // .map{samplename,bam,vcf-> tuple(samplename,file(vcf))} - - - - */ - - workflow QC_NOGL { @@ -586,10 +582,24 @@ workflow INPUT_BAM { } } - splitinterval(intervalbedin) - bamwithsample=baminputonly.combine(sample_sheet,by:0).map{it.swap(3,0)}.combine(baminputonly,by:0).map{it.swap(3,0)} + if (params.indelrealign){ + bqsrs= baminputonly | indelrealign | combine(splitinterval.out.flatten()) + | bqsr_ir + | groupTuple + | map { samplename,beds -> + tuple( samplename, beds.toSorted{ it -> (it.name =~ /${samplename}_(.*?).recal_data.grp/)[0][1].toInteger() } )} + | gatherbqsr + + baminput2=baminputonly.combine(bqsrs,by:0) + |applybqsr + + bamwithsample=baminput2.combine(sample_sheet,by:0).map{it.swap(3,0)}.combine(baminputonly,by:0).map{it.swap(3,0)} + + } else { + bamwithsample=baminputonly.combine(sample_sheet,by:0).map{it.swap(3,0)}.combine(baminputonly,by:0).map{it.swap(3,0)} + } emit: bamwithsample