Skip to content

Commit

Permalink
Merge pull request #39 from CCBR/feat-indelrealign
Browse files Browse the repository at this point in the history
Feat indelrealign
  • Loading branch information
samarth8392 authored Apr 17, 2024
2 parents fce79b1 + 241fe3b commit f7a2108
Show file tree
Hide file tree
Showing 9 changed files with 223 additions and 176 deletions.
4 changes: 3 additions & 1 deletion conf/base.config
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,9 @@ process {
time = { check_max( 16.h * task.attempt, 'time' ) }
}
withLabel:process_long {
time = { check_max( 20.h * task.attempt, 'time' ) }
cpus = { check_max( 4 * task.attempt, 'cpus' ) }
memory = { check_max( 16.GB * task.attempt, 'memory' ) }
time = { check_max( 72.h * task.attempt, 'time' ) }
}
withLabel:process_high_memory {
memory = { check_max( 200.GB * task.attempt, 'memory' ) }
Expand Down
13 changes: 8 additions & 5 deletions conf/genomes.config
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,12 @@ params {
genomedict= "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/genome/Homo_sapiens_assembly38.dict"
wgsregion = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/resources_broad_hg38_v0_wgs_calling_regions.hg38.interval_list"
intervals= "${projectDir}/assets/hg38_v0_wgs_calling_regions.hg38.bed"
//millsindel = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz" //Mills_and_1000G_gold_standard.indels.hg38.vcf.gz"
//shapeitindel = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz" //ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz" //file(params.gold_indels2) //
KNOWNINDELS= '/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz'
millsindel = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz" //Mills_and_1000G_gold_standard.indels.hg38.vcf.gz"
shapeitindel = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz" //ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz" //file(params.gold_indels2) //
INDELREF = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz" //ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz"
KNOWNINDELS = "-known /data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz -known /data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz"
KNOWNRECAL = '--known-sites /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GATK_resource_bundle/dbsnp_138.hg38.vcf.gz --known-sites /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GATK_resource_bundle/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz --known-sites /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GATK_resource_bundle/ALL.wgs.1000G_phase3.GRCh38.ncbi_remapper.20150424.shapeit2_indels.vcf.gz'
dbsnp = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/dbsnp_138.hg38.vcf.gz"
dbsnp_indel = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz"
gnomad = '--germline-resource /data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GNOMAD/somatic-hg38-af-only-gnomad.hg38.vcf.gz' // /data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GNOMAD/somatic-hg38-af-only-gnomad.hg38.vcf.gz
pon = "/data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/PON/updatedpon.vcf.gz" //pon="/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/PON/hg38.noCOSMIC_ClinVar.pon.vcf.gz" //file{params.pon}
kgp = "/data/CCBR_Pipeliner/CCBR_Pipeliner_Legacy/Exome-seek/hg38/GATK_resource_bundle/1000G_phase1.snps.high_confidence.hg38.vcf.gz"
Expand All @@ -25,6 +25,7 @@ params {
vepcache = "/fdb/VEP/102/cache"
vepspecies = "homo_sapiens"
vepbuild = "GRCh38"
annotsvgenome = "GRCh38"
octopus_sforest= "--somatic-forest /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/octopus/somatic.v0.7.4.forest"
octopus_gforest= "--forest /data/CCBR_Pipeliner/Pipelines/LOGAN/resources/hg38/octopus/germline.v0.7.4.forest"
SEQUENZAGC = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/SEQUENZA/hg38_gc50Base.txt.gz"
Expand All @@ -37,7 +38,8 @@ params {
bwagenome= "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/genome/bwaindex/genome.fa"
genomedict= "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/genome/bwamem2index/genome.dict"
intervals="/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/genome/bwamem2index/mm10_wgsregions.bed"
KNOWNINDELS = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/dbsnp/mm10_known_indels.vcf.gz"
KNOWNINDELS = "-known /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/dbsnp/mm10_known_indels.vcf.gz"
INDELREF = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/dbsnp/mm10_known_indels.vcf.gz"
KNOWNRECAL = "-known-sites /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/dbsnp/mm10_known_indels.vcf.gz -known-sites /data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/dbsnp/mm10_known_snps.vcf.gz"
dbsnp = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/dbsnp/mm10_allstrains_dbSNP142.vcf.gz"
pon = "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/dbsnp/mm10_dbSNP_allStrains_compSet_noIND.vcf.gz"
Expand All @@ -53,6 +55,7 @@ params {
vepcache = "/fdb/VEP/102/cache"
vepspecies = "mus_musculus"
vepbuild= "GRCm38"
annotsvgenome = "mm10"
octopus_sforest = ""
octopus_gforest = ""
SEQUENZAGC = '/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/mm10/SEQUENZA/mm10.gc50Base.wig.gz'
Expand Down
2 changes: 0 additions & 2 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,6 @@ include {INPUT_TONLY; INPUT_TONLY_BAM;
VC_TONLY; SV_TONLY; CNVhuman_tonly; CNVmouse_tonly; QC_TONLY } from "./subworkflows/local/workflows_tonly.nf"



//SUB WORKFLOWS to SPLIT
workflow.onComplete {
if (!workflow.stubRun && !workflow.commandLine.contains('-preview')) {
def message = Utils.spooker(workflow)
Expand Down
11 changes: 5 additions & 6 deletions modules/local/copynumber.nf
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ ENSEMBLCACHE='/data/SCLC-BRAINMETS/cn/common/ensembl_data'
DRIVERS='/data/SCLC-BRAINMETS/cn/common/DriverGenePanel.38.tsv'
HOTSPOTS='/data/SCLC-BRAINMETS/cn/variants/KnownHotspots.somatic.38.vcf.gz'

//DBSNP_INDEL=file(params.genomes[params.genome].KNOWNINDELS)
//ascatR=


Expand Down Expand Up @@ -273,7 +272,7 @@ process amber_tonly {

"""
java -Xmx32G -cp amber.jar com.hartwig.hmftools.amber.AmberApplication \
java -Xmx32G -cp /opt2/hmftools/amber.jar com.hartwig.hmftools.amber.AmberApplication \
-tumor ${tumorname} -tumor_bam ${tumor} \
-output_dir ${tumorname}_amber \
-threads $task.cpus \
Expand Down Expand Up @@ -310,7 +309,7 @@ process amber_tn {

"""
java -Xmx32G -cp amber.jar com.hartwig.hmftools.amber.AmberApplication \
java -Xmx32G -cp /opt2/hmftools/amber.jar com.hartwig.hmftools.amber.AmberApplication \
-tumor ${tumorname} -tumor_bam ${tumor} \
-reference ${normalname} -reference_bam ${normal} \
-output_dir ${tumorname}_vs_${normalname}_amber \
Expand Down Expand Up @@ -346,7 +345,7 @@ process cobalt_tonly {

"""
java -jar -Xmx8G cobalt.jar \
java -jar -Xmx8G /opt2/hmftools/cobalt.jar \
-tumor ${tumorname} -tumor_bam ${tumor} \
-output_dir ${tumorname}_cobalt \
-threads $task.cpus \
Expand Down Expand Up @@ -382,7 +381,7 @@ process cobalt_tn {

"""
java -jar -Xmx8G cobalt.jar \
java -jar -Xmx8G /opt2/hmftools/cobalt.jar \
-tumor ${tumorname} -tumor_bam ${tumorname} \
-reference ${normalname} -reference_bam ${normal} \
-output_dir ${tumorname}_vs_${normalname}_cobalt \
Expand Down Expand Up @@ -418,7 +417,7 @@ process purple {
script:

"""
java -jar purple.jar \
java -jar /opt2/hmftools/purple.jar \
-tumor ${tumorname} \
-amber ${amberin} \
-cobalt ${cobaltin} \
Expand Down
20 changes: 11 additions & 9 deletions modules/local/structural_variant.nf
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
GENOMEREF=file(params.genomes[params.genome].genome)
GENOME=params.genome
ANNOTSVGENOME=file(params.genomes[params.genome].annotsvgenome)
BWAGENOME=file(params.genomes[params.genome].bwagenome)
DBSNP_INDEL=file(params.genomes[params.genome].KNOWNINDELS)
INDELREF=file(params.genomes[params.genome].INDELREF)



process svaba_somatic {
container = "${params.containers.logan}"
label 'process_highcpu'

input:
Expand All @@ -30,7 +31,7 @@ process svaba_somatic {

script:
"""
svaba run -t ${tumor} -n ${normal} -p $task.cpus -D $DBSNP_INDEL -a ${tumor.simpleName} -G $BWAGENOME
svaba run -t ${tumor} -n ${normal} -p $task.cpus -D $INDELREF -a ${tumor.simpleName} -G $BWAGENOME
"""

stub:
Expand All @@ -56,7 +57,7 @@ process svaba_somatic {


process manta_somatic {

container = "${params.containers.logan}"
label 'process_highcpu'

input:
Expand Down Expand Up @@ -102,7 +103,6 @@ process manta_somatic {
process annotsv_tn {
//AnnotSV for Manta/Svaba works with either vcf.gz or .vcf files
//Requires bedtools,bcftools

module = ['annotsv/3.3.1']

input:
Expand All @@ -119,7 +119,7 @@ process annotsv_tn {
mkdir ${sv}
AnnotSV -SVinputFile ${somaticvcf} \
-genomeBuild $GENOME \
-genomeBuild $ANNOTSVGENOME \
-SVinputInfo 1 -outputFile ${tumorname} \
-outputDir ${sv}
Expand All @@ -136,6 +136,7 @@ process annotsv_tn {


process manta_tonly {
container = "${params.containers.logan}"
label 'process_highcpu'

input:
Expand Down Expand Up @@ -178,6 +179,7 @@ process manta_tonly {


process svaba_tonly {
container = "${params.containers.logan}"
label 'process_highcpu'

input:
Expand All @@ -198,7 +200,7 @@ process svaba_tonly {

script:
"""
svaba run -t ${tumor} -p $task.cpus -D $DBSNP_INDEL -a ${tumor.simpleName} -G $BWAGENOME
svaba run -t ${tumor} -p $task.cpus -D $INDELREF -a ${tumor.simpleName} -G $BWAGENOME
"""

stub:
Expand Down Expand Up @@ -230,7 +232,7 @@ process gunzip {

script:
"""
gunzip ${vcf} > ${tumorname}.tumorSV.vcf
gunzip -f ${vcf} > ${tumorname}.tumorSV.vcf
"""

stub:
Expand Down Expand Up @@ -291,7 +293,7 @@ process annotsv_tonly {
mkdir ${sv}
AnnotSV -SVinputFile ${somaticvcf} \
-genomeBuild $GENOME \
-genomeBuild $ANNOTSVGENOME \
-SVinputInfo 1 -outputFile ${tumorname} \
-outputDir ${sv}
Expand Down
122 changes: 78 additions & 44 deletions modules/local/trim_align.nf
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
GENOMEREF=file(params.genomes[params.genome].genome)
GENOMEREF = file(params.genomes[params.genome].genome)
KNOWNRECAL = params.genomes[params.genome].KNOWNRECAL

KNOWNINDELS = params.genomes[params.genome].KNOWNINDELS

process fastp {
container = "${params.containers.logan}"
Expand Down Expand Up @@ -77,6 +77,70 @@ process bwamem2 {



process indelrealign {
container "${params.containers.logan}"
label 'process_long'

input:
tuple val(samplename), path("${samplename}.bam"), path("${samplename}.bai")

output:
tuple val(samplename), path("${samplename}.ir.bam"), path("${samplename}.ir.bai")

script:

"""
/usr/lib/jvm/java-8-openjdk-amd64/bin/java -Xmx16g -jar \$GATK_JAR -T RealignerTargetCreator \
-I ${samplename}.bam \
-R ${GENOMEREF} \
-o ${samplename}.intervals \
-nt $task.cpus \
${KNOWNINDELS}
/usr/lib/jvm/java-8-openjdk-amd64/bin/java -Xmx16g -jar \$GATK_JAR -T IndelRealigner \
-R ${GENOMEREF} \
-I ${samplename}.bam \
${KNOWNINDELS} \
-targetIntervals ${samplename}.intervals \
-o ${samplename}.ir.bam
"""

stub:
"""
touch ${samplename}.ir.bam ${samplename}.ir.bai
"""

}


process bqsr_ir {
/*
Base quality recalibration for all samples
*/
container = "${params.containers.logan}"
label 'process_low'
input:
tuple val(samplename), path("${samplename}.ir.bam"), path("${samplename}.ir.bai"), path(bed)

output:
tuple val(samplename), path("${samplename}_${bed.simpleName}.recal_data.grp")

script:
"""
gatk --java-options '-Xmx16g' BaseRecalibrator \
--input ${samplename}.ir.bam \
--reference ${GENOMEREF} \
${KNOWNRECAL} \
--output ${samplename}_${bed.simpleName}.recal_data.grp \
--intervals ${bed}
"""

stub:
"""
touch ${samplename}_${bed.simpleName}.recal_data.grp
"""
}

process bqsr {
/*
Base quality recalibration for all samples
Expand All @@ -103,7 +167,6 @@ process bqsr {
"""
touch ${samplename}_${bed.simpleName}.recal_data.grp
"""

}

process gatherbqsr {
Expand Down Expand Up @@ -131,16 +194,15 @@ process gatherbqsr {
"""
}


process applybqsr {
/*
Base quality recalibration for all samples to
*/
container = "${params.containers.logan}"
label 'process_low'
label 'process_long'

input:
tuple val(samplename), path("${samplename}.bam"), path("${samplename}.bai"), path("${samplename}.recal_data.grp")
tuple val(samplename), path(bam), path(bai), path("${samplename}.recal_data.grp")

output:
tuple val(samplename), path("${samplename}.bqsr.bam"), path("${samplename}.bqsr.bai")
Expand All @@ -150,7 +212,7 @@ process applybqsr {
"""
gatk --java-options '-Xmx32g' ApplyBQSR \
--reference ${GENOMEREF} \
--input ${samplename}.bam \
--input ${bam} \
--bqsr-recal-file ${samplename}.recal_data.grp \
--output ${samplename}.bqsr.bam \
--use-jdk-inflater \
Expand All @@ -166,7 +228,6 @@ process applybqsr {
}



process samtoolsindex {
container = "${params.containers.logan}"
label 'process_medium'
Expand Down Expand Up @@ -198,48 +259,21 @@ process bamtocram_tonly {
tuple val(tumorname), path(tumor), path(tumorbai)

output:
path("${sample}.cram")

script:
"""
samtools view -@ $task.cpus -C -T $GENOMEREF -o ${sample}.cram {$tumor}.bam
"""
}

path("${tumorname}.cram"), path("${tumorname}.cram.crai")

/*
process indelrealign {
input:
tuple val(samplename), path("${samplename}.bam"), path("${samplename}.bai")
output:
tuple val(samplename), path("${samplename}.ir.bam")

script:
"""
/usr/bin/java -Xmx32g -jar \${GATK_JAR} -T RealignerTargetCreator \
-I ${samplename}.bam \
-R ${GENOMEREF} \
-o ${samplename}.intervals \
-nt 16 \
-known ${MILLSINDEL} -known ${SHAPEITINDEL}
/usr/bin/java -Xmx32g -jar \${GATK_JAR} -T IndelRealigner \
-R ${GENOMEREF} \
-I ${samplename}.bam \
-known ${MILLSINDEL} -known ${SHAPEITINDEL} \
--use_jdk_inflater \
--use_jdk_deflater \
-targetIntervals ${samplename}.intervals \
-o ${samplename}.ir.bam
samtools view -@ $task.cpus -C -T $GENOMEREF -o ${sample}.cram $tumor
samtools index ${tumorname}.cram -@ $task.cpus
"""

stub:
"""
touch ${samplename}.ir.bam
touch ${tumorname}.cram ${tumorname}.cram.crai
"""
}
*/




Loading

0 comments on commit f7a2108

Please sign in to comment.