Skip to content

Commit

Permalink
Merge pull request #521 from genomic-medicine-sweden/vcf2cytosurerename
Browse files Browse the repository at this point in the history
Add option to annotate vcf2cytosure file with different(customer) ids
  • Loading branch information
ramprasadn authored Mar 1, 2024
2 parents 653623c + 4f088f3 commit 4f02eb0
Show file tree
Hide file tree
Showing 6 changed files with 104 additions and 15 deletions.
24 changes: 24 additions & 0 deletions assets/sample_id_map.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
{
"$schema": "http://json-schema.org/draft-07/schema",
"$id": "https://raw.githubusercontent.com/nf-core/raredisease/master/assets/mobile_element_references_schema.json",
"title": "Schema for customerid_internalid_map",
"description": "Schema for the file provided with params.customerid_internalid_map",
"type": "array",
"items": {
"type": "object",
"properties": {
"customer_id": {
"type": "string",
"pattern": "^\\S+$",
"errorMessage": "Customer id cannot contain spaces"
},
"internal_id": {
"type": "string",
"pattern": "^\\S+$",
"errorMessage": "Internal id cannot contain spaces",
"meta": ["id"]
}
},
"required": ["customer_id", "internal_id"]
}
}
11 changes: 9 additions & 2 deletions conf/modules/generate_cytosure_files.config
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,14 @@ process {
'--output-type z',
"--exclude 'gnomad_svAF > 0.05'"
].join(' ') }
ext.prefix = { ${meta.id} }
ext.prefix = { "${meta.id}" }
}

withName: '.*GENERATE_CYTOSURE_FILES:BCFTOOLS_REHEADER_SV_VCF' {
beforeScript = { "echo ${meta.custid} > ${meta.custid}.txt" }
ext.args = { "--samples ${meta.custid}.txt" }
ext.args2 = "--output-type v"
ext.prefix = { "${meta.custid}" }
}

withName: '.*GENERATE_CYTOSURE_FILES:VCF2CYTOSURE' {
Expand All @@ -39,7 +46,7 @@ process {
'--size 5000',
'--maxbnd 5000'
].join(' ') }
ext.prefix = { "${meta.id}" }
ext.prefix = { "${meta.custid}" ? "${meta.custid}" : "${meta.id}" }
publishDir = [
path: { "${params.outdir}/vcf2cytosure" },
mode: params.publish_dir_mode,
Expand Down
1 change: 1 addition & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ params {
reduced_penetrance = null
readcount_intervals = null
rtg_truthvcfs = null
sample_id_map = null
sequence_dictionary = null
score_config_mt = null
score_config_snv = null
Expand Down
11 changes: 11 additions & 0 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -375,6 +375,17 @@
"help_text": "Should be Stranger's extended JSON as described at https://github.com/Clinical-Genomics/stranger/blob/master/stranger/resources/variant_catalog_grch37.json. This file is used by both ExpansionHunter and Stranger",
"fa_icon": "fas fa-file"
},
"sample_id_map": {
"type": "string",
"exists": true,
"format": "file-path",
"description": "Path to a file containing internal ids and customer ids in csv format.",
"fa_icon": "fas fa-file-csv",
"mimetype": "text/csv",
"schema": "assets/sample_id_map.json",
"help_text": "Optional file to rename sample ids in the vcf2cytosure vcf",
"pattern": "^\\S+\\.csv$"
},
"vcf2cytosure_blacklist": {
"type": "string",
"help_text": "Optional file to blacklist regions for VCF2cytosure",
Expand Down
69 changes: 56 additions & 13 deletions subworkflows/local/generate_cytosure_files.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,36 +2,79 @@
// Convert VCF with structural variations to the “.CGH” format used by the CytoSure Interpret Software
//

include { BCFTOOLS_VIEW as SPLIT_AND_FILTER_SV_VCF } from '../../modules/nf-core/bcftools/view/main'
include { TIDDIT_COV as TIDDIT_COV_VCF2CYTOSURE } from '../../modules/nf-core/tiddit/cov/main'
include { VCF2CYTOSURE } from '../../modules/nf-core/vcf2cytosure/main'
include { BCFTOOLS_VIEW as SPLIT_AND_FILTER_SV_VCF } from '../../modules/nf-core/bcftools/view/main'
include { BCFTOOLS_REHEADER as BCFTOOLS_REHEADER_SV_VCF } from '../../modules/nf-core/bcftools/reheader/main'
include { TIDDIT_COV as TIDDIT_COV_VCF2CYTOSURE } from '../../modules/nf-core/tiddit/cov/main'
include { VCF2CYTOSURE } from '../../modules/nf-core/vcf2cytosure/main'

workflow GENERATE_CYTOSURE_FILES {
take:
ch_vcf // channel: [mandatory] [ val(meta), path(vcf) ]
ch_tbi // channel: [mandatory] [ val(meta), path(vcf_index) ]
ch_bam // channel: [mandatory] [ val(meta), path(bam) ]
ch_blacklist // channel: [optional] [path(blacklist)]
ch_vcf // channel: [mandatory] [ val(meta), path(vcf) ]
ch_tbi // channel: [mandatory] [ val(meta), path(vcf_index) ]
ch_bam // channel: [mandatory] [ val(meta), path(bam) ]
ch_sample_id_map // channel: [optional] [val(id), val(id)]
ch_blacklist // channel: [optional] [path(blacklist)]

main:
ch_versions = Channel.empty()
ch_versions = Channel.empty()
ch_reheader_out = Channel.empty()

TIDDIT_COV_VCF2CYTOSURE (ch_bam, [[],[]])

// Build channel: [val(sample_meta), path(vcf), path(vcf_index)]
ch_vcf.join( ch_tbi, failOnMismatch: true )
.set { ch_vcf_tbi }

ch_bam.combine(ch_vcf_tbi).map {
meta_sample, bam, meta_case, vcf, tbi ->
return [ meta_sample, vcf, tbi ]
}.set { ch_sample_vcf }
ch_bam.combine(ch_vcf_tbi)
.map {
meta_sample, bam, meta_case, vcf, tbi ->
new_meta = ['id':meta_sample.sample, 'sex':meta_sample.sex]
return [ new_meta, vcf, tbi ]
}
.join(ch_sample_id_map, remainder: true)
.branch { it ->
id: it[3].equals(null)
return [it[0] + [custid:it[0].id], it[1], it[2]]
custid: !(it[3].equals(null))
return [it[0] + [custid:it[3]], it[1], it[2]]
}
.set { ch_for_mix }

Channel.empty()
.mix(ch_for_mix.id, ch_for_mix.custid)
.set { ch_sample_vcf }

// Split vcf into sample vcf:s and frequency filter
SPLIT_AND_FILTER_SV_VCF ( ch_sample_vcf, [], [], [] )

if (params.sample_id_map != null) {

SPLIT_AND_FILTER_SV_VCF.out.vcf
.map { meta, vcf -> return [meta, vcf, [], []]}
.set { ch_reheader_in }

BCFTOOLS_REHEADER_SV_VCF ( ch_reheader_in, [[:],[]] ).vcf
.set {ch_reheader_out}

ch_versions = ch_versions.mix(BCFTOOLS_REHEADER_SV_VCF.out.versions.first())
}

SPLIT_AND_FILTER_SV_VCF.out.vcf
.join(ch_reheader_out, remainder: true)
.branch { it ->
split: it[2].equals(null)
return [it[0], it[1]]
reheader: !(it[2].equals(null))
return [it[0], it[2]]
}
.set { ch_for_mix }

Channel.empty()
.mix(ch_for_mix.split, ch_for_mix.reheader)
.set { ch_vcf2cytosure_in }

VCF2CYTOSURE (
SPLIT_AND_FILTER_SV_VCF.out.vcf,
ch_vcf2cytosure_in,
TIDDIT_COV_VCF2CYTOSURE.out.cov,
[[:], []], [[:], []],
ch_blacklist
Expand Down
3 changes: 3 additions & 0 deletions workflows/raredisease.nf
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,8 @@ workflow RAREDISEASE {
: Channel.value([])
ch_rtg_truthvcfs = params.rtg_truthvcfs ? Channel.fromPath(params.rtg_truthvcfs).collect()
: Channel.value([])
ch_sample_id_map = params.sample_id_map ? Channel.fromSamplesheet("sample_id_map")
: Channel.empty()
ch_score_config_mt = params.score_config_mt ? Channel.fromPath(params.score_config_mt).collect()
: Channel.value([])
ch_score_config_snv = params.score_config_snv ? Channel.fromPath(params.score_config_snv).collect()
Expand Down Expand Up @@ -622,6 +624,7 @@ workflow RAREDISEASE {
ch_sv_annotate.vcf_ann,
ch_sv_annotate.tbi,
ch_mapped.genome_marked_bam,
ch_sample_id_map,
ch_vcf2cytosure_blacklist
)
ch_versions = ch_versions.mix(GENERATE_CYTOSURE_FILES.out.versions)
Expand Down

0 comments on commit 4f02eb0

Please sign in to comment.