Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handle CTX_PP/QQ and CTX_PQ/QP CPX_TYPE values in SVConcordance #8885

Merged
merged 5 commits into from
Jul 1, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
package org.broadinstitute.hellbender.tools.spark.sv.utils;

import com.google.common.collect.HashBiMap;
import htsjdk.variant.variantcontext.Allele;

import java.util.Map;

import static java.util.Map.entry;

public final class GATKSVVCFConstants {

// todo: add these and the other standard SV info fields from the VCF spec to htsjdk VCFStandardHeaderLines
Expand Down Expand Up @@ -76,6 +81,7 @@ public final class GATKSVVCFConstants {
public static final String CPX_INTERVALS = "CPX_INTERVALS";
public static final String CPX_TYPE = "CPX_TYPE";

// keep in sync with map below
public enum ComplexVariantSubtype {
delINV,
INVdel,
Expand All @@ -95,6 +101,26 @@ public enum ComplexVariantSubtype {
CTX_INV
}

// keep in sync with enum above
public static final HashBiMap<String, ComplexVariantSubtype> ComplexVariantSubtypeMap = HashBiMap.create(Map.ofEntries(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
public static final HashBiMap<String, ComplexVariantSubtype> ComplexVariantSubtypeMap = HashBiMap.create(Map.ofEntries(
public static final HashBiMap<String, ComplexVariantSubtype> COMPLEX_VARIANT_SUBTYPE_MAP = HashBiMap.create(Map.ofEntries(

entry("delINV", ComplexVariantSubtype.delINV),
entry("INVdel", ComplexVariantSubtype.INVdel),
entry("dupINV", ComplexVariantSubtype.dupINV),
entry("INVdup", ComplexVariantSubtype.INVdup),
entry("delINVdel", ComplexVariantSubtype.delINVdel),
entry("dupINVdup", ComplexVariantSubtype.dupINVdup),
entry("delINVdup", ComplexVariantSubtype.delINVdup),
entry("dupINVdel", ComplexVariantSubtype.dupINVdel),
entry("piDUP_FR", ComplexVariantSubtype.piDUP_FR),
entry("piDUP_RF", ComplexVariantSubtype.piDUP_RF),
entry("dDUP", ComplexVariantSubtype.dDUP),
entry("dDUP_iDEL", ComplexVariantSubtype.dDUP_iDEL),
entry("INS_iDEL", ComplexVariantSubtype.INS_iDEL),
entry("CTX_PP/QQ", ComplexVariantSubtype.CTX_PP_QQ),
entry("CTX_PQ/QP", ComplexVariantSubtype.CTX_PQ_QP),
entry("CTX_INV", ComplexVariantSubtype.CTX_INV)
));

// not defined in output vcf header but used in internal id that is currently output in the ID column
public static final String INTERVAL_VARIANT_ID_FIELD_SEPARATOR = "_";
public static final String DUP_TAN_CONTRACTION_INTERNAL_ID_START_STRING = "DEL-DUPLICATION-TANDEM-CONTRACTION";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -384,7 +384,4 @@ public Double getLog10PError() {
return log10PError;
}

public GATKSVVCFConstants.ComplexVariantSubtype getCpxSubtype() {
return cpxSubtype;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ public static VariantContextBuilder getVariantBuilder(final SVCallRecord record)
builder.attribute(GATKSVVCFConstants.CONTIG2_ATTRIBUTE, chr2);
}
if (cpxType != null) {
builder.attribute(GATKSVVCFConstants.CPX_TYPE, record.getComplexSubtype().toString());
builder.attribute(GATKSVVCFConstants.CPX_TYPE, getComplexSubtypeString(cpxType));
}

builder.attribute(GATKSVVCFConstants.SVLEN, record.getLength());
Expand Down Expand Up @@ -426,15 +426,19 @@ public static List<String> getAlgorithms(final VariantContext variant) {

public static GATKSVVCFConstants.ComplexVariantSubtype getComplexSubtype(final VariantContext variant) {
Utils.nonNull(variant);
final String subtypeString = variant.getAttributeAsString(GATKSVVCFConstants.CPX_TYPE, null);
String subtypeString = variant.getAttributeAsString(GATKSVVCFConstants.CPX_TYPE, null);
if (subtypeString == null) {
return null;
}
if (!VALID_CPX_SUBTYPES.contains(subtypeString)) {
if (!GATKSVVCFConstants.ComplexVariantSubtypeMap.containsKey(subtypeString)) {
throw new IllegalArgumentException("Invalid CPX subtype: " + subtypeString + ", valid values are: " +
String.join(", ", VALID_CPX_SUBTYPES));
}
return GATKSVVCFConstants.ComplexVariantSubtype.valueOf(subtypeString);
return GATKSVVCFConstants.ComplexVariantSubtypeMap.get(subtypeString);
}

public static String getComplexSubtypeString(final GATKSVVCFConstants.ComplexVariantSubtype subtype) {
return GATKSVVCFConstants.ComplexVariantSubtypeMap.inverse().get(subtype);
}

private static String getStrands(final VariantContext variant, final GATKSVVCFConstants.StructuralVariantAnnotationType type) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import org.broadinstitute.hellbender.exceptions.UserException;
import org.broadinstitute.hellbender.tools.spark.sv.utils.GATKSVVCFConstants;
import org.broadinstitute.hellbender.tools.spark.sv.utils.SVUtils;
import org.broadinstitute.hellbender.tools.sv.SVCallRecordUtils;
import org.broadinstitute.hellbender.utils.SVInterval;
import org.broadinstitute.hellbender.utils.SVIntervalTree;
import org.broadinstitute.hellbender.utils.SimpleInterval;
Expand Down Expand Up @@ -862,12 +863,7 @@ protected static boolean includesDispersedDuplication(final GATKSVVCFConstants.C
protected Map<String, Object> annotateStructuralVariant(final VariantContext variant) {
final Map<String, Set<String>> variantConsequenceDict = new HashMap<>();
final GATKSVVCFConstants.StructuralVariantAnnotationType overallSVType = getSVType(variant);
final String complexTypeString = variant.getAttributeAsString(GATKSVVCFConstants.CPX_TYPE, null);
GATKSVVCFConstants.ComplexVariantSubtype complexType = null;
if (complexTypeString != null) {
// replace / in CTX_PP/QQ and CTX_PQ/QP with _ to match ComplexVariantSubtype constants which cannot contain slashes
complexType = GATKSVVCFConstants.ComplexVariantSubtype.valueOf(complexTypeString.replace("/", "_"));
}
final GATKSVVCFConstants.ComplexVariantSubtype complexType = SVCallRecordUtils.getComplexSubtype(variant);
final boolean includesDispersedDuplication = includesDispersedDuplication(complexType, COMPLEX_SUBTYPES_WITH_DISPERSED_DUP);
final List<SVSegment> svSegmentsForGeneOverlaps = getSVSegments(variant, overallSVType, maxBreakendLen, complexType);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ protected SVCallRecord minimizeTruthFootprint(final SVCallRecord item) {
final List<Genotype> genotypes = item.getGenotypes().stream().map(SVConcordance::stripTruthGenotype).collect(Collectors.toList());
return new SVCallRecord(item.getId(), item.getContigA(), item.getPositionA(),
item.getStrandA(), item.getContigB(), item.getPositionB(), item.getStrandB(), item.getType(),
item.getCpxSubtype(), item.getLength(), item.getAlgorithms(), item.getAlleles(), genotypes,
item.getComplexSubtype(), item.getLength(), item.getAlgorithms(), item.getAlleles(), genotypes,
item.getAttributes(), item.getFilters(), item.getLog10PError(), dictionary);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -583,4 +583,72 @@ public void testCreate(final VariantContext variant, final SVCallRecord expected
final SVCallRecord resultKeepAttr = SVCallRecordUtils.create(variant, true);
SVTestUtils.assertEqualsExceptExcludedAttributes(resultKeepAttr, expected, Collections.emptyList());
}

@DataProvider(name = "testGetComplexSubtypeData")
public Object[][] testGetComplexSubtypeData() {
return new Object[][]{
{new VariantContextBuilder()
.source("source")
.id("id")
.chr("chr1")
.start(2000)
.stop(3000)
.alleles(Arrays.asList(Allele.REF_N, Allele.create("<CPX>", false)))
.attributes(Map.of(
GATKSVVCFConstants.SVTYPE, GATKSVVCFConstants.StructuralVariantAnnotationType.CPX,
GATKSVVCFConstants.CPX_TYPE, "dupINVdup"
))
.make(),
GATKSVVCFConstants.ComplexVariantSubtype.dupINVdup
},
{new VariantContextBuilder()
.source("source")
.id("id")
.chr("chr1")
.start(2000)
.stop(3000)
.alleles(Arrays.asList(Allele.REF_N, Allele.create("<CPX>", false)))
.attributes(Map.of(
GATKSVVCFConstants.SVTYPE, GATKSVVCFConstants.StructuralVariantAnnotationType.CPX,
GATKSVVCFConstants.CPX_TYPE, "CTX_PP/QQ"
))
.make(),
GATKSVVCFConstants.ComplexVariantSubtype.CTX_PP_QQ
},
{new VariantContextBuilder()
.source("source")
.id("id")
.chr("chr1")
.start(2000)
.stop(3000)
.alleles(Arrays.asList(Allele.REF_N, Allele.create("<DEL>", false)))
.attributes(Map.of(
GATKSVVCFConstants.SVTYPE, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL
))
.make(),
null
}
};
}

@Test(dataProvider= "testGetComplexSubtypeData")
public void testGetComplexSubtype(final VariantContext variant, final GATKSVVCFConstants.ComplexVariantSubtype expected) {
final GATKSVVCFConstants.ComplexVariantSubtype actual = SVCallRecordUtils.getComplexSubtype(variant);
Assert.assertEquals(actual, expected);
}

@DataProvider(name = "testGetComplexSubtypeStringData")
public Object[][] testGetComplexSubtypeStringData() {
return new Object[][]{
{GATKSVVCFConstants.ComplexVariantSubtype.CTX_PQ_QP, "CTX_PQ/QP"},
{GATKSVVCFConstants.ComplexVariantSubtype.CTX_PP_QQ, "CTX_PP/QQ"},
{GATKSVVCFConstants.ComplexVariantSubtype.INS_iDEL, "INS_iDEL"}
};
}

@Test(dataProvider= "testGetComplexSubtypeStringData")
public void testGetComplexSubtypeString(final GATKSVVCFConstants.ComplexVariantSubtype subtype, final String expected) {
final String actual = SVCallRecordUtils.getComplexSubtypeString(subtype);
Assert.assertEquals(actual, expected);
}
}
Loading