Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handle CTX_PP/QQ and CTX_PQ/QP CPX_TYPE values in SVConcordance #8885

Merged
merged 5 commits into from
Jul 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
package org.broadinstitute.hellbender.tools.spark.sv.utils;

import com.google.common.collect.HashBiMap;
import htsjdk.variant.variantcontext.Allele;

import java.util.Map;

import static java.util.Map.entry;

public final class GATKSVVCFConstants {

// todo: add these and the other standard SV info fields from the VCF spec to htsjdk VCFStandardHeaderLines
Expand Down Expand Up @@ -76,6 +81,7 @@ public final class GATKSVVCFConstants {
public static final String CPX_INTERVALS = "CPX_INTERVALS";
public static final String CPX_TYPE = "CPX_TYPE";

// keep in sync with map below
public enum ComplexVariantSubtype {
delINV,
INVdel,
Expand All @@ -95,6 +101,26 @@ public enum ComplexVariantSubtype {
CTX_INV
}

// keep in sync with enum above
public static final HashBiMap<String, ComplexVariantSubtype> COMPLEX_VARIANT_SUBTYPE_MAP = HashBiMap.create(Map.ofEntries(
entry("delINV", ComplexVariantSubtype.delINV),
entry("INVdel", ComplexVariantSubtype.INVdel),
entry("dupINV", ComplexVariantSubtype.dupINV),
entry("INVdup", ComplexVariantSubtype.INVdup),
entry("delINVdel", ComplexVariantSubtype.delINVdel),
entry("dupINVdup", ComplexVariantSubtype.dupINVdup),
entry("delINVdup", ComplexVariantSubtype.delINVdup),
entry("dupINVdel", ComplexVariantSubtype.dupINVdel),
entry("piDUP_FR", ComplexVariantSubtype.piDUP_FR),
entry("piDUP_RF", ComplexVariantSubtype.piDUP_RF),
entry("dDUP", ComplexVariantSubtype.dDUP),
entry("dDUP_iDEL", ComplexVariantSubtype.dDUP_iDEL),
entry("INS_iDEL", ComplexVariantSubtype.INS_iDEL),
entry("CTX_PP/QQ", ComplexVariantSubtype.CTX_PP_QQ),
entry("CTX_PQ/QP", ComplexVariantSubtype.CTX_PQ_QP),
entry("CTX_INV", ComplexVariantSubtype.CTX_INV)
));

// not defined in output vcf header but used in internal id that is currently output in the ID column
public static final String INTERVAL_VARIANT_ID_FIELD_SEPARATOR = "_";
public static final String DUP_TAN_CONTRACTION_INTERNAL_ID_START_STRING = "DEL-DUPLICATION-TANDEM-CONTRACTION";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -384,7 +384,4 @@ public Double getLog10PError() {
return log10PError;
}

public GATKSVVCFConstants.ComplexVariantSubtype getCpxSubtype() {
return cpxSubtype;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ public static VariantContextBuilder getVariantBuilder(final SVCallRecord record)
builder.attribute(GATKSVVCFConstants.CONTIG2_ATTRIBUTE, chr2);
}
if (cpxType != null) {
builder.attribute(GATKSVVCFConstants.CPX_TYPE, record.getComplexSubtype().toString());
builder.attribute(GATKSVVCFConstants.CPX_TYPE, getComplexSubtypeString(cpxType));
}

builder.attribute(GATKSVVCFConstants.SVLEN, record.getLength());
Expand Down Expand Up @@ -329,8 +329,7 @@ public static SVCallRecord create(final VariantContext variant, boolean keepVari
final int positionA = variant.getStart();

final GATKSVVCFConstants.StructuralVariantAnnotationType type = inferStructuralVariantType(variant);
final GATKSVVCFConstants.ComplexVariantSubtype cpxSubtype =
type == GATKSVVCFConstants.StructuralVariantAnnotationType.CPX ? getComplexSubtype(variant) : null;
final GATKSVVCFConstants.ComplexVariantSubtype cpxSubtype = getComplexSubtype(variant);
final List<String> algorithms = getAlgorithms(variant);

final String strands;
Expand Down Expand Up @@ -426,15 +425,19 @@ public static List<String> getAlgorithms(final VariantContext variant) {

public static GATKSVVCFConstants.ComplexVariantSubtype getComplexSubtype(final VariantContext variant) {
Utils.nonNull(variant);
final String subtypeString = variant.getAttributeAsString(GATKSVVCFConstants.CPX_TYPE, null);
String subtypeString = variant.getAttributeAsString(GATKSVVCFConstants.CPX_TYPE, null);
if (subtypeString == null) {
return null;
}
if (!VALID_CPX_SUBTYPES.contains(subtypeString)) {
if (!GATKSVVCFConstants.COMPLEX_VARIANT_SUBTYPE_MAP.containsKey(subtypeString)) {
throw new IllegalArgumentException("Invalid CPX subtype: " + subtypeString + ", valid values are: " +
String.join(", ", VALID_CPX_SUBTYPES));
}
return GATKSVVCFConstants.ComplexVariantSubtype.valueOf(subtypeString);
return GATKSVVCFConstants.COMPLEX_VARIANT_SUBTYPE_MAP.get(subtypeString);
}

public static String getComplexSubtypeString(final GATKSVVCFConstants.ComplexVariantSubtype subtype) {
return GATKSVVCFConstants.COMPLEX_VARIANT_SUBTYPE_MAP.inverse().get(subtype);
}

private static String getStrands(final VariantContext variant, final GATKSVVCFConstants.StructuralVariantAnnotationType type) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import org.broadinstitute.hellbender.exceptions.UserException;
import org.broadinstitute.hellbender.tools.spark.sv.utils.GATKSVVCFConstants;
import org.broadinstitute.hellbender.tools.spark.sv.utils.SVUtils;
import org.broadinstitute.hellbender.tools.sv.SVCallRecordUtils;
import org.broadinstitute.hellbender.utils.SVInterval;
import org.broadinstitute.hellbender.utils.SVIntervalTree;
import org.broadinstitute.hellbender.utils.SimpleInterval;
Expand Down Expand Up @@ -862,12 +863,7 @@ protected static boolean includesDispersedDuplication(final GATKSVVCFConstants.C
protected Map<String, Object> annotateStructuralVariant(final VariantContext variant) {
final Map<String, Set<String>> variantConsequenceDict = new HashMap<>();
final GATKSVVCFConstants.StructuralVariantAnnotationType overallSVType = getSVType(variant);
final String complexTypeString = variant.getAttributeAsString(GATKSVVCFConstants.CPX_TYPE, null);
GATKSVVCFConstants.ComplexVariantSubtype complexType = null;
if (complexTypeString != null) {
// replace / in CTX_PP/QQ and CTX_PQ/QP with _ to match ComplexVariantSubtype constants which cannot contain slashes
complexType = GATKSVVCFConstants.ComplexVariantSubtype.valueOf(complexTypeString.replace("/", "_"));
}
final GATKSVVCFConstants.ComplexVariantSubtype complexType = SVCallRecordUtils.getComplexSubtype(variant);
final boolean includesDispersedDuplication = includesDispersedDuplication(complexType, COMPLEX_SUBTYPES_WITH_DISPERSED_DUP);
final List<SVSegment> svSegmentsForGeneOverlaps = getSVSegments(variant, overallSVType, maxBreakendLen, complexType);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ protected SVCallRecord minimizeTruthFootprint(final SVCallRecord item) {
final List<Genotype> genotypes = item.getGenotypes().stream().map(SVConcordance::stripTruthGenotype).collect(Collectors.toList());
return new SVCallRecord(item.getId(), item.getContigA(), item.getPositionA(),
item.getStrandA(), item.getContigB(), item.getPositionB(), item.getStrandB(), item.getType(),
item.getCpxSubtype(), item.getLength(), item.getAlgorithms(), item.getAlleles(), genotypes,
item.getComplexSubtype(), item.getLength(), item.getAlgorithms(), item.getAlleles(), genotypes,
item.getAttributes(), item.getFilters(), item.getLog10PError(), dictionary);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ public class GATKSVVariantContextUtils {

public static final Allele BND_ALLELE = Allele.create("<BND>");
public static final Allele CPX_ALLELE = Allele.create("<CPX>");
public static final Allele CTX_ALLELE = Allele.create("<CTX>");

/**
* Build the list of called alleles based on reference and called copy numbers
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,17 @@ public class SVCallRecordUtilsUnitTest {
private static final List<Allele> ALLELES_INS = Lists.newArrayList(Allele.REF_N, Allele.SV_SIMPLE_INS);
private static final List<Allele> ALLELES_BND = Lists.newArrayList(Allele.REF_N, GATKSVVariantContextUtils.BND_ALLELE);
private static final List<Allele> ALLELES_CPX = Lists.newArrayList(Allele.REF_N, GATKSVVariantContextUtils.CPX_ALLELE);
private static final List<Allele> ALLELES_CTX = Lists.newArrayList(Allele.REF_N, GATKSVVariantContextUtils.CTX_ALLELE);

private static final Map<String, Object> TEST_ATTRIBUTES = Collections.singletonMap("TEST_KEY", "TEST_VAL");
private static final Map<String, Object> TEST_ATTRIBUTES_CPX = Lists.newArrayList(
new AbstractMap.SimpleImmutableEntry<String, Object>("TEST_KEY", "TEST_VAL"),
new AbstractMap.SimpleImmutableEntry<String, Object>(GATKSVVCFConstants.CPX_TYPE, GATKSVVCFConstants.ComplexVariantSubtype.dDUP.toString())
).stream().collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
private static final Map<String, Object> TEST_ATTRIBUTES_CTX = Map.of(
"TEST_KEY", "TEST_VAL",
GATKSVVCFConstants.CPX_TYPE, "CTX_PP/QQ"
);

private static final Genotype GENOTYPE_DEL_1 = new GenotypeBuilder("sample1")
.alleles(Lists.newArrayList(Allele.REF_N, Allele.SV_SIMPLE_DEL))
Expand All @@ -54,6 +59,8 @@ public class SVCallRecordUtilsUnitTest {
.alleles(Lists.newArrayList(Allele.REF_N, GATKSVVariantContextUtils.BND_ALLELE)).make();
private static final Genotype GENOTYPE_CPX_1 = new GenotypeBuilder("sample1")
.alleles(Lists.newArrayList(Allele.REF_N, GATKSVVariantContextUtils.CPX_ALLELE)).make();
private static final Genotype GENOTYPE_CTX_1 = new GenotypeBuilder("sample1")
.alleles(Lists.newArrayList(Allele.REF_N, GATKSVVariantContextUtils.CTX_ALLELE)).make();

private static final Comparator<SVCallRecord> RECORD_COMPARATOR = SVCallRecordUtils.getCallComparator(SVTestUtils.hg38Dict);

Expand Down Expand Up @@ -570,6 +577,15 @@ public Object[][] testCreateData() {
SVTestUtils.PESR_ONLY_ALGORITHM_LIST, ALLELES_CPX, Collections.singletonList(GENOTYPE_CPX_1),
TEST_ATTRIBUTES, Collections.emptySet(), null)
},
{
SVTestUtils.newVariantContext("var11", "chr1", 1000, 1000,
ALLELES_CTX, Collections.singletonList(GENOTYPE_CTX_1), -1, null,
GATKSVVCFConstants.StructuralVariantAnnotationType.CTX, SVTestUtils.PESR_ONLY_ALGORITHM_LIST,
"chrX", 2000, TEST_ATTRIBUTES_CTX, null),
new SVCallRecord("var11", "chr1", 1000, null, "chrX", 2000, null, GATKSVVCFConstants.StructuralVariantAnnotationType.CTX, GATKSVVCFConstants.ComplexVariantSubtype.CTX_PP_QQ, null,
SVTestUtils.PESR_ONLY_ALGORITHM_LIST, ALLELES_CTX, Collections.singletonList(GENOTYPE_CTX_1),
TEST_ATTRIBUTES, Collections.emptySet(), null)
},
};
}

Expand All @@ -583,4 +599,72 @@ public void testCreate(final VariantContext variant, final SVCallRecord expected
final SVCallRecord resultKeepAttr = SVCallRecordUtils.create(variant, true);
SVTestUtils.assertEqualsExceptExcludedAttributes(resultKeepAttr, expected, Collections.emptyList());
}

@DataProvider(name = "testGetComplexSubtypeData")
public Object[][] testGetComplexSubtypeData() {
return new Object[][]{
{new VariantContextBuilder()
.source("source")
.id("id")
.chr("chr1")
.start(2000)
.stop(3000)
.alleles(Arrays.asList(Allele.REF_N, Allele.create("<CPX>", false)))
.attributes(Map.of(
GATKSVVCFConstants.SVTYPE, GATKSVVCFConstants.StructuralVariantAnnotationType.CPX,
GATKSVVCFConstants.CPX_TYPE, "dupINVdup"
))
.make(),
GATKSVVCFConstants.ComplexVariantSubtype.dupINVdup
},
{new VariantContextBuilder()
.source("source")
.id("id")
.chr("chr1")
.start(2000)
.stop(3000)
.alleles(Arrays.asList(Allele.REF_N, Allele.create("<CPX>", false)))
.attributes(Map.of(
GATKSVVCFConstants.SVTYPE, GATKSVVCFConstants.StructuralVariantAnnotationType.CPX,
GATKSVVCFConstants.CPX_TYPE, "CTX_PP/QQ"
))
.make(),
GATKSVVCFConstants.ComplexVariantSubtype.CTX_PP_QQ
},
{new VariantContextBuilder()
.source("source")
.id("id")
.chr("chr1")
.start(2000)
.stop(3000)
.alleles(Arrays.asList(Allele.REF_N, Allele.create("<DEL>", false)))
.attributes(Map.of(
GATKSVVCFConstants.SVTYPE, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL
))
.make(),
null
}
};
}

@Test(dataProvider= "testGetComplexSubtypeData")
public void testGetComplexSubtype(final VariantContext variant, final GATKSVVCFConstants.ComplexVariantSubtype expected) {
final GATKSVVCFConstants.ComplexVariantSubtype actual = SVCallRecordUtils.getComplexSubtype(variant);
Assert.assertEquals(actual, expected);
}

@DataProvider(name = "testGetComplexSubtypeStringData")
public Object[][] testGetComplexSubtypeStringData() {
return new Object[][]{
{GATKSVVCFConstants.ComplexVariantSubtype.CTX_PQ_QP, "CTX_PQ/QP"},
{GATKSVVCFConstants.ComplexVariantSubtype.CTX_PP_QQ, "CTX_PP/QQ"},
{GATKSVVCFConstants.ComplexVariantSubtype.INS_iDEL, "INS_iDEL"}
};
}

@Test(dataProvider= "testGetComplexSubtypeStringData")
public void testGetComplexSubtypeString(final GATKSVVCFConstants.ComplexVariantSubtype subtype, final String expected) {
final String actual = SVCallRecordUtils.getComplexSubtypeString(subtype);
Assert.assertEquals(actual, expected);
}
}
Loading