Skip to content

Commit

Permalink
(SV) bug-fixing commit: (broadinstitute#4677)
Browse files Browse the repository at this point in the history
* better exception message and fix bug in existing cpx variant test data

(SV) new cpx variant scenario commit:
  * optionally add reference segments when the would be segment is 2-bp long (before all such segments are skipped)
  • Loading branch information
SHuang-Broad authored and cwhelan committed May 25, 2018
1 parent 75fee75 commit 9b84f3f
Show file tree
Hide file tree
Showing 5 changed files with 233 additions and 47 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import org.broadinstitute.hellbender.tools.spark.sv.discovery.AnnotatedVariantProducer;
import org.broadinstitute.hellbender.tools.spark.sv.discovery.alignment.AlignmentInterval;
import org.broadinstitute.hellbender.tools.spark.sv.discovery.alignment.AssemblyContigWithFineTunedAlignments;
import org.broadinstitute.hellbender.tools.spark.sv.discovery.alignment.StrandSwitch;
import org.broadinstitute.hellbender.utils.SimpleInterval;
import scala.Tuple2;

Expand Down Expand Up @@ -136,8 +137,9 @@ private void checkBoundedBySharedSingleBase(final CpxVariantInducingAssemblyCont
checkBoundedBySharedSingleBase(cpxVariantInducingAssemblyContig);
}

final Set<SimpleInterval> twoBaseBoundaries = cpxVariantInducingAssemblyContig.getTwoBaseBoundaries();
affectedRefRegion = getAffectedReferenceRegion(segmentingLocations);
referenceSegments = extractRefSegments(basicInfo, segmentingLocations);
referenceSegments = extractRefSegments(basicInfo, segmentingLocations, twoBaseBoundaries);
eventDescriptions = extractAltArrangements(basicInfo, contigAlignments, jumps, referenceSegments);

altSeq = extractAltHaplotypeSeq(cpxVariantInducingAssemblyContig.getPreprocessedTig(), referenceSegments, basicInfo);
Expand All @@ -152,7 +154,8 @@ static SimpleInterval getAffectedReferenceRegion(final List<SimpleInterval> even

@VisibleForTesting
static List<SimpleInterval> extractRefSegments(final CpxVariantInducingAssemblyContig.BasicInfo basicInfo,
final List<SimpleInterval> segmentingLocations) {
final List<SimpleInterval> segmentingLocations,
final Set<SimpleInterval> twoBaseBoundaries) {

if (segmentingLocations.size() == 1) { // for case described in {@link checkBoundedBySharedSingleBase}
return segmentingLocations;
Expand All @@ -168,8 +171,14 @@ static List<SimpleInterval> extractRefSegments(final CpxVariantInducingAssemblyC
// there shouldn't be a segment constructed if two segmenting locations are adjacent to each other on the reference
// this could happen when (in the simplest case), two alignments are separated by a mapped insertion (hence 3 total alignments),
// and the two alignments' ref span are connected
// more generally: only segment when the two segmenting locations are boundaries of alignments that overlap each other (ref. span)
if (rightBoundary.getStart() - leftBoundary.getEnd() > 1) {
segments.add(new SimpleInterval(eventPrimaryChromosome, leftBoundary.getStart(), rightBoundary.getStart()));
segments.add(new SimpleInterval(eventPrimaryChromosome, leftBoundary.getEnd(), rightBoundary.getStart()));
} else if (rightBoundary.getStart() - leftBoundary.getEnd() == 1) {
final SimpleInterval twoBaseSegment = new SimpleInterval(eventPrimaryChromosome, leftBoundary.getEnd(), rightBoundary.getStart());
if ( twoBaseBoundaries.contains(twoBaseSegment) ) {
segments.add(twoBaseSegment);
}
}
leftBoundary = rightBoundary;
}
Expand Down Expand Up @@ -335,7 +344,7 @@ static byte[] extractAltHaplotypeSeq(final AssemblyContigWithFineTunedAlignments
: (head.referenceSpan.getStart() - firstSegment.getEnd() == 1);
if ( ! firstSegmentNeighborsHeadAlignment )
throw new CpxVariantInterpreter.UnhandledCaseSeen("1st segment is not overlapping with head alignment but it is not immediately before/after the head alignment either\n"
+ tigWithInsMappings.toString());
+ tigWithInsMappings.toString() + "\nSegments:\t" + segments.toString());
start = head.endInAssembledContig;
} else {
final SimpleInterval intersect = firstSegment.intersect(head.referenceSpan);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import org.broadinstitute.hellbender.utils.IntervalUtils;
import org.broadinstitute.hellbender.utils.SimpleInterval;
import org.broadinstitute.hellbender.utils.Utils;
import scala.Tuple2;

import javax.annotation.Nonnull;
import java.util.*;
Expand Down Expand Up @@ -70,7 +71,7 @@ final class CpxVariantInducingAssemblyContig {
private final AssemblyContigWithFineTunedAlignments contigWithFineTunedAlignments;
private final BasicInfo basicInfo;
private final List<Jump> jumps;
private final List<SimpleInterval> eventPrimaryChromosomeSegmentingLocations;
private final List<SimpleInterval> eventPrimaryChromosomeSegmentingLocations; // a list of 1bp "point" locations that will be used to segment the affected reference region

@VisibleForTesting
CpxVariantInducingAssemblyContig(final AssemblyContigWithFineTunedAlignments contigWithFineTunedAlignments,
Expand Down Expand Up @@ -178,6 +179,31 @@ List<Jump> getJumps() {
}
List<SimpleInterval> getEventPrimaryChromosomeSegmentingLocations() { return eventPrimaryChromosomeSegmentingLocations;}

/**
* @return two-base boundaries of contig alignments that are in the valid region, i.e. [alpha, omega]
*/
@VisibleForTesting
Set<SimpleInterval> getTwoBaseBoundaries() {
// only look at alignments that are not disjoint from [alpha, omega]
final SimpleInterval validRegion = new SimpleInterval(basicInfo.eventPrimaryChromosome, basicInfo.alpha.getStart(), basicInfo.omega.getEnd());

final String chr = basicInfo.eventPrimaryChromosome;

final Set<SimpleInterval> result = new HashSet<>(2 * contigWithFineTunedAlignments.getAlignments().size());
for (final AlignmentInterval aln : contigWithFineTunedAlignments.getAlignments()) {
final SimpleInterval referenceSpan = aln.referenceSpan;
if (!validRegion.contains(referenceSpan))
continue;

final SimpleInterval left = new SimpleInterval(chr, referenceSpan.getStart(), referenceSpan.getStart()+1);
final SimpleInterval right = new SimpleInterval(chr, referenceSpan.getEnd()-1, referenceSpan.getEnd());
result.add(left);
result.add(right);
}

return result;
}

@Override
public String toString() {
final StringBuilder sb = new StringBuilder("CpxVariantInducingAssemblyContig{");
Expand Down Expand Up @@ -429,7 +455,7 @@ static final class Jump {
default: throw new NoSuchElementException("seeing a strand switch that doesn't make sense");
}

gapSize = Math.max(0, two.startInAssembledContig - one.endInAssembledContig - 2); // -2 because we want bases in-between
gapSize = Math.max(0, two.startInAssembledContig - one.endInAssembledContig - 1); // -1 because we want bases in-between
}

boolean isGapped() {
Expand Down
Loading

0 comments on commit 9b84f3f

Please sign in to comment.