From e78fa7ce4195a8056a620d9e0202fc44103b607b Mon Sep 17 00:00:00 2001 From: David Benjamin Date: Sat, 13 Jan 2018 22:30:17 -0500 Subject: [PATCH] Replaced IntervalsSkipList with OverlapDetector --- .../hellbender/engine/ContextShard.java | 23 +-- .../spark/AddContextDataToReadSpark.java | 21 +- .../AddContextDataToReadSparkOptimized.java | 12 +- .../spark/BroadcastJoinReadsWithVariants.java | 8 +- .../engine/spark/KnownSitesCache.java | 12 +- .../spark/datasources/ReadsSparkSource.java | 2 +- .../utils/collections/IntervalsSkipList.java | 54 ----- .../IntervalsSkipListOneContig.java | 172 ---------------- .../IntervalsSkipListOneContigUnitTest.java | 190 ------------------ .../IntervalsSkipListUnitTest.java | 50 ----- 10 files changed, 34 insertions(+), 510 deletions(-) delete mode 100644 src/main/java/org/broadinstitute/hellbender/utils/collections/IntervalsSkipList.java delete mode 100644 src/main/java/org/broadinstitute/hellbender/utils/collections/IntervalsSkipListOneContig.java delete mode 100644 src/test/java/org/broadinstitute/hellbender/utils/collections/IntervalsSkipListOneContigUnitTest.java delete mode 100644 src/test/java/org/broadinstitute/hellbender/utils/collections/IntervalsSkipListUnitTest.java diff --git a/src/main/java/org/broadinstitute/hellbender/engine/ContextShard.java b/src/main/java/org/broadinstitute/hellbender/engine/ContextShard.java index 7abea0eef07..25973c59200 100644 --- a/src/main/java/org/broadinstitute/hellbender/engine/ContextShard.java +++ b/src/main/java/org/broadinstitute/hellbender/engine/ContextShard.java @@ -1,13 +1,16 @@ package org.broadinstitute.hellbender.engine; +import htsjdk.samtools.util.Locatable; +import htsjdk.samtools.util.OverlapDetector; import org.broadinstitute.hellbender.utils.SimpleInterval; -import org.broadinstitute.hellbender.utils.collections.IntervalsSkipListOneContig; import org.broadinstitute.hellbender.utils.read.GATKRead; import org.broadinstitute.hellbender.utils.variant.GATKVariant; import java.io.Serializable; import java.util.ArrayList; +import java.util.Comparator; import java.util.List; +import java.util.stream.Collectors; /** * Immutable storage class. @@ -21,7 +24,7 @@ public class ContextShard implements Serializable { // the interval covered by this shard public final SimpleInterval interval; // variants that overlap with the shard. - public final IntervalsSkipListOneContig variants; + public final OverlapDetector variants; // reads that start in the shard public final List reads; // variants and reference for the particular read at the same index as this element. @@ -38,7 +41,7 @@ public ContextShard(SimpleInterval interval) { * Careful: this ctor takes ownership of the passed reads and ReadContextData array. * Do not modify them after this call (ideally don't even keep a reference to them). */ - private ContextShard(SimpleInterval interval, IntervalsSkipListOneContig variants, final List reads, final List readContext) { + private ContextShard(SimpleInterval interval, OverlapDetector variants, final List reads, final List readContext) { this.interval = interval; this.variants = variants; this.reads = reads; @@ -50,12 +53,8 @@ private ContextShard(SimpleInterval interval, IntervalsSkipListOneContig newVariants; - if (null==variants) { - newVariants = null; - } else { - newVariants = new IntervalsSkipListOneContig<>( variants.getOverlapping(newInterval) ); - } + final OverlapDetector newVariants = variants == null ? null : + OverlapDetector.create( new ArrayList<>(variants.getOverlaps(newInterval)) ); return new ContextShard(newInterval, newVariants, reads, readContext); } @@ -64,7 +63,7 @@ public ContextShard split(SimpleInterval newInterval) { * Note that readContext is unchanged (including the variants it may refer to). */ public ContextShard withVariants(List newVariants) { - return new ContextShard(this.interval, new IntervalsSkipListOneContig<>(newVariants), reads, readContext); + return new ContextShard(this.interval, OverlapDetector.create(newVariants), reads, readContext); } /** @@ -86,8 +85,8 @@ public ContextShard withReadContext(List newReadContext) { /** * Returns the variants that overlap the query interval, in start-position order. */ - public List variantsOverlapping(SimpleInterval interval) { - return variants.getOverlapping(interval); + public List variantsOverlapping(Locatable interval) { + return variants.getOverlaps(interval).stream().sorted(Comparator.comparingInt(GATKVariant::getStart)).collect(Collectors.toList()); } } diff --git a/src/main/java/org/broadinstitute/hellbender/engine/spark/AddContextDataToReadSpark.java b/src/main/java/org/broadinstitute/hellbender/engine/spark/AddContextDataToReadSpark.java index ccfc714aaa0..be6ac1840b6 100644 --- a/src/main/java/org/broadinstitute/hellbender/engine/spark/AddContextDataToReadSpark.java +++ b/src/main/java/org/broadinstitute/hellbender/engine/spark/AddContextDataToReadSpark.java @@ -3,6 +3,7 @@ import com.google.common.base.Function; import com.google.common.collect.Iterators; import htsjdk.samtools.SAMSequenceDictionary; +import htsjdk.samtools.util.OverlapDetector; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; @@ -16,16 +17,13 @@ import org.broadinstitute.hellbender.exceptions.UserException; import org.broadinstitute.hellbender.utils.IntervalUtils; import org.broadinstitute.hellbender.utils.SimpleInterval; -import org.broadinstitute.hellbender.utils.collections.IntervalsSkipList; import org.broadinstitute.hellbender.utils.read.GATKRead; import org.broadinstitute.hellbender.utils.reference.ReferenceBases; import org.broadinstitute.hellbender.utils.variant.GATKVariant; import scala.Tuple2; import javax.annotation.Nullable; -import java.util.Collections; -import java.util.Iterator; -import java.util.List; +import java.util.*; import java.util.stream.Collectors; /** @@ -106,7 +104,7 @@ private static JavaPairRDD addUsingOverlapsPartitioni .collect(Collectors.toList()); final Broadcast bReferenceSource = ctx.broadcast(referenceSource); - final Broadcast> variantsBroadcast = variantsPaths == null ? ctx.broadcast(new IntervalsSkipList<>(variants.collect())) : null; + final Broadcast> variantsBroadcast = variantsPaths == null ? ctx.broadcast(OverlapDetector.create(variants.collect())) : null; int maxLocatableSize = Math.min(shardSize, shardPadding); JavaRDD> shardedReads = SparkSharder.shard(ctx, mappedReads, GATKRead.class, sequenceDictionary, intervalShards, maxLocatableSize); @@ -119,20 +117,15 @@ public Iterator> call(Shard shard) t // get reference bases for this shard (padded) SimpleInterval paddedInterval = shard.getInterval().expandWithinContig(shardPadding, sequenceDictionary); ReferenceBases referenceBases = bReferenceSource.getValue().getReferenceBases(paddedInterval); - final IntervalsSkipList intervalsSkipList = variantsPaths == null ? variantsBroadcast.getValue() : + final OverlapDetector overlapDetector = variantsPaths == null ? variantsBroadcast.getValue() : KnownSitesCache.getVariants(variantsPaths); Iterator> transform = Iterators.transform(shard.iterator(), new Function>() { @Nullable @Override public Tuple2 apply(@Nullable GATKRead r) { - List overlappingVariants; - if (SimpleInterval.isValid(r.getContig(), r.getStart(), r.getEnd())) { - overlappingVariants = intervalsSkipList.getOverlapping(new SimpleInterval(r)); - } else { - //Sometimes we have reads that do not form valid intervals (reads that do not consume any ref bases, eg CIGAR 61S90I - //In those cases, we'll just say that nothing overlaps the read - overlappingVariants = Collections.emptyList(); - } + final List overlappingVariants = SimpleInterval.isValid(r.getContig(), r.getStart(), r.getEnd()) + ? overlapDetector.getOverlaps(r).stream().sorted(Comparator.comparingInt(GATKVariant::getStart)).collect(Collectors.toList()) + : Collections.emptyList(); return new Tuple2<>(r, new ReadContextData(referenceBases, overlappingVariants)); } }); diff --git a/src/main/java/org/broadinstitute/hellbender/engine/spark/AddContextDataToReadSparkOptimized.java b/src/main/java/org/broadinstitute/hellbender/engine/spark/AddContextDataToReadSparkOptimized.java index ffc3b1e8417..2776e669720 100644 --- a/src/main/java/org/broadinstitute/hellbender/engine/spark/AddContextDataToReadSparkOptimized.java +++ b/src/main/java/org/broadinstitute/hellbender/engine/spark/AddContextDataToReadSparkOptimized.java @@ -1,5 +1,6 @@ package org.broadinstitute.hellbender.engine.spark; +import htsjdk.samtools.util.OverlapDetector; import org.broadinstitute.hellbender.utils.SerializableFunction; import htsjdk.samtools.SAMRecord; @@ -20,7 +21,6 @@ import org.broadinstitute.hellbender.exceptions.UserException; import org.broadinstitute.hellbender.utils.IntervalUtils; import org.broadinstitute.hellbender.utils.SimpleInterval; -import org.broadinstitute.hellbender.utils.collections.IntervalsSkipList; import org.broadinstitute.hellbender.utils.gcs.BucketUtils; import org.broadinstitute.hellbender.utils.io.IOUtils; import org.broadinstitute.hellbender.utils.read.GATKRead; @@ -33,10 +33,8 @@ import java.io.IOException; import java.io.Serializable; import java.security.GeneralSecurityException; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; -import java.util.NoSuchElementException; +import java.util.*; +import java.util.stream.Collectors; public final class AddContextDataToReadSparkOptimized implements Serializable { @@ -325,7 +323,7 @@ private void throwIfOutsideMargin(SAMRecordToGATKReadAdapter g, SAMRecord r) { * This happens immediately, at the caller. */ public static ArrayList fillVariants(List shardedIntervals, List variants, int margin) { - IntervalsSkipList intervals = new IntervalsSkipList<>(variants); + OverlapDetector intervals = OverlapDetector.create(variants); ArrayList ret = new ArrayList<>(); for (SimpleInterval s : shardedIntervals) { int start = Math.max(s.getStart() - margin, 1); @@ -344,7 +342,7 @@ public static ArrayList fillVariants(List shardedI // // Since the read's length is less than margin, we know that by including all the variants that overlap // with the expanded interval we are also including all the variants that overlap with all the reads in this shard. - ret.add(new ContextShard(s).withVariants(intervals.getOverlapping(expandedInterval))); + ret.add(new ContextShard(s).withVariants(intervals.getOverlaps(expandedInterval).stream().sorted(Comparator.comparingInt(GATKVariant::getStart)).collect(Collectors.toList()))); } return ret; } diff --git a/src/main/java/org/broadinstitute/hellbender/engine/spark/BroadcastJoinReadsWithVariants.java b/src/main/java/org/broadinstitute/hellbender/engine/spark/BroadcastJoinReadsWithVariants.java index b7686e30ac9..79d2dbca14b 100644 --- a/src/main/java/org/broadinstitute/hellbender/engine/spark/BroadcastJoinReadsWithVariants.java +++ b/src/main/java/org/broadinstitute/hellbender/engine/spark/BroadcastJoinReadsWithVariants.java @@ -1,10 +1,10 @@ package org.broadinstitute.hellbender.engine.spark; +import htsjdk.samtools.util.OverlapDetector; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.broadcast.Broadcast; -import org.broadinstitute.hellbender.utils.collections.IntervalsSkipList; import org.broadinstitute.hellbender.utils.SimpleInterval; import org.broadinstitute.hellbender.utils.read.GATKRead; import org.broadinstitute.hellbender.utils.variant.GATKVariant; @@ -32,7 +32,7 @@ private BroadcastJoinReadsWithVariants(){} */ public static JavaPairRDD> join(final JavaRDD reads, final JavaRDD variants) { final JavaSparkContext ctx = new JavaSparkContext(reads.context()); - final Broadcast> variantsBroadcast = ctx.broadcast(new IntervalsSkipList<>(variants.collect())); + final Broadcast> variantsBroadcast = ctx.broadcast(OverlapDetector.create(variants.collect())); return reads.mapToPair(r -> getOverlapping(r, variantsBroadcast.getValue())); } @@ -48,9 +48,9 @@ public static JavaPairRDD> join(final JavaRDD getOverlapping(r, KnownSitesCache.getVariants(variantsPaths))); } - private static Tuple2> getOverlapping(final GATKRead read, final IntervalsSkipList intervalsSkipList) { + private static Tuple2> getOverlapping(final GATKRead read, final OverlapDetector overlapDetector) { if (SimpleInterval.isValid(read.getContig(), read.getStart(), read.getEnd())) { - return new Tuple2<>(read, intervalsSkipList.getOverlapping(new SimpleInterval(read))); + return new Tuple2<>(read, overlapDetector.getOverlaps(read)); } else { //Sometimes we have reads that do not form valid intervals (reads that do not consume any ref bases, eg CIGAR 61S90I //In those cases, we'll just say that nothing overlaps the read diff --git a/src/main/java/org/broadinstitute/hellbender/engine/spark/KnownSitesCache.java b/src/main/java/org/broadinstitute/hellbender/engine/spark/KnownSitesCache.java index 4f4e140a8e2..5b8a1b039f7 100644 --- a/src/main/java/org/broadinstitute/hellbender/engine/spark/KnownSitesCache.java +++ b/src/main/java/org/broadinstitute/hellbender/engine/spark/KnownSitesCache.java @@ -1,10 +1,10 @@ package org.broadinstitute.hellbender.engine.spark; +import htsjdk.samtools.util.OverlapDetector; import htsjdk.variant.variantcontext.VariantContext; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.broadinstitute.hellbender.engine.FeatureDataSource; -import org.broadinstitute.hellbender.utils.collections.IntervalsSkipList; import org.broadinstitute.hellbender.utils.variant.GATKVariant; import org.broadinstitute.hellbender.utils.variant.VariantContextVariantAdapter; @@ -19,19 +19,19 @@ class KnownSitesCache { private static final Logger log = LogManager.getLogger(KnownSitesCache.class); - private static final Map, IntervalsSkipList> PATHS_TO_VARIANTS = new HashMap<>(); + private static final Map, OverlapDetector> PATHS_TO_VARIANTS = new HashMap<>(); - public static synchronized IntervalsSkipList getVariants(List paths) { + public static synchronized OverlapDetector getVariants(List paths) { if (PATHS_TO_VARIANTS.containsKey(paths)) { return PATHS_TO_VARIANTS.get(paths); } - IntervalsSkipList variants = retrieveVariants(paths); + OverlapDetector variants = retrieveVariants(paths); PATHS_TO_VARIANTS.put(paths, variants); return variants; } - private static IntervalsSkipList retrieveVariants(List paths) { - return new IntervalsSkipList<>(paths + private static OverlapDetector retrieveVariants(List paths) { + return OverlapDetector.create(paths .stream() .map(KnownSitesCache::loadFromFeatureDataSource) .flatMap(Collection::stream) diff --git a/src/main/java/org/broadinstitute/hellbender/engine/spark/datasources/ReadsSparkSource.java b/src/main/java/org/broadinstitute/hellbender/engine/spark/datasources/ReadsSparkSource.java index 322d68e46ef..a5092958033 100644 --- a/src/main/java/org/broadinstitute/hellbender/engine/spark/datasources/ReadsSparkSource.java +++ b/src/main/java/org/broadinstitute/hellbender/engine/spark/datasources/ReadsSparkSource.java @@ -289,7 +289,7 @@ private void setHadoopBAMConfigurationProperties(final String inputName, final S * Tests if a given SAMRecord overlaps any interval in a collection. This is only used as a fallback option for * formats that don't support query-by-interval natively at the Hadoop-BAM layer. */ - //TODO: use IntervalsSkipList, see https://github.com/broadinstitute/gatk/issues/1531 + //TODO: use OverlapDetector, see https://github.com/broadinstitute/gatk/issues/1531 private static boolean samRecordOverlaps(final SAMRecord record, final TraversalParameters traversalParameters ) { if (traversalParameters == null) { return true; diff --git a/src/main/java/org/broadinstitute/hellbender/utils/collections/IntervalsSkipList.java b/src/main/java/org/broadinstitute/hellbender/utils/collections/IntervalsSkipList.java deleted file mode 100644 index 02591ec720d..00000000000 --- a/src/main/java/org/broadinstitute/hellbender/utils/collections/IntervalsSkipList.java +++ /dev/null @@ -1,54 +0,0 @@ -package org.broadinstitute.hellbender.utils.collections; - -import htsjdk.samtools.util.Locatable; -import org.broadinstitute.hellbender.utils.SimpleInterval; - -import java.io.Serializable; -import java.util.*; - -/** - * Holds many intervals in memory, with an efficient operation to get - * intervals that overlap a given query interval. - * - * This version allows intervals to lie on different contigs. - */ -public final class IntervalsSkipList implements Serializable { - private static final long serialVersionUID = 1L; - - private final Map> intervals; - - /** - * Creates an IntervalsSkipList that holds a copy of the given intervals, sorted - * and indexed. - * - * @param loc Locatables, not necessarily sorted. Will be iterated over exactly once. - */ - public IntervalsSkipList(final Iterable loc) { - final Map> variantsPerContig = new LinkedHashMap<>(); - for (final T v : loc) { - final String k = v.getContig(); - variantsPerContig.putIfAbsent(k, new ArrayList<>()); - variantsPerContig.get(k).add(v); - } - intervals = new LinkedHashMap<>(); - for (String k : variantsPerContig.keySet()) { - intervals.put(k, new IntervalsSkipListOneContig<>(variantsPerContig.get(k))); - } - } - - /** - * Returns all the intervals that overlap with the query. - * The query doesn't *have* to be in the same contig as any interval we - * hold, but of course if it isn't you'll get an empty result. - * You may modify the returned list. - */ - public List getOverlapping(final SimpleInterval query) { - final String k = query.getContig(); - final IntervalsSkipListOneContig result = intervals.get(k); - if (result == null){ - return new ArrayList<>(); - } - return result.getOverlapping(query); - } - -} diff --git a/src/main/java/org/broadinstitute/hellbender/utils/collections/IntervalsSkipListOneContig.java b/src/main/java/org/broadinstitute/hellbender/utils/collections/IntervalsSkipListOneContig.java deleted file mode 100644 index 1503d9c85d2..00000000000 --- a/src/main/java/org/broadinstitute/hellbender/utils/collections/IntervalsSkipListOneContig.java +++ /dev/null @@ -1,172 +0,0 @@ -package org.broadinstitute.hellbender.utils.collections; - -import com.google.common.annotations.VisibleForTesting; -import com.google.common.collect.Lists; -import htsjdk.samtools.util.Locatable; -import org.broadinstitute.hellbender.utils.SimpleInterval; -import org.broadinstitute.hellbender.utils.Utils; - -import java.io.Serializable; -import java.util.ArrayList; -import java.util.Comparator; -import java.util.List; -import java.util.Set; -import java.util.stream.Collectors; - -/** - * Holds many intervals in memory, with an efficient operation to get - * intervals that overlap a given query interval. - * - * This version assumes that all the intervals lie on the same contig. - */ -public final class IntervalsSkipListOneContig implements Serializable { - private static final long serialVersionUID = 1L; - - // approx number of buckets we're aiming for. - private static final int NUMBUCKETS = 1000; - // each bucket contains 2**shift entries. - private final int shift; - - // input intervals, sorted by start location - private final List vs; - // the contig all the intervals are in. - private final String contig; - - // reach: bucket# -> how far that bucket reaches. - // e.g. bucket 0 contains the first 2**shift locatables. reach[0] is the max over their .getEnd() - // reach[x] is the max over the .getEnd for that bucket and all the ones before it. - private final int[] reach; - private final int reachLength; - - /** - * Creates an IntervalsSkipList that holds a copy of the given intervals, sorted - * and indexed. - * - * @param loc Locatables, not necessarily sorted. Will be iterated over exactly once. - */ - public IntervalsSkipListOneContig(final Iterable loc) { - Utils.nonNull(loc); - vs = Lists.newArrayList(loc); - - final Set contigs = vs.stream().map(l -> l.getContig()).collect(Collectors.toSet()); - if (contigs.size() > 1){ - throw new IllegalArgumentException("Only one contig expected but got " + contigs); - } - - if (vs.isEmpty()) { - contig=""; - } else { - contig=vs.get(0).getContig(); - } - int bSize = vs.size() / NUMBUCKETS; - // heuristic: if we have too many small buckets then we're better off instead - // taking fewer but bigger steps, and then iterating through a few values. - // Thus, put a lower bound on bucket size. - if (bSize < 32) { - bSize = 32; - } - shift = floorLog2(bSize); - - vs.sort(Comparator.comparing(Locatable::getContig).thenComparingInt(Locatable::getStart).thenComparing(Locatable::getEnd)); - - reach = buildIndexAndCheck(); - reachLength = reach.length; - } - - /** - * Returns all the intervals that overlap with the query. - * The query doesn't *have* to be in the same contig as the intervals we - * hold, but of course if it isn't you'll get an empty result. - * You may modify the returned list. - */ - public List getOverlapping(final SimpleInterval query) { - Utils.nonNull(query); - if (!contig.equals(query.getContig())) { - // different contig, so we know no one'll overlap. - return new ArrayList<>(); - } - final List ret = new ArrayList<>(); - // use index to skip early non-overlapping entries. - int idx = firstPotentiallyReaching(query.getStart()); - if (idx<0) { - idx=0; - } - for (;idx query.getEnd()) { - break; - } - if (query.overlaps(v)) { - ret.add(v); - } - } - return ret; - } - - // returns all the intervals that overlap with the query. - // (use the optimized version instead, unless you're testing it and need something to compare against) - @VisibleForTesting - List getOverlappingIgnoringIndex(final SimpleInterval query) { - if (!contig.equals(query.getContig())) { - // different contig, so we know no one'll overlap. - return new ArrayList<>(); - } - final List ret = new ArrayList<>(); - for (final T v : vs) { - // they are sorted by start location, so if this one starts too late - // then all of the others will, too. - if (v.getStart() > query.getEnd()) { - break; - } - if (query.overlaps(v)) { - ret.add(v); - } - } - return ret; - } - - // returns an index into the vs array s.t. no entry before that index - // reaches (or extends beyond) the given position. - private int firstPotentiallyReaching(final int position) { - for (int i=0; i=position) { - return i< how far that bucket reaches - final int[] result = new int[(vs.size()>>shift)+1]; - for (Locatable v : vs) { - int k = idx>>shift; - if (k>key) { - result[key]=max; - key=k; - } - if (v.getEnd()>max) { - max=v.getEnd(); - } - idx++; - } - result[key]=max; - return result; - } - - private static int floorLog2(final int n){ - if (n <= 0) { - throw new IllegalArgumentException(); - } - // size of int is 32 bits - return 31 - Integer.numberOfLeadingZeros(n); - } - -} diff --git a/src/test/java/org/broadinstitute/hellbender/utils/collections/IntervalsSkipListOneContigUnitTest.java b/src/test/java/org/broadinstitute/hellbender/utils/collections/IntervalsSkipListOneContigUnitTest.java deleted file mode 100644 index e49b3ea0775..00000000000 --- a/src/test/java/org/broadinstitute/hellbender/utils/collections/IntervalsSkipListOneContigUnitTest.java +++ /dev/null @@ -1,190 +0,0 @@ -package org.broadinstitute.hellbender.utils.collections; - -import com.google.common.base.Stopwatch; -import com.google.common.collect.Lists; -import htsjdk.samtools.util.Locatable; -import org.broadinstitute.hellbender.utils.SimpleInterval; -import org.broadinstitute.hellbender.GATKBaseTest; -import org.testng.Assert; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.List; -import java.util.concurrent.TimeUnit; - -public final class IntervalsSkipListOneContigUnitTest extends GATKBaseTest { - - - @DataProvider(name="intervals") - public Object[][] intervals(){ - ArrayList input = Lists.newArrayList( - new SimpleInterval("1",10,100) - ); - ArrayList empty = new ArrayList<>(); - ArrayList manyOverlapping = Lists.newArrayList( - new SimpleInterval("1",10,100), - // special case: multiple intervals starting at the same place - new SimpleInterval("1",20,50), - new SimpleInterval("1",20,50), - new SimpleInterval("1",20,50) - ); - ArrayList mixInput = Lists.newArrayList( - // ends before query interval - new SimpleInterval("1",10,20), - // ends in query interval - new SimpleInterval("1",10,60), - // equal to query interval - new SimpleInterval("1",30,50), - // covered by query interval - new SimpleInterval("1",40,42), - // ends after query interval - new SimpleInterval("1",45,60), - // starts after query interval - new SimpleInterval("1",60,100) - ); - ArrayList mixExpected = Lists.newArrayList( - // ends in query interval - new SimpleInterval("1",10,60), - // equal to query interval - new SimpleInterval("1",30,50), - // covered by query interval - new SimpleInterval("1",40,42), - // ends after query interval - new SimpleInterval("1",45,60) - ); - // returns input single SimpleInterval, query range, expected SimpleInterval - return new Object[][]{ - // single-point boundary cases - new Object[]{input, new SimpleInterval("1", 10, 10), input}, - new Object[]{input, new SimpleInterval("1", 100, 100), input}, - new Object[]{input, new SimpleInterval("1", 9, 9), empty}, - new Object[]{input, new SimpleInterval("1", 11, 11), input}, - new Object[]{input, new SimpleInterval("1", 99, 99), input}, - new Object[]{input, new SimpleInterval("1", 101, 101), empty}, - // empty list boundary case - new Object[]{empty, new SimpleInterval("1", 101, 101), empty}, - // different contig - new Object[]{empty, new SimpleInterval("2", 101, 101), empty}, - // input exactly matches the query interval - new Object[]{input, new SimpleInterval("1", 10, 100), input}, - // multiple intervals in the same place (potential edge case for indexing) - new Object[]{manyOverlapping, new SimpleInterval("1", 20, 20), manyOverlapping}, - // input with multiple intervals - new Object[]{mixInput, new SimpleInterval("1",30,50), mixExpected} - - }; - } - - @Test(dataProvider = "intervals") - public void testOverlap(ArrayList input, SimpleInterval query, ArrayList expected) throws Exception { - IntervalsSkipListOneContig ints = new IntervalsSkipListOneContig<>(input); - List actual = ints.getOverlapping(query); - Assert.assertEquals( - actual, - expected - ); - } - - @Test(expectedExceptions = IllegalArgumentException.class) - public void testMultipleContigs() throws Exception { - new IntervalsSkipListOneContig<>(Arrays.asList(new SimpleInterval("1",1,2), new SimpleInterval("2",1,2))); - } - - @Test - public void testManyIntervals() throws Exception { - ArrayList si = new ArrayList<>(); - final int MAX = 10_000_000; - for (int start = 1; start ints = new IntervalsSkipListOneContig<>(si); - indexing.stop(); - - Stopwatch v1 = Stopwatch.createStarted(); - for (int start = 101; start actual = ints.getOverlappingIgnoringIndex(interval); - Assert.assertEquals(actual.size(), 3); - // the two that start from "start", plus the long one that starts from start-100. - // the one that starts from start-200 ends before our test point. - for (Locatable l : actual) { - Assert.assertTrue(interval.overlaps(l)); - } - } - v1.stop(); - Stopwatch v2 = Stopwatch.createStarted(); - for (int start = 101; start actual = ints.getOverlapping(interval); - Assert.assertEquals(actual.size(), 3); - // the two that start from "start", plus the long one that starts from start-100. - // the one that starts from start-200 ends before our test point. - for (Locatable l : actual) { - Assert.assertTrue(interval.overlaps(l)); - } - } - v2.stop(); - - System.out.println("non-indexed took "+v1.elapsed(TimeUnit.MILLISECONDS)+" ms, " - +" indexed took "+v2.elapsed(TimeUnit.MILLISECONDS)+" ms, plus "+indexing.elapsed(TimeUnit.MILLISECONDS)+" for sorting&indexing."); - } - - @Test - public void testLotsOfTinyIntervals() throws Exception { - List input = new ArrayList<>(); - int n = 1000000; - for (int i = 0; i < n; i++) { - input.add(new SimpleInterval("1", 3*i+1, 3*i+2)); //1:1-2, 1:4-5, 1:7-8 - } - final IntervalsSkipListOneContig skipList = new IntervalsSkipListOneContig<>(input); - final List overlapping = skipList.getOverlapping(new SimpleInterval("1", 1, 3 * n + 2)); - Assert.assertEquals(input, overlapping); - } - - - @Test(expectedExceptions = IllegalArgumentException.class) - public void testNullCtorArg() throws Exception { - new IntervalsSkipListOneContig<>(null); - } - - @Test(expectedExceptions = IllegalArgumentException.class) - public void testNullArg() throws Exception { - List input = Arrays.asList( - new SimpleInterval("1",10,100) - ); - final IntervalsSkipListOneContig l = new IntervalsSkipListOneContig<>(input); - l.getOverlapping(null); - } - - @Test(expectedExceptions = IllegalArgumentException.class) - public void testNotSameContig() throws Exception { - List input = Arrays.asList( - new SimpleInterval("1",10,100), - new SimpleInterval("2",10,100) - ); - final IntervalsSkipListOneContig l = new IntervalsSkipListOneContig<>(input); - } - - @Test - public void testQquetNotSameContig() throws Exception { - List input = Arrays.asList( - new SimpleInterval("1",10,100) - ); - final IntervalsSkipListOneContig l = new IntervalsSkipListOneContig<>(input); - final List res = l.getOverlappingIgnoringIndex(new SimpleInterval("2", 10, 100)); - Assert.assertEquals(res, Collections.emptyList()); - } - @Test - public void testEmptyInput() throws Exception { - List empty = new ArrayList<>(); - final IntervalsSkipListOneContig l = new IntervalsSkipListOneContig<>(empty); - Assert.assertTrue(l.getOverlapping(new SimpleInterval("", 10, 100)).isEmpty()); //try to fool it by using empty contig - Assert.assertTrue(l.getOverlapping(new SimpleInterval("1", 10, 100)).isEmpty()); - } -} diff --git a/src/test/java/org/broadinstitute/hellbender/utils/collections/IntervalsSkipListUnitTest.java b/src/test/java/org/broadinstitute/hellbender/utils/collections/IntervalsSkipListUnitTest.java deleted file mode 100644 index 6589a79498b..00000000000 --- a/src/test/java/org/broadinstitute/hellbender/utils/collections/IntervalsSkipListUnitTest.java +++ /dev/null @@ -1,50 +0,0 @@ -package org.broadinstitute.hellbender.utils.collections; - -import com.google.common.collect.Lists; -import htsjdk.samtools.util.Locatable; -import org.broadinstitute.hellbender.utils.SimpleInterval; -import org.broadinstitute.hellbender.GATKBaseTest; -import org.testng.Assert; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; - -import java.util.ArrayList; -import java.util.List; - -public final class IntervalsSkipListUnitTest extends GATKBaseTest { - - @DataProvider(name="intervals") - public Object[][] intervals(){ - ArrayList input = Lists.newArrayList( - new SimpleInterval("1", 10, 100), - new SimpleInterval("2", 200, 300) - ); - ArrayList empty = new ArrayList<>(); - ArrayList contig1 = Lists.newArrayList( - new SimpleInterval("1", 10, 100) - ); - ArrayList contig2 = Lists.newArrayList( - new SimpleInterval("2", 200, 300) - ); - - // returns input, query range, expected SimpleIntervals - return new Object[][]{ - // we already test elsewhere that it works within a contig, so here we just have to make sure that - // it picks the correct contig and can deal with not-yet-mentioned contigs. - new Object[]{input, new SimpleInterval("1", 100, 200), contig1}, - new Object[]{input, new SimpleInterval("1", 1, 5), empty}, - new Object[]{input, new SimpleInterval("2", 100, 200), contig2}, - new Object[]{input, new SimpleInterval("3", 100, 200), empty}, - }; - } - - @Test(dataProvider = "intervals") - public void testOverlap(ArrayList input, SimpleInterval query, ArrayList expected) throws Exception { - IntervalsSkipList ints = new IntervalsSkipList<>(input); - List actual = ints.getOverlapping(query); - Assert.assertEquals( - actual, - expected - ); - } -} \ No newline at end of file