From 7274dcd41cdc49f1457d75ece2cf7d520e668278 Mon Sep 17 00:00:00 2001 From: Louis Bergelson Date: Mon, 6 Jan 2020 14:03:10 -0500 Subject: [PATCH] Adding a new GATKTool level argument to control which if any output variants are filtered. --- .../hellbender/engine/GATKTool.java | 33 ++++- .../tools/walkers/GenotypeGVCFs.java | 54 +++---- .../gnarlyGenotyper/GnarlyGenotyper.java | 28 +--- .../writers/IntervalFilteringVcfWriter.java | 140 ++++++++++++++++++ .../engine/GatkToolIntegrationTest.java | 72 +++++++++ .../walkers/GenotypeGVCFsIntegrationTest.java | 12 +- .../GnarlyGenotyperIntegrationTest.java | 7 +- .../variant/writers/GVCFWriterUnitTest.java | 82 +++------- .../IntervalFilteringVcfWriterUnitTest.java | 91 ++++++++++++ .../writers/SomaticGVCFWriterUnitTest.java | 6 +- .../utils/variant/writers/MockVcfWriter.java | 42 ++++++ 11 files changed, 439 insertions(+), 128 deletions(-) create mode 100644 src/main/java/org/broadinstitute/hellbender/utils/variant/writers/IntervalFilteringVcfWriter.java create mode 100644 src/test/java/org/broadinstitute/hellbender/utils/variant/writers/IntervalFilteringVcfWriterUnitTest.java create mode 100644 src/testUtils/java/org/broadinstitute/hellbender/utils/variant/writers/MockVcfWriter.java diff --git a/src/main/java/org/broadinstitute/hellbender/engine/GATKTool.java b/src/main/java/org/broadinstitute/hellbender/engine/GATKTool.java index f5107d20df4..9fb55490c10 100644 --- a/src/main/java/org/broadinstitute/hellbender/engine/GATKTool.java +++ b/src/main/java/org/broadinstitute/hellbender/engine/GATKTool.java @@ -14,8 +14,11 @@ import java.time.ZonedDateTime; import java.util.*; import java.util.stream.Stream; + +import org.broadinstitute.barclay.argparser.Advanced; import org.broadinstitute.barclay.argparser.Argument; import org.broadinstitute.barclay.argparser.ArgumentCollection; +import org.broadinstitute.barclay.argparser.CommandLineException; import org.broadinstitute.barclay.argparser.CommandLinePluginDescriptor; import org.broadinstitute.hellbender.cmdline.CommandLineProgram; import org.broadinstitute.hellbender.cmdline.GATKPlugin.GATKAnnotationPluginDescriptor; @@ -42,6 +45,7 @@ import org.broadinstitute.hellbender.utils.read.SAMFileGATKReadWriter; import org.broadinstitute.hellbender.utils.reference.ReferenceUtils; import org.broadinstitute.hellbender.utils.variant.GATKVariantContextUtils; +import org.broadinstitute.hellbender.utils.variant.writers.IntervalFilteringVcfWriter; //TODO: //UserException overloads @@ -123,6 +127,14 @@ public abstract class GATKTool extends CommandLineProgram { doc = "If true, don't emit genotype fields when writing vcf file output.", optional = true) public boolean outputSitesOnlyVCFs = false; + public static final String VARIANT_OUTPUT_INTERVAL_FILTERING_MODE = "variant-output-interval-filtering-mode"; + @Argument(fullName = VARIANT_OUTPUT_INTERVAL_FILTERING_MODE, + doc = "Restrict the output variants to ones that match the specified intervals according to the specified matching mode.", + optional = true) + @Advanced + public IntervalFilteringVcfWriter.Mode outputVariantIntervalFilteringMode = getDefaultVariantOutputFilterMode(); + + /** * Master sequence dictionary to be used instead of all other dictionaries (if provided). */ @@ -413,6 +425,13 @@ public int getDefaultCloudIndexPrefetchBufferSize() { */ public String getProgressMeterRecordLabel() { return ProgressMeter.DEFAULT_RECORD_LABEL; } + /** + * @return Default interval filtering mode for variant output. Subclasses may override this to set a different default. + */ + public IntervalFilteringVcfWriter.Mode getDefaultVariantOutputFilterMode(){ + return null; + } + protected List transformTraversalIntervals(final List getIntervals, final SAMSequenceDictionary sequenceDictionary) { return getIntervals; } @@ -710,12 +729,16 @@ protected void onStartup() { initializeIntervals(); // Must be initialized after reference, reads and features, since intervals currently require a sequence dictionary from another data source - if ( seqValidationArguments.performSequenceDictionaryValidation()) { + if (seqValidationArguments.performSequenceDictionaryValidation()) { validateSequenceDictionaries(); } checkToolRequirements(); + if (outputVariantIntervalFilteringMode != null && userIntervals == null){ + throw new CommandLineException.MissingArgument("-L or -XL", "Intervals are required if --" + VARIANT_OUTPUT_INTERVAL_FILTERING_MODE + " was specified."); + } + progressMeter = new ProgressMeter(secondsBetweenProgressUpdates); progressMeter.setRecordLabel(getProgressMeterRecordLabel()); } @@ -884,11 +907,17 @@ public VariantContextWriter createVCFWriter(final Path outPath) { options.add(Options.DO_NOT_WRITE_GENOTYPES); } - return GATKVariantContextUtils.createVCFWriter( + final VariantContextWriter vcfWriter = GATKVariantContextUtils.createVCFWriter( outPath, sequenceDictionary, createOutputVariantMD5, options.toArray(new Options[options.size()])); + + if(outputVariantIntervalFilteringMode != null){ + return new IntervalFilteringVcfWriter(vcfWriter, intervalArgumentCollection.getIntervals(getBestAvailableSequenceDictionary()), outputVariantIntervalFilteringMode); + } else { + return vcfWriter; + } } /** diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/GenotypeGVCFs.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/GenotypeGVCFs.java index 9da50ccd378..be439487e22 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/GenotypeGVCFs.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/GenotypeGVCFs.java @@ -7,12 +7,16 @@ import htsjdk.variant.variantcontext.writer.VariantContextWriter; import htsjdk.variant.vcf.VCFHeader; import htsjdk.variant.vcf.VCFHeaderLine; -import org.broadinstitute.barclay.argparser.*; +import org.broadinstitute.barclay.argparser.Argument; +import org.broadinstitute.barclay.argparser.ArgumentCollection; +import org.broadinstitute.barclay.argparser.CommandLineException; +import org.broadinstitute.barclay.argparser.CommandLineProgramProperties; import org.broadinstitute.barclay.help.DocumentedFeature; import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; import org.broadinstitute.hellbender.cmdline.argumentcollections.DbsnpArgumentCollection; import org.broadinstitute.hellbender.cmdline.programgroups.ShortVariantDiscoveryProgramGroup; import org.broadinstitute.hellbender.engine.FeatureContext; +import org.broadinstitute.hellbender.engine.GATKTool; import org.broadinstitute.hellbender.engine.ReadsContext; import org.broadinstitute.hellbender.engine.ReferenceContext; import org.broadinstitute.hellbender.engine.VariantLocusWalker; @@ -24,11 +28,22 @@ import org.broadinstitute.hellbender.tools.walkers.annotator.VariantAnnotatorEngine; import org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeCalculationArgumentCollection; import org.broadinstitute.hellbender.tools.walkers.mutect.M2ArgumentCollection; -import org.broadinstitute.hellbender.utils.*; +import org.broadinstitute.hellbender.utils.GenomeLoc; +import org.broadinstitute.hellbender.utils.GenomeLocParser; +import org.broadinstitute.hellbender.utils.GenomeLocSortedSet; +import org.broadinstitute.hellbender.utils.IntervalMergingRule; +import org.broadinstitute.hellbender.utils.IntervalSetRule; +import org.broadinstitute.hellbender.utils.IntervalUtils; +import org.broadinstitute.hellbender.utils.SimpleInterval; import org.broadinstitute.hellbender.utils.variant.GATKVariantContextUtils; +import org.broadinstitute.hellbender.utils.variant.writers.IntervalFilteringVcfWriter; import java.io.File; -import java.util.*; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Set; /** * Perform joint genotyping on one or more samples pre-called with HaplotypeCaller @@ -108,7 +123,7 @@ public final class GenotypeGVCFs extends VariantLocusWalker { /** * Import all data between specified intervals. Improves performance using large lists of intervals, as in exome * sequencing, especially if GVCF data only exists for specified intervals. Use with - * --only-output-calls-starting-in-intervals if input GVCFs contain calls outside the specified intervals. + * --{@value GATKTool#VARIANT_OUTPUT_INTERVAL_FILTERING_MODE} if input GVCFs contain calls outside the specified intervals. */ @Argument(fullName = GenomicsDBImport.MERGE_INPUT_INTERVALS_LONG_NAME, shortName = GenomicsDBImport.MERGE_INPUT_INTERVALS_LONG_NAME, @@ -149,16 +164,6 @@ public final class GenotypeGVCFs extends VariantLocusWalker { @ArgumentCollection private GenomicsDBArgumentCollection genomicsdbArgs = new GenomicsDBArgumentCollection(); - /** - * This option can only be activated if intervals are specified. - */ - @Advanced - @Argument(fullName= ONLY_OUTPUT_CALLS_STARTING_IN_INTERVALS_FULL_NAME, - doc="Restrict variant output to sites that start within provided intervals", - optional=true) - private boolean onlyOutputCallsStartingInIntervals = false; - - @Argument(fullName = FORCE_OUTPUT_INTERVALS_NAME, suppressFileExpansion = true, doc = "sites at which to output genotypes even if non-variant in samples", optional = true) protected final List forceOutputIntervalStrings = new ArrayList<>(); @@ -177,15 +182,14 @@ public final class GenotypeGVCFs extends VariantLocusWalker { private VariantContextWriter vcfWriter; - /** these are used when {@link #onlyOutputCallsStartingInIntervals) is true */ - private List intervals; - private OverlapDetector forceOutputIntervals; private boolean forceOutputIntervalsPresent; private GenotypeGVCFsEngine gvcfEngine; + + /** * Get the largest interval per contig that contains the intervals specified on the command line. * @param getIntervals intervals to be transformed @@ -248,21 +252,12 @@ public void onTraversalStart() { final VCFHeader inputVCFHeader = getHeaderForVariants(); - if(onlyOutputCallsStartingInIntervals) { - if( !hasUserSuppliedIntervals()) { - throw new CommandLineException.MissingArgument("-L or -XL", "Intervals are required if --" + ONLY_OUTPUT_CALLS_STARTING_IN_INTERVALS_FULL_NAME + " was specified."); - } - } - - intervals = hasUserSuppliedIntervals() ? intervalArgumentCollection.getIntervals(getBestAvailableSequenceDictionary()) : - Collections.emptyList(); - annotationEngine = new VariantAnnotatorEngine(makeVariantAnnotations(), dbsnp.dbsnp, Collections.emptyList(), false, keepCombined); merger = new ReferenceConfidenceVariantContextMerger(annotationEngine, getHeaderForVariants(), somaticInput); //methods that cannot be called in engine bc its protected - Set defaultToolVCFHeaderLines = getDefaultToolVCFHeaderLines(); + final Set defaultToolVCFHeaderLines = getDefaultToolVCFHeaderLines(); vcfWriter = createVCFWriter(outputFile); //create engine object @@ -270,7 +265,6 @@ public void onTraversalStart() { //call initialize method in engine class that creates VCFWriter object and writes a header to it vcfWriter = gvcfEngine.setupVCFWriter(defaultToolVCFHeaderLines, keepCombined, dbsnp, vcfWriter); - } @Override @@ -281,9 +275,7 @@ public void apply(final Locatable loc, List variants, ReadsConte final VariantContext regenotypedVC = gvcfEngine.callRegion(loc, variants, ref, features, merger, somaticInput, tlodThreshold, afTolerance, forceOutput); if (regenotypedVC != null) { - final SimpleInterval variantStart = new SimpleInterval(regenotypedVC.getContig(), regenotypedVC.getStart(), regenotypedVC.getStart()); - if ((inForceOutputIntervals || !GATKVariantContextUtils.isSpanningDeletionOnly(regenotypedVC)) && - (!onlyOutputCallsStartingInIntervals || intervals.stream().anyMatch(interval -> interval.contains (variantStart)))) { + if ((inForceOutputIntervals || !GATKVariantContextUtils.isSpanningDeletionOnly(regenotypedVC))) { vcfWriter.add(regenotypedVC); } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/gnarlyGenotyper/GnarlyGenotyper.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/gnarlyGenotyper/GnarlyGenotyper.java index 78b62189608..a7475d7a744 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/gnarlyGenotyper/GnarlyGenotyper.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/gnarlyGenotyper/GnarlyGenotyper.java @@ -110,15 +110,6 @@ public final class GnarlyGenotyper extends VariantWalker { @Argument(fullName = "keep-all-sites", doc="Retain low quality and non-variant sites, applying appropriate filters", optional=true) private boolean keepAllSites = false; - /** - * This option can only be activated if intervals are specified. - */ - @Advanced - @Argument(fullName = GenotypeGVCFs.ONLY_OUTPUT_CALLS_STARTING_IN_INTERVALS_FULL_NAME, - doc="Restrict variant output to sites that start within provided intervals", - optional=true) - private boolean onlyOutputCallsStartingInIntervals = false; - @Argument(fullName = GenomicsDBImport.MERGE_INPUT_INTERVALS_LONG_NAME, shortName = GenomicsDBImport.MERGE_INPUT_INTERVALS_LONG_NAME, doc = "Boolean flag to read in all data in between intervals. Improves performance reading from GenomicsDB " + @@ -146,9 +137,6 @@ public final class GnarlyGenotyper extends VariantWalker { private final RMSMappingQuality mqCalculator = RMSMappingQuality.getInstance(); private final Set> allAlleleSpecificAnnotations = new HashSet<>(); - /** these are used when {@link #onlyOutputCallsStartingInIntervals) is true */ - private List intervals; - @Override public boolean requiresReference() { return true; @@ -183,14 +171,6 @@ protected GenomicsDBOptions getGenomicsDBOptions() { public void onTraversalStart() { final VCFHeader inputVCFHeader = getHeaderForVariants(); - if(onlyOutputCallsStartingInIntervals) { - if( !intervalArgumentCollection.intervalsSpecified()) { - throw new CommandLineException.MissingArgument("-L or -XL", "Intervals are required if --" + GenotypeGVCFs.ONLY_OUTPUT_CALLS_STARTING_IN_INTERVALS_FULL_NAME + " was specified."); - } - } - intervals = intervalArgumentCollection.intervalsSpecified() ? intervalArgumentCollection.getIntervals(getBestAvailableSequenceDictionary()) : - Collections.emptyList(); - final SampleList samples = new IndexedSampleList(inputVCFHeader.getGenotypeSamples()); setupVCFWriter(inputVCFHeader, samples); @@ -266,11 +246,11 @@ private void setupVCFWriter(VCFHeader inputVCFHeader, SampleList samples) { @SuppressWarnings({"unchecked", "rawtypes"}) @Override public void apply(VariantContext variant, ReadsContext reads, ReferenceContext ref, FeatureContext features) { - SimpleInterval variantStart = new SimpleInterval(variant.getContig(), variant.getStart(), variant.getStart()); //return early if there's no non-symbolic ALT since GDB already did the merging if ( !variant.isVariant() || !GATKVariantContextUtils.isProperlyPolymorphic(variant) - || variant.getAttributeAsInt(VCFConstants.DEPTH_KEY,0) == 0 - || (onlyOutputCallsStartingInIntervals && !intervals.stream().anyMatch(interval -> interval.contains(variantStart)))) { + || variant.getAttributeAsInt(VCFConstants.DEPTH_KEY,0) == 0 ) + // todo this changes is a slight de-optimization since we will now process some sites whihc were previously ignored + { if (keepAllSites) { VariantContextBuilder builder = new VariantContextBuilder(mqCalculator.finalizeRawMQ(variant)); //don't fill in QUAL here because there's no alt data builder.filter(GATKVCFConstants.LOW_QUAL_FILTER_NAME); @@ -297,7 +277,7 @@ public void apply(VariantContext variant, ReadsContext reads, ReferenceContext r finalizedVC = genotyperEngine.finalizeGenotype(variant); } //could return null if the variant didn't pass the genotyping arg calling/emission threshold - if (finalizedVC != null && (!onlyOutputCallsStartingInIntervals || intervals.stream().anyMatch(interval -> interval.contains(variantStart)))) { + if (finalizedVC != null) { vcfWriter.add(finalizedVC); } } diff --git a/src/main/java/org/broadinstitute/hellbender/utils/variant/writers/IntervalFilteringVcfWriter.java b/src/main/java/org/broadinstitute/hellbender/utils/variant/writers/IntervalFilteringVcfWriter.java new file mode 100644 index 00000000000..6f81da1efd6 --- /dev/null +++ b/src/main/java/org/broadinstitute/hellbender/utils/variant/writers/IntervalFilteringVcfWriter.java @@ -0,0 +1,140 @@ +package org.broadinstitute.hellbender.utils.variant.writers; + +import htsjdk.samtools.util.Locatable; +import htsjdk.samtools.util.OverlapDetector; +import htsjdk.variant.variantcontext.VariantContext; +import htsjdk.variant.variantcontext.writer.VariantContextWriter; +import htsjdk.variant.vcf.VCFHeader; +import org.broadinstitute.hellbender.utils.SimpleInterval; +import org.broadinstitute.hellbender.utils.Utils; + +import java.util.List; +import java.util.Set; + +/** + * A {@link VariantContextWriter} decorator which filters out variants that don't match a given set of intervals. + */ +public class IntervalFilteringVcfWriter implements VariantContextWriter { + + /** + * Comparison modes which allow matching intervals in different ways. + */ + public enum Mode { + + /** + * Matches if the query starts within any of the given intervals. + */ + STARTS_IN{ + @Override + boolean test(OverlapDetector detector, final VariantContext query) { + final SimpleInterval startPosition = new SimpleInterval(query.getContig(), query.getStart(), query.getStart()); + return detector.overlapsAny(startPosition); + } + }, + + /** + * Matches if the query ends within any of the given intervals + */ + ENDS_IN{ + @Override + boolean test(final OverlapDetector detector, final VariantContext query) { + final SimpleInterval endPosition = new SimpleInterval(query.getContig(), query.getEnd(), query.getEnd()); + return detector.overlapsAny(endPosition); + } + }, + + /** + * Matches if any part of the query overlaps any one of the given intervals + */ + OVERLAPS{ + @Override + boolean test(final OverlapDetector detector, final VariantContext query) { + return detector.overlapsAny(query); + } + }, + + /** + * Matches if the entirety of the query is contained within one of the intervals + */ + CONTAINED { + @Override + boolean test(final OverlapDetector detector, final VariantContext query) { + final Set overlaps = detector.getOverlaps(query); + for( final Locatable loc : overlaps){ + if(loc.contains(query)){ + return true; + } + } + return false; + } + }, + + /** + * Always matches, may be used to not perform any filtering, alternatively a + */ + ANYWHERE { + @Override + boolean test(final OverlapDetector detector, final VariantContext query) { + return true; + } + }; + + /** + * @param detector The OverlapDetector to compare against + * @param query The variant being tested + * @return true iff the variant matches the given intervals + */ + abstract boolean test(OverlapDetector detector, VariantContext query); + } + + private final VariantContextWriter writer; + private final OverlapDetector detector; + private final Mode mode; + + /** + * @param writer the writer to wrap + * @param intervals the intervals to compare against, note that these are not merged so if they should be merged than the input list should be preprocessed + * @param mode the matching mode to use + */ + public IntervalFilteringVcfWriter(final VariantContextWriter writer, List intervals, Mode mode) { + Utils.nonNull(writer); + Utils.nonEmpty(intervals); + Utils.nonNull(mode); + + this.writer = writer; + this.detector = OverlapDetector.create(intervals); + this.mode = mode; + } + + @Override + public void writeHeader(final VCFHeader header) { + writer.writeHeader(header); + } + + @Override + public void setHeader(final VCFHeader header) { + writer.setHeader(header); + } + + @Override + public void close() { + writer.close(); + } + + @Override + public boolean checkError() { + return writer.checkError(); + } + + /** + * Add the given variant to the writer and output it if it matches. + * @param vc the variant to potentially write + */ + @Override + public void add(final VariantContext vc) { + if(mode.test(detector, vc)) { + writer.add(vc); + } + } + +} diff --git a/src/test/java/org/broadinstitute/hellbender/engine/GatkToolIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/engine/GatkToolIntegrationTest.java index 9a31953df66..514212cfb14 100644 --- a/src/test/java/org/broadinstitute/hellbender/engine/GatkToolIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/engine/GatkToolIntegrationTest.java @@ -1,16 +1,26 @@ package org.broadinstitute.hellbender.engine; import htsjdk.samtools.reference.ReferenceSequenceFileFactory; +import htsjdk.samtools.util.Locatable; import htsjdk.variant.variantcontext.VariantContext; +import htsjdk.variant.variantcontext.VariantContextBuilder; +import htsjdk.variant.variantcontext.writer.VariantContextWriter; import htsjdk.variant.vcf.VCFHeader; import org.apache.commons.lang3.tuple.Pair; +import org.broadinstitute.barclay.argparser.Argument; +import org.broadinstitute.barclay.argparser.CommandLineProgramProperties; import org.broadinstitute.hellbender.CommandLineProgramTest; import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; +import org.broadinstitute.hellbender.cmdline.TestProgramGroup; +import org.broadinstitute.hellbender.testutils.ArgumentsBuilder; import org.broadinstitute.hellbender.tools.walkers.mutect.Mutect2; import org.broadinstitute.hellbender.tools.walkers.variantutils.SelectVariants; import org.broadinstitute.hellbender.testutils.VariantContextTestUtils; +import org.broadinstitute.hellbender.utils.SimpleInterval; import org.broadinstitute.hellbender.utils.io.IOUtils; +import org.broadinstitute.hellbender.utils.variant.writers.IntervalFilteringVcfWriter; import org.testng.Assert; +import org.testng.annotations.DataProvider; import org.testng.annotations.Test; import java.io.File; @@ -64,4 +74,66 @@ public void testBrokenReferenceDictionaryErrorMessage() throws IOException { runCommandLine(Arrays.asList(args), Mutect2.class.getSimpleName()); } + + @CommandLineProgramProperties(summary = "testTool which emits specific variants", + oneLineSummary = "Test tool", + programGroup = TestProgramGroup.class) + public static class VariantEmitter extends GATKTool{ + @Argument(fullName = StandardArgumentDefinitions.OUTPUT_LONG_NAME) + File output; + + @Override + public void traverse() { + //nope + } + + @Override + public void onTraversalStart() { + try(final VariantContextWriter vcfWriter = createVCFWriter(output)){ + vcfWriter.writeHeader(new VCFHeader()); + final VariantContextBuilder vcb = new VariantContextBuilder(); + vcb.alleles("AAAAAA", "A").chr("1"); + + vcfWriter.add(vcb.start(10).stop(15).make()); + vcfWriter.add(vcb.start(100).stop(105).make()); + vcfWriter.add(vcb.start(1000).stop(1005).make()); + vcfWriter.add(vcb.start(10000).stop(10005).make()); + + vcb.chr("2"); + vcfWriter.add(vcb.start(20).stop(25).make()); + vcfWriter.add(vcb.start(200).stop(205).make()); + vcfWriter.add(vcb.start(2000).stop(2005).make()); + vcfWriter.add(vcb.start(20000).stop(20005).make()); + } + } + } + + @DataProvider + public Object[][] getIntervalsAndOverlapMode(){ + return new Object[][]{ + {Arrays.asList(new SimpleInterval("1", 101, 10001), new SimpleInterval("2", 201, 20001)), IntervalFilteringVcfWriter.Mode.ANYWHERE, 8}, + {Arrays.asList(new SimpleInterval("1", 101, 10001), new SimpleInterval("2", 201, 20001)), IntervalFilteringVcfWriter.Mode.OVERLAPS, 6}, + {Arrays.asList(new SimpleInterval("1", 101, 10001), new SimpleInterval("2", 201, 20001)), IntervalFilteringVcfWriter.Mode.STARTS_IN, 4}, + {Arrays.asList(new SimpleInterval("1", 101, 10001), new SimpleInterval("2", 201, 20001)), IntervalFilteringVcfWriter.Mode.ENDS_IN, 4}, + {Arrays.asList(new SimpleInterval("1", 101, 10001), new SimpleInterval("2", 201, 20001)), IntervalFilteringVcfWriter.Mode.CONTAINED, 2}, + {Arrays.asList(new SimpleInterval("1", 101, 10001), new SimpleInterval("2", 201, 20001)), null, 8}, + }; + } + + @Test(dataProvider = "getIntervalsAndOverlapMode") + public void testVcfOutputFilterMode(List intervals, IntervalFilteringVcfWriter.Mode mode, int variantsIncluded){ + final ArgumentsBuilder args = new ArgumentsBuilder(); + final File out = createTempFile("out", ".vcf"); + args.addOutput(out); + intervals.forEach(args::addInterval); + args.addReference(b37Reference); + if( mode != null) { + args.addArgument(GATKTool.VARIANT_OUTPUT_INTERVAL_FILTERING_MODE, mode); + } + + runCommandLine(args, VariantEmitter.class.getSimpleName()); + final Pair> vcfHeaderListPair = VariantContextTestUtils.readEntireVCFIntoMemory(out.toString()); + + Assert.assertEquals(vcfHeaderListPair.getRight().size(), variantsIncluded); + } } diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/GenotypeGVCFsIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/GenotypeGVCFsIntegrationTest.java index a4cd31611dc..a733e70ccd1 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/GenotypeGVCFsIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/GenotypeGVCFsIntegrationTest.java @@ -13,6 +13,7 @@ import org.broadinstitute.barclay.argparser.CommandLineException; import org.broadinstitute.hellbender.CommandLineProgramTest; import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; +import org.broadinstitute.hellbender.engine.GATKTool; import org.broadinstitute.hellbender.exceptions.UserException; import org.broadinstitute.hellbender.testutils.ArgumentsBuilder; import org.broadinstitute.hellbender.testutils.GenomicsDBTestUtils; @@ -24,6 +25,7 @@ import org.broadinstitute.hellbender.utils.io.IOUtils; import org.broadinstitute.hellbender.utils.runtime.ProcessController; import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants; +import org.broadinstitute.hellbender.utils.variant.writers.IntervalFilteringVcfWriter; import org.broadinstitute.hellbender.utils.variant.VariantContextGetters; import org.testng.Assert; import org.testng.annotations.DataProvider; @@ -74,7 +76,7 @@ public Object[][] gvcfsToGenotype() { return new Object[][]{ //combine not supported yet, see https://github.com/broadinstitute/gatk/issues/2429 and https://github.com/broadinstitute/gatk/issues/2584 //{"combine.single.sample.pipeline.1.vcf", null, Arrays.asList("-V", getTestFile("combine.single.sample.pipeline.2.vcf").toString() , "-V", getTestFile("combine.single.sample.pipeline.3.vcf").toString()), b37_reference_20_21}, - {getTestFile("leadingDeletion.g.vcf"), getTestFile("leadingDeletionRestrictToStartExpected.vcf"), Arrays.asList("-L", "20:69512-69513", "--"+GenotypeGVCFs.ONLY_OUTPUT_CALLS_STARTING_IN_INTERVALS_FULL_NAME), b37_reference_20_21}, + {getTestFile("leadingDeletion.g.vcf"), getTestFile("leadingDeletionRestrictToStartExpected.vcf"), Arrays.asList("-L", "20:69512-69513", "--"+GATKTool.VARIANT_OUTPUT_INTERVAL_FILTERING_MODE, IntervalFilteringVcfWriter.Mode.STARTS_IN.toString()), b37_reference_20_21}, {getTestFile("leadingDeletion.g.vcf"), getTestFile("leadingDeletionExpected.vcf"), Arrays.asList("-L", "20:69512-69513"), b37_reference_20_21}, {getTestFile(BASE_PAIR_GVCF), getTestFile( BASE_PAIR_EXPECTED), NO_EXTRA_ARGS, b37_reference_20_21}, //base pair level gvcf {getTestFile("testUpdatePGT.gvcf"), getTestFile( "testUpdatePGT.gatk3.7_30_ga4f720357.output.vcf"), NO_EXTRA_ARGS, b37_reference_20_21}, //testUpdatePGT @@ -251,7 +253,7 @@ public void assertMatchingGenotypesFromTileDB(File input, File expected, Locatab } @Test(dataProvider = "getGVCFsForGenomicsDBOverMultipleIntervals") - public void testGenotypeGVCFsMultiIntervalGDBQuery(File input, File expected, List intervals, String reference) throws IOException { + public void testGenotypeGVCFsMultiIntervalGDBQuery(File input, File expected, List intervals, String reference) { final File tempGenomicsDB = GenomicsDBTestUtils.createTempGenomicsDB(input, intervals, true); final String genomicsDBUri = GenomicsDBTestUtils.makeGenomicsDBUri(tempGenomicsDB); @@ -262,8 +264,8 @@ public void testGenotypeGVCFsMultiIntervalGDBQuery(File input, File expected, Li .add("V", genomicsDBUri) .addOutput(output); intervals.forEach(args::addInterval); - args.addRaw("--" + GenomicsDBImport.MERGE_INPUT_INTERVALS_LONG_NAME); - args.addRaw("--only-output-calls-starting-in-intervals"); //note that this will restrict calls to just the specified intervals + args.add(GenomicsDBImport.MERGE_INPUT_INTERVALS_LONG_NAME, true); + args.add(GATKTool.VARIANT_OUTPUT_INTERVAL_FILTERING_MODE, IntervalFilteringVcfWriter.Mode.STARTS_IN); //note that this will restrict calls to just the specified intervals Utils.resetRandomGenerator(); runCommandLine(args); @@ -381,7 +383,7 @@ public void testIntervalsAndOnlyOutputCallsStartingInIntervalsAreMutuallyRequire .addVCF(getTestFile("leadingDeletion.g.vcf")) .addReference(new File(b37_reference_20_21)) .addOutput( createTempFile("tmp",".vcf")) - .add(GenotypeGVCFs.ONLY_OUTPUT_CALLS_STARTING_IN_INTERVALS_FULL_NAME, true); + .add(GATKTool.VARIANT_OUTPUT_INTERVAL_FILTERING_MODE, IntervalFilteringVcfWriter.Mode.STARTS_IN); Assert.assertThrows(CommandLineException.MissingArgument.class, () -> runCommandLine(args)); args.add("L", "20:69512-69513"); diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/GnarlyGenotyperIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/GnarlyGenotyperIntegrationTest.java index f25b4a9145b..5242c82d750 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/GnarlyGenotyperIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/GnarlyGenotyperIntegrationTest.java @@ -5,12 +5,14 @@ import org.broadinstitute.hellbender.GATKBaseTest; import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; import org.broadinstitute.hellbender.engine.FeatureDataSource; +import org.broadinstitute.hellbender.engine.GATKTool; import org.broadinstitute.hellbender.utils.IntervalUtils; import org.broadinstitute.hellbender.utils.SimpleInterval; import org.broadinstitute.hellbender.testutils.ArgumentsBuilder; import org.broadinstitute.hellbender.testutils.GenomicsDBTestUtils; import org.broadinstitute.hellbender.testutils.VariantContextTestUtils; import org.broadinstitute.hellbender.utils.Utils; +import org.broadinstitute.hellbender.utils.variant.writers.IntervalFilteringVcfWriter; import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; @@ -36,12 +38,11 @@ public Object[][] getVCFdata() { // Simple Test, spanning deletions; standard calling confidence //No variants outside requested intervals; no SNPs with QUAL < 60, no INDELs with QUAL < 69?; has star alleles after deletion at chr20:263497; has AC, AF, AN, DP, ExcessHet, FS, MQ, (MQRankSum), (ReadPosRankSum), SOR, QD; has called genotypes {new File[]{getTestFile("sample1.vcf"), getTestFile("sample2.vcf"), getTestFile("sample3.vcf"), getTestFile("sample4.vcf"), getTestFile("sample5.vcf")}, - getTestFile("fiveSampleTest.vcf"), null, Arrays.asList(new SimpleInterval("chr20", 251370, 252000), new SimpleInterval("chr20", 263000, 265600)), Arrays.asList("--merge-input-intervals", "--only-output-calls-starting-in-intervals"), b38_reference_20_21}, + getTestFile("fiveSampleTest.vcf"), null, Arrays.asList(new SimpleInterval("chr20", 251370, 252000), new SimpleInterval("chr20", 263000, 265600)), Arrays.asList("--merge-input-intervals", "--"+GATKTool.VARIANT_OUTPUT_INTERVAL_FILTERING_MODE, IntervalFilteringVcfWriter.Mode.STARTS_IN.toString()), b38_reference_20_21}, // Same as above, but with GenomicsDB using VCFCodec for interchange {new File[]{getTestFile("sample1.vcf"), getTestFile("sample2.vcf"), getTestFile("sample3.vcf"), getTestFile("sample4.vcf"), getTestFile("sample5.vcf")}, getTestFile("fiveSampleTest.vcf"), null, Arrays.asList(new SimpleInterval("chr20", 251370, 252000), new SimpleInterval("chr20", 263000, 265600)), Arrays.asList("--merge-input-intervals", "--only-output-calls-starting-in-intervals", "--genomicsdb-use-vcf-codec"), b38_reference_20_21}, - //lower calling confidence //same as above except (different intervals and) with SNPs with 40 < QUAL < 60 and INDELs with 49 < QUAL < 69 {new File[]{getTestFile("sample1.vcf"), getTestFile("sample2.vcf"), getTestFile("sample3.vcf"), getTestFile("sample4.vcf"), getTestFile("sample5.vcf")}, @@ -134,7 +135,7 @@ public void testOnHailOutput() { args.addReference(new File(hg38Reference)) .add("V", input) .add("L", "chr20:10000000-10030000") - .add("only-output-calls-starting-in-intervals", true) + .add(GATKTool.VARIANT_OUTPUT_INTERVAL_FILTERING_MODE, IntervalFilteringVcfWriter.Mode.STARTS_IN) .add("keep-all-sites", true) .addOutput(output) .add(StandardArgumentDefinitions.ADD_OUTPUT_VCF_COMMANDLINE, "false"); diff --git a/src/test/java/org/broadinstitute/hellbender/utils/variant/writers/GVCFWriterUnitTest.java b/src/test/java/org/broadinstitute/hellbender/utils/variant/writers/GVCFWriterUnitTest.java index 9752bddddc8..e48ee1196e8 100644 --- a/src/test/java/org/broadinstitute/hellbender/utils/variant/writers/GVCFWriterUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/utils/variant/writers/GVCFWriterUnitTest.java @@ -38,53 +38,17 @@ public class GVCFWriterUnitTest extends GATKBaseTest { private static final String CHR1 = "1"; private static final String CHR2 = "2"; - private static final Allele REF = Allele.create("G", true); - private static final Allele ALT = Allele.create("A"); - private static final List ALLELES = ImmutableList.of(REF, Allele.NON_REF_ALLELE); + private static final List ALLELES = ImmutableList.of(Allele.REF_G, Allele.NON_REF_ALLELE); private static final String SAMPLE_NAME = "XXYYZZ"; - static final class MockWriter implements VariantContextWriter { - final List emitted = new ArrayList<>(); - boolean headerWritten = false; - boolean closed = false; - boolean error = false; - boolean headerSet = false; - - @Override - public void writeHeader(VCFHeader header) { - headerSet = true; - headerWritten = true; - } - - @Override - public void close() { - closed = true; - } - - @Override - public boolean checkError() { - return error; - } - - @Override - public void add(VariantContext vc) { - emitted.add(vc); - } - - @Override - public void setHeader(VCFHeader header) { - headerSet = true; - } - } - private static final List standardPartition = ImmutableList.of(1, 10, 20); private static final List highConfLowConf = ImmutableList.of(20,100); @Test public void testHeaderWriting() { - final MockWriter mockWriter = new MockWriter(); + final MockVcfWriter mockWriter = new MockVcfWriter(); final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition, HomoSapiensConstants.DEFAULT_PLOIDY); writer.writeHeader(new VCFHeader()); Assert.assertTrue(mockWriter.headerSet); @@ -93,7 +57,7 @@ public void testHeaderWriting() { @Test public void testHeaderSetting(){ - final MockWriter mockWriter = new MockWriter(); + final MockVcfWriter mockWriter = new MockVcfWriter(); final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition, HomoSapiensConstants.DEFAULT_PLOIDY); writer.setHeader(new VCFHeader()); Assert.assertTrue(mockWriter.headerSet); @@ -102,17 +66,15 @@ public void testHeaderSetting(){ @Test public void testClose() { - final MockWriter mockWriter = new MockWriter(); + final MockVcfWriter mockWriter = new MockVcfWriter(); final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition, HomoSapiensConstants.DEFAULT_PLOIDY); writer.close(); Assert.assertTrue(mockWriter.closed); } - - @Test public void testCloseEmitsLastVariant() { - final MockWriter mockWriter = new MockWriter(); + final MockVcfWriter mockWriter = new MockVcfWriter(); final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition, HomoSapiensConstants.DEFAULT_PLOIDY); writer.add(makeHomRef(1)); @@ -125,7 +87,7 @@ public void testCloseEmitsLastVariant() { @Test public void testCloseDoesntEmitsLastVariantWhenNonRef() { - final MockWriter mockWriter = new MockWriter(); + final MockVcfWriter mockWriter = new MockVcfWriter(); final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition, HomoSapiensConstants.DEFAULT_PLOIDY); writer.add(makeNonRef(CHR1, 1)); @@ -138,7 +100,7 @@ public void testCloseDoesntEmitsLastVariantWhenNonRef() { @Test public void testCrossingContigBoundaryRef() { - final MockWriter mockWriter = new MockWriter(); + final MockVcfWriter mockWriter = new MockVcfWriter(); final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition, HomoSapiensConstants.DEFAULT_PLOIDY); writer.add(makeHomRef(1)); @@ -155,7 +117,7 @@ public void testCrossingContigBoundaryRef() { @Test public void testCrossingContigBoundaryToLowerPositionsRef() { - final MockWriter mockWriter = new MockWriter(); + final MockVcfWriter mockWriter = new MockVcfWriter(); final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition, HomoSapiensConstants.DEFAULT_PLOIDY); writer.add(makeHomRef(30)); @@ -172,7 +134,7 @@ public void testCrossingContigBoundaryToLowerPositionsRef() { @Test public void testCrossingContigBoundaryFromNonRefToLowerPositionsRef() { - final MockWriter mockWriter = new MockWriter(); + final MockVcfWriter mockWriter = new MockVcfWriter(); final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition, HomoSapiensConstants.DEFAULT_PLOIDY); writer.add(makeNonRef(CHR1, 20)); @@ -188,7 +150,7 @@ public void testCrossingContigBoundaryFromNonRefToLowerPositionsRef() { @Test public void testCrossingContigBoundaryNonRef() { - final MockWriter mockWriter = new MockWriter(); + final MockVcfWriter mockWriter = new MockVcfWriter(); final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition, HomoSapiensConstants.DEFAULT_PLOIDY); writer.add(makeHomRef(1)); @@ -202,7 +164,7 @@ public void testCrossingContigBoundaryNonRef() { @Test public void testCrossingContigBoundaryNonRefThenNonRef() { - final MockWriter mockWriter = new MockWriter(); + final MockVcfWriter mockWriter = new MockVcfWriter(); final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition, HomoSapiensConstants.DEFAULT_PLOIDY); writer.add(makeNonRef(CHR1, 1)); @@ -251,7 +213,7 @@ private static void assertGoodVC(final VariantContext vc, final String contig, f @Test public void testVariantForcesNonRef() { - final MockWriter mockWriter = new MockWriter(); + final MockVcfWriter mockWriter = new MockVcfWriter(); final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition, HomoSapiensConstants.DEFAULT_PLOIDY); writer.add(makeHomRef(1)); @@ -269,7 +231,7 @@ public void testVariantForcesNonRef() { @Test public void testEmittingTwoBands() { - final MockWriter mockWriter = new MockWriter(); + final MockVcfWriter mockWriter = new MockVcfWriter(); final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition, HomoSapiensConstants.DEFAULT_PLOIDY); writer.add(makeHomRef(1)); @@ -286,14 +248,14 @@ public void testEmittingTwoBands() { @Test public void testBandingUsingPP() { - final MockWriter mockWriter = new MockWriter(); + final MockVcfWriter mockWriter = new MockVcfWriter(); final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition, HomoSapiensConstants.DEFAULT_PLOIDY); int[] PPs1 = {0,63,128}; int[] PPs2 = {0,67,145}; writer.add(makeVariantContext(new VariantContextBuilder("test", CHR1, 10000, 10000, - ALLELES), Arrays.asList(REF, REF), 2, PPs1)); - writer.add(makeVariantContext(new VariantContextBuilder("test", CHR1, 10001, 10001, ALLELES), Arrays.asList(REF, REF), 21, PPs2)); + ALLELES), Arrays.asList(Allele.REF_G, Allele.REF_G), 2, PPs1)); + writer.add(makeVariantContext(new VariantContextBuilder("test", CHR1, 10001, 10001, ALLELES), Arrays.asList(Allele.REF_G, Allele.REF_G), 21, PPs2)); writer.close(); Assert.assertEquals(mockWriter.emitted.size(), 1); assertGoodVCwithPPs(mockWriter.emitted.get(0), CHR1, 10000, 10001, false); @@ -302,7 +264,7 @@ public void testBandingUsingPP() { @Test public void testNonContiguousBlocks() { - final MockWriter mockWriter = new MockWriter(); + final MockVcfWriter mockWriter = new MockVcfWriter(); final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition, HomoSapiensConstants.DEFAULT_PLOIDY); writer.add(makeHomRef(1)); @@ -317,7 +279,7 @@ public void testNonContiguousBlocks() { @Test public void testInputBlocks() { - final MockWriter mockWriter = new MockWriter(); + final MockVcfWriter mockWriter = new MockVcfWriter(); final GVCFWriter writer = new GVCFWriter(mockWriter, highConfLowConf, HomoSapiensConstants.DEFAULT_PLOIDY); writer.add(makeHomRef("20", 1, 16, 600)); @@ -329,7 +291,7 @@ public void testInputBlocks() { @Test public void testDeletion() { - final MockWriter mockWriter = new MockWriter(); + final MockVcfWriter mockWriter = new MockVcfWriter(); final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition, HomoSapiensConstants.DEFAULT_PLOIDY); writer.add(makeHomRef(1)); @@ -348,7 +310,7 @@ public void testDeletion() { @Test public void testHomRefAlt() { - final MockWriter mockWriter = new MockWriter(); + final MockVcfWriter mockWriter = new MockVcfWriter(); final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition, HomoSapiensConstants.DEFAULT_PLOIDY); writer.add(makeHomRef(1)); @@ -407,7 +369,7 @@ public void testBadPartitionsThrowException(final List partitions){ @Test public void testCheckError(){ - final MockWriter mockWriter = new MockWriter(); + final MockVcfWriter mockWriter = new MockVcfWriter(); final GVCFWriter gvcfWriter = new GVCFWriter(mockWriter, standardPartition, HomoSapiensConstants.DEFAULT_PLOIDY); mockWriter.error = false; Assert.assertEquals(gvcfWriter.checkError(), mockWriter.checkError()); @@ -637,7 +599,7 @@ public void testOverlappingDeletions() { final VariantContext block1 = reblocker.lowQualVariantToGQ0HomRef(deletion1, deletion1); final VariantContext block2 = reblocker.lowQualVariantToGQ0HomRef(deletion2, deletion2); - final MockWriter mockWriter = new MockWriter(); + final MockVcfWriter mockWriter = new MockVcfWriter(); final GVCFWriter writer = new GVCFWriter(mockWriter, Arrays.asList(20,100), 2); writer.add(deletion1); writer.add(block2); diff --git a/src/test/java/org/broadinstitute/hellbender/utils/variant/writers/IntervalFilteringVcfWriterUnitTest.java b/src/test/java/org/broadinstitute/hellbender/utils/variant/writers/IntervalFilteringVcfWriterUnitTest.java new file mode 100644 index 00000000000..f96e937c7c4 --- /dev/null +++ b/src/test/java/org/broadinstitute/hellbender/utils/variant/writers/IntervalFilteringVcfWriterUnitTest.java @@ -0,0 +1,91 @@ +package org.broadinstitute.hellbender.utils.variant.writers; + +import htsjdk.samtools.util.OverlapDetector; +import htsjdk.variant.variantcontext.Allele; +import htsjdk.variant.variantcontext.VariantContext; +import htsjdk.variant.variantcontext.VariantContextBuilder; +import htsjdk.variant.vcf.VCFHeader; +import org.broadinstitute.hellbender.GATKBaseTest; +import org.broadinstitute.hellbender.utils.SimpleInterval; +import org.broadinstitute.hellbender.utils.Utils; +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +public class IntervalFilteringVcfWriterUnitTest extends GATKBaseTest { + + + @DataProvider + public Object[][] getIntervalsAndMode(){ + final VariantContext noOverlap = new VariantContextBuilder("test", "1", 200, 300, Arrays.asList(Allele.create(Utils.repeatChars('A', 101), true), Allele.ALT_A)).make(); + final VariantContext contained = new VariantContextBuilder("test", "1", 101, 104, Arrays.asList(Allele.create(Utils.repeatChars('A', 4), true), Allele.ALT_A)).make(); + final VariantContext overlaps = new VariantContextBuilder("test", "1", 90, 120, Arrays.asList(Allele.create(Utils.repeatChars('A', 31), true), Allele.ALT_A)).make(); + final VariantContext startsIn = new VariantContextBuilder("test", "1", 103, 140, Arrays.asList(Allele.create(Utils.repeatChars('A', 38), true), Allele.ALT_A)).make(); + final VariantContext endsIn = new VariantContextBuilder("test", "1", 90, 103, Arrays.asList(Allele.create(Utils.repeatChars('A', 14), true), Allele.ALT_A)).make(); + final VariantContext anotherContig = new VariantContextBuilder("test", "2", 90, 140, Arrays.asList(Allele.create(Utils.repeatChars('A', 51), true), Allele.ALT_A)).make(); + final List vcs = Arrays.asList(noOverlap, contained, overlaps, startsIn, endsIn, anotherContig); + + final SimpleInterval interval = new SimpleInterval("1", 100, 105); + + return new Object[][]{ + // no overlap, contained, overlaps, starts in, ends in, another contig + {interval, vcs, IntervalFilteringVcfWriter.Mode.ANYWHERE, new boolean[]{ true, true, true, true, true, true}}, + {interval, vcs, IntervalFilteringVcfWriter.Mode.CONTAINED, new boolean[]{ false, true, false, false, false, false}}, + {interval, vcs, IntervalFilteringVcfWriter.Mode.OVERLAPS, new boolean[]{ false, true, true, true, true, false}}, + {interval, vcs, IntervalFilteringVcfWriter.Mode.STARTS_IN, new boolean[]{ false, true, false, true, false, false}}, + {interval, vcs, IntervalFilteringVcfWriter.Mode.ENDS_IN, new boolean[]{ false, true, false, false, true, false}}, + }; + } + + @Test(dataProvider = "getIntervalsAndMode") + public void testModes(SimpleInterval interval, List vcs, IntervalFilteringVcfWriter.Mode mode, boolean[] expected) { + final OverlapDetector detector = OverlapDetector.create(Collections.singletonList(interval)); + for(int i = 0; i < expected.length; i++){ + Assert.assertEquals(mode.test(detector,vcs.get(i)), expected[i], "mode " + mode + " mismatches at " + i); + } + } + + @Test + public void testHeaderWriting() { + final MockVcfWriter mockWriter = new MockVcfWriter(); + final List intervals = Arrays.asList(new SimpleInterval("1", 10, 100), new SimpleInterval("2", 100, 500)); + final IntervalFilteringVcfWriter writer = new IntervalFilteringVcfWriter(mockWriter, intervals, IntervalFilteringVcfWriter.Mode.OVERLAPS); + writer.writeHeader(new VCFHeader()); + Assert.assertTrue(mockWriter.headerSet); + Assert.assertTrue(mockWriter.headerWritten); + } + + @Test + public void testHeaderSetting(){ + final MockVcfWriter mockWriter = new MockVcfWriter(); + final List intervals = Arrays.asList(new SimpleInterval("1", 10, 100), new SimpleInterval("2", 100, 500)); + final IntervalFilteringVcfWriter writer = new IntervalFilteringVcfWriter(mockWriter, intervals, IntervalFilteringVcfWriter.Mode.OVERLAPS); + writer.setHeader(new VCFHeader()); + Assert.assertTrue(mockWriter.headerSet); + Assert.assertFalse(mockWriter.headerWritten); + } + + @Test + public void testClose() { + final MockVcfWriter mockWriter = new MockVcfWriter(); + final List intervals = Arrays.asList(new SimpleInterval("1", 10, 100), new SimpleInterval("2", 100, 500)); + final IntervalFilteringVcfWriter writer = new IntervalFilteringVcfWriter(mockWriter, intervals, IntervalFilteringVcfWriter.Mode.OVERLAPS); + writer.close(); + Assert.assertTrue(mockWriter.closed); + } + + @Test + public void testCheckError(){ + final MockVcfWriter mockWriter = new MockVcfWriter(); + final List intervals = Arrays.asList(new SimpleInterval("1", 10, 100), new SimpleInterval("2", 100, 500)); + final IntervalFilteringVcfWriter writer = new IntervalFilteringVcfWriter(mockWriter, intervals, IntervalFilteringVcfWriter.Mode.OVERLAPS); + Assert.assertFalse(writer.checkError()); + mockWriter.error = true; + Assert.assertTrue(writer.checkError()); + } + +} \ No newline at end of file diff --git a/src/test/java/org/broadinstitute/hellbender/utils/variant/writers/SomaticGVCFWriterUnitTest.java b/src/test/java/org/broadinstitute/hellbender/utils/variant/writers/SomaticGVCFWriterUnitTest.java index 1fb3522a4a6..4b51da5f2fe 100644 --- a/src/test/java/org/broadinstitute/hellbender/utils/variant/writers/SomaticGVCFWriterUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/utils/variant/writers/SomaticGVCFWriterUnitTest.java @@ -22,7 +22,7 @@ public class SomaticGVCFWriterUnitTest { @Test public void testValueBinning() { - final GVCFWriterUnitTest.MockWriter mockWriter = new GVCFWriterUnitTest.MockWriter(); + final MockVcfWriter mockWriter = new MockVcfWriter(); SomaticGVCFWriter writer = new SomaticGVCFWriter(mockWriter, standardPartition); //derives partitionPrecision 1 from standardPartition values Assert.assertTrue(writer.convertLODtoInt(2.3) == 23); @@ -46,7 +46,7 @@ public void testValueBinning() { @Test public void testAddingAndMerging() { - final GVCFWriterUnitTest.MockWriter mockWriter = new GVCFWriterUnitTest.MockWriter(); + final MockVcfWriter mockWriter = new MockVcfWriter(); final SomaticGVCFWriter writer = new SomaticGVCFWriter(mockWriter, standardPartition); final GenotypeBuilder gb = new GenotypeBuilder(SAMPLE_NAME, Arrays.asList(REF, REF)); int pos = 1; @@ -89,7 +89,7 @@ public void testAddingAndMerging() { @Test public void testPrecision() { - final GVCFWriterUnitTest.MockWriter mockWriter = new GVCFWriterUnitTest.MockWriter(); + final MockVcfWriter mockWriter = new MockVcfWriter(); SomaticGVCFWriter writer = new SomaticGVCFWriter(mockWriter, precisionTwoPartition); Assert.assertTrue(((SomaticGVCFBlockCombiner)writer.gvcfBlockCombiner).partitionPrecision == 2); diff --git a/src/testUtils/java/org/broadinstitute/hellbender/utils/variant/writers/MockVcfWriter.java b/src/testUtils/java/org/broadinstitute/hellbender/utils/variant/writers/MockVcfWriter.java new file mode 100644 index 00000000000..4cf3137b4cf --- /dev/null +++ b/src/testUtils/java/org/broadinstitute/hellbender/utils/variant/writers/MockVcfWriter.java @@ -0,0 +1,42 @@ +package org.broadinstitute.hellbender.utils.variant.writers; + +import htsjdk.variant.variantcontext.VariantContext; +import htsjdk.variant.variantcontext.writer.VariantContextWriter; +import htsjdk.variant.vcf.VCFHeader; + +import java.util.ArrayList; +import java.util.List; + +final class MockVcfWriter implements VariantContextWriter { + final List emitted = new ArrayList<>(); + boolean headerWritten = false; + boolean closed = false; + boolean error = false; + boolean headerSet = false; + + @Override + public void writeHeader(VCFHeader header) { + headerSet = true; + headerWritten = true; + } + + @Override + public void close() { + closed = true; + } + + @Override + public boolean checkError() { + return error; + } + + @Override + public void add(VariantContext vc) { + emitted.add(vc); + } + + @Override + public void setHeader(VCFHeader header) { + headerSet = true; + } +}