From ceea71369023e758a055bf83fbdc0784bb419869 Mon Sep 17 00:00:00 2001 From: Louis Bergelson Date: Mon, 6 Jan 2020 14:03:10 -0500 Subject: [PATCH] Adding a new GATKTool level argument to control which if any output variants are filtered. --- .../hellbender/engine/GATKTool.java | 55 +++++-- .../tools/walkers/GenotypeGVCFs.java | 52 +++---- .../gnarlyGenotyper/GnarlyGenotyper.java | 28 +--- .../writers/IntervalFilteringVcfWriter.java | 140 ++++++++++++++++++ .../engine/GatkToolIntegrationTest.java | 73 +++++++++ .../walkers/GenotypeGVCFsIntegrationTest.java | 12 +- .../GnarlyGenotyperIntegrationTest.java | 8 +- .../variantutils/ReblockGVCFUnitTest.java | 7 +- .../variant/writers/GVCFWriterUnitTest.java | 82 +++------- .../IntervalFilteringVcfWriterUnitTest.java | 91 ++++++++++++ .../writers/SomaticGVCFWriterUnitTest.java | 6 +- .../utils/variant/writers/MockVcfWriter.java | 42 ++++++ 12 files changed, 457 insertions(+), 139 deletions(-) create mode 100644 src/main/java/org/broadinstitute/hellbender/utils/variant/writers/IntervalFilteringVcfWriter.java create mode 100644 src/test/java/org/broadinstitute/hellbender/utils/variant/writers/IntervalFilteringVcfWriterUnitTest.java create mode 100644 src/testUtils/java/org/broadinstitute/hellbender/utils/variant/writers/MockVcfWriter.java diff --git a/src/main/java/org/broadinstitute/hellbender/engine/GATKTool.java b/src/main/java/org/broadinstitute/hellbender/engine/GATKTool.java index a046b332a3c..57664c3c61a 100644 --- a/src/main/java/org/broadinstitute/hellbender/engine/GATKTool.java +++ b/src/main/java/org/broadinstitute/hellbender/engine/GATKTool.java @@ -16,8 +16,12 @@ import java.util.*; import java.util.stream.Stream; + +import org.broadinstitute.barclay.argparser.Advanced; import org.broadinstitute.barclay.argparser.Argument; import org.broadinstitute.barclay.argparser.ArgumentCollection; +import org.broadinstitute.barclay.argparser.CommandLineException; +import org.broadinstitute.barclay.argparser.CommandLineException; import org.broadinstitute.barclay.argparser.CommandLinePluginDescriptor; import org.broadinstitute.hellbender.cmdline.CommandLineProgram; import org.broadinstitute.hellbender.cmdline.GATKPlugin.GATKAnnotationPluginDescriptor; @@ -45,6 +49,11 @@ import org.broadinstitute.hellbender.utils.reference.ReferenceUtils; import org.broadinstitute.hellbender.utils.variant.GATKVariantContextUtils; import org.broadinstitute.hellbender.utils.variant.writers.ShardingVCFWriter; +import org.broadinstitute.hellbender.utils.variant.writers.IntervalFilteringVcfWriter; + +//TODO: +//UserException overloads +//VCF outs /** * Base class for all GATK tools. Tool authors that wish to write a "GATK" tool but not use one of @@ -127,6 +136,14 @@ public abstract class GATKTool extends CommandLineProgram { doc = "If true, don't emit genotype fields when writing vcf file output.", optional = true) public boolean outputSitesOnlyVCFs = false; + public static final String VARIANT_OUTPUT_INTERVAL_FILTERING_MODE = "variant-output-interval-filtering-mode"; + @Argument(fullName = VARIANT_OUTPUT_INTERVAL_FILTERING_MODE, + doc = "Restrict the output variants to ones that match the specified intervals according to the specified matching mode.", + optional = true) + @Advanced + public IntervalFilteringVcfWriter.Mode outputVariantIntervalFilteringMode = getDefaultVariantOutputFilterMode(); + + /** * Master sequence dictionary to be used instead of all other dictionaries (if provided). */ @@ -417,6 +434,13 @@ public int getDefaultCloudIndexPrefetchBufferSize() { */ public String getProgressMeterRecordLabel() { return ProgressMeter.DEFAULT_RECORD_LABEL; } + /** + * @return Default interval filtering mode for variant output. Subclasses may override this to set a different default. + */ + public IntervalFilteringVcfWriter.Mode getDefaultVariantOutputFilterMode(){ + return null; + } + protected List transformTraversalIntervals(final List getIntervals, final SAMSequenceDictionary sequenceDictionary) { return getIntervals; } @@ -600,7 +624,7 @@ public boolean requiresIntervals() { /** * Does this tool want to disable the progress meter? If so, override here to return true - * + * * @return true if this tools wants to disable progress meter output, otherwise false */ public boolean disableProgressMeter() { @@ -727,12 +751,16 @@ protected void onStartup() { initializeIntervals(); // Must be initialized after reference, reads and features, since intervals currently require a sequence dictionary from another data source - if ( seqValidationArguments.performSequenceDictionaryValidation()) { + if (seqValidationArguments.performSequenceDictionaryValidation()) { validateSequenceDictionaries(); } checkToolRequirements(); + if (outputVariantIntervalFilteringMode != null && userIntervals == null){ + throw new CommandLineException.MissingArgument("-L or -XL", "Intervals are required if --" + VARIANT_OUTPUT_INTERVAL_FILTERING_MODE + " was specified."); + } + initializeProgressMeter(getProgressMeterRecordLabel()); } @@ -911,20 +939,27 @@ public VariantContextWriter createVCFWriter(final Path outPath) { if (outputSitesOnlyVCFs) { options.add(Options.DO_NOT_WRITE_GENOTYPES); } - + final VariantContextWriter unfilteredWriter; if (maxVariantsPerShard > 0) { - return new ShardingVCFWriter( + unfilteredWriter = new ShardingVCFWriter( outPath, maxVariantsPerShard, sequenceDictionary, createOutputVariantMD5, - options.toArray(new Options[options.size()])); + options.toArray(new Options[0])); + } else { + unfilteredWriter = GATKVariantContextUtils.createVCFWriter( + outPath, + sequenceDictionary, + createOutputVariantMD5, + options.toArray(new Options[0])); } - return GATKVariantContextUtils.createVCFWriter( - outPath, - sequenceDictionary, - createOutputVariantMD5, - options.toArray(new Options[options.size()])); + + return outputVariantIntervalFilteringMode== null ? + unfilteredWriter : + new IntervalFilteringVcfWriter(unfilteredWriter, + intervalArgumentCollection.getIntervals(getBestAvailableSequenceDictionary()), + outputVariantIntervalFilteringMode); } /** diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/GenotypeGVCFs.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/GenotypeGVCFs.java index eb87d041913..7c29f251b90 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/GenotypeGVCFs.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/GenotypeGVCFs.java @@ -7,12 +7,16 @@ import htsjdk.variant.variantcontext.writer.VariantContextWriter; import htsjdk.variant.vcf.VCFHeader; import htsjdk.variant.vcf.VCFHeaderLine; -import org.broadinstitute.barclay.argparser.*; +import org.broadinstitute.barclay.argparser.Argument; +import org.broadinstitute.barclay.argparser.ArgumentCollection; +import org.broadinstitute.barclay.argparser.CommandLineException; +import org.broadinstitute.barclay.argparser.CommandLineProgramProperties; import org.broadinstitute.barclay.help.DocumentedFeature; import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; import org.broadinstitute.hellbender.cmdline.argumentcollections.DbsnpArgumentCollection; import org.broadinstitute.hellbender.cmdline.programgroups.ShortVariantDiscoveryProgramGroup; import org.broadinstitute.hellbender.engine.FeatureContext; +import org.broadinstitute.hellbender.engine.GATKTool; import org.broadinstitute.hellbender.engine.GATKPath; import org.broadinstitute.hellbender.engine.ReadsContext; import org.broadinstitute.hellbender.engine.ReferenceContext; @@ -25,10 +29,21 @@ import org.broadinstitute.hellbender.tools.walkers.annotator.VariantAnnotatorEngine; import org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeCalculationArgumentCollection; import org.broadinstitute.hellbender.tools.walkers.mutect.M2ArgumentCollection; -import org.broadinstitute.hellbender.utils.*; +import org.broadinstitute.hellbender.utils.GenomeLoc; +import org.broadinstitute.hellbender.utils.GenomeLocParser; +import org.broadinstitute.hellbender.utils.GenomeLocSortedSet; +import org.broadinstitute.hellbender.utils.IntervalMergingRule; +import org.broadinstitute.hellbender.utils.IntervalSetRule; +import org.broadinstitute.hellbender.utils.IntervalUtils; +import org.broadinstitute.hellbender.utils.SimpleInterval; import org.broadinstitute.hellbender.utils.variant.GATKVariantContextUtils; +import org.broadinstitute.hellbender.utils.variant.writers.IntervalFilteringVcfWriter; -import java.util.*; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Set; /** * Perform joint genotyping on one or more samples pre-called with HaplotypeCaller @@ -108,7 +123,7 @@ public final class GenotypeGVCFs extends VariantLocusWalker { /** * Import all data between specified intervals. Improves performance using large lists of intervals, as in exome * sequencing, especially if GVCF data only exists for specified intervals. Use with - * --only-output-calls-starting-in-intervals if input GVCFs contain calls outside the specified intervals. + * --{@value GATKTool#VARIANT_OUTPUT_INTERVAL_FILTERING_MODE} if input GVCFs contain calls outside the specified intervals. */ @Argument(fullName = GenomicsDBImport.MERGE_INPUT_INTERVALS_LONG_NAME, shortName = GenomicsDBImport.MERGE_INPUT_INTERVALS_LONG_NAME, @@ -149,16 +164,6 @@ public final class GenotypeGVCFs extends VariantLocusWalker { @ArgumentCollection private GenomicsDBArgumentCollection genomicsdbArgs = new GenomicsDBArgumentCollection(); - /** - * This option can only be activated if intervals are specified. - */ - @Advanced - @Argument(fullName= ONLY_OUTPUT_CALLS_STARTING_IN_INTERVALS_FULL_NAME, - doc="Restrict variant output to sites that start within provided intervals", - optional=true) - private boolean onlyOutputCallsStartingInIntervals = false; - - @Argument(fullName = FORCE_OUTPUT_INTERVALS_NAME, suppressFileExpansion = true, doc = "sites at which to output genotypes even if non-variant in samples", optional = true) protected final List forceOutputIntervalStrings = new ArrayList<>(); @@ -177,9 +182,6 @@ public final class GenotypeGVCFs extends VariantLocusWalker { private VariantContextWriter vcfWriter; - /** these are used when {@link #onlyOutputCallsStartingInIntervals) is true */ - private List intervals; - private OverlapDetector forceOutputIntervals; private boolean forceOutputIntervalsPresent; @@ -249,21 +251,12 @@ public void onTraversalStart() { final VCFHeader inputVCFHeader = getHeaderForVariants(); - if(onlyOutputCallsStartingInIntervals) { - if( !hasUserSuppliedIntervals()) { - throw new CommandLineException.MissingArgument("-L or -XL", "Intervals are required if --" + ONLY_OUTPUT_CALLS_STARTING_IN_INTERVALS_FULL_NAME + " was specified."); - } - } - - intervals = hasUserSuppliedIntervals() ? intervalArgumentCollection.getIntervals(getBestAvailableSequenceDictionary()) : - Collections.emptyList(); - annotationEngine = new VariantAnnotatorEngine(makeVariantAnnotations(), dbsnp.dbsnp, Collections.emptyList(), false, keepCombined); merger = new ReferenceConfidenceVariantContextMerger(annotationEngine, getHeaderForVariants(), somaticInput, false, true); //methods that cannot be called in engine bc its protected - Set defaultToolVCFHeaderLines = getDefaultToolVCFHeaderLines(); + final Set defaultToolVCFHeaderLines = getDefaultToolVCFHeaderLines(); vcfWriter = createVCFWriter(outputFile); //create engine object @@ -271,7 +264,6 @@ public void onTraversalStart() { //call initialize method in engine class that creates VCFWriter object and writes a header to it vcfWriter = gvcfEngine.setupVCFWriter(defaultToolVCFHeaderLines, keepCombined, dbsnp, vcfWriter); - } @Override @@ -282,9 +274,7 @@ public void apply(final Locatable loc, List variants, ReadsConte final VariantContext regenotypedVC = gvcfEngine.callRegion(loc, variants, ref, features, merger, somaticInput, tlodThreshold, afTolerance, forceOutput); if (regenotypedVC != null) { - final SimpleInterval variantStart = new SimpleInterval(regenotypedVC.getContig(), regenotypedVC.getStart(), regenotypedVC.getStart()); - if ((forceOutput || !GATKVariantContextUtils.isSpanningDeletionOnly(regenotypedVC)) && - (!onlyOutputCallsStartingInIntervals || intervals.stream().anyMatch(interval -> interval.contains (variantStart)))) { + if ((forceOutput || !GATKVariantContextUtils.isSpanningDeletionOnly(regenotypedVC))) { vcfWriter.add(regenotypedVC); } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/gnarlyGenotyper/GnarlyGenotyper.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/gnarlyGenotyper/GnarlyGenotyper.java index cd4917c178f..72c03b01de9 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/gnarlyGenotyper/GnarlyGenotyper.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/gnarlyGenotyper/GnarlyGenotyper.java @@ -111,15 +111,6 @@ public final class GnarlyGenotyper extends VariantWalker { @Argument(fullName = "keep-all-sites", doc="Retain low quality and non-variant sites, applying appropriate filters", optional=true) private boolean keepAllSites = false; - /** - * This option can only be activated if intervals are specified. - */ - @Advanced - @Argument(fullName = GenotypeGVCFs.ONLY_OUTPUT_CALLS_STARTING_IN_INTERVALS_FULL_NAME, - doc="Restrict variant output to sites that start within provided intervals", - optional=true) - private boolean onlyOutputCallsStartingInIntervals = false; - @Argument(fullName = GenomicsDBImport.MERGE_INPUT_INTERVALS_LONG_NAME, shortName = GenomicsDBImport.MERGE_INPUT_INTERVALS_LONG_NAME, doc = "Boolean flag to read in all data in between intervals. Improves performance reading from GenomicsDB " + @@ -147,9 +138,6 @@ public final class GnarlyGenotyper extends VariantWalker { private final RMSMappingQuality mqCalculator = RMSMappingQuality.getInstance(); private final Set> allAlleleSpecificAnnotations = new HashSet<>(); - /** these are used when {@link #onlyOutputCallsStartingInIntervals) is true */ - private List intervals; - @Override public boolean requiresReference() { return true; @@ -183,14 +171,6 @@ protected GenomicsDBOptions getGenomicsDBOptions() { public void onTraversalStart() { final VCFHeader inputVCFHeader = getHeaderForVariants(); - if(onlyOutputCallsStartingInIntervals) { - if( !intervalArgumentCollection.intervalsSpecified()) { - throw new CommandLineException.MissingArgument("-L or -XL", "Intervals are required if --" + GenotypeGVCFs.ONLY_OUTPUT_CALLS_STARTING_IN_INTERVALS_FULL_NAME + " was specified."); - } - } - intervals = intervalArgumentCollection.intervalsSpecified() ? intervalArgumentCollection.getIntervals(getBestAvailableSequenceDictionary()) : - Collections.emptyList(); - final SampleList samples = new IndexedSampleList(inputVCFHeader.getGenotypeSamples()); setupVCFWriter(inputVCFHeader, samples); @@ -266,11 +246,11 @@ private void setupVCFWriter(VCFHeader inputVCFHeader, SampleList samples) { @SuppressWarnings({"unchecked", "rawtypes"}) @Override public void apply(VariantContext variant, ReadsContext reads, ReferenceContext ref, FeatureContext features) { - SimpleInterval variantStart = new SimpleInterval(variant.getContig(), variant.getStart(), variant.getStart()); //return early if there's no non-symbolic ALT since GDB already did the merging if ( !variant.isVariant() || !GATKVariantContextUtils.isProperlyPolymorphic(variant) - || variant.getAttributeAsInt(VCFConstants.DEPTH_KEY,0) == 0 - || (onlyOutputCallsStartingInIntervals && !intervals.stream().anyMatch(interval -> interval.contains(variantStart)))) { + || variant.getAttributeAsInt(VCFConstants.DEPTH_KEY,0) == 0 ) + // todo this changes is a slight de-optimization since we will now process some sites whihc were previously ignored + { if (keepAllSites) { VariantContextBuilder builder = new VariantContextBuilder(mqCalculator.finalizeRawMQ(variant)); //don't fill in QUAL here because there's no alt data builder.filter(GATKVCFConstants.LOW_QUAL_FILTER_NAME); @@ -297,7 +277,7 @@ public void apply(VariantContext variant, ReadsContext reads, ReferenceContext r finalizedVC = genotyperEngine.finalizeGenotype(variant); } //could return null if the variant didn't pass the genotyping arg calling/emission threshold - if (finalizedVC != null && (!onlyOutputCallsStartingInIntervals || intervals.stream().anyMatch(interval -> interval.contains(variantStart)))) { + if (finalizedVC != null) { vcfWriter.add(finalizedVC); } } diff --git a/src/main/java/org/broadinstitute/hellbender/utils/variant/writers/IntervalFilteringVcfWriter.java b/src/main/java/org/broadinstitute/hellbender/utils/variant/writers/IntervalFilteringVcfWriter.java new file mode 100644 index 00000000000..6f81da1efd6 --- /dev/null +++ b/src/main/java/org/broadinstitute/hellbender/utils/variant/writers/IntervalFilteringVcfWriter.java @@ -0,0 +1,140 @@ +package org.broadinstitute.hellbender.utils.variant.writers; + +import htsjdk.samtools.util.Locatable; +import htsjdk.samtools.util.OverlapDetector; +import htsjdk.variant.variantcontext.VariantContext; +import htsjdk.variant.variantcontext.writer.VariantContextWriter; +import htsjdk.variant.vcf.VCFHeader; +import org.broadinstitute.hellbender.utils.SimpleInterval; +import org.broadinstitute.hellbender.utils.Utils; + +import java.util.List; +import java.util.Set; + +/** + * A {@link VariantContextWriter} decorator which filters out variants that don't match a given set of intervals. + */ +public class IntervalFilteringVcfWriter implements VariantContextWriter { + + /** + * Comparison modes which allow matching intervals in different ways. + */ + public enum Mode { + + /** + * Matches if the query starts within any of the given intervals. + */ + STARTS_IN{ + @Override + boolean test(OverlapDetector detector, final VariantContext query) { + final SimpleInterval startPosition = new SimpleInterval(query.getContig(), query.getStart(), query.getStart()); + return detector.overlapsAny(startPosition); + } + }, + + /** + * Matches if the query ends within any of the given intervals + */ + ENDS_IN{ + @Override + boolean test(final OverlapDetector detector, final VariantContext query) { + final SimpleInterval endPosition = new SimpleInterval(query.getContig(), query.getEnd(), query.getEnd()); + return detector.overlapsAny(endPosition); + } + }, + + /** + * Matches if any part of the query overlaps any one of the given intervals + */ + OVERLAPS{ + @Override + boolean test(final OverlapDetector detector, final VariantContext query) { + return detector.overlapsAny(query); + } + }, + + /** + * Matches if the entirety of the query is contained within one of the intervals + */ + CONTAINED { + @Override + boolean test(final OverlapDetector detector, final VariantContext query) { + final Set overlaps = detector.getOverlaps(query); + for( final Locatable loc : overlaps){ + if(loc.contains(query)){ + return true; + } + } + return false; + } + }, + + /** + * Always matches, may be used to not perform any filtering, alternatively a + */ + ANYWHERE { + @Override + boolean test(final OverlapDetector detector, final VariantContext query) { + return true; + } + }; + + /** + * @param detector The OverlapDetector to compare against + * @param query The variant being tested + * @return true iff the variant matches the given intervals + */ + abstract boolean test(OverlapDetector detector, VariantContext query); + } + + private final VariantContextWriter writer; + private final OverlapDetector detector; + private final Mode mode; + + /** + * @param writer the writer to wrap + * @param intervals the intervals to compare against, note that these are not merged so if they should be merged than the input list should be preprocessed + * @param mode the matching mode to use + */ + public IntervalFilteringVcfWriter(final VariantContextWriter writer, List intervals, Mode mode) { + Utils.nonNull(writer); + Utils.nonEmpty(intervals); + Utils.nonNull(mode); + + this.writer = writer; + this.detector = OverlapDetector.create(intervals); + this.mode = mode; + } + + @Override + public void writeHeader(final VCFHeader header) { + writer.writeHeader(header); + } + + @Override + public void setHeader(final VCFHeader header) { + writer.setHeader(header); + } + + @Override + public void close() { + writer.close(); + } + + @Override + public boolean checkError() { + return writer.checkError(); + } + + /** + * Add the given variant to the writer and output it if it matches. + * @param vc the variant to potentially write + */ + @Override + public void add(final VariantContext vc) { + if(mode.test(detector, vc)) { + writer.add(vc); + } + } + +} diff --git a/src/test/java/org/broadinstitute/hellbender/engine/GatkToolIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/engine/GatkToolIntegrationTest.java index 7e1164b89c9..6031808e4e0 100644 --- a/src/test/java/org/broadinstitute/hellbender/engine/GatkToolIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/engine/GatkToolIntegrationTest.java @@ -1,18 +1,29 @@ package org.broadinstitute.hellbender.engine; import htsjdk.samtools.reference.ReferenceSequenceFileFactory; +import htsjdk.samtools.util.Locatable; import htsjdk.samtools.util.FileExtensions; import htsjdk.variant.variantcontext.VariantContext; +import htsjdk.variant.variantcontext.VariantContextBuilder; +import htsjdk.variant.variantcontext.writer.VariantContextWriter; import htsjdk.variant.vcf.VCFHeader; import org.apache.commons.lang3.tuple.Pair; +import org.broadinstitute.barclay.argparser.Argument; +import org.broadinstitute.barclay.argparser.CommandLineProgramProperties; import org.broadinstitute.hellbender.CommandLineProgramTest; import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; +import org.broadinstitute.hellbender.cmdline.TestProgramGroup; +import org.broadinstitute.hellbender.testutils.ArgumentsBuilder; import org.broadinstitute.hellbender.testutils.VariantContextTestUtils; import org.broadinstitute.hellbender.tools.walkers.mutect.Mutect2; import org.broadinstitute.hellbender.tools.walkers.variantutils.SelectVariants; +import org.broadinstitute.hellbender.testutils.VariantContextTestUtils; +import org.broadinstitute.hellbender.utils.SimpleInterval; import org.broadinstitute.hellbender.utils.io.IOUtils; +import org.broadinstitute.hellbender.utils.variant.writers.IntervalFilteringVcfWriter; import org.broadinstitute.hellbender.utils.variant.writers.ShardingVCFWriter; import org.testng.Assert; +import org.testng.annotations.DataProvider; import org.testng.annotations.Test; import java.io.File; @@ -91,4 +102,66 @@ public void testSharding() { Assert.assertTrue(Files.exists(Paths.get(firstShard + FileExtensions.COMPRESSED_VCF_INDEX))); Assert.assertTrue(Files.exists(Paths.get(secondShard + FileExtensions.COMPRESSED_VCF_INDEX))); } + + @CommandLineProgramProperties(summary = "testTool which emits specific variants", + oneLineSummary = "Test tool", + programGroup = TestProgramGroup.class) + public static class VariantEmitter extends GATKTool{ + @Argument(fullName = StandardArgumentDefinitions.OUTPUT_LONG_NAME) + File output; + + @Override + public void traverse() { + //nope + } + + @Override + public void onTraversalStart() { + try(final VariantContextWriter vcfWriter = createVCFWriter(output)){ + vcfWriter.writeHeader(new VCFHeader()); + final VariantContextBuilder vcb = new VariantContextBuilder(); + vcb.alleles("AAAAAA", "A").chr("1"); + + vcfWriter.add(vcb.start(10).stop(15).make()); + vcfWriter.add(vcb.start(100).stop(105).make()); + vcfWriter.add(vcb.start(1000).stop(1005).make()); + vcfWriter.add(vcb.start(10000).stop(10005).make()); + + vcb.chr("2"); + vcfWriter.add(vcb.start(20).stop(25).make()); + vcfWriter.add(vcb.start(200).stop(205).make()); + vcfWriter.add(vcb.start(2000).stop(2005).make()); + vcfWriter.add(vcb.start(20000).stop(20005).make()); + } + } + } + + @DataProvider + public Object[][] getIntervalsAndOverlapMode(){ + return new Object[][]{ + {Arrays.asList(new SimpleInterval("1", 101, 10001), new SimpleInterval("2", 201, 20001)), IntervalFilteringVcfWriter.Mode.ANYWHERE, 8}, + {Arrays.asList(new SimpleInterval("1", 101, 10001), new SimpleInterval("2", 201, 20001)), IntervalFilteringVcfWriter.Mode.OVERLAPS, 6}, + {Arrays.asList(new SimpleInterval("1", 101, 10001), new SimpleInterval("2", 201, 20001)), IntervalFilteringVcfWriter.Mode.STARTS_IN, 4}, + {Arrays.asList(new SimpleInterval("1", 101, 10001), new SimpleInterval("2", 201, 20001)), IntervalFilteringVcfWriter.Mode.ENDS_IN, 4}, + {Arrays.asList(new SimpleInterval("1", 101, 10001), new SimpleInterval("2", 201, 20001)), IntervalFilteringVcfWriter.Mode.CONTAINED, 2}, + {Arrays.asList(new SimpleInterval("1", 101, 10001), new SimpleInterval("2", 201, 20001)), null, 8}, + }; + } + + @Test(dataProvider = "getIntervalsAndOverlapMode") + public void testVcfOutputFilterMode(List intervals, IntervalFilteringVcfWriter.Mode mode, int variantsIncluded){ + final ArgumentsBuilder args = new ArgumentsBuilder(); + final File out = createTempFile("out", ".vcf"); + args.addOutput(out); + intervals.forEach(args::addInterval); + args.addReference(b37Reference); + if( mode != null) { + args.add(GATKTool.VARIANT_OUTPUT_INTERVAL_FILTERING_MODE, mode); + } + + runCommandLine(args, VariantEmitter.class.getSimpleName()); + final Pair> vcfHeaderListPair = VariantContextTestUtils.readEntireVCFIntoMemory(out.toString()); + + Assert.assertEquals(vcfHeaderListPair.getRight().size(), variantsIncluded); + } } diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/GenotypeGVCFsIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/GenotypeGVCFsIntegrationTest.java index 41b0b2f126e..3b0938135af 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/GenotypeGVCFsIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/GenotypeGVCFsIntegrationTest.java @@ -17,6 +17,7 @@ import org.broadinstitute.hellbender.CommandLineProgramTest; import org.broadinstitute.hellbender.cmdline.GATKPlugin.DefaultGATKVariantAnnotationArgumentCollection; import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; +import org.broadinstitute.hellbender.engine.GATKTool; import org.broadinstitute.hellbender.exceptions.UserException; import org.broadinstitute.hellbender.testutils.ArgumentsBuilder; import org.broadinstitute.hellbender.testutils.GenomicsDBTestUtils; @@ -31,6 +32,7 @@ import org.broadinstitute.hellbender.utils.io.IOUtils; import org.broadinstitute.hellbender.utils.runtime.ProcessController; import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants; +import org.broadinstitute.hellbender.utils.variant.writers.IntervalFilteringVcfWriter; import org.broadinstitute.hellbender.utils.variant.VariantContextGetters; import org.testng.Assert; import org.testng.annotations.DataProvider; @@ -87,7 +89,7 @@ public Object[][] gvcfsToGenotype() { return new Object[][]{ //combine not supported yet, see https://github.com/broadinstitute/gatk/issues/2429 and https://github.com/broadinstitute/gatk/issues/2584 //{"combine.single.sample.pipeline.1.vcf", null, Arrays.asList("-V", getTestFile("combine.single.sample.pipeline.2.vcf").toString() , "-V", getTestFile("combine.single.sample.pipeline.3.vcf").toString()), b37_reference_20_21}, - {getTestFile("leadingDeletion.g.vcf"), getTestFile("leadingDeletionRestrictToStartExpected.vcf"), Arrays.asList("-L", "20:69512-69513", "--"+GenotypeGVCFs.ONLY_OUTPUT_CALLS_STARTING_IN_INTERVALS_FULL_NAME), b37_reference_20_21}, + {getTestFile("leadingDeletion.g.vcf"), getTestFile("leadingDeletionRestrictToStartExpected.vcf"), Arrays.asList("-L", "20:69512-69513", "--"+GATKTool.VARIANT_OUTPUT_INTERVAL_FILTERING_MODE, IntervalFilteringVcfWriter.Mode.STARTS_IN.toString()), b37_reference_20_21}, {getTestFile("leadingDeletion.g.vcf"), getTestFile("leadingDeletionExpected.vcf"), Arrays.asList("-L", "20:69512-69513"), b37_reference_20_21}, {getTestFile(BASE_PAIR_GVCF), getTestFile( BASE_PAIR_EXPECTED), NO_EXTRA_ARGS, b37_reference_20_21}, //base pair level gvcf {getTestFile("testUpdatePGT.gvcf"), getTestFile( "testUpdatePGT.gatk3.7_30_ga4f720357.output.vcf"), NO_EXTRA_ARGS, b37_reference_20_21}, //testUpdatePGT @@ -280,7 +282,7 @@ private void runAndCheckGenomicsDBOutput(final ArgumentsBuilder args, final File } @Test(dataProvider = "getGVCFsForGenomicsDBOverMultipleIntervals") - public void testGenotypeGVCFsMultiIntervalGDBQuery(File input, File expected, List intervals, String reference) throws IOException { + public void testGenotypeGVCFsMultiIntervalGDBQuery(File input, File expected, List intervals, String reference) { final File tempGenomicsDB = GenomicsDBTestUtils.createTempGenomicsDB(input, intervals, true); final String genomicsDBUri = GenomicsDBTestUtils.makeGenomicsDBUri(tempGenomicsDB); @@ -291,8 +293,8 @@ public void testGenotypeGVCFsMultiIntervalGDBQuery(File input, File expected, Li .add("V", genomicsDBUri); args.addOutput(output); intervals.forEach(args::addInterval); - args.addRaw("--" + GenomicsDBImport.MERGE_INPUT_INTERVALS_LONG_NAME); - args.addRaw("--only-output-calls-starting-in-intervals"); //note that this will restrict calls to just the specified intervals + args.add(GenomicsDBImport.MERGE_INPUT_INTERVALS_LONG_NAME, true); + args.add(GATKTool.VARIANT_OUTPUT_INTERVAL_FILTERING_MODE, IntervalFilteringVcfWriter.Mode.STARTS_IN); //note that this will restrict calls to just the specified intervals runAndCheckGenomicsDBOutput(args, expected, output); @@ -409,7 +411,7 @@ public void testIntervalsAndOnlyOutputCallsStartingInIntervalsAreMutuallyRequire .addVCF(getTestFile("leadingDeletion.g.vcf")) .addReference(new File(b37_reference_20_21)) .addOutput( createTempFile("tmp",".vcf")) - .add(GenotypeGVCFs.ONLY_OUTPUT_CALLS_STARTING_IN_INTERVALS_FULL_NAME, true); + .add(GATKTool.VARIANT_OUTPUT_INTERVAL_FILTERING_MODE, IntervalFilteringVcfWriter.Mode.STARTS_IN); Assert.assertThrows(CommandLineException.MissingArgument.class, () -> runCommandLine(args)); args.add("L", "20:69512-69513"); diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/GnarlyGenotyperIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/GnarlyGenotyperIntegrationTest.java index 7b591771691..d838a41ac40 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/GnarlyGenotyperIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/GnarlyGenotyperIntegrationTest.java @@ -8,6 +8,7 @@ import org.broadinstitute.hellbender.GATKBaseTest; import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; import org.broadinstitute.hellbender.engine.FeatureDataSource; +import org.broadinstitute.hellbender.engine.GATKTool; import org.broadinstitute.hellbender.tools.walkers.annotator.allelespecific.AS_RMSMappingQuality; import org.broadinstitute.hellbender.utils.IntervalUtils; import org.broadinstitute.hellbender.utils.SimpleInterval; @@ -15,6 +16,7 @@ import org.broadinstitute.hellbender.testutils.GenomicsDBTestUtils; import org.broadinstitute.hellbender.testutils.VariantContextTestUtils; import org.broadinstitute.hellbender.utils.Utils; +import org.broadinstitute.hellbender.utils.variant.writers.IntervalFilteringVcfWriter; import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants; import org.testng.Assert; import org.testng.annotations.DataProvider; @@ -56,10 +58,10 @@ public Object[][] getVCFdata() { // Simple Test, spanning deletions; standard calling confidence //No variants outside requested intervals; no SNPs with QUAL < 60, no INDELs with QUAL < 69?; has star alleles after deletion at chr20:263497; has AC, AF, AN, DP, ExcessHet, FS, MQ, (MQRankSum), (ReadPosRankSum), SOR, QD; has called genotypes {new File[]{getTestFile("sample1.vcf"), getTestFile("sample2.vcf"), getTestFile("sample3.vcf"), getTestFile("sample4.vcf"), getTestFile("sample5.vcf")}, - getTestFile("fiveSampleTest.vcf"), null, Arrays.asList(new SimpleInterval("chr20", 251370, 252000), new SimpleInterval("chr20", 263000, 265600)), Arrays.asList("--merge-input-intervals", "--only-output-calls-starting-in-intervals"), b38_reference_20_21}, + getTestFile("fiveSampleTest.vcf"), null, Arrays.asList(new SimpleInterval("chr20", 251370, 252000), new SimpleInterval("chr20", 263000, 265600)), Arrays.asList("--merge-input-intervals", "--"+GATKTool.VARIANT_OUTPUT_INTERVAL_FILTERING_MODE, IntervalFilteringVcfWriter.Mode.STARTS_IN.toString()), b38_reference_20_21}, // Same as above, but with GenomicsDB using BCF2Codec for interchange {new File[]{getTestFile("sample1.vcf"), getTestFile("sample2.vcf"), getTestFile("sample3.vcf"), getTestFile("sample4.vcf"), getTestFile("sample5.vcf")}, - getTestFile("fiveSampleTest.vcf"), null, Arrays.asList(new SimpleInterval("chr20", 251370, 252000), new SimpleInterval("chr20", 263000, 265600)), Arrays.asList("--merge-input-intervals", "--only-output-calls-starting-in-intervals", "--genomicsdb-use-bcf-codec"), b38_reference_20_21}, + getTestFile("fiveSampleTest.vcf"), null, Arrays.asList(new SimpleInterval("chr20", 251370, 252000), new SimpleInterval("chr20", 263000, 265600)), Arrays.asList("--merge-input-intervals", "--"+GATKTool.VARIANT_OUTPUT_INTERVAL_FILTERING_MODE, IntervalFilteringVcfWriter.Mode.STARTS_IN.toString(), "--genomicsdb-use-bcf-codec"), b38_reference_20_21}, //lower calling confidence //same as above except (different intervals and) with SNPs with 40 < QUAL < 60 and INDELs with 49 < QUAL < 69 {new File[]{getTestFile("sample1.vcf"), getTestFile("sample2.vcf"), getTestFile("sample3.vcf"), getTestFile("sample4.vcf"), getTestFile("sample5.vcf")}, @@ -162,7 +164,7 @@ public void testOnHailOutput() { args.addReference(new File(hg38Reference)) .add("V", input) .add("L", "chr20:10000000-10030000") - .add("only-output-calls-starting-in-intervals", true) + .add(GATKTool.VARIANT_OUTPUT_INTERVAL_FILTERING_MODE, IntervalFilteringVcfWriter.Mode.STARTS_IN) .add("keep-all-sites", true) .addOutput(outputPath) .add(StandardArgumentDefinitions.ADD_OUTPUT_VCF_COMMANDLINE, "false"); diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/ReblockGVCFUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/ReblockGVCFUnitTest.java index c083a620891..473e193f867 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/ReblockGVCFUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/ReblockGVCFUnitTest.java @@ -24,6 +24,7 @@ import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants; import org.broadinstitute.hellbender.utils.variant.writers.GVCFWriter; import org.broadinstitute.hellbender.utils.variant.writers.GVCFWriterUnitTest; +import org.broadinstitute.hellbender.utils.variant.writers.MockVcfWriter; import org.broadinstitute.hellbender.utils.variant.writers.ReblockingGVCFWriter; import org.broadinstitute.hellbender.utils.variant.writers.ReblockingOptions; import org.testng.Assert; @@ -50,7 +51,7 @@ public void testCleanUpHighQualityVariant() { //We need an annotation engine for cleanUpHighQualityVariant() reblocker.createAnnotationEngine(); //...and a vcfwriter - reblocker.vcfWriter = new ReblockingGVCFWriter(new GVCFWriterUnitTest.MockWriter(), Arrays.asList(20, 100), true, null, new ReblockingOptions()); + reblocker.vcfWriter = new ReblockingGVCFWriter(new MockVcfWriter(), Arrays.asList(20, 100), true, null, new ReblockingOptions()); reblocker.dropLowQuals = true; reblocker.doQualApprox = true; @@ -93,7 +94,7 @@ public void testCleanUpHighQualityVariant() { @Test public void testLowQualVariantToGQ0HomRef() { final ReblockGVCF reblocker = new ReblockGVCF(); - reblocker.vcfWriter = new ReblockingGVCFWriter(new GVCFWriterUnitTest.MockWriter(), Arrays.asList(20, 100), true, null, new ReblockingOptions()); + reblocker.vcfWriter = new ReblockingGVCFWriter(new MockVcfWriter(), Arrays.asList(20, 100), true, null, new ReblockingOptions()); reblocker.dropLowQuals = true; final Genotype g = VariantContextTestUtils.makeG("sample1", LONG_REF, Allele.NON_REF_ALLELE, 200, 100, 200, 11, 0, 37); @@ -179,7 +180,7 @@ public void testBadCalls() { @Test public void testPosteriors() { final ReblockGVCF reblocker = new ReblockGVCF(); - reblocker.vcfWriter = new ReblockingGVCFWriter(new GVCFWriterUnitTest.MockWriter(), Arrays.asList(20, 100), true, null, new ReblockingOptions()); + reblocker.vcfWriter = new ReblockingGVCFWriter(new MockVcfWriter(), Arrays.asList(20, 100), true, null, new ReblockingOptions()); reblocker.posteriorsKey = "GP"; final GenotypeBuilder gb = new GenotypeBuilder("sample1", Arrays.asList(LONG_REF, LONG_REF)); diff --git a/src/test/java/org/broadinstitute/hellbender/utils/variant/writers/GVCFWriterUnitTest.java b/src/test/java/org/broadinstitute/hellbender/utils/variant/writers/GVCFWriterUnitTest.java index 9abea785e40..b971a109e8b 100644 --- a/src/test/java/org/broadinstitute/hellbender/utils/variant/writers/GVCFWriterUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/utils/variant/writers/GVCFWriterUnitTest.java @@ -37,53 +37,17 @@ public class GVCFWriterUnitTest extends GATKBaseTest { private static final String CHR1 = "1"; private static final String CHR2 = "2"; - private static final Allele REF = Allele.create("G", true); - private static final Allele ALT = Allele.create("A"); - private static final List ALLELES = ImmutableList.of(REF, Allele.NON_REF_ALLELE); + private static final List ALLELES = ImmutableList.of(Allele.REF_G, Allele.NON_REF_ALLELE); private static final String SAMPLE_NAME = "XXYYZZ"; - static public final class MockWriter implements VariantContextWriter { - final List emitted = new ArrayList<>(); - boolean headerWritten = false; - boolean closed = false; - boolean error = false; - boolean headerSet = false; - - @Override - public void writeHeader(VCFHeader header) { - headerSet = true; - headerWritten = true; - } - - @Override - public void close() { - closed = true; - } - - @Override - public boolean checkError() { - return error; - } - - @Override - public void add(VariantContext vc) { - emitted.add(vc); - } - - @Override - public void setHeader(VCFHeader header) { - headerSet = true; - } - } - private static final List standardPartition = ImmutableList.of(1, 10, 20); private static final List highConfLowConf = ImmutableList.of(20,100); @Test public void testHeaderWriting() { - final MockWriter mockWriter = new MockWriter(); + final MockVcfWriter mockWriter = new MockVcfWriter(); final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition); writer.writeHeader(new VCFHeader()); Assert.assertTrue(mockWriter.headerSet); @@ -92,7 +56,7 @@ public void testHeaderWriting() { @Test public void testHeaderSetting(){ - final MockWriter mockWriter = new MockWriter(); + final MockVcfWriter mockWriter = new MockVcfWriter(); final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition); writer.setHeader(new VCFHeader()); Assert.assertTrue(mockWriter.headerSet); @@ -101,17 +65,15 @@ public void testHeaderSetting(){ @Test public void testClose() { - final MockWriter mockWriter = new MockWriter(); + final MockVcfWriter mockWriter = new MockVcfWriter(); final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition); writer.close(); Assert.assertTrue(mockWriter.closed); } - - @Test public void testCloseEmitsLastVariant() { - final MockWriter mockWriter = new MockWriter(); + final MockVcfWriter mockWriter = new MockVcfWriter(); final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition); writer.add(makeHomRef(1)); @@ -124,7 +86,7 @@ public void testCloseEmitsLastVariant() { @Test public void testCloseDoesntEmitsLastVariantWhenNonRef() { - final MockWriter mockWriter = new MockWriter(); + final MockVcfWriter mockWriter = new MockVcfWriter(); final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition); writer.add(makeNonRef(CHR1, 1)); @@ -137,7 +99,7 @@ public void testCloseDoesntEmitsLastVariantWhenNonRef() { @Test public void testCrossingContigBoundaryRef() { - final MockWriter mockWriter = new MockWriter(); + final MockVcfWriter mockWriter = new MockVcfWriter(); final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition); writer.add(makeHomRef(1)); @@ -154,7 +116,7 @@ public void testCrossingContigBoundaryRef() { @Test public void testCrossingContigBoundaryToLowerPositionsRef() { - final MockWriter mockWriter = new MockWriter(); + final MockVcfWriter mockWriter = new MockVcfWriter(); final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition); writer.add(makeHomRef(30)); @@ -171,7 +133,7 @@ public void testCrossingContigBoundaryToLowerPositionsRef() { @Test public void testCrossingContigBoundaryFromNonRefToLowerPositionsRef() { - final MockWriter mockWriter = new MockWriter(); + final MockVcfWriter mockWriter = new MockVcfWriter(); final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition); writer.add(makeNonRef(CHR1, 20)); @@ -187,7 +149,7 @@ public void testCrossingContigBoundaryFromNonRefToLowerPositionsRef() { @Test public void testCrossingContigBoundaryNonRef() { - final MockWriter mockWriter = new MockWriter(); + final MockVcfWriter mockWriter = new MockVcfWriter(); final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition); writer.add(makeHomRef(1)); @@ -201,7 +163,7 @@ public void testCrossingContigBoundaryNonRef() { @Test public void testCrossingContigBoundaryNonRefThenNonRef() { - final MockWriter mockWriter = new MockWriter(); + final MockVcfWriter mockWriter = new MockVcfWriter(); final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition); writer.add(makeNonRef(CHR1, 1)); @@ -250,7 +212,7 @@ private static void assertGoodVC(final VariantContext vc, final String contig, f @Test public void testVariantForcesNonRef() { - final MockWriter mockWriter = new MockWriter(); + final MockVcfWriter mockWriter = new MockVcfWriter(); final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition); writer.add(makeHomRef(1)); @@ -268,7 +230,7 @@ public void testVariantForcesNonRef() { @Test public void testEmittingTwoBands() { - final MockWriter mockWriter = new MockWriter(); + final MockVcfWriter mockWriter = new MockVcfWriter(); final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition); writer.add(makeHomRef(1)); @@ -285,14 +247,14 @@ public void testEmittingTwoBands() { @Test public void testBandingUsingPP() { - final MockWriter mockWriter = new MockWriter(); + final MockVcfWriter mockWriter = new MockVcfWriter(); final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition); int[] PPs1 = {0,63,128}; int[] PPs2 = {0,67,145}; writer.add(makeVariantContext(new VariantContextBuilder("test", CHR1, 10000, 10000, - ALLELES), Arrays.asList(REF, REF), 2, PPs1)); - writer.add(makeVariantContext(new VariantContextBuilder("test", CHR1, 10001, 10001, ALLELES), Arrays.asList(REF, REF), 21, PPs2)); + ALLELES), Arrays.asList(Allele.REF_G, Allele.REF_G), 2, PPs1)); + writer.add(makeVariantContext(new VariantContextBuilder("test", CHR1, 10001, 10001, ALLELES), Arrays.asList(Allele.REF_G, Allele.REF_G), 21, PPs2)); writer.close(); Assert.assertEquals(mockWriter.emitted.size(), 1); assertGoodVCwithPPs(mockWriter.emitted.get(0), CHR1, 10000, 10001, false); @@ -301,7 +263,7 @@ public void testBandingUsingPP() { @Test public void testNonContiguousBlocks() { - final MockWriter mockWriter = new MockWriter(); + final MockVcfWriter mockWriter = new MockVcfWriter(); final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition); writer.add(makeHomRef(1)); @@ -316,7 +278,7 @@ public void testNonContiguousBlocks() { @Test public void testInputBlocks() { - final MockWriter mockWriter = new MockWriter(); + final MockVcfWriter mockWriter = new MockVcfWriter(); final GVCFWriter writer = new GVCFWriter(mockWriter, highConfLowConf); writer.add(makeHomRef("20", 1, 16, 600)); @@ -328,7 +290,7 @@ public void testInputBlocks() { @Test public void testDeletion() { - final MockWriter mockWriter = new MockWriter(); + final MockVcfWriter mockWriter = new MockVcfWriter(); final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition); writer.add(makeHomRef(1)); @@ -347,7 +309,7 @@ public void testDeletion() { @Test public void testHomRefAlt() { - final MockWriter mockWriter = new MockWriter(); + final MockVcfWriter mockWriter = new MockVcfWriter(); final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition); writer.add(makeHomRef(1)); @@ -406,7 +368,7 @@ public void testBadPartitionsThrowException(final List partitions){ @Test public void testCheckError(){ - final MockWriter mockWriter = new MockWriter(); + final MockVcfWriter mockWriter = new MockVcfWriter(); final GVCFWriter gvcfWriter = new GVCFWriter(mockWriter, standardPartition); mockWriter.error = false; Assert.assertEquals(gvcfWriter.checkError(), mockWriter.checkError()); @@ -636,7 +598,7 @@ public void testOverlappingDeletions() { .genotypes(gb.make()).attribute(VCFConstants.END_KEY, 10025); final VariantContext block2 = (new HomRefBlock(vcb.make(), 20, 100, 2).toVariantContext(SAMPLE_NAME, false)); - final MockWriter mockWriter = new MockWriter(); + final MockVcfWriter mockWriter = new MockVcfWriter(); final GVCFWriter writer = new GVCFWriter(mockWriter, Arrays.asList(20,100)); writer.add(deletion1); writer.add(block2); diff --git a/src/test/java/org/broadinstitute/hellbender/utils/variant/writers/IntervalFilteringVcfWriterUnitTest.java b/src/test/java/org/broadinstitute/hellbender/utils/variant/writers/IntervalFilteringVcfWriterUnitTest.java new file mode 100644 index 00000000000..f96e937c7c4 --- /dev/null +++ b/src/test/java/org/broadinstitute/hellbender/utils/variant/writers/IntervalFilteringVcfWriterUnitTest.java @@ -0,0 +1,91 @@ +package org.broadinstitute.hellbender.utils.variant.writers; + +import htsjdk.samtools.util.OverlapDetector; +import htsjdk.variant.variantcontext.Allele; +import htsjdk.variant.variantcontext.VariantContext; +import htsjdk.variant.variantcontext.VariantContextBuilder; +import htsjdk.variant.vcf.VCFHeader; +import org.broadinstitute.hellbender.GATKBaseTest; +import org.broadinstitute.hellbender.utils.SimpleInterval; +import org.broadinstitute.hellbender.utils.Utils; +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +public class IntervalFilteringVcfWriterUnitTest extends GATKBaseTest { + + + @DataProvider + public Object[][] getIntervalsAndMode(){ + final VariantContext noOverlap = new VariantContextBuilder("test", "1", 200, 300, Arrays.asList(Allele.create(Utils.repeatChars('A', 101), true), Allele.ALT_A)).make(); + final VariantContext contained = new VariantContextBuilder("test", "1", 101, 104, Arrays.asList(Allele.create(Utils.repeatChars('A', 4), true), Allele.ALT_A)).make(); + final VariantContext overlaps = new VariantContextBuilder("test", "1", 90, 120, Arrays.asList(Allele.create(Utils.repeatChars('A', 31), true), Allele.ALT_A)).make(); + final VariantContext startsIn = new VariantContextBuilder("test", "1", 103, 140, Arrays.asList(Allele.create(Utils.repeatChars('A', 38), true), Allele.ALT_A)).make(); + final VariantContext endsIn = new VariantContextBuilder("test", "1", 90, 103, Arrays.asList(Allele.create(Utils.repeatChars('A', 14), true), Allele.ALT_A)).make(); + final VariantContext anotherContig = new VariantContextBuilder("test", "2", 90, 140, Arrays.asList(Allele.create(Utils.repeatChars('A', 51), true), Allele.ALT_A)).make(); + final List vcs = Arrays.asList(noOverlap, contained, overlaps, startsIn, endsIn, anotherContig); + + final SimpleInterval interval = new SimpleInterval("1", 100, 105); + + return new Object[][]{ + // no overlap, contained, overlaps, starts in, ends in, another contig + {interval, vcs, IntervalFilteringVcfWriter.Mode.ANYWHERE, new boolean[]{ true, true, true, true, true, true}}, + {interval, vcs, IntervalFilteringVcfWriter.Mode.CONTAINED, new boolean[]{ false, true, false, false, false, false}}, + {interval, vcs, IntervalFilteringVcfWriter.Mode.OVERLAPS, new boolean[]{ false, true, true, true, true, false}}, + {interval, vcs, IntervalFilteringVcfWriter.Mode.STARTS_IN, new boolean[]{ false, true, false, true, false, false}}, + {interval, vcs, IntervalFilteringVcfWriter.Mode.ENDS_IN, new boolean[]{ false, true, false, false, true, false}}, + }; + } + + @Test(dataProvider = "getIntervalsAndMode") + public void testModes(SimpleInterval interval, List vcs, IntervalFilteringVcfWriter.Mode mode, boolean[] expected) { + final OverlapDetector detector = OverlapDetector.create(Collections.singletonList(interval)); + for(int i = 0; i < expected.length; i++){ + Assert.assertEquals(mode.test(detector,vcs.get(i)), expected[i], "mode " + mode + " mismatches at " + i); + } + } + + @Test + public void testHeaderWriting() { + final MockVcfWriter mockWriter = new MockVcfWriter(); + final List intervals = Arrays.asList(new SimpleInterval("1", 10, 100), new SimpleInterval("2", 100, 500)); + final IntervalFilteringVcfWriter writer = new IntervalFilteringVcfWriter(mockWriter, intervals, IntervalFilteringVcfWriter.Mode.OVERLAPS); + writer.writeHeader(new VCFHeader()); + Assert.assertTrue(mockWriter.headerSet); + Assert.assertTrue(mockWriter.headerWritten); + } + + @Test + public void testHeaderSetting(){ + final MockVcfWriter mockWriter = new MockVcfWriter(); + final List intervals = Arrays.asList(new SimpleInterval("1", 10, 100), new SimpleInterval("2", 100, 500)); + final IntervalFilteringVcfWriter writer = new IntervalFilteringVcfWriter(mockWriter, intervals, IntervalFilteringVcfWriter.Mode.OVERLAPS); + writer.setHeader(new VCFHeader()); + Assert.assertTrue(mockWriter.headerSet); + Assert.assertFalse(mockWriter.headerWritten); + } + + @Test + public void testClose() { + final MockVcfWriter mockWriter = new MockVcfWriter(); + final List intervals = Arrays.asList(new SimpleInterval("1", 10, 100), new SimpleInterval("2", 100, 500)); + final IntervalFilteringVcfWriter writer = new IntervalFilteringVcfWriter(mockWriter, intervals, IntervalFilteringVcfWriter.Mode.OVERLAPS); + writer.close(); + Assert.assertTrue(mockWriter.closed); + } + + @Test + public void testCheckError(){ + final MockVcfWriter mockWriter = new MockVcfWriter(); + final List intervals = Arrays.asList(new SimpleInterval("1", 10, 100), new SimpleInterval("2", 100, 500)); + final IntervalFilteringVcfWriter writer = new IntervalFilteringVcfWriter(mockWriter, intervals, IntervalFilteringVcfWriter.Mode.OVERLAPS); + Assert.assertFalse(writer.checkError()); + mockWriter.error = true; + Assert.assertTrue(writer.checkError()); + } + +} \ No newline at end of file diff --git a/src/test/java/org/broadinstitute/hellbender/utils/variant/writers/SomaticGVCFWriterUnitTest.java b/src/test/java/org/broadinstitute/hellbender/utils/variant/writers/SomaticGVCFWriterUnitTest.java index 1fb3522a4a6..4b51da5f2fe 100644 --- a/src/test/java/org/broadinstitute/hellbender/utils/variant/writers/SomaticGVCFWriterUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/utils/variant/writers/SomaticGVCFWriterUnitTest.java @@ -22,7 +22,7 @@ public class SomaticGVCFWriterUnitTest { @Test public void testValueBinning() { - final GVCFWriterUnitTest.MockWriter mockWriter = new GVCFWriterUnitTest.MockWriter(); + final MockVcfWriter mockWriter = new MockVcfWriter(); SomaticGVCFWriter writer = new SomaticGVCFWriter(mockWriter, standardPartition); //derives partitionPrecision 1 from standardPartition values Assert.assertTrue(writer.convertLODtoInt(2.3) == 23); @@ -46,7 +46,7 @@ public void testValueBinning() { @Test public void testAddingAndMerging() { - final GVCFWriterUnitTest.MockWriter mockWriter = new GVCFWriterUnitTest.MockWriter(); + final MockVcfWriter mockWriter = new MockVcfWriter(); final SomaticGVCFWriter writer = new SomaticGVCFWriter(mockWriter, standardPartition); final GenotypeBuilder gb = new GenotypeBuilder(SAMPLE_NAME, Arrays.asList(REF, REF)); int pos = 1; @@ -89,7 +89,7 @@ public void testAddingAndMerging() { @Test public void testPrecision() { - final GVCFWriterUnitTest.MockWriter mockWriter = new GVCFWriterUnitTest.MockWriter(); + final MockVcfWriter mockWriter = new MockVcfWriter(); SomaticGVCFWriter writer = new SomaticGVCFWriter(mockWriter, precisionTwoPartition); Assert.assertTrue(((SomaticGVCFBlockCombiner)writer.gvcfBlockCombiner).partitionPrecision == 2); diff --git a/src/testUtils/java/org/broadinstitute/hellbender/utils/variant/writers/MockVcfWriter.java b/src/testUtils/java/org/broadinstitute/hellbender/utils/variant/writers/MockVcfWriter.java new file mode 100644 index 00000000000..4009c710530 --- /dev/null +++ b/src/testUtils/java/org/broadinstitute/hellbender/utils/variant/writers/MockVcfWriter.java @@ -0,0 +1,42 @@ +package org.broadinstitute.hellbender.utils.variant.writers; + +import htsjdk.variant.variantcontext.VariantContext; +import htsjdk.variant.variantcontext.writer.VariantContextWriter; +import htsjdk.variant.vcf.VCFHeader; + +import java.util.ArrayList; +import java.util.List; + +public final class MockVcfWriter implements VariantContextWriter { + final List emitted = new ArrayList<>(); + boolean headerWritten = false; + boolean closed = false; + boolean error = false; + boolean headerSet = false; + + @Override + public void writeHeader(VCFHeader header) { + headerSet = true; + headerWritten = true; + } + + @Override + public void close() { + closed = true; + } + + @Override + public boolean checkError() { + return error; + } + + @Override + public void add(VariantContext vc) { + emitted.add(vc); + } + + @Override + public void setHeader(VCFHeader header) { + headerSet = true; + } +}