-
Notifications
You must be signed in to change notification settings - Fork 592
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Adding argument to GenotypeGVCFs to keep only RAW_GT_COUNT #7996
Changes from 10 commits
9ec8ca4
4a8b3cf
522cd29
470c858
bf9fffc
263d2d8
7462183
aaad296
65e4309
27b3994
fdccc7b
8d314f6
f7b8aa9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,6 +9,8 @@ | |
import htsjdk.variant.vcf.VCFHeaderLine; | ||
import org.broadinstitute.barclay.argparser.*; | ||
import org.broadinstitute.barclay.help.DocumentedFeature; | ||
import org.broadinstitute.hellbender.cmdline.GATKPlugin.GATKAnnotationPluginDescriptor; | ||
import org.broadinstitute.hellbender.cmdline.GATKPlugin.GATKReadFilterPluginDescriptor; | ||
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; | ||
import org.broadinstitute.hellbender.cmdline.argumentcollections.DbsnpArgumentCollection; | ||
import org.broadinstitute.hellbender.cmdline.programgroups.ShortVariantDiscoveryProgramGroup; | ||
|
@@ -28,6 +30,7 @@ | |
import org.broadinstitute.hellbender.tools.walkers.mutect.M2ArgumentCollection; | ||
import org.broadinstitute.hellbender.utils.*; | ||
import org.broadinstitute.hellbender.utils.variant.GATKVariantContextUtils; | ||
import org.broadinstitute.hellbender.tools.walkers.annotator.allelespecific.ReducibleAnnotation; | ||
|
||
import java.util.*; | ||
import java.util.stream.Collectors; | ||
|
@@ -132,17 +135,17 @@ public final class GenotypeGVCFs extends VariantLocusWalker { | |
doc = "LOD threshold to emit variant to VCF.") | ||
protected double tlodThreshold = 3.5; //allow for some lower quality variants | ||
|
||
|
||
/** | ||
* Margin of error in allele fraction to consider a somatic variant homoplasmic, i.e. if there is less than a 0.1% reference allele fraction, those reads are likely errors | ||
*/ | ||
@Argument(fullName=CombineGVCFs.ALLELE_FRACTION_DELTA_LONG_NAME, doc = "Margin of error in allele fraction to consider a somatic variant homoplasmic") | ||
protected double afTolerance = 1e-3; //based on Q30 as a "good" base quality score | ||
|
||
/** | ||
* If specified, keep the combined raw annotations (e.g. AS_SB_TABLE) after genotyping. This is applicable to Allele-Specific annotations | ||
* If specified, keep all the combined raw annotations (e.g. AS_SB_TABLE) after genotyping. This is applicable to Allele-Specific annotations. See {@link ReducibleAnnotation} | ||
*/ | ||
@Argument(fullName=KEEP_COMBINED_LONG_NAME, shortName = KEEP_COMBINED_SHORT_NAME, doc = "If specified, keep the combined raw annotations") | ||
@Argument(fullName=KEEP_COMBINED_LONG_NAME, shortName = KEEP_COMBINED_SHORT_NAME, doc = "If specified, keep the combined raw annotations", | ||
mutex = {GenotypeGVCFsAnnotationArgumentCollection.KEEP_SPECIFIED_RAW_COMBINED_ANNOTATION_LONG_NAME}) | ||
protected boolean keepCombined = false; | ||
|
||
@ArgumentCollection | ||
|
@@ -172,6 +175,9 @@ public final class GenotypeGVCFs extends VariantLocusWalker { | |
@ArgumentCollection | ||
private final DbsnpArgumentCollection dbsnp = new DbsnpArgumentCollection(); | ||
|
||
// @ArgumentCollection deliberately omitted since this is passed to the annotation plugin | ||
final GenotypeGVCFsAnnotationArgumentCollection genotypeGVCFsAnnotationArgs = new GenotypeGVCFsAnnotationArgumentCollection(); | ||
|
||
// the annotation engine | ||
private VariantAnnotatorEngine annotationEngine; | ||
|
||
|
@@ -221,6 +227,16 @@ protected GenomicsDBOptions getGenomicsDBOptions() { | |
@Override | ||
public boolean useVariantAnnotations() { return true;} | ||
|
||
@Override | ||
public List<? extends CommandLinePluginDescriptor<?>> getPluginDescriptors() { | ||
GATKReadFilterPluginDescriptor readFilterDescriptor = new GATKReadFilterPluginDescriptor(getDefaultReadFilters()); | ||
return useVariantAnnotations()? | ||
Arrays.asList(readFilterDescriptor, new GATKAnnotationPluginDescriptor( | ||
genotypeGVCFsAnnotationArgs, | ||
getDefaultVariantAnnotations(), getDefaultVariantAnnotationGroups())): | ||
Collections.singletonList(readFilterDescriptor); | ||
} | ||
|
||
@Override | ||
public List<Class<? extends Annotation>> getDefaultVariantAnnotationGroups() { | ||
return Arrays.asList(StandardAnnotation.class); | ||
|
@@ -261,8 +277,9 @@ public void onTraversalStart() { | |
intervals = hasUserSuppliedIntervals() ? intervalArgumentCollection.getIntervals(getBestAvailableSequenceDictionary()) : | ||
Collections.emptyList(); | ||
|
||
Collection<Annotation> variantAnnotations = makeVariantAnnotations(); | ||
annotationEngine = new VariantAnnotatorEngine(variantAnnotations, dbsnp.dbsnp, Collections.emptyList(), false, keepCombined); | ||
final Collection<Annotation> variantAnnotations = makeVariantAnnotations(); | ||
final Set<Annotation> annotationsToKeep = getAnnotationsToKeep(); | ||
annotationEngine = new VariantAnnotatorEngine(variantAnnotations, dbsnp.dbsnp, Collections.emptyList(), false, keepCombined, annotationsToKeep); | ||
|
||
merger = new ReferenceConfidenceVariantContextMerger(annotationEngine, getHeaderForVariants(), somaticInput, false, true); | ||
|
||
|
@@ -279,6 +296,13 @@ public void onTraversalStart() { | |
|
||
} | ||
|
||
private Set<Annotation> getAnnotationsToKeep() { | ||
final GATKAnnotationPluginDescriptor pluginDescriptor = getCommandLineParser().getPluginDescriptor(GATKAnnotationPluginDescriptor.class); | ||
final List<String> annotationStringsToKeep = genotypeGVCFsAnnotationArgs.getKeepSpecifiedCombinedAnnotationNames(); | ||
final Map<String, Annotation> resolvedInstancesMap = pluginDescriptor.getResolvedInstancesMap(); | ||
return annotationStringsToKeep.stream().map(resolvedInstancesMap::get).collect(Collectors.toSet()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think you need to be a bit more defensive in how you reconcile the "resolved" entries returned by For instance, I don't think there is anything that will detect that the user made up some non-existent annotation name and specified it as a keeper, since they're just strings up until this point. Also, it's possible for the user to have both excluded an annotation AND specified it as a keeper (in which case it won't be in the resolved list). This code needs to detect and reject those cases so it doesn't wind up handing the annotator engine null annotations, preferably with some tests to ensure they're rejected properly. You probably won't be able to tell exactly WHY a null is returned here, but I think rejecting that case with an error message saying something like, There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I moved the "missing annotation" check here and added a test. |
||
} | ||
|
||
@Override | ||
public void apply(final Locatable loc, List<VariantContext> variants, ReadsContext reads, ReferenceContext ref, FeatureContext features) { | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
package org.broadinstitute.hellbender.tools.walkers; | ||
|
||
import org.broadinstitute.barclay.argparser.Argument; | ||
import org.broadinstitute.hellbender.cmdline.GATKPlugin.DefaultGATKVariantAnnotationArgumentCollection; | ||
import org.broadinstitute.hellbender.tools.walkers.annotator.allelespecific.ReducibleAnnotation; | ||
|
||
import java.util.ArrayList; | ||
import java.util.Collections; | ||
import java.util.List; | ||
|
||
public class GenotypeGVCFsAnnotationArgumentCollection extends DefaultGATKVariantAnnotationArgumentCollection { | ||
private static final long serialVersionUID = 1L; | ||
|
||
public static final String KEEP_SPECIFIED_RAW_COMBINED_ANNOTATION_LONG_NAME = "keep-specific-combined-raw-annotation"; | ||
public static final String KEEP_SPECIFIED_RAW_COMBINED_ANNOTATION_SHORT_NAME = "keep-specific-combined"; | ||
|
||
/** | ||
* Keep only the specific combined raw annotations specified. Cannot be used with --keep-combined-raw-annotations which saves all raw annotations. | ||
* Duplicate values will be ignored. See {@link ReducibleAnnotation} for more information on raw annotations. | ||
*/ | ||
@Argument(fullName= KEEP_SPECIFIED_RAW_COMBINED_ANNOTATION_LONG_NAME, shortName = KEEP_SPECIFIED_RAW_COMBINED_ANNOTATION_SHORT_NAME, optional = true, | ||
mutex = {GenotypeGVCFs.KEEP_COMBINED_LONG_NAME}, | ||
doc="Keep only the specific combined raw annotations specified (removing the other raw annotations). Duplicate values will be ignored.") | ||
protected List<String> keepSpecifiedCombined = new ArrayList<>(); | ||
|
||
public List<String> getKeepSpecifiedCombinedAnnotationNames() {return Collections.unmodifiableList(keepSpecifiedCombined);} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,12 +4,14 @@ | |
import htsjdk.variant.vcf.*; | ||
import org.apache.logging.log4j.LogManager; | ||
import org.apache.logging.log4j.Logger; | ||
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; | ||
import org.broadinstitute.hellbender.engine.FeatureContext; | ||
import org.broadinstitute.hellbender.engine.FeatureDataSource; | ||
import org.broadinstitute.hellbender.engine.FeatureInput; | ||
import org.broadinstitute.hellbender.engine.ReferenceContext; | ||
import org.broadinstitute.hellbender.exceptions.GATKException; | ||
import org.broadinstitute.hellbender.exceptions.UserException; | ||
import org.broadinstitute.hellbender.tools.walkers.GenotypeGVCFsAnnotationArgumentCollection; | ||
import org.broadinstitute.hellbender.tools.walkers.annotator.allelespecific.ReducibleAnnotation; | ||
import org.broadinstitute.hellbender.tools.walkers.annotator.allelespecific.ReducibleAnnotationData; | ||
import org.broadinstitute.hellbender.utils.Utils; | ||
|
@@ -43,6 +45,7 @@ public final class VariantAnnotatorEngine { | |
private boolean expressionAlleleConcordance; | ||
private final boolean useRawAnnotations; | ||
private final boolean keepRawCombinedAnnotations; | ||
private final List<String> rawAnnotationsToKeep; | ||
|
||
private final static Logger logger = LogManager.getLogger(VariantAnnotatorEngine.class); | ||
private final static OneShotLogger jumboAnnotationsLogger = new OneShotLogger(VariantAnnotatorEngine.class); | ||
|
@@ -59,17 +62,21 @@ public final class VariantAnnotatorEngine { | |
* @param useRaw When this is set to true, the annotation engine will call {@link ReducibleAnnotation#annotateRawData(ReferenceContext, VariantContext, AlleleLikelihoods)} | ||
* on annotations that extend {@link ReducibleAnnotation}, instead of {@link InfoFieldAnnotation#annotate(ReferenceContext, VariantContext, AlleleLikelihoods)}, | ||
* @param keepCombined If true, retain the combined raw annotation values instead of removing them after finalizing | ||
* @param rawAnnotationsToKeep List of raw annotations to keep even when others are removed | ||
*/ | ||
public VariantAnnotatorEngine(final Collection<Annotation> annotationList, | ||
final FeatureInput<VariantContext> dbSNPInput, | ||
final List<FeatureInput<VariantContext>> featureInputs, | ||
final boolean useRaw, | ||
boolean keepCombined){ | ||
final boolean keepCombined, | ||
final Collection<Annotation> rawAnnotationsToKeep){ | ||
Utils.nonNull(featureInputs, "comparisonFeatureInputs is null"); | ||
infoAnnotations = new ArrayList<>(); | ||
genotypeAnnotations = new ArrayList<>(); | ||
jumboInfoAnnotations = new ArrayList<>(); | ||
jumboGenotypeAnnotations = new ArrayList<>(); | ||
final List<String> variantAnnotationKeys = new ArrayList<>(); | ||
final List<String> rawVariantAnnotationKeysToKeep = new ArrayList<>(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. One other minor nit: why introduce this intermediate alias ? To me having the additional alias with a slightly different name just obfuscates whats going on - I'd suggest just allocating and using the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yup, good catch. I fixed this and removed |
||
for (Annotation annot : annotationList) { | ||
if (annot instanceof InfoFieldAnnotation) { | ||
infoAnnotations.add((InfoFieldAnnotation) annot); | ||
|
@@ -82,11 +89,19 @@ public VariantAnnotatorEngine(final Collection<Annotation> annotationList, | |
} else { | ||
throw new GATKException.ShouldNeverReachHereException("Unexpected annotation type: " + annot.getClass().getName()); | ||
} | ||
variantAnnotationKeys.addAll(((VariantAnnotation) annot).getKeyNames()); | ||
} | ||
variantOverlapAnnotator = initializeOverlapAnnotator(dbSNPInput, featureInputs); | ||
reducibleKeys = new LinkedHashSet<>(); | ||
useRawAnnotations = useRaw; | ||
keepRawCombinedAnnotations = keepCombined; | ||
for (final Annotation rawAnnot : rawAnnotationsToKeep) { | ||
if (!annotationList.contains(rawAnnot)) { | ||
throw new UserException("Requested --" + GenotypeGVCFsAnnotationArgumentCollection.KEEP_SPECIFIED_RAW_COMBINED_ANNOTATION_LONG_NAME + ": " + rawAnnot + " is not available. Add requested annotation with --" + StandardArgumentDefinitions.ANNOTATION_LONG_NAME + "."); | ||
} | ||
rawVariantAnnotationKeysToKeep.addAll(((VariantAnnotation) rawAnnot).getKeyNames()); | ||
} | ||
this.rawAnnotationsToKeep = rawVariantAnnotationKeysToKeep; | ||
for (InfoFieldAnnotation annot : infoAnnotations) { | ||
if (annot instanceof ReducibleAnnotation) { | ||
for (final String rawKey : ((ReducibleAnnotation) annot).getRawKeyNames()) { | ||
|
@@ -96,6 +111,14 @@ public VariantAnnotatorEngine(final Collection<Annotation> annotationList, | |
} | ||
} | ||
|
||
public VariantAnnotatorEngine(final Collection<Annotation> annotationList, | ||
final FeatureInput<VariantContext> dbSNPInput, | ||
final List<FeatureInput<VariantContext>> featureInputs, | ||
final boolean useRaw, | ||
boolean keepCombined){ | ||
this(annotationList, dbSNPInput, featureInputs, useRaw, keepCombined, Collections.emptyList()); | ||
} | ||
|
||
private VariantOverlapAnnotator initializeOverlapAnnotator(final FeatureInput<VariantContext> dbSNPInput, final List<FeatureInput<VariantContext>> featureInputs) { | ||
final Map<FeatureInput<VariantContext>, String> overlaps = new LinkedHashMap<>(); | ||
for ( final FeatureInput<VariantContext> fi : featureInputs) { | ||
|
@@ -253,6 +276,14 @@ public Map<String, Object> combineAnnotations(final List<Allele> allelesList, Ma | |
public VariantContext finalizeAnnotations(VariantContext vc, VariantContext originalVC) { | ||
final Map<String, Object> variantAnnotations = new LinkedHashMap<>(vc.getAttributes()); | ||
|
||
//save annotations that have been requested to be kept | ||
final Map<String, Object> savedRawAnnotations = new LinkedHashMap<>(); | ||
for(final String rawAnnot : rawAnnotationsToKeep) { | ||
if (variantAnnotations.containsKey(rawAnnot)) { | ||
savedRawAnnotations.put(rawAnnot, variantAnnotations.get(rawAnnot)); | ||
} | ||
} | ||
|
||
// go through all the requested info annotationTypes | ||
for (final InfoFieldAnnotation annotationType : infoAnnotations) { | ||
if (annotationType instanceof ReducibleAnnotation) { | ||
|
@@ -280,6 +311,8 @@ public VariantContext finalizeAnnotations(VariantContext vc, VariantContext orig | |
variantAnnotations.remove(GATKVCFConstants.VARIANT_DEPTH_KEY); | ||
variantAnnotations.remove(GATKVCFConstants.RAW_GENOTYPE_COUNT_KEY); | ||
} | ||
//add back raw annotations that have specifically been requested to keep | ||
variantAnnotations.putAll(savedRawAnnotations); | ||
|
||
// generate a new annotated VC | ||
final VariantContextBuilder builder = new VariantContextBuilder(vc).attributes(variantAnnotations); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -43,8 +43,6 @@ | |
import java.security.NoSuchAlgorithmException; | ||
import java.util.*; | ||
import java.util.function.BiConsumer; | ||
import java.util.function.Consumer; | ||
import java.util.function.IntUnaryOperator; | ||
import java.util.stream.Collectors; | ||
import java.util.stream.Stream; | ||
|
||
|
@@ -930,7 +928,7 @@ public void testRawGtCountAnnotation() { | |
args.addReference(b37_reference_20_21) | ||
.addVCF(reblockedGVCF) | ||
.addOutput(output) | ||
.add("keep-combined-raw-annotations", true) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Note that this replaces the previous test in favor of this new one. Perhaps we should consider just adding a new test, so that we cover both 1) keeping all combined annotations, and 2) keeping only the specified raw annotations? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We've now made these two arguments (keep all combined and keeping only specified) mutually exclusive (not at the time of your review, but since then based on other comments). So I'm going to leave this test as is. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry, just to be clear, you mean you'll revert this test to its original behavior, right? In that case, will you add another test for the keep-specified behavior? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The original behavior of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry, by "original behavior" I mean the current behavior of the test as it is in master. This test sets In contrast, you modify the test in your branch so that only Probably not anything to sweat about, but that's all that I wanted to point out in my original comment. I'll leave it up to you to decide if There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh! Sorry, I see what you're saying now. I think I originally added this test as a way of testing |
||
.add(GenotypeGVCFsAnnotationArgumentCollection.KEEP_SPECIFIED_RAW_COMBINED_ANNOTATION_LONG_NAME, "RawGtCount") | ||
.add("A", "RawGtCount"); | ||
runCommandLine(args); | ||
|
||
|
@@ -942,6 +940,7 @@ public void testRawGtCountAnnotation() { | |
Assert.assertEquals(rawGtCount.get(0), "."); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What's going on with the hom-ref count? I see the TODO in the RawGtCount class but I don't understand the fundamental limitation. Incidentally, perhaps we can go ahead and expand the header-line documentation of this annotation to explicitly give the hom-ref/het/hom-var order. Maybe this is obvious to everyone else, but why not document it for dummies like me? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The issue is that Also I added the documentation to the header line. |
||
Assert.assertEquals(rawGtCount.get(1), "2"); | ||
Assert.assertEquals(rawGtCount.get(2), "0"); | ||
Assert.assertFalse(vc.getAttributes().containsKey(GATKVCFConstants.RAW_MAPPING_QUALITY_WITH_DEPTH_KEY)); | ||
} | ||
|
||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think the "Should be used...." warning can be removed. That comment was intended for a previous revision where you were exposing
allDiscoveredAnnotations
.