From fdbaf98e8d369b742a3038377d8b4772337b204d Mon Sep 17 00:00:00 2001 From: Laura Gauthier Date: Tue, 1 Aug 2023 14:25:36 -0400 Subject: [PATCH 1/4] Allow users to turn off fix from 4.1.9.0 for consistency's sake --- .../walkers/mutect/M2ArgumentCollection.java | 10 +++++++++- .../tools/walkers/mutect/Mutect2Engine.java | 15 ++++++++++++--- .../walkers/mutect/Mutect2EngineUnitTest.java | 4 +++- 3 files changed, 24 insertions(+), 5 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/M2ArgumentCollection.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/M2ArgumentCollection.java index 7de109846aa..692e025e74b 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/M2ArgumentCollection.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/M2ArgumentCollection.java @@ -9,7 +9,6 @@ import org.broadinstitute.hellbender.cmdline.ReadFilterArgumentDefinitions; import org.broadinstitute.hellbender.engine.FeatureInput; import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.FlowBasedAlignmentArgumentCollection; -import org.broadinstitute.hellbender.tools.FlowBasedArgumentCollection; import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.*; import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.readthreading.ReadThreadingAssembler; import org.broadinstitute.hellbender.tools.walkers.mutect.filtering.FilterMutectCalls; @@ -50,6 +49,7 @@ public class M2ArgumentCollection extends AssemblyBasedCallerArgumentCollection public static final String CALLABLE_DEPTH_LONG_NAME = "callable-depth"; public static final String PCR_SNV_QUAL_LONG_NAME = "pcr-snv-qual"; public static final String PCR_INDEL_QUAL_LONG_NAME = "pcr-indel-qual"; + public static final String USER_DEFINED_BASE_QUAL_CORRECTION = "base-qual-correction-factor"; public static final String F1R2_TAR_GZ_NAME = "f1r2-tar-gz"; public static final double DEFAULT_AF_FOR_TUMOR_ONLY_CALLING = 5e-8; @@ -249,6 +249,14 @@ public double getInitialLogOdds() { @Argument(fullName = PCR_INDEL_QUAL_LONG_NAME, optional = true, doc = "Phred-scaled PCR indel qual for overlapping fragments") public int pcrIndelQual = 40; + /** + * A scale factor to modify the base qualities reported by the sequencer and used in the Mutect2 substitution error model. + * Set to zero to turn off the error model changes included in GATK 4.1.9.0. + */ + @Advanced + @Argument(fullName = USER_DEFINED_BASE_QUAL_CORRECTION, optional = true, doc = "Set to zero to turn off the error model changes included in GATK 4.1.9.0.") + public double userDefinedBaseQualCorrection = 1; + /** * In tumor-only mode, we discard variants with population allele frequencies greater than this threshold. */ diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2Engine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2Engine.java index 9f0bb90345c..6b64f74dbce 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2Engine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2Engine.java @@ -122,8 +122,8 @@ public final class Mutect2Engine implements AssemblyRegionEvaluator, AutoCloseab private final Optional f1R2CountsCollector; - private PileupQualBuffer tumorPileupQualBuffer = new PileupQualBuffer(); - private PileupQualBuffer normalPileupQualBuffer = new PileupQualBuffer(); + private PileupQualBuffer tumorPileupQualBuffer; + private PileupQualBuffer normalPileupQualBuffer; /** * Create and initialize a new HaplotypeCallerEngine given a collection of HaplotypeCaller arguments, a reads header, @@ -144,6 +144,9 @@ public Mutect2Engine(final M2ArgumentCollection MTAC, AssemblyRegionArgumentColl aligner = SmithWatermanAligner.getAligner(MTAC.smithWatermanImplementation); samplesList = new IndexedSampleList(new ArrayList<>(ReadUtils.getSamplesFromHeader(header))); + tumorPileupQualBuffer = new PileupQualBuffer(MTAC.userDefinedBaseQualCorrection); + normalPileupQualBuffer = new PileupQualBuffer(MTAC.userDefinedBaseQualCorrection); + // optimize set operations for the common cases of no normal and one normal if (MTAC.normalSamples.isEmpty()) { normalSamples = Collections.emptySet(); @@ -677,11 +680,17 @@ private static class PileupQualBuffer { // which corresponds to adding 10*log10(3) = 4.77 ~ 5 to the qual. private static final int ONE_THIRD_QUAL_CORRECTION = 5; + private static double USER_DEFINED_QUAL_CORRECTION = 1; + // indices 0-3 are A,C,G,T; 4 is other substitution (just in case it's some exotic protocol); 5 is indel private List buffers = IntStream.range(0,6).mapToObj(n -> new ByteArrayList()).collect(Collectors.toList()); public PileupQualBuffer() { } + public PileupQualBuffer(final double userDefinedQualCorrection) { + USER_DEFINED_QUAL_CORRECTION = userDefinedQualCorrection; + } + public void accumulateQuals(final ReadPileup pileup, final byte refBase, final int pcrErrorQual) { clear(); final int position = pileup.getLocation().getStart(); @@ -731,7 +740,7 @@ private void accumulateSubstitution(final byte base, final byte qual) { if (index == -1) { // -1 is the hard-coded value for non-simple bases in BaseUtils buffers.get(OTHER_SUBSTITUTION).add(qual); } else { - buffers.get(index).add((byte) FastMath.min(qual + ONE_THIRD_QUAL_CORRECTION, QualityUtils.MAX_QUAL)); + buffers.get(index).add((byte) FastMath.min(qual + ONE_THIRD_QUAL_CORRECTION * USER_DEFINED_QUAL_CORRECTION, QualityUtils.MAX_QUAL)); } } diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2EngineUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2EngineUnitTest.java index 18e2915897f..73010131141 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2EngineUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2EngineUnitTest.java @@ -119,8 +119,10 @@ public void testSmallNumAltExact(final int numRef, final double errorRate) { Assert.assertEquals(calculated, expected, precision); } + } - + @Test + public void testOldErrorModelBehavior() { } From 2f9bb344b625615762e4955971eb11a91015cacd Mon Sep 17 00:00:00 2001 From: Laura Gauthier Date: Fri, 4 Aug 2023 16:01:00 -0400 Subject: [PATCH 2/4] David R says advanced args don't need tests --- .../tools/walkers/mutect/Mutect2EngineUnitTest.java | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2EngineUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2EngineUnitTest.java index 73010131141..cb24556e4b6 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2EngineUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2EngineUnitTest.java @@ -121,11 +121,6 @@ public void testSmallNumAltExact(final int numRef, final double errorRate) { } } - @Test - public void testOldErrorModelBehavior() { - - } - @DataProvider(name = "fewAltData") public Object[][] getFewAltData() { return new Object[][] { From 44231ca5245f0fcf14dfe75c4c8768747bba3252 Mon Sep 17 00:00:00 2001 From: Laura Gauthier Date: Thu, 10 Aug 2023 10:21:26 -0400 Subject: [PATCH 3/4] Implement rename/refactor suggestion --- .../tools/walkers/mutect/M2ArgumentCollection.java | 6 +++--- .../tools/walkers/mutect/Mutect2Engine.java | 14 ++++++-------- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/M2ArgumentCollection.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/M2ArgumentCollection.java index 692e025e74b..16fed8f001b 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/M2ArgumentCollection.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/M2ArgumentCollection.java @@ -49,7 +49,7 @@ public class M2ArgumentCollection extends AssemblyBasedCallerArgumentCollection public static final String CALLABLE_DEPTH_LONG_NAME = "callable-depth"; public static final String PCR_SNV_QUAL_LONG_NAME = "pcr-snv-qual"; public static final String PCR_INDEL_QUAL_LONG_NAME = "pcr-indel-qual"; - public static final String USER_DEFINED_BASE_QUAL_CORRECTION = "base-qual-correction-factor"; + public static final String MULTIPLE_SUBSTITUTION_BASE_QUAL_CORRECTION = "base-qual-correction-factor"; public static final String F1R2_TAR_GZ_NAME = "f1r2-tar-gz"; public static final double DEFAULT_AF_FOR_TUMOR_ONLY_CALLING = 5e-8; @@ -254,8 +254,8 @@ public double getInitialLogOdds() { * Set to zero to turn off the error model changes included in GATK 4.1.9.0. */ @Advanced - @Argument(fullName = USER_DEFINED_BASE_QUAL_CORRECTION, optional = true, doc = "Set to zero to turn off the error model changes included in GATK 4.1.9.0.") - public double userDefinedBaseQualCorrection = 1; + @Argument(fullName = MULTIPLE_SUBSTITUTION_BASE_QUAL_CORRECTION, optional = true, doc = "Set to zero to turn off the error model changes included in GATK 4.1.9.0.") + public long activeRegionMultipleSubstitutionBaseQualCorrection = Math.round(10 * Math.log(3)); /** * In tumor-only mode, we discard variants with population allele frequencies greater than this threshold. diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2Engine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2Engine.java index 6b64f74dbce..2a6efbcc3e5 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2Engine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2Engine.java @@ -144,8 +144,8 @@ public Mutect2Engine(final M2ArgumentCollection MTAC, AssemblyRegionArgumentColl aligner = SmithWatermanAligner.getAligner(MTAC.smithWatermanImplementation); samplesList = new IndexedSampleList(new ArrayList<>(ReadUtils.getSamplesFromHeader(header))); - tumorPileupQualBuffer = new PileupQualBuffer(MTAC.userDefinedBaseQualCorrection); - normalPileupQualBuffer = new PileupQualBuffer(MTAC.userDefinedBaseQualCorrection); + tumorPileupQualBuffer = new PileupQualBuffer(MTAC.activeRegionMultipleSubstitutionBaseQualCorrection); + normalPileupQualBuffer = new PileupQualBuffer(MTAC.activeRegionMultipleSubstitutionBaseQualCorrection); // optimize set operations for the common cases of no normal and one normal if (MTAC.normalSamples.isEmpty()) { @@ -678,17 +678,15 @@ private static class PileupQualBuffer { // as the *particular* alt base, whereas the qual actually means the probability of *any* substitution error. // since there are three possible substitutions for each ref base we must divide the error probability by three // which corresponds to adding 10*log10(3) = 4.77 ~ 5 to the qual. - private static final int ONE_THIRD_QUAL_CORRECTION = 5; - - private static double USER_DEFINED_QUAL_CORRECTION = 1; + private static double MULTIPLE_SUBSTITUTION_BASE_QUAL_CORRECTION = 5; // indices 0-3 are A,C,G,T; 4 is other substitution (just in case it's some exotic protocol); 5 is indel private List buffers = IntStream.range(0,6).mapToObj(n -> new ByteArrayList()).collect(Collectors.toList()); public PileupQualBuffer() { } - public PileupQualBuffer(final double userDefinedQualCorrection) { - USER_DEFINED_QUAL_CORRECTION = userDefinedQualCorrection; + public PileupQualBuffer(final double multipleSubstitutionQualCorrection) { + MULTIPLE_SUBSTITUTION_BASE_QUAL_CORRECTION = multipleSubstitutionQualCorrection; } public void accumulateQuals(final ReadPileup pileup, final byte refBase, final int pcrErrorQual) { @@ -740,7 +738,7 @@ private void accumulateSubstitution(final byte base, final byte qual) { if (index == -1) { // -1 is the hard-coded value for non-simple bases in BaseUtils buffers.get(OTHER_SUBSTITUTION).add(qual); } else { - buffers.get(index).add((byte) FastMath.min(qual + ONE_THIRD_QUAL_CORRECTION * USER_DEFINED_QUAL_CORRECTION, QualityUtils.MAX_QUAL)); + buffers.get(index).add((byte) FastMath.min(qual + MULTIPLE_SUBSTITUTION_BASE_QUAL_CORRECTION, QualityUtils.MAX_QUAL)); } } From 1c3b0624e8e1f13ee0fc9565b36dcba411681c01 Mon Sep 17 00:00:00 2001 From: Laura Gauthier Date: Thu, 10 Aug 2023 15:14:07 -0400 Subject: [PATCH 4/4] Remove constant from engine and move documentation to arg colelction --- .../tools/walkers/mutect/M2ArgumentCollection.java | 6 +++++- .../hellbender/tools/walkers/mutect/Mutect2Engine.java | 8 +------- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/M2ArgumentCollection.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/M2ArgumentCollection.java index 16fed8f001b..d25c7a33fc5 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/M2ArgumentCollection.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/M2ArgumentCollection.java @@ -252,10 +252,14 @@ public double getInitialLogOdds() { /** * A scale factor to modify the base qualities reported by the sequencer and used in the Mutect2 substitution error model. * Set to zero to turn off the error model changes included in GATK 4.1.9.0. + * Our pileup likelihoods models assume that the base quality (qual) corresponds to the probability that a ref base is misread + * as the *particular* alt base, whereas the qual actually means the probability of *any* substitution error. + * Since there are three possible substitutions for each ref base we must divide the error probability by three + * which corresponds to adding 10*log10(3) = 4.77 ~ 5 to the qual. */ @Advanced @Argument(fullName = MULTIPLE_SUBSTITUTION_BASE_QUAL_CORRECTION, optional = true, doc = "Set to zero to turn off the error model changes included in GATK 4.1.9.0.") - public long activeRegionMultipleSubstitutionBaseQualCorrection = Math.round(10 * Math.log(3)); + public int activeRegionMultipleSubstitutionBaseQualCorrection = (int)Math.round(10 * Math.log10(3)); /** * In tumor-only mode, we discard variants with population allele frequencies greater than this threshold. diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2Engine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2Engine.java index 2a6efbcc3e5..1d2096f3c2b 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2Engine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2Engine.java @@ -674,17 +674,11 @@ private static class PileupQualBuffer { private static final int OTHER_SUBSTITUTION = 4; private static final int INDEL = 5; - // our pileup likelihoods models assume that the qual corresponds to the probability that a ref base is misread - // as the *particular* alt base, whereas the qual actually means the probability of *any* substitution error. - // since there are three possible substitutions for each ref base we must divide the error probability by three - // which corresponds to adding 10*log10(3) = 4.77 ~ 5 to the qual. - private static double MULTIPLE_SUBSTITUTION_BASE_QUAL_CORRECTION = 5; + private static double MULTIPLE_SUBSTITUTION_BASE_QUAL_CORRECTION; // indices 0-3 are A,C,G,T; 4 is other substitution (just in case it's some exotic protocol); 5 is indel private List buffers = IntStream.range(0,6).mapToObj(n -> new ByteArrayList()).collect(Collectors.toList()); - public PileupQualBuffer() { } - public PileupQualBuffer(final double multipleSubstitutionQualCorrection) { MULTIPLE_SUBSTITUTION_BASE_QUAL_CORRECTION = multipleSubstitutionQualCorrection; }