From 1d4cbfd53934ef3af6bd8a31ba7c8a2fda66c4f3 Mon Sep 17 00:00:00 2001 From: Ilya Soifer Date: Fri, 12 Apr 2024 10:18:51 +0300 Subject: [PATCH 1/5] Fixed a bug that prevented filtering by SOR in many cases --- .../tools/walkers/haplotypecaller/AlleleFiltering.java | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AlleleFiltering.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AlleleFiltering.java index ebc0f61d3f6..11eb04b2c91 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AlleleFiltering.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AlleleFiltering.java @@ -303,9 +303,11 @@ private List identifyBadAlleles(final List collectedRPLs, final //we then add alleles with high SOR. Note that amongh all allleles with the SOR higher than the SOR_THRESHOLD //we will first filter the one with the lowest QUAL. logger.debug(() -> String.format("SHA:: Have %d candidates with low QUAL", rplCount)); - for (int i = sorIndices.length-1 ; (i >= 0) && (collectedSORs.get(sorIndices[i])>SOR_THRESHOLD) ; i--) { - if (!result.contains(alleles.get(sorIndices[i]))) { - result.add(alleles.get(sorIndices[i])); + for (int i = sorIndices.length-1 ; (i >= 0) ; i--) { + if (collectedSORs.get(sorIndices[i])>SOR_THRESHOLD){ + if (!result.contains(alleles.get(sorIndices[i]))) { + result.add(alleles.get(sorIndices[i])); + } } } logger.debug(() -> String.format("SHA:: Have %d candidates with high SOR", result.size() - rplCount)); From 82e8b105ca4d0979888ed70c5669f002d6eb8337 Mon Sep 17 00:00:00 2001 From: Ilya Soifer Date: Fri, 12 Apr 2024 16:46:18 +0300 Subject: [PATCH 2/5] Updated .gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index e95e5ab6094..a73810708a6 100644 --- a/.gitignore +++ b/.gitignore @@ -44,3 +44,4 @@ funcotator_tmp #Test generated dot files test*.dot +.vscode/ From e60afd52348e0ce2edc8a4ea2156220983ed6777 Mon Sep 17 00:00:00 2001 From: Ilya Soifer Date: Sun, 14 Apr 2024 15:55:09 +0300 Subject: [PATCH 3/5] Test added --- .../AlleleFilteringUnitTest.java | 65 +++++++++++++++++++ 1 file changed, 65 insertions(+) diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AlleleFilteringUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AlleleFilteringUnitTest.java index c57dcc343c0..31f949aacab 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AlleleFilteringUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AlleleFilteringUnitTest.java @@ -298,5 +298,70 @@ public void testNotFilterLoneWeakAllele(){ } + @Test + public void testFilterDistantHindelSor() { + + // create haplotypes + List haplotypeList = new ArrayList<>(); + final byte[] fullReferenceWithPadding = "CAGGCATG".getBytes(); + Haplotype haplotype = new Haplotype(fullReferenceWithPadding, true, 0, TextCigarCodec.decode("8M")); + haplotype.setGenomeLocation(new SimpleInterval("chr", 100, 108)); + haplotype.setEventMap(EventMap.fromHaplotype(haplotype, fullReferenceWithPadding, 0)); + haplotypeList.add(haplotype); + + haplotype = new Haplotype("CAGGCATTG".getBytes(), false, 0, TextCigarCodec.decode("7M1I1M")); + haplotype.setGenomeLocation(new SimpleInterval("chr", 100, 109)); + + haplotype.setEventMap(EventMap.fromHaplotype(haplotype, fullReferenceWithPadding, 0)); + haplotypeList.add(haplotype); + haplotype = new Haplotype("CAGGCATTTG".getBytes(), false, 0, TextCigarCodec.decode("7M2I1M")); + haplotype.setGenomeLocation(new SimpleInterval("chr", 100, 110)); + + haplotype.setEventMap(EventMap.fromHaplotype(haplotype, fullReferenceWithPadding, 0)); + haplotypeList.add(haplotype); + + AlleleList haplotypes = new IndexedAlleleList<>(haplotypeList); + SampleList samples = new IndexedSampleList(Arrays.asList("sm1")); + + List readList = new ArrayList<>(30); + Map> ebs = new HashMap<>(); + ebs.put("sm1", readList); + + for (int i = 0; i < 40; i++) { + readList.add(ArtificialReadUtils.createArtificialRead("20M")); + } + + + double[][] values = { + { 0, 3, 0, 3, 0, 3, 0, 3, 0, 3, 0, 3, 0, 3, 0, 3, 0, 3, 0, 3, 0, 3, 0, 3, 0, 3, 0, 3, 0, 3, 0, 3, 0, 3, + 0, 3, 0, 3, 0, + 3 }, + { 3, 0, 3, 0, 3, 0, 3, 0, 3, 0, 3, 0, 3, 0, 3, 0, 3, 0, 3, 0, 3, 0, 3, 0, 3, 0, 3, 0, 3, 0, 3, 0, 3, 0, + 3, 0, 3, 0, 3, + 0 }, + { 10, 0, 0, 0, 10, 0, 0, 0, 10, 0, 0, 0, 10, 0, 0, 0, 10, 0, 0, 0, 10, 0, 0, 0, 10, 0, 0, 0, 10, 0, 0,0,10,0,0,0,10,0,0,0} + }; + for (int i = 0; i < 40; i++) { + if (i % 4 == 0) { + readList.get(i).setIsReverseStrand(true); + } + } + + AlleleLikelihoods lks = new AlleleLikelihoods<>(samples, haplotypes, ebs); + LikelihoodMatrix lkm = lks.sampleMatrix(0); + for (int i = 0; i < lks.numberOfAlleles(); i++) { + for (int j = 0; j < lkm.evidenceCount(); j++) { + lkm.set(i, j, values[i][j]); + } + } + + HaplotypeCallerArgumentCollection hcArgs = new HaplotypeCallerArgumentCollection(); + HaplotypeCallerGenotypingEngine genotypingEngine = new HaplotypeCallerGenotypingEngine(hcArgs, samples, + !hcArgs.doNotRunPhysicalPhasing, false); + + AlleleFiltering alleleFiltering = new AlleleFilteringHC(hcArgs, null, genotypingEngine); + AlleleLikelihoods filtered_lks = alleleFiltering.filterAlleles(lks, 0, new HashSet<>()); + Assert.assertEquals(filtered_lks.alleles(), haplotypeList.subList(0, 2)); + } } From 96cb51464c68dee8749873693d2e1acb17ec6dfd Mon Sep 17 00:00:00 2001 From: Ilya Soifer Date: Sun, 14 Apr 2024 15:59:03 +0300 Subject: [PATCH 4/5] Test added --- .../tools/walkers/haplotypecaller/AlleleFilteringUnitTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AlleleFilteringUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AlleleFilteringUnitTest.java index 31f949aacab..346d1b4e1ac 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AlleleFilteringUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AlleleFilteringUnitTest.java @@ -298,7 +298,7 @@ public void testNotFilterLoneWeakAllele(){ } - @Test + @Test //check that we filter strong allele with high SOR public void testFilterDistantHindelSor() { // create haplotypes From cfc01cee744947a2058123e7b5d07ee864739dd1 Mon Sep 17 00:00:00 2001 From: Ilya Soifer Date: Sun, 28 Apr 2024 17:24:24 +0300 Subject: [PATCH 5/5] Added direct test for identifyBadAlleles --- .../haplotypecaller/AlleleFiltering.java | 5 +++- .../AlleleFilteringUnitTest.java | 29 +++++++++++++++++++ 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AlleleFiltering.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AlleleFiltering.java index 11eb04b2c91..3a350228128 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AlleleFiltering.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AlleleFiltering.java @@ -29,6 +29,8 @@ import java.util.stream.Collectors; import java.util.stream.IntStream; +import com.google.common.annotations.VisibleForTesting; + /** * Filtering haplotypes that contribute weak alleles to the genotyping. * @@ -278,7 +280,8 @@ private AlleleLikelihoods subsetHaplotypesByAlleles(final A * @param sorThreshold only variants with SOR above threshold will be considered * @return list of alleles that can be removed */ - private List identifyBadAlleles(final List collectedRPLs, final List collectedSORs, + @VisibleForTesting + List identifyBadAlleles(final List collectedRPLs, final List collectedSORs, final List alleles, final double qualThreshold, final double sorThreshold) { diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AlleleFilteringUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AlleleFilteringUnitTest.java index 346d1b4e1ac..5d91a3ea918 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AlleleFilteringUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AlleleFilteringUnitTest.java @@ -1,12 +1,15 @@ package org.broadinstitute.hellbender.tools.walkers.haplotypecaller; +import com.google.common.collect.ImmutableList; import htsjdk.samtools.TextCigarCodec; +import htsjdk.variant.variantcontext.Allele; import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.AlleleFiltering; import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.AlleleFilteringHC; import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.HaplotypeCallerArgumentCollection; import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.HaplotypeCallerGenotypingEngine; import org.broadinstitute.hellbender.utils.SimpleInterval; import org.broadinstitute.hellbender.utils.genotyper.*; +import org.broadinstitute.hellbender.utils.haplotype.Event; import org.broadinstitute.hellbender.utils.haplotype.EventMap; import org.broadinstitute.hellbender.utils.haplotype.Haplotype; import org.broadinstitute.hellbender.utils.read.ArtificialReadUtils; @@ -364,4 +367,30 @@ public void testFilterDistantHindelSor() { Assert.assertEquals(filtered_lks.alleles(), haplotypeList.subList(0, 2)); } + @Test + public void testIdentifyBadAlleles(){ + Event a = new Event("chr1", 10, Allele.create("A",true), Allele.create("T", false)); + Event b = new Event("chr1", 10, Allele.create("T",true), Allele.create("G", false)); + Event c = new Event("chr1", 10, Allele.create("C", true), Allele.create("G", false)); + + List events = List.of(a,b,c); + List rpls = List.of(10,20,0); + List sors = List.of(0.0,1.0,3.5); + HaplotypeCallerGenotypingEngine ge = new HaplotypeCallerGenotypingEngine(new HaplotypeCallerArgumentCollection(), + SampleList.singletonSampleList("test"), false, false); + AlleleFiltering af = new AlleleFilteringHC(null, null,ge); + List badAlleles = af.identifyBadAlleles(rpls, sors, events, 30, 3); + Assert.assertEquals(badAlleles, List.of(b, a, c)); + rpls = List.of(-100, -200, 0); + sors = List.of(0.0,1.0,3.5); + badAlleles = af.identifyBadAlleles(rpls, sors, events, 30, 3); + Assert.assertEquals(badAlleles, List.of(c)); + + rpls = List.of(-100, -200, -300); + sors = List.of(0.0,1.0,3.5); + badAlleles = af.identifyBadAlleles(rpls, sors, events, 30, 3); + Assert.assertEquals(badAlleles, List.of(c)); + + + } }