From fdff6dbad70de2aa1674063c99ecc0e83d3a6609 Mon Sep 17 00:00:00 2001 From: David Benjamin Date: Mon, 24 Jan 2022 15:25:05 -0500 Subject: [PATCH] Huge simplication of genotyping likelihoods calculations -- no change in output --- .../models/AlleleFractionLikelihoods.java | 10 +- .../copynumber/models/CopyRatioSamplers.java | 6 +- ...aiveHeterozygousPileupGenotypingUtils.java | 2 +- ...ferenceConfidenceVariantContextMerger.java | 19 +- .../contamination/ContaminationModel.java | 2 +- .../genotyper/AlleleSubsettingUtils.java | 83 +-- .../genotyper/DRAGENGenotypesModel.java | 38 +- .../genotyper/GenotypeAlleleCounts.java | 179 ++--- .../genotyper/GenotypeIndexCalculator.java | 206 ++++++ .../GenotypeLikelihoodCalculator.java | 625 ++++-------------- .../GenotypeLikelihoodCalculatorDRAGEN.java | 146 ++-- .../GenotypeLikelihoodCalculators.java | 418 ------------ .../walkers/genotyper/GenotypesCache.java | 89 +++ .../walkers/genotyper/GenotypingEngine.java | 5 +- .../IndependentSampleGenotypesModel.java | 42 +- .../afcalc/AlleleFrequencyCalculator.java | 86 ++- .../GnarlyGenotyperEngine.java | 26 +- .../HaplotypeCallerGenotypingEngine.java | 2 +- .../ReferenceConfidenceModel.java | 12 +- .../graphs/KBestHaplotype.java | 2 +- .../tools/walkers/mutect/Mutect2Engine.java | 3 +- .../walkers/variantutils/ReblockGVCF.java | 19 +- .../hellbender/utils/GenotypeUtils.java | 51 +- .../hellbender/utils/IndexRange.java | 14 + .../hellbender/utils/Log10Cache.java | 13 - .../hellbender/utils/Log10FactorialCache.java | 20 - .../hellbender/utils/MannWhitneyU.java | 18 +- .../hellbender/utils/MathUtils.java | 93 +-- .../hellbender/utils/NaturalLogUtils.java | 5 +- .../genotyper/GenotypePriorCalculator.java | 35 +- .../utils/recalibration/RecalDatum.java | 42 +- .../variant/GATKVariantContextUtils.java | 19 +- .../ModelSegmentsIntegrationTest.java | 7 +- .../GenotypeAlleleCountsUnitTest.java | 66 +- .../GenotypeIndexCalculatorUnitTest.java | 148 +++++ .../GenotypeLikelihoodCalculatorUnitTest.java | 132 +--- ...GenotypeLikelihoodCalculatorsUnitTest.java | 103 --- .../genotyper/GenotypesCacheUnitTest.java | 55 ++ ...dependentSampleGenotypesModelUnitTest.java | 2 +- .../AlleleFrequencyCalculatorUnitTest.java | 13 +- .../hellbender/utils/IndexRangeUnitTest.java | 10 + .../hellbender/utils/MathUtilsUnitTest.java | 98 +-- .../genotyper/ReadLikelihoodsUnitTester.java | 2 +- ...ocumentationGenerationIntegrationTest.java | 3 +- .../recalibration/RecalDatumUnitTest.java | 30 +- .../multiple-sample-ac-nac-tumor-1.af.igv.seg | 3 +- .../multiple-sample-ac-nac-tumor-1.cr.igv.seg | 3 +- .../multiple-sample-ac-nac-tumor-1.cr.seg | 3 +- ...-sample-ac-nac-tumor-1.modelBegin.af.param | 6 +- ...tiple-sample-ac-nac-tumor-1.modelBegin.seg | 382 +++++------ ...-sample-ac-nac-tumor-1.modelFinal.af.param | 6 +- ...tiple-sample-ac-nac-tumor-1.modelFinal.seg | 3 +- .../multiple-sample-ac-tumor-1.af.igv.seg | 3 +- .../multiple-sample-ac-tumor-1.cr.igv.seg | 3 +- .../multiple-sample-ac-tumor-1.cr.seg | 3 +- ...iple-sample-ac-tumor-1.modelBegin.af.param | 6 +- .../multiple-sample-ac-tumor-1.modelBegin.seg | 576 ++++++++-------- ...iple-sample-ac-tumor-1.modelFinal.af.param | 6 +- .../multiple-sample-ac-tumor-1.modelFinal.seg | 3 +- ...ltiple-sample-cr-ac-nac-tumor-1.af.igv.seg | 14 +- ...mple-cr-ac-nac-tumor-1.modelBegin.af.param | 6 +- ...le-sample-cr-ac-nac-tumor-1.modelBegin.seg | 156 ++--- ...mple-cr-ac-nac-tumor-1.modelFinal.af.param | 4 +- ...le-sample-cr-ac-nac-tumor-1.modelFinal.seg | 24 +- .../multiple-sample-cr-ac-tumor-1.af.igv.seg | 6 +- ...e-sample-cr-ac-tumor-1.modelBegin.af.param | 6 +- ...ltiple-sample-cr-ac-tumor-1.modelBegin.seg | 222 +++---- ...e-sample-cr-ac-tumor-1.modelFinal.af.param | 6 +- ...ltiple-sample-cr-ac-tumor-1.modelFinal.seg | 10 +- .../single-sample-ac-nac.af.igv.seg | 20 +- .../single-sample-ac-nac.cr.igv.seg | 10 +- .../single-sample-ac-nac.cr.seg | 10 +- .../single-sample-ac-nac.modelBegin.af.param | 6 +- .../single-sample-ac-nac.modelBegin.seg | 92 +-- .../single-sample-ac-nac.modelFinal.af.param | 6 +- .../single-sample-ac-nac.modelFinal.seg | 20 +- .../single-sample-ac.modelBegin.af.param | 6 +- .../single-sample-ac.modelBegin.seg | 136 ++-- .../single-sample-ac.modelFinal.af.param | 6 +- .../single-sample-cr-ac-nac.af.igv.seg | 16 +- ...ingle-sample-cr-ac-nac.modelBegin.af.param | 6 +- .../single-sample-cr-ac-nac.modelBegin.seg | 24 +- ...ingle-sample-cr-ac-nac.modelFinal.af.param | 6 +- .../single-sample-cr-ac-nac.modelFinal.seg | 16 +- .../single-sample-cr-ac.af.igv.seg | 10 +- .../single-sample-cr-ac.modelBegin.af.param | 4 +- .../single-sample-cr-ac.modelBegin.seg | 12 +- .../single-sample-cr-ac.modelFinal.af.param | 6 +- .../single-sample-cr-ac.modelFinal.seg | 12 +- 89 files changed, 1942 insertions(+), 2941 deletions(-) create mode 100644 src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypeIndexCalculator.java delete mode 100644 src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypeLikelihoodCalculators.java create mode 100644 src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypesCache.java delete mode 100644 src/main/java/org/broadinstitute/hellbender/utils/Log10Cache.java delete mode 100644 src/main/java/org/broadinstitute/hellbender/utils/Log10FactorialCache.java create mode 100644 src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypeIndexCalculatorUnitTest.java delete mode 100644 src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypeLikelihoodCalculatorsUnitTest.java create mode 100644 src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypesCacheUnitTest.java diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/models/AlleleFractionLikelihoods.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/models/AlleleFractionLikelihoods.java index 11b2b8c1931..43cddbe0be4 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/models/AlleleFractionLikelihoods.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/models/AlleleFractionLikelihoods.java @@ -1,6 +1,7 @@ package org.broadinstitute.hellbender.tools.copynumber.models; import org.apache.commons.math3.special.Gamma; +import org.apache.commons.math3.util.CombinatoricsUtils; import org.apache.commons.math3.util.FastMath; import org.broadinstitute.hellbender.utils.NaturalLogUtils; @@ -8,8 +9,6 @@ import java.util.stream.IntStream; import static org.apache.commons.math3.util.FastMath.sqrt; -import static org.broadinstitute.hellbender.utils.MathUtils.log10Factorial; -import static org.broadinstitute.hellbender.utils.MathUtils.log10ToLog; /** * Contains likelihood methods for the allele-fraction model. @@ -87,10 +86,7 @@ static double hetLogLikelihood(final AlleleFractionGlobalParameters parameters, - n * log(majorFraction + minorFraction * lambda0RefMinor); final double refMinorLogLikelihood = logNotPi + logcRefMinor + Gamma.logGamma(rhoRefMinor) - rhoRefMinor * log(tauRefMinor); - // changing the factorial implementation below may introduce non-negligible numerical differences; - // note https://github.com/broadinstitute/gatk/pull/7652 - final double outlierLogLikelihood = logPi + log10ToLog(log10Factorial(a) + log10Factorial(r) - log10Factorial(a + r + 1)); - + final double outlierLogLikelihood = logPi - Math.log(a + r + 1) - CombinatoricsUtils.binomialCoefficientLog(a+r,a); return NaturalLogUtils.logSumExp(altMinorLogLikelihood, refMinorLogLikelihood, outlierLogLikelihood); } @@ -165,6 +161,6 @@ private static double biasPosteriorEffectiveBeta(final double lambda0, final dou } private static double log(final double x) { - return FastMath.log(Math.max(EPSILON, x)); + return Math.log(Math.max(EPSILON, x)); } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/models/CopyRatioSamplers.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/models/CopyRatioSamplers.java index 3b5ea04b819..dc94a1d5d5c 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/models/CopyRatioSamplers.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/models/CopyRatioSamplers.java @@ -162,11 +162,11 @@ public CopyRatioState.OutlierIndicators sample(final RandomGenerator rng, final CopyRatioSegmentedData data) { logger.debug("Sampling outlier indicators..."); final double outlierUnnormalizedLogProbability = - FastMath.log(state.outlierProbability()) + outlierUniformLogLikelihood; + Math.log(state.outlierProbability()) + outlierUniformLogLikelihood; // final double notOutlierUnnormalizedLogProbabilityPrefactor = -// FastMath.log(1. - state.outlierProbability()) - 0.5 * FastMath.log(2 * Math.PI * state.variance()); +// Math.log(1. - state.outlierProbability()) - 0.5 * Math.log(2 * Math.PI * state.variance()); final double notOutlierUnnormalizedLogProbabilityPrefactor = - FastMath.log((1. - state.outlierProbability()) / FastMath.sqrt(2 * Math.PI * state.variance())); + Math.log((1. - state.outlierProbability()) / FastMath.sqrt(2 * Math.PI * state.variance())); final List indicators = new ArrayList<>(data.getNumPoints()); for (int segmentIndex = 0; segmentIndex < data.getNumSegments(); segmentIndex++) { final List indexedCopyRatiosInSegment = data.getIndexedCopyRatiosInSegment(segmentIndex); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/utils/genotyping/NaiveHeterozygousPileupGenotypingUtils.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/utils/genotyping/NaiveHeterozygousPileupGenotypingUtils.java index 97b46ea33ba..be1369fabe0 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/utils/genotyping/NaiveHeterozygousPileupGenotypingUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/utils/genotyping/NaiveHeterozygousPileupGenotypingUtils.java @@ -254,6 +254,6 @@ private static double calculateHomozygousLogRatio(final AllelicCount allelicCoun final double betaOneMinusError = Beta.regularizedBeta(1 - genotypingBaseErrorRate, r + 1, n - r + 1); final double betaHom = betaError + betaAll - betaOneMinusError; final double betaHet = betaOneMinusError - betaError; - return FastMath.log(betaHom) - FastMath.log(betaHet); + return Math.log(betaHom) - Math.log(betaHet); } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/ReferenceConfidenceVariantContextMerger.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/ReferenceConfidenceVariantContextMerger.java index f6d01b321cb..f57d50a7cad 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/ReferenceConfidenceVariantContextMerger.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/ReferenceConfidenceVariantContextMerger.java @@ -11,9 +11,7 @@ import org.broadinstitute.hellbender.tools.walkers.annotator.VariantAnnotatorEngine; import org.broadinstitute.hellbender.tools.walkers.annotator.allelespecific.AlleleSpecificAnnotationData; import org.broadinstitute.hellbender.tools.walkers.annotator.allelespecific.ReducibleAnnotationData; -import org.broadinstitute.hellbender.tools.walkers.genotyper.AlleleSubsettingUtils; -import org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeAssignmentMethod; -import org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeLikelihoodCalculators; +import org.broadinstitute.hellbender.tools.walkers.genotyper.*; import org.broadinstitute.hellbender.tools.walkers.mutect.filtering.Mutect2FilteringEngine; import org.broadinstitute.hellbender.utils.GenotypeUtils; import org.broadinstitute.hellbender.utils.Utils; @@ -34,7 +32,6 @@ @SuppressWarnings({"rawtypes","unchecked"}) //TODO fix uses of untyped Comparable. public final class ReferenceConfidenceVariantContextMerger { - private static final GenotypeLikelihoodCalculators calculators = new GenotypeLikelihoodCalculators(); private static VCFHeader vcfInputHeader = null; protected final VariantAnnotatorEngine annotatorEngine; private final boolean doSomaticMerge; @@ -571,7 +568,6 @@ private GenotypesContext mergeRefConfidenceGenotypes(final VariantContext vc, // the map is different depending on the ploidy, so in order to keep this method flexible (mixed ploidies) // we need to get a map done (lazily inside the loop) for each ploidy, up to the maximum possible. final int[][] genotypeIndexMapsByPloidy = new int[maximumPloidy + 1][]; - final int maximumAlleleCount = Math.max(remappedAlleles.size(),targetAlleles.size()); for ( final Genotype g : vc.getGenotypes() ) { final String name; @@ -584,20 +580,17 @@ private GenotypesContext mergeRefConfidenceGenotypes(final VariantContext vc, final GenotypeBuilder genotypeBuilder = new GenotypeBuilder(g); if (!doSomaticMerge) { if (g.hasPL() || g.hasAD()) { - int[] perSampleIndexesOfRelevantAlleles = AlleleSubsettingUtils.getIndexesOfRelevantAllelesForGVCF(remappedAlleles, targetAlleles, vc.getStart(), g, false); + int[] perSampleIndexesOfRelevantAlleles = AlleleSubsettingUtils.getIndexesOfRelevantAllelesForGVCF(remappedAlleles, targetAlleles, vc.getStart(), g, false); if (g.hasPL()) { - // lazy initialization of the genotype index map by ploidy. final int[] genotypeIndexMapByPloidy = genotypeIndexMapsByPloidy[ploidy] == null - ? calculators.getInstance(ploidy, maximumAlleleCount).genotypeIndexMap(perSampleIndexesOfRelevantAlleles, calculators) //probably horribly slow + ? GenotypeIndexCalculator.newToOldGenotypeMap(ploidy, perSampleIndexesOfRelevantAlleles) //probably horribly slow : genotypeIndexMapsByPloidy[ploidy]; - final int[] PLs = generatePL(g, genotypeIndexMapByPloidy); - genotypeBuilder.PL(PLs); + genotypeBuilder.PL(generatePL(g, genotypeIndexMapByPloidy)); } if (g.hasAD()) { - final int[] AD = AlleleSubsettingUtils.generateAD(g.getAD(), perSampleIndexesOfRelevantAlleles); - genotypeBuilder.AD(AD); + genotypeBuilder.AD(AlleleSubsettingUtils.generateAD(g.getAD(), perSampleIndexesOfRelevantAlleles)); } - // clean up low confidence hom refs for better annotations later + //clean up low confidence hom refs for better annotations later } else if (GenotypeGVCFsEngine.excludeFromAnnotations(g)) { genotypeBuilder.alleles(Collections.nCopies(ploidy, Allele.NO_CALL)); } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/contamination/ContaminationModel.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/contamination/ContaminationModel.java index 2fa40d8545a..44b07f6fcfe 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/contamination/ContaminationModel.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/contamination/ContaminationModel.java @@ -277,7 +277,7 @@ private static double probability(final PileupSummary site, final double contami } private static double segmentLogLikelihood(final List segment, final double contamination, final double errorRate, final double minorAlleleFraction) { - return segment.stream().mapToDouble(site -> FastMath.log(MathUtils.sum(genotypeLikelihoods(site, contamination, errorRate, minorAlleleFraction)))).sum(); + return segment.stream().mapToDouble(site -> Math.log(MathUtils.sum(genotypeLikelihoods(site, contamination, errorRate, minorAlleleFraction)))).sum(); } private static double modelLogLikelihood(final List> segments, final double contamination, final double errorRate, final List mafs) { diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/AlleleSubsettingUtils.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/AlleleSubsettingUtils.java index 9422472b9fe..60f1ef7b205 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/AlleleSubsettingUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/AlleleSubsettingUtils.java @@ -4,15 +4,17 @@ import com.google.common.primitives.Doubles; import com.google.common.primitives.Ints; import htsjdk.variant.variantcontext.*; -import htsjdk.variant.vcf.*; +import htsjdk.variant.vcf.VCFConstants; +import htsjdk.variant.vcf.VCFFormatHeaderLine; +import htsjdk.variant.vcf.VCFHeader; import org.broadinstitute.hellbender.exceptions.GATKException; import org.broadinstitute.hellbender.exceptions.UserException; -import org.broadinstitute.hellbender.tools.walkers.annotator.AnnotationUtils; -import org.broadinstitute.hellbender.utils.genotyper.GenotypePriorCalculator; import org.broadinstitute.hellbender.tools.walkers.ReferenceConfidenceVariantContextMerger; +import org.broadinstitute.hellbender.tools.walkers.annotator.AnnotationUtils; import org.broadinstitute.hellbender.utils.MathUtils; import org.broadinstitute.hellbender.utils.Utils; import org.broadinstitute.hellbender.utils.collections.Permutation; +import org.broadinstitute.hellbender.utils.genotyper.GenotypePriorCalculator; import org.broadinstitute.hellbender.utils.genotyper.IndexedAlleleList; import org.broadinstitute.hellbender.utils.logging.OneShotLogger; import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants; @@ -37,8 +39,6 @@ private AlleleSubsettingUtils() {} // prevent instantiation private static final OneShotLogger attributesRemovedOneShotLogger = new OneShotLogger(AlleleSubsettingUtils.class); - private static final GenotypeLikelihoodCalculators GL_CALCS = new GenotypeLikelihoodCalculators(); - public static GenotypesContext subsetAlleles(final GenotypesContext originalGs, final int defaultPloidy, final List originalAlleles, final List allelesToKeep, @@ -47,6 +47,7 @@ public static GenotypesContext subsetAlleles(final GenotypesContext originalGs, //TODO: if other usages of this method should update or remove A,R, or G length annotations then header parsing is necessary and the method below should be used return subsetAlleles(originalGs, defaultPloidy, originalAlleles, allelesToKeep, gpc, assignmentMethod, Collections.emptyList()); } + /** * Create the new GenotypesContext with the subsetted PLs and ADs * @@ -399,12 +400,10 @@ static double[] calculateLikelihoodSums(final VariantContext vc, final int defau final double GLDiffBetweenRefAndBestVariantGenotype = Math.abs(glsVector[indexOfMostLikelyVariantGenotype] - glsVector[PL_INDEX_OF_HOM_REF]); final int ploidy = genotype.getPloidy() > 0 ? genotype.getPloidy() : defaultPloidy; - final int[] alleleCounts = new GenotypeLikelihoodCalculators() - .getInstance(ploidy, vc.getNAlleles()).genotypeAlleleCountsAt(indexOfMostLikelyVariantGenotype) - .alleleCountsByIndex(vc.getNAlleles() - 1); + final GenotypeAlleleCounts mostLikelyGenotypeAlleleCounts = GenotypesCache.get(ploidy, indexOfMostLikelyVariantGenotype); - for (int allele = 1; allele < alleleCounts.length; allele++) { - if (alleleCounts[allele] > 0) { + for (int allele = 1; allele < vc.getNAlleles(); allele++) { + if (mostLikelyGenotypeAlleleCounts.containsAllele(allele)) { likelihoodSums[allele] += GLDiffBetweenRefAndBestVariantGenotype; } } @@ -428,10 +427,7 @@ public static int[] subsettedPLIndices(final int ploidy, final List orig final int[] result = new int[GenotypeLikelihoods.numLikelihoods(newAlleles.size(), ploidy)]; final Permutation allelePermutation = new IndexedAlleleList<>(originalAlleles).permutation(new IndexedAlleleList<>(newAlleles)); - final GenotypeLikelihoodCalculator glCalc = GL_CALCS.getInstance(ploidy, originalAlleles.size()); - for (int oldPLIndex = 0; oldPLIndex < glCalc.genotypeCount(); oldPLIndex++) { - final GenotypeAlleleCounts oldAlleleCounts = glCalc.genotypeAlleleCountsAt(oldPLIndex); - + for (final GenotypeAlleleCounts oldAlleleCounts : GenotypeAlleleCounts.iterable(ploidy, originalAlleles.size())) { final boolean containsOnlyNewAlleles = IntStream.range(0, oldAlleleCounts.distinctAlleleCount()) .map(oldAlleleCounts::alleleIndexAt).allMatch(allelePermutation::isKept); @@ -441,8 +437,8 @@ public static int[] subsettedPLIndices(final int ploidy, final List orig final int[] newAlleleCounts = IntStream.range(0, newAlleles.size()).flatMap(newAlleleIndex -> IntStream.of(newAlleleIndex, oldAlleleCounts.alleleCountFor(allelePermutation.fromIndex(newAlleleIndex)))).toArray(); - final int newPLIndex = glCalc.alleleCountsToIndex(newAlleleCounts); - result[newPLIndex] = oldPLIndex; + final int newPLIndex = GenotypeIndexCalculator.alleleCountsToIndex(newAlleleCounts); + result[newPLIndex] = oldAlleleCounts.index(); } } return result; @@ -492,39 +488,6 @@ public static int[] getIndexesOfRelevantAllelesForGVCF(final List remapp return indexMapping; } - public static int[] getIndexesOfRelevantAlleles(final List remappedAlleles, final List targetAlleles, final int position, final Genotype g) { - Utils.nonEmpty(remappedAlleles); - Utils.nonEmpty(targetAlleles); - - final int[] indexMapping = new int[targetAlleles.size()]; - - // the reference likelihoods should always map to each other (even if the alleles don't) - indexMapping[0] = 0; - - for ( int i = 1; i < targetAlleles.size(); i++ ) { - // if there's more than 1 spanning deletion (*) allele then we need to use the best one - if (targetAlleles.get(i) == Allele.SPAN_DEL && g.hasPL()) { - final int occurrences = Collections.frequency(remappedAlleles, Allele.SPAN_DEL); - if (occurrences > 1) { - final int indexOfBestDel = indexOfBestDel(remappedAlleles, g.getPL(), g.getPloidy()); - if (indexOfBestDel == -1) { - throw new IllegalArgumentException("At position " + position + " targetAlleles contains a spanning deletion, but remappedAlleles does not."); - } - indexMapping[i] = indexOfBestDel; - continue; - } - } - - final int indexOfRemappedAllele = remappedAlleles.indexOf(targetAlleles.get(i)); - if (indexOfRemappedAllele == -1) { - throw new IllegalArgumentException("At position " + position + " targetAlleles contains a " + targetAlleles.get(i) + " allele, but remappedAlleles does not."); - } - indexMapping[i] = indexOfRemappedAllele; - } - - return indexMapping; - } - /** * Returns the index of the best spanning deletion allele based on AD counts * @@ -539,7 +502,8 @@ private static int indexOfBestDel(final List alleles, final int[] PLs, f for ( int i = 0; i < alleles.size(); i++ ) { if ( alleles.get(i) == Allele.SPAN_DEL ) { - final int homAltIndex = findHomIndex(GL_CALCS.getInstance(ploidy, alleles.size()), i, ploidy); + //In the canonical order, the homozygous genotype of the ith allele is immediately followed by the first genotype containing the (i+1)th allele. + final int homAltIndex = (int) GenotypeIndexCalculator.indexOfFirstGenotypeWithAllele(ploidy, i +1) - 1; final int PL = PLs[homAltIndex]; if ( PL < bestPL ) { bestIndex = i; @@ -551,25 +515,6 @@ private static int indexOfBestDel(final List alleles, final int[] PLs, f return bestIndex; } - /** //TODO simplify these methods - * Returns the index of the PL that represents the homozygous genotype of the given i'th allele - * - * @param i the index of the allele with the list of alleles - * @param ploidy the ploidy of the sample - * @return the hom index - */ - private static int findHomIndex(final GenotypeLikelihoodCalculator calculator, final int i, final int ploidy) { - // some quick optimizations for the common case - if ( ploidy == 2 ) - return GenotypeLikelihoods.calculatePLindex(i, i); - if ( ploidy == 1 ) - return i; - - final int[] alleleIndexes = new int[ploidy]; - Arrays.fill(alleleIndexes, i); - return calculator.allelesToIndex(alleleIndexes); - } - /** * Generates a new AD array by adding zeros for missing alleles given the set of indexes of the Genotype's current * alleles from the original AD. diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/DRAGENGenotypesModel.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/DRAGENGenotypesModel.java index c64f926aa5c..718f0b2662f 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/DRAGENGenotypesModel.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/DRAGENGenotypesModel.java @@ -37,10 +37,6 @@ public class DRAGENGenotypesModel implements GenotypingModel { public static final double FLAT_SNP_HET_PRIOR = 34.77; public static final double BQD_HOMOPOLYMER_PHRED_ADJUSTMENT_FACTOR = 5.0; - private final int cacheAlleleCountCapacity; - private final int cachePloidyCapacity; - private GenotypeLikelihoodCalculatorDRAGEN[][] likelihoodCalculators; - private final GenotypeLikelihoodCalculators calculators; private final boolean computeBQD; private final boolean computeFRD; private final int allelePadding; @@ -58,10 +54,6 @@ public DRAGENGenotypesModel(final boolean useBQDModel, final boolean useFRDModel public DRAGENGenotypesModel(final int calculatorCachePloidyCapacity, final int calculatorCacheAlleleCapacity, final boolean useBQDModel, final boolean useFRDModel, final int allelePadding, final int maxEffectiveDepthAdjustment, final DragstrParams dragstrParams) { - cachePloidyCapacity = calculatorCachePloidyCapacity; - cacheAlleleCountCapacity = calculatorCacheAlleleCapacity; - likelihoodCalculators = new GenotypeLikelihoodCalculatorDRAGEN[calculatorCachePloidyCapacity][calculatorCacheAlleleCapacity]; - calculators = new GenotypeLikelihoodCalculators(); this.computeBQD = useBQDModel; this.computeFRD = useFRDModel; this.allelePadding = allelePadding; @@ -105,7 +97,6 @@ public GenotypingLikelihoods calculateLikelihoods(final Al final int alleleCount = genotypingAlleles.numberOfAlleles(); final int variantOffset = data.readLikelihoods().getVariantCallingSubsetApplied().getStart() + allelePadding; - GenotypeLikelihoodCalculatorDRAGEN likelihoodsCalculator = getLikelihoodsCalculator(ploidyModel.samplePloidy(0), alleleCount); //TODO this needs to change for (int sampleIndex = 0; sampleIndex < sampleCount; sampleIndex++) { /////////////////////////////////////////////////////////////////////////// @@ -139,14 +130,9 @@ public GenotypingLikelihoods calculateLikelihoods(final Al // Compute default likelihoods as normal (before we go ahead and alter the likelihoods for the call) final int samplePloidy = ploidyModel.samplePloidy(sampleIndex); - // get a new likelihoodsCalculator if this sample's ploidy differs from the previous sample's - if (samplePloidy != likelihoodsCalculator.ploidy()) { - likelihoodsCalculator = getLikelihoodsCalculator(samplePloidy, alleleCount); - } - // this is the data array for the read likelihoods without any trouble final LikelihoodMatrix sampleLikelihoods = alleleLikelihoodMatrixMapper.mapAlleles(data.readLikelihoods().sampleMatrix(sampleIndex)); - final double[] ploidyModelGenotypeLikelihoods = likelihoodsCalculator.rawGenotypeLikelihoods(sampleLikelihoods); + final double[] ploidyModelGenotypeLikelihoods = GenotypeLikelihoodCalculator.computeLog10GenotypeLikelihoods(samplePloidy, sampleLikelihoods); if (HaplotypeCallerGenotypingDebugger.isEnabled()) { HaplotypeCallerGenotypingDebugger.println("\n Standard Genotyping Likelihoods Results:"); @@ -155,14 +141,14 @@ public GenotypingLikelihoods calculateLikelihoods(final Al if (computeBQD) { applyLikelihoodsAdjusmentToBaseline(ploidyModelGenotypeLikelihoods, "BQD", - likelihoodsCalculator.calculateBQDLikelihoods(sampleLikelihoods, strandForward, strandReverse, - paddedReference, offsetForRefIntoEvent, calculators)); + GenotypeLikelihoodCalculatorDRAGEN.calculateBQDLikelihoods(samplePloidy, sampleLikelihoods, strandForward, strandReverse, + paddedReference, offsetForRefIntoEvent)); } if (computeFRD) { applyLikelihoodsAdjusmentToBaseline(ploidyModelGenotypeLikelihoods, "FRD", - likelihoodsCalculator.calculateFRDLikelihoods(sampleLikelihoods, ploidyModelGenotypeLikelihoods, + GenotypeLikelihoodCalculatorDRAGEN.calculateFRDLikelihoods(samplePloidy, sampleLikelihoods, ploidyModelGenotypeLikelihoods, Stream.of(strandForward, strandReverse).flatMap(Collection::stream).collect(Collectors.toList()), // We filter out the HMM filtered reads as they do not apply to FRD - FLAT_SNP_HET_PRIOR, api, maxEffectiveDepthAdjustment, calculators)); + FLAT_SNP_HET_PRIOR, api, maxEffectiveDepthAdjustment)); } // this is what the work actually is, after we have computed a few things @@ -187,20 +173,6 @@ private void applyLikelihoodsAdjusmentToBaseline(final double[] initialLikelihoo } - private GenotypeLikelihoodCalculatorDRAGEN getLikelihoodsCalculator(final int samplePloidy, final int alleleCount) { - if (samplePloidy >= cachePloidyCapacity || alleleCount >= cacheAlleleCountCapacity) { - return calculators.getInstanceDRAGEN(samplePloidy, alleleCount); - } - final GenotypeLikelihoodCalculatorDRAGEN cachedResult = likelihoodCalculators[samplePloidy][alleleCount]; - if (cachedResult != null) { - return cachedResult; - } else { - final GenotypeLikelihoodCalculatorDRAGEN newOne = calculators.getInstanceDRAGEN(samplePloidy, alleleCount); - likelihoodCalculators[samplePloidy][alleleCount] = newOne; - return newOne; - } - } - /** * This helper class is used to store the necessary data in order to sort a read based on its BQD "feather end" as * well as information relevant to re-associate the read with its position in the AlleleLikelihoods object arrays. diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypeAlleleCounts.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypeAlleleCounts.java index aa485933da0..61a26fee7fc 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypeAlleleCounts.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypeAlleleCounts.java @@ -2,6 +2,7 @@ import htsjdk.variant.variantcontext.Allele; import htsjdk.variant.variantcontext.Genotype; +import org.apache.commons.math3.util.CombinatoricsUtils; import org.broadinstitute.hellbender.utils.IndexRange; import org.broadinstitute.hellbender.utils.MathUtils; import org.broadinstitute.hellbender.utils.Utils; @@ -10,6 +11,7 @@ import java.util.Arrays; import java.util.Collections; +import java.util.Iterator; import java.util.List; import java.util.function.IntConsumer; import java.util.stream.Collectors; @@ -18,6 +20,9 @@ /** * Collection of allele counts for a genotype. It encompasses what alleles are present in the genotype and in what number.

* + * Also, it stores its index within the canonical ordering of genotypes and can efficiently generate the next genotype in that order, which is used + * to iterate over all genotypes of a given ploidy and allele count. + * *

Alleles are represented herein by their indices running from 0 to N-1 where N is the number of alleles.

* *

Genotypes are represented as a single array of alternating alleles and counts, where only alleles with non-zero counts are included: @@ -52,18 +57,18 @@ * 20/1/1 * 31/1/1 * 40/0/2 - * 60/1/2 - * 71/1/2 - * 80/2/2 - * 91/2/2 - * 102/2/2 - * 110/0/3 - * 120/1/3 - * 131/1/3 - * 140/2/3 - * 151/2/3 - * 162/2/3 - * 170/3/3 + * 50/1/2 + * 61/1/2 + * 70/2/2 + * 81/2/2 + * 92/2/2 + * 100/0/3 + * 110/1/3 + * 121/1/3 + * 130/2/3 + * 141/2/3 + * 152/2/3 + * 160/3/3 * ...... * * @@ -79,7 +84,7 @@ public final class GenotypeAlleleCounts implements Comparable iterator(final int ploidy, final int alleleCount) { + return new Iterator() { + private int index = 0; + private int numGenotypes = GenotypeIndexCalculator.genotypeCount(ploidy, alleleCount); + private GenotypeAlleleCounts alleleCounts = first(ploidy); + + @Override + public boolean hasNext() { + return index < numGenotypes; + } + + @Override + public GenotypeAlleleCounts next() { + if (index++ > 0) { + alleleCounts.increase(); + } + return alleleCounts; + } + }; + } + + /** + * Iterate over all GenotypeAlleleCounts for a given ploidy and allele count in the canonical order. + * + * This is the preferred way to access all GenotypeAlleleCounts in sequence, such as when computing genotype likelihoods. + * Thanks to the efficiency of the increase() method this iteration is extremely fast. + */ + public static Iterable iterable(final int ploidy, final int alleleCount) { + return new Iterable() { + private final int p = ploidy; + private final int a = alleleCount; + + @Override + public Iterator iterator() { + return GenotypeAlleleCounts.iterator(p,a); + } + }; + } + /** * Increases the allele counts a number of times. * *

* This method must not be invoked on cached genotype-allele-counts that are meant to remain constant, - * such as the ones contained in {@link GenotypeLikelihoodCalculators#genotypeTableByPloidy}. + * such as the ones contained in {@link GenotypesCache#genotypeTableByPloidy}. *

* * @param times the number of times to increase. @@ -162,17 +206,17 @@ protected void increase(final int times) { } /** - * Updates the genotype counts to match the next genotype according to the canonical ordering of PLs. + * Returns the next genotype allele counts object in the canonical ordering of genotypes. * *

* This method must not be invoked on cached genotype-allele-counts that are meant to remain constant, - * such as the ones contained in {@link GenotypeLikelihoodCalculators#genotypeTableByPloidy} + * such as the ones contained in {@link GenotypesCache#genotypeTableByPloidy} *

*/ - protected void increase() { + protected GenotypeAlleleCounts increase() { // if the ploidy is zero there is only one possible genotype. if (distinctAlleleCount == 0) { - return; + return this; } // Worth make this case faster. @@ -239,6 +283,7 @@ protected void increase() { } index++; log10CombinationCount = -1; + return this; } /** @@ -303,17 +348,15 @@ public int distinctAlleleCount() { } /** - * Gets the log10 combination count, computing it if uninitialized. Note that the invoked MathUtils method uses fast cached - * log10 values of integers for any reasonable ploidy. + * Gets the log10 combination count, computing it if uninitialized. * - * This method should be invoked on instances of {@link GenotypeAlleleCounts} cached in {@link GenotypeLikelihoodCalculators::genotypeTableByPloidy}. + * This method should be invoked on instances of {@link GenotypeAlleleCounts} cached in {@link GenotypesCache}. * Such usage allows the result of this computation to be cached once for an entire run of HaplotypeCaller. - * @return */ public double log10CombinationCount() { if (log10CombinationCount == UNCOMPUTED_LOG_10_COMBINATION_COUNT) { - log10CombinationCount = MathUtils.log10Factorial(ploidy) - - new IndexRange(0, distinctAlleleCount).sum(n -> MathUtils.log10Factorial(sortedAlleleCounts[2*n+1])); + log10CombinationCount = MathUtils.logToLog10(CombinatoricsUtils.factorialLog(ploidy) + - new IndexRange(0, distinctAlleleCount).sum(n -> CombinatoricsUtils.factorialLog(sortedAlleleCounts[2*n+1]))); } return log10CombinationCount; } @@ -512,90 +555,6 @@ public int alleleCountFor(final int index) { return rank < 0 ? 0 : alleleCountAt(rank); } - /** - * Returns the allele counts for each allele index to maximum. - * @param maximumAlleleIndex the maximum allele index required. - * @throws IllegalArgumentException if {@code maximumAlleleIndex} is less than 0. - * @return never {@code null}, an array of exactly {@code maximumAlleleIndex + 1} positions with the counts - * of each allele where the position in the array is equal to its index. - */ - public int[] alleleCountsByIndex(final int maximumAlleleIndex) { - Utils.validateArg(maximumAlleleIndex >= 0, "the requested allele count cannot be less than 0"); - final int[] result = new int[maximumAlleleIndex + 1]; - copyAlleleCountsByIndex(result, 0, 0, maximumAlleleIndex); - return result; - } - - - private void copyAlleleCountsByIndex(final int[] dest, final int offset, final int minimumAlleleIndex, final int maximumAlleleIndex) { - - // First we determine what section of the sortedAlleleCounts array contains the counts of interest, - // By the present allele rank range of interest. - final int minimumAlleleRank = alleleRankFor(minimumAlleleIndex); - final int maximumAlleleRank = alleleRankFor(maximumAlleleIndex); - - // If the min or max allele index are absent (returned rank < 0) we note where the would be inserted; that - // way we avoid going through the rest of positions in the sortedAlleleCounts array. - // The range of interest is then [startRank,endRank]. - final int startRank = minimumAlleleRank < 0 ? - minimumAlleleRank - 1 : minimumAlleleRank; - final int endRank = maximumAlleleRank < 0 ? - maximumAlleleRank - 2 : maximumAlleleRank; - - // Iteration variables: - int nextIndex = minimumAlleleIndex; // next index that we want to output the count for. - int nextRank = startRank; // next rank to query in sortedAlleleCounts. - int nextSortedAlleleCountsOffset = nextRank << 1; // offset in sortedAlleleCounts where the info is present for the next rank. - int nextDestOffset = offset; // next offset in destination array where to set the count for the nextIndex. - - while (nextRank++ <= endRank) { - final int alleleIndex = sortedAlleleCounts[nextSortedAlleleCountsOffset++]; - // fill non-present allele counts with 0s. - while (alleleIndex > nextIndex) { - dest[nextDestOffset++] = 0; - nextIndex++; - } - // It is guaranteed that at this point alleleIndex == nextIndex - // thanks to the condition of the enclosing while: there must be at least one index of interest that - // is present in the remaining (nextRank,endRank] interval as otherwise endRank would be less than nextRank. - dest[nextDestOffset++] = sortedAlleleCounts[nextSortedAlleleCountsOffset++]; - nextIndex++; - } - // Finally we take care of trailing requested allele indices. - while (nextIndex++ <= maximumAlleleIndex) { - dest[nextDestOffset++] = 0; - } - } - - /** - * Copies the sorted allele counts into an array. - * - *

- * Sorted allele counts are disposed as an even-sized array where even positions indicate the allele index and - * the following odd positions the number of copies of that allele in this genotype allele count: - *

- *

-     *     [ allele_0, freq_0, allele_1, freq_1 ... ]
-     * 

- * - *

- * With {@code offset} you can indicate an alternative first position in the destination array. - *

- * - * @param dest where to copy the counts. - * @param offset starting position. - * - * @throws IllegalArgumentException if {@code dest} is {@code null}, {@code offset} is less than 0 - * or {@code dest} is not large enough considering the number of alleles present in this genotype - * allele counts and the {@code offset} provided. A total of - * {@link #distinctAlleleCount()} * 2 positions - * are required for the job. - */ - public void copyAlleleCounts(final int[] dest, final int offset) { - Utils.nonNull(dest, "the destination cannot be null"); - Utils.validateArg(offset >= 0, "the offset cannot be negative"); - final int sortedAlleleCountsLength = distinctAlleleCount << 1; - Utils.validateArg(offset + sortedAlleleCountsLength <= dest.length, "the input array does not have enough capacity"); - System.arraycopy(sortedAlleleCounts, 0, dest, offset, sortedAlleleCountsLength); - } /** * Instantiates the first genotype possible provided a total ploidy. diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypeIndexCalculator.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypeIndexCalculator.java new file mode 100644 index 00000000000..07845ef52ab --- /dev/null +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypeIndexCalculator.java @@ -0,0 +1,206 @@ +package org.broadinstitute.hellbender.tools.walkers.genotyper; + +import org.apache.commons.lang3.mutable.MutableInt; +import org.apache.commons.math3.util.CombinatoricsUtils; +import org.apache.commons.math3.util.FastMath; +import org.broadinstitute.hellbender.exceptions.GATKException; +import org.broadinstitute.hellbender.utils.IndexRange; +import org.broadinstitute.hellbender.utils.MathUtils; +import org.broadinstitute.hellbender.utils.Utils; + +import java.util.Arrays; + +/** + * Utilities class for calculations involving the canonical enumeration of (unphased) genotypes. + * + * For diploid genotypes with alleles A, B, C. . . this ordering is AA, AB, BB, AC, BC, CC. . . + * + * For triploid genotypes it is AAA, AAB, ABB, BBB, AAC, ABC, BBC, ACC, BCC, CCC. . . + * + * Note that we may define the ordering recursively. Letting g = {g_1,g_2,g_3, . . ., g_N} and h = {h_1,h_2,h_3, . . ., h_N} + * be genotypes comprising alleles g_1,g_2,g_3, . . . , g_N and h_1,h_2,h_3, . . ., h_N, respectively: + * (i) the order of haploid genotypes is simply the allele ordering + * (ii) if g_N < h_N then g < h + * (iii) if g_N = h_N then the order is that of the first N-1 alleles + * + * Note also that whenever possible it is best to traverse all genotypes in the canonical order without the random index calculations + * provided here. However, when subsetting, reordering, merging, and adding alleles it is necessary to translate indices from + * one basis of alleles to another. In such cases efficient index calculations are important. + */ +public class GenotypeIndexCalculator { + + private GenotypeIndexCalculator() {} + + /** + * How many genotypes with given ploidy appear in the standard order before a given allele is reached. + * + * For example, considering alleles A, B, C, D, etc ... (indexed 0, 1, 2, ... respectively): + * f(3,A) = f(3,0) = 0 as the first genotype AAA contains A. + * f(3,B) = f(3,1) = 1 as the second genotype AAB contains B. + * f(3,C) = f(3,2) = 4 as the first genotype that contains C, AAC follows: AAA AAB ABB BBB + * f(4,D) = f(4,3) = 15 as AAAD follows AAAA AAAB AABB ABBB BBBB AAAC AABC ABBC BBBC AACC ABCC BBCC ACCC BCCC CCCC + * + * There is a simple closed-form expression for this. Any genotype with ploidy p and a alleles can be encoded + * by p 'x's and a - 1 '/'s, where each x represents one allele count and each slash divides between consecutive alleles. + * For example, with p = 3 and a = 3 we have xxx// representing AAA, //xxx representing CCC, x/x/x representing ABC, + * and xx//x representing AAC. It is easy to see that any such string corresponds to a genotype, and the number of such + * strings is given by the number of places to put the a-1 slashes within the p+a-1 total characters, which is + * simply the binomial coefficient (p+a-1)C(a-1). Considering that allele indices are zero-based, we also have + * f(p,a) = (p+a-1)C(a-1). + * + * See discussion at https://genome.sph.umich.edu/wiki/Relationship_between_Ploidy,_Alleles_and_Genotypes + */ + public static long indexOfFirstGenotypeWithAllele(final int ploidy, final int allele) { + return allele == 0 ? 0 : CombinatoricsUtils.binomialCoefficient(ploidy + allele - 1, allele - 1); + } + + /** + * Returns the number of possible genotypes given the ploidy and number of different alleles. + * @param ploidy the requested ploidy. + * @param alleleCount the requested number of alleles. + * + * @throws IllegalArgumentException if {@code ploidy} or {@code alleleCount} is negative or + * the number of genotypes is too large (more than {@link Integer#MAX_VALUE}). + * + * @return the number of genotypes given ploidy and allele count (0 or greater). + */ + public static int genotypeCount(final int ploidy, final int alleleCount) { + final long result = indexOfFirstGenotypeWithAllele(ploidy, alleleCount); + Utils.validateArg(result != MathUtils.LONG_OVERFLOW && result < Integer.MAX_VALUE, () -> + String.format("the number of genotypes is too large for ploidy %d and %d alleles: approx. %.0f", ploidy, alleleCount, + CombinatoricsUtils.binomialCoefficientDouble(ploidy + alleleCount - 1, alleleCount - 1))); + return (int) result; + } + + /** + * Give a list of alleles, returns the likelihood array index. + * + * @param alleles the indices of the alleles in the genotype, there should be as many repetition of an + * index as copies of that allele in the genotype. Allele indices do not need to be sorted in + * any particular way. For example, {A,A,B}, {A,B,A}, {B,A,A} are all valid inputs. + * + * @return never {@code null}. + */ + public static int allelesToIndex(final int... alleles) { + final int ploidy = alleles.length; + return ploidy == 0 ? 0 : calculateIndex(Arrays.copyOf(alleles, ploidy)); + } + + /** + * Returns the genotype index given the allele counts in format (allele1, count1, allele2, count2. . . ) + * + * @param alleleCountArray the query allele counts. + * + * @throws IllegalArgumentException if {@code alleleCountArray} is null, has odd length, contains negative counts, + * or has a total allele count different from the ploidy. + */ + public static int alleleCountsToIndex(final int ... alleleCountArray) { + Utils.nonNull(alleleCountArray, "the allele counts cannot be null"); + Utils.validateArg((alleleCountArray.length & 1) == 0, "the allele counts array cannot have odd length"); + int ploidy = 0; + for (int i = 0; i < alleleCountArray.length; i += 2) { + ploidy += alleleCountArray[i+1]; + } + final int[] alleleContainer = new int[ploidy]; + + + int n = 0; + for (int i = 0; i < alleleCountArray.length; i += 2) { + final int allele = alleleCountArray[i]; + final int count = alleleCountArray[i+1]; + Utils.validateArg(count >= 0, "no allele count can be less than 0"); + for (int j = 0; j < count; j++, n++) { + alleleContainer[n] = allele; + } + } + return calculateIndex(alleleContainer); + } + + /** + * Calculate the "old" genotype index for the ploidy and allele count of this instance given a GenotypeAlleleCounts + * object in some new basis of alleles and a int -> int map (in the form of an array) to translate from new allele + * indices to the "old" allele indices of this instance. + */ + public static int alleleCountsToIndex(final GenotypeAlleleCounts newGAC, final int[] newToOldAlleleMap) { + final int[] alleleContainer = new int[newGAC.ploidy()]; + final MutableInt n = new MutableInt(0); + newGAC.forEachAlleleIndexAndCount((newAllele, count) -> { + final int oldAllele = newToOldAlleleMap[newAllele]; + new IndexRange(0, count).forEach(k -> alleleContainer[n.getAndIncrement()] = oldAllele); + }); + + return calculateIndex(alleleContainer); + } + + /** + * Example: suppose our genotype is ABC. Then the index is the sum of (1) the number of ploidy 3 genotypes before + * reaching C in the third position, (2) the number of ploidy 2 genotypes before reaching B in the 2nd position, and + * (3) the number of ploidy 1 genotypes before reaching A in the 1st position. + */ + private static int calculateIndex(final int[] alleles) { + final int ploidy = alleles.length; + + // traverse alleles from highest to lowest index + Arrays.sort(alleles); + return new IndexRange(0, ploidy).sumInt(n -> { + final int allele = alleles[ploidy - n - 1]; + return (int) indexOfFirstGenotypeWithAllele(ploidy - n, allele); + }); + } + + /** + * Compute the maximally acceptable allele count (ref allele included) given the maximally acceptable genotype count. + * @param ploidy sample ploidy + * @param maxGenotypeCount maximum number of genotype count used to calculate upper bound on number of alleles given ploidy + * @throws IllegalArgumentException if {@code ploidy} or {@code alleleCount} is negative. + * @return the maximally acceptable allele count given ploidy and maximum number of genotypes acceptable + */ + public static int computeMaxAcceptableAlleleCount(final int ploidy, final int maxGenotypeCount){ + Utils.validateArg(ploidy >= 0, () -> "negative ploidy " + ploidy); + + if (ploidy == 1) { + return maxGenotypeCount; + } + final double logMaxGenotypeCount = Math.log(maxGenotypeCount); + + // Math explanation: genotype count is determined by ${P+A-1 \choose A-1}$, this leads to constraint + // $\log(\frac{(P+A-1)!}{(A-1)!}) \le \log(P!G)$, + // where $P$ is ploidy, $A$ is allele count, and $G$ is maxGenotypeCount + // The upper and lower bounds of the left hand side of the constraint are $P \log(A-1+P)$ and $P \log(A)$ + // which require $A$ to be searched in interval $[exp{\log(P!G)/P} - (P-1), exp{\log(P!G)/P}]$ + // Denote $[10^{\log(P!G)/P}$ as $x$ in the code. + + final double x = FastMath.exp((CombinatoricsUtils.factorialLog(ploidy) + logMaxGenotypeCount)/ploidy ); + final int lower = (int)Math.floor(x) - ploidy - 1; + final int upper = (int)Math.ceil(x); + for(int a=upper; a>=lower; --a){// check one by one + + final double logGTCnt = CombinatoricsUtils.binomialCoefficientLog(ploidy+a-1, a-1); + if(logMaxGenotypeCount >= logGTCnt) { + return a; + } + } + throw new GATKException.ShouldNeverReachHereException("This method must have implemented its search wrong."); + } + + /** + * Composes a genotype index map given a allele index recoding such that result[i] is the index of the old + * genotype corresponding to the ith new genotype. + * + * @param newToOldAlleleMap allele recoding such that newToOldAlleleMap[i] is the index of the old allele + * corresponding to the ith new allele + * + * @return never {@code null}. + */ + public static int[] newToOldGenotypeMap(final int ploidy, final int[] newToOldAlleleMap) { + Utils.nonNull(newToOldAlleleMap); + final int newAlleleCount = newToOldAlleleMap.length; + + final int[] result = new int[genotypeCount(ploidy, newAlleleCount)]; + for (final GenotypeAlleleCounts newGAC : GenotypeAlleleCounts.iterable(ploidy, newAlleleCount)) { + result[newGAC.index()] = alleleCountsToIndex(newGAC, newToOldAlleleMap); + } + + return result; + } +} diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypeLikelihoodCalculator.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypeLikelihoodCalculator.java index 66c1fd241a1..d1a99280019 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypeLikelihoodCalculator.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypeLikelihoodCalculator.java @@ -2,550 +2,151 @@ import htsjdk.variant.variantcontext.Allele; import htsjdk.variant.variantcontext.GenotypeLikelihoods; +import org.apache.commons.lang3.tuple.ImmutablePair; +import org.apache.commons.lang3.tuple.Pair; +import org.apache.commons.math3.util.FastMath; +import org.broadinstitute.hellbender.utils.IndexRange; import org.broadinstitute.hellbender.utils.MathUtils; import org.broadinstitute.hellbender.utils.Utils; import org.broadinstitute.hellbender.utils.genotyper.LikelihoodMatrix; -import java.util.Comparator; -import java.util.PriorityQueue; - +import java.util.Arrays; + +/** + * This class has a single responsibility: calculating genotype likelihoods given allele likelihoods through the formula: + * + * Prob(reads | genotype) = product_{all reads} [[sum_{alleles in genotype} Prob(read | allele)]/ploidy] + * + * Note that this applies to non-somatic variant calling, where ploidy is a known integer and genotypes are given by the + * number of copies of each allele. + * + * COMPUTATIONAL NOTE + * In the multiallelic calculation we accumulate the likelihood contribution of each read one allele at a time. That is, + * for genotype containing alleles A, B, C, we first fill an array with the likelihood contributions from allele A, then + * we make a second pass and add the contributions from allele B, then allele C. Traversing all the reads in each + * allele row of the likelihoods array in this manner is cache-friendly and makes an enormous difference in runtime. + * + * The difference in performance comes from the fact that we index likelihoods first by allele, then by read. Because of this, + * likelihoods of consecutive reads with the same allele are adjacent in memory while likelihoods of consecutive alleles with the same read + * are not. In the former case looking up new likelihoods almost always results in a cache hit since many reads of the same allele + * are loaded on the same cache page. + * + * If the cache-friendliness of this class is broken, it will show up as a severe regression in the runtime of its unit tests + * for larger ploidies and allele counts. + */ public class GenotypeLikelihoodCalculator { - /** - * Offset table for this calculator. - * - *

- * This is a shallow copy of {@link GenotypeLikelihoodCalculators#alleleFirstGenotypeOffsetByPloidy} when the calculator was created - * thus it follows the same format as that array. Please refer to its documentation. - *

- * - *

You can assume that this offset table contain at least (probably more) the numbers corresponding to the allele count and ploidy for this calculator. - * However since it might have more than that and so you must use {@link #alleleCount} and {@link #ploidy} when - * iterating through this array rather that its length or the length of its components.

. - */ - private final int[][] alleleFirstGenotypeOffsetByPloidy; - /** - * Genotype table for this calculator. - * - *

It is ensure that it contains all the genotypes for this calculator ploidy and allele count, maybe more. For - * that reason you must use {@link #genotypeCount} when iterating through this array and not relay on its length.

- */ - private final GenotypeAlleleCounts[] genotypeAlleleCounts; - /** - * Number of genotypes given this calculator {@link #ploidy} and {@link #alleleCount}. - */ - final int genotypeCount; - /** - * Number of genotyping alleles for this calculator. - */ - final int alleleCount; - /** - * Ploidy for this calculator. - */ - final int ploidy; - /** - * Max-heap for integers used for this calculator internally. - */ - private final PriorityQueue alleleHeap; - /** - * Buffer used as a temporary container for likelihood components for genotypes stratified by reads. - * - *

- * It is indexed by genotype index and then by read index. The read capacity is increased as needed by calling - * {@link #ensureReadCapacity(int) ensureReadCapacity}. - *

- */ - final double[][] readLikelihoodsByGenotypeIndex; - /** - * Buffer field use as a temporal container for sorted allele counts when calculating the likelihood of a - * read in a genotype. - *

- * This array follows the same format as {@link GenotypeAlleleCounts#sortedAlleleCounts}. Each component in the - * genotype takes up two positions in the array where the first indicate the allele index and the second its frequency in the - * genotype. Only non-zero frequency alleles are represented, taking up the first positions of the array. - *

- * - *

- * This array is sized so that it can accommodate the maximum possible number of distinct alleles in any - * genotype supported by the calculator, value stored in {@link #maximumDistinctAllelesInGenotype}. - *

- */ - private final int[] genotypeAllelesAndCounts; - /** - * Maximum number of components (or distinct alleles) for any genotype with this calculator ploidy and allele count. - */ - private int maximumDistinctAllelesInGenotype; - /** - * Cache of the last genotype-allele-count requested using {@link #genotypeAlleleCountsAt(int)}, when it - * goes beyond the maximum genotype-allele-count static capacity. Check on that method documentation for details. - */ - private GenotypeAlleleCounts lastOverheadCounts; - /** - * Buffer used as a temporary container for likelihood components for genotypes stratified by alleles, allele frequency and reads. - * - *

To improve performance we use a 1-dimensional array to implement a 3-dimensional one as some of those dimension - * have typically very low depths (allele and allele frequency)

- * - *

- * The value contained in position [a][f][r] == log10Lk(read[r] | allele[a]) + log10(f) . Exception is - * for f == 0 whose value is undefined (in practice 0.0) and never used. - *

- * - *

- * It is indexed by read, then by allele and then by the number of copies of the allele. For the latter - * there are as many entries as the ploidy of the calculator + 1 (to accommodate zero copies although is - * never used in practice). - *

- */ - double[] readAlleleLikelihoodByAlleleCount = null; - /** - * Indicates how many reads the calculator supports. - * - *

This figure is increased dynamically as per the - * calculation request calling {@link #ensureReadCapacity(int) ensureReadCapacity}.

- */ - private int readCapacity = -1; - /** - * Buffer field use as a temporal container for component likelihoods when calculating the likelihood of a - * read in a genotype. It is stratified by read and the allele component of the genotype likelihood... that is - * the part of the likelihood sum that correspond to a particular allele in the genotype. - * - *

- * It is implemented in a 1-dimensional array since typically one of the dimensions is rather small. Its size - * is equal to {@link #readCapacity} times {@link #maximumDistinctAllelesInGenotype}. - *

- * - *

- * More concretely [r][i] == log10Lk(read[r] | allele[i]) + log(freq[i]) where allele[i] is the ith allele - * in the genotype of interest and freq[i] is the number of times it occurs in that genotype. - *

- */ - private double[] readGenotypeLikelihoodComponents; - public GenotypeLikelihoodCalculator(final int ploidy, final int alleleCount, final int[][] alleleFirstGenotypeOffsetByPloidy, - final GenotypeAlleleCounts[][] genotypeTableByPloidy) { - maximumDistinctAllelesInGenotype = Math.min(ploidy, alleleCount); - this.alleleFirstGenotypeOffsetByPloidy = alleleFirstGenotypeOffsetByPloidy; - genotypeAlleleCounts = genotypeTableByPloidy[ploidy]; - genotypeCount = this.alleleFirstGenotypeOffsetByPloidy[ploidy][alleleCount]; - this.alleleCount = alleleCount; - this.ploidy = ploidy; - alleleHeap = new PriorityQueue<>(ploidy, Comparator.naturalOrder().reversed()); - readLikelihoodsByGenotypeIndex = new double[genotypeCount][]; - genotypeAllelesAndCounts = new int[maximumDistinctAllelesInGenotype * 2]; - } + protected GenotypeLikelihoodCalculator() { } /** - * Makes sure that temporal arrays and matrices are prepared for a number of reads to process. - * @param requestedCapacity number of read that need to be processed. - */ - public void ensureReadCapacity(final int requestedCapacity) { - Utils.validateArg(requestedCapacity >= 0, "capacity may not be negative"); - if (readCapacity == -1) { // first time call. - final int minimumCapacity = Math.max(requestedCapacity, 10); // Never go too small, 10 is the minimum. - readAlleleLikelihoodByAlleleCount = new double[minimumCapacity * alleleCount * (ploidy+1)]; - for (int i = 0; i < genotypeCount; i++) { - readLikelihoodsByGenotypeIndex[i] = new double[minimumCapacity]; - } - readGenotypeLikelihoodComponents = new double[ploidy * minimumCapacity]; - readCapacity = minimumCapacity; - } else if (readCapacity < requestedCapacity) { - final int doubleCapacity = (requestedCapacity << 1); - readAlleleLikelihoodByAlleleCount = new double[doubleCapacity * alleleCount * (ploidy+1)]; - for (int i = 0; i < genotypeCount; i++) { - readLikelihoodsByGenotypeIndex[i] = new double[doubleCapacity]; - } - readGenotypeLikelihoodComponents = new double[maximumDistinctAllelesInGenotype * doubleCapacity]; - readCapacity = doubleCapacity; - } - } - - /** - * Give a list of alleles, returns the likelihood array index. - * @param alleleIndices the indices of the alleles in the genotype, there should be as many repetition of an - * index as copies of that allele in the genotype. Allele indices do not need to be sorted in - * any particular way. - * - * @return never {@code null}. - */ - public int allelesToIndex(final int... alleleIndices) { - // Special case ploidy == 0. - if (ploidy == 0) { - return 0; - } - - alleleHeap.clear(); - for (int i = 0; i < alleleIndices.length; i++) { - alleleHeap.add(alleleIndices[i]); - } - return alleleHeapToIndex(); - } - - /** - * Returns the number of possible genotypes given ploidy and the maximum allele index. - * @return never {@code null}. - */ - public int genotypeCount() { - return genotypeCount; - } - - /** - * Returns the genotype associated to a particular likelihood index. - * - *

If {@code index} is larger than {@link GenotypeLikelihoodCalculators#MAXIMUM_STRONG_REF_GENOTYPE_PER_PLOIDY}, - * this method will reconstruct that genotype-allele-count iteratively from the largest strongly referenced count available. - * or the last requested index genotype. - *

+ * Calculate the log10AlleleLikelihoods given the list of alleles and the likelihood map. * - *

Therefore if you are iterating through all genotype-allele-counts you should do sequentially and incrementally, to - * avoid a large efficiency drop

. - * - * @param index query likelihood-index. - * @return never {@code null}. - */ - public GenotypeAlleleCounts genotypeAlleleCountsAt(final int index) { - Utils.validateArg(index >= 0 && index < genotypeCount, () -> "invalid likelihood index: " + index + " >= " + genotypeCount - + " (genotype count for nalleles = " + alleleCount + " and ploidy " + ploidy); - if (index < GenotypeLikelihoodCalculators.MAXIMUM_STRONG_REF_GENOTYPE_PER_PLOIDY) { - return genotypeAlleleCounts[index]; - } else if (lastOverheadCounts == null || lastOverheadCounts.index() > index) { - final GenotypeAlleleCounts result = genotypeAlleleCounts[GenotypeLikelihoodCalculators.MAXIMUM_STRONG_REF_GENOTYPE_PER_PLOIDY - 1].copy(); - result.increase(index - GenotypeLikelihoodCalculators.MAXIMUM_STRONG_REF_GENOTYPE_PER_PLOIDY + 1); - lastOverheadCounts = result; - return result.copy(); - } else { - lastOverheadCounts.increase(index - lastOverheadCounts.index()); - return lastOverheadCounts.copy(); - } - } - - /** - * Calculate the likelihoods given the list of alleles and the likelihood map. + * @param log10AlleleLikelihoods the likelihood matrix all alleles vs all reads. * - * @param likelihoods the likelihood matrix all alleles vs all reads. - * - * @throws IllegalArgumentException if {@code alleleList} is {@code null} or {@code likelihoods} is {@code null} + * @throws IllegalArgumentException if {@code alleleList} is {@code null} or {@code log10AlleleLikelihoods} is {@code null} * or the alleleList size does not match the allele-count of this calculator, or there are missing allele vs - * read combinations in {@code likelihoods}. - * - * @return never {@code null}. - */ - public GenotypeLikelihoods genotypeLikelihoods(final LikelihoodMatrix likelihoods) { - final double[] readLikelihoodsByGenotypeIndex = getReadRawReadLikelihoodsByGenotypeIndex(likelihoods); - return GenotypeLikelihoods.fromLog10Likelihoods(readLikelihoodsByGenotypeIndex); - } - - /** - * A helper method that actually does the matrix operations but returns the raw values. - * - * @return the raw array (in log10 likelihoods space) of the GL for each genotype - */ - double[] getReadRawReadLikelihoodsByGenotypeIndex(final LikelihoodMatrix likelihoods) { - Utils.nonNull(likelihoods); - Utils.validateArg(likelihoods.numberOfAlleles() == alleleCount, "mismatch between allele list and alleleCount"); - final int readCount = likelihoods.evidenceCount(); - ensureReadCapacity(readCount); - - /// [x][y][z] = z * LnLk(Read_x | Allele_y) - final double[] readLikelihoodComponentsByAlleleCount - = readLikelihoodComponentsByAlleleCount(likelihoods); - final double[][] genotypeLikelihoodByRead = genotypeLikelihoodByRead(readLikelihoodComponentsByAlleleCount,readCount); - return genotypeLikelihoods(genotypeLikelihoodByRead, readCount); - } - - /** - * Calculates the final genotype likelihood array out of the likelihoods for each genotype per read. - * - * @param readLikelihoodsByGenotypeIndex [g][r] likelihoods for each genotype g and r. - * @param readCount number of reads in the input likelihood arrays in {@code genotypeLikelihoodByRead}. - * @return never {@code null}, one position per genotype where the i entry is the likelihood of the ith - * genotype (0-based). - */ - double[] genotypeLikelihoods(final double[][] readLikelihoodsByGenotypeIndex, final int readCount) { - final double[] result = new double[genotypeCount]; - final double denominator = readCount * MathUtils.log10(ploidy); - // instead of dividing each read likelihood by ploidy ( so subtract log10(ploidy) ) - // we multiply them all and the divide by ploidy^readCount (so substract readCount * log10(ploidy) ) - for (int g = 0; g < genotypeCount; g++) { - result[g] = MathUtils.sum(readLikelihoodsByGenotypeIndex[g], 0, readCount) - denominator; - } - return result; - } - - /** - * Calculates the likelihood component of each read on each genotype. - * - * NOTE: this is not actually the read likelihood component for each genotype, it is the sum of the log read likelihoods components - * for each genotype without having been normalized by the the denominator of the ploidy, that happens in the final step + * read combinations in {@code log10AlleleLikelihoods}. * - * @param readLikelihoodComponentsByAlleleCount [a][f][r] likelihood stratified by allele a, frequency in genotype f and - * read r. - * @param readCount number of reads in {@code readLikelihoodComponentsByAlleleCount}. * @return never {@code null}. */ - protected double[][] genotypeLikelihoodByRead(final double[] readLikelihoodComponentsByAlleleCount, final int readCount) { - - // Here we don't use the convenience of {@link #genotypeAlleleCountsAt(int)} within the loop to spare instantiations of - // GenotypeAlleleCounts class when we are dealing with many genotypes. - GenotypeAlleleCounts alleleCounts = genotypeAlleleCounts[0]; - - for (int genotypeIndex = 0; genotypeIndex < genotypeCount; genotypeIndex++) { - final double[] readLikelihoods = this.readLikelihoodsByGenotypeIndex[genotypeIndex]; - final int componentCount = alleleCounts.distinctAlleleCount(); - switch (componentCount) { - case 1: // - singleComponentGenotypeLikelihoodByRead(alleleCounts, readLikelihoods, readLikelihoodComponentsByAlleleCount, readCount); - break; - case 2: - twoComponentGenotypeLikelihoodByRead(alleleCounts,readLikelihoods,readLikelihoodComponentsByAlleleCount, readCount); - break; - default: - manyComponentGenotypeLikelihoodByRead(alleleCounts,readLikelihoods,readLikelihoodComponentsByAlleleCount, readCount); - } - if (genotypeIndex < genotypeCount - 1) { - alleleCounts = nextGenotypeAlleleCounts(alleleCounts); + public static GenotypeLikelihoods log10GenotypeLikelihoods(final int ploidy, final LikelihoodMatrix log10AlleleLikelihoods) { + final double[] log10GenotypeLikelihoods = computeLog10GenotypeLikelihoods(ploidy, log10AlleleLikelihoods); + return GenotypeLikelihoods.fromLog10Likelihoods(log10GenotypeLikelihoods); + } + + /** + * Compute the genotype log10 likelihoods as an array in the canonical genotype order. That is, result[i] = Pr(reads | ith genotype) + * + * @param log10AlleleLikelihoods log 10 likelihood matrix indexed by allele, then read + * @return the log 10 likelihood of each genotype as an array + */ + protected static double[] computeLog10GenotypeLikelihoods(final int ploidy, final LikelihoodMatrix log10AlleleLikelihoods) { + Utils.nonNull(log10AlleleLikelihoods); + final int alleleCount = log10AlleleLikelihoods.numberOfAlleles(); + final int readCount = log10AlleleLikelihoods.evidenceCount(); + + final double[][] log10LikelihoodsByAlleleAndRead = log10AlleleLikelihoods.asRealMatrix().getData(); + + final boolean triallelicGenotypesPossible = alleleCount > 2 && ploidy > 2; + final double[] perReadBuffer = triallelicGenotypesPossible ? new double[readCount] : null; + + // non-log space log10AlleleLikelihoods for multiallelic computation requires rescaling for stability when we + // exponentiate away the log, and we store the scaling factor to bring back later + final Pair rescaledNonLogLikelihoodsAndCorrection = !triallelicGenotypesPossible ? null : + rescaledNonLogLikelihoods(log10AlleleLikelihoods); + + final double[] result = new double[GenotypeIndexCalculator.genotypeCount(ploidy, alleleCount)]; + + for (final GenotypeAlleleCounts gac : GenotypeAlleleCounts.iterable(ploidy, alleleCount)) { + final int componentCount = gac.distinctAlleleCount(); + final int genotypeIndex = gac.index(); + if (componentCount == 1) { + // homozygous case: log P(reads|AAAAA. . .) = sum_{reads} log P(read|A) + final int allele = gac.alleleIndexAt(0); + result[genotypeIndex] = MathUtils.sum(log10LikelihoodsByAlleleAndRead[allele]); + } else if (componentCount == 2) { + // biallelic het case: log P(reads | nA copies of A, nB copies of B) = sum_{reads} (log[(nA * P(read | A) + nB * P(read | B))] -log(ploidy)) + final double[] log10ReadLks1 = log10LikelihoodsByAlleleAndRead[gac.alleleIndexAt(0)]; + final int count1 = gac.alleleCountAt(0); + final double log10Count1 = Math.log10(count1); + final double[] log10ReadLks2 = log10LikelihoodsByAlleleAndRead[gac.alleleIndexAt(1)]; + final double log10Count2 = Math.log10(ploidy - count1); + + // note: if you are reading the multiallelic case below and have gotten paranoid about cache efficiency, + // here the log10 likelihood matrix rows for *both* alleles are in the cache at once + result[genotypeIndex] = new IndexRange(0, readCount).sum(r -> MathUtils.approximateLog10SumLog10(log10ReadLks1[r] + log10Count1, log10ReadLks2[r] + log10Count2)) + - readCount * Math.log10(ploidy); + } else { + // the multiallelic case is conceptually the same as the biallelic case but done in non-log space + // We implement in a cache-friendly way by summing nA * P(read|A) over all alleles for each read, but iterating over reads as the inner loop + Arrays.fill(perReadBuffer,0, readCount, 0); + final double[][] rescaledNonLogLikelihoods = rescaledNonLogLikelihoodsAndCorrection.getLeft(); + final double log10Rescaling = rescaledNonLogLikelihoodsAndCorrection.getRight(); + gac.forEachAlleleIndexAndCount((a, f) -> new IndexRange(0, readCount).forEach(r -> perReadBuffer[r] += f * rescaledNonLogLikelihoods[a][r])); + result[genotypeIndex] = new IndexRange(0, readCount).sum(r -> Math.log10(perReadBuffer[r])) - readCount * Math.log10(ploidy) + log10Rescaling; } } - return readLikelihoodsByGenotypeIndex; - } - - private GenotypeAlleleCounts nextGenotypeAlleleCounts(final GenotypeAlleleCounts alleleCounts) { - final int index = alleleCounts.index(); - final GenotypeAlleleCounts result; - final int cmp = index - GenotypeLikelihoodCalculators.MAXIMUM_STRONG_REF_GENOTYPE_PER_PLOIDY + 1; - if (cmp < 0) { - result = genotypeAlleleCounts[index + 1]; - } else if (cmp == 0) { - result = genotypeAlleleCounts[index].copy(); - result.increase(); - } else { - alleleCounts.increase(); - result = alleleCounts; - } return result; } - /** - * General genotype likelihood component by read calculator. It does not make any assumption in the exact - * number of alleles present in the genotype. - */ - private void manyComponentGenotypeLikelihoodByRead(final GenotypeAlleleCounts genotypeAlleleCounts, - final double[] likelihoodByRead, - final double[]readLikelihoodComponentsByAlleleCount, - final int readCount) { - - // First we collect the allele likelihood component for all reads and place it - // in readGenotypeLikelihoodComponents for the final calculation per read. - genotypeAlleleCounts.copyAlleleCounts(genotypeAllelesAndCounts,0); - final int componentCount = genotypeAlleleCounts.distinctAlleleCount(); - final int alleleDataSize = (ploidy + 1) * readCount; - for (int c = 0,cc = 0; c < componentCount; c++) { - final int alleleIndex = genotypeAllelesAndCounts[cc++]; - final int alleleCount = genotypeAllelesAndCounts[cc++]; - // alleleDataOffset will point to the index of the first read likelihood for that allele and allele count. - int alleleDataOffset = alleleDataSize * alleleIndex + alleleCount * readCount; - for (int r = 0, readDataOffset = c; r < readCount; r++, readDataOffset += maximumDistinctAllelesInGenotype) { - readGenotypeLikelihoodComponents[readDataOffset] = readLikelihoodComponentsByAlleleCount[alleleDataOffset++]; - } - } - - // Calculate the likelihood per read. - for (int r = 0, readDataOffset = 0; r < readCount; r++, readDataOffset += maximumDistinctAllelesInGenotype) { - likelihoodByRead[r] = MathUtils.approximateLog10SumLog10(readGenotypeLikelihoodComponents, readDataOffset, readDataOffset + componentCount); - } - } /** - * Calculates the likelihood component by read for a given genotype allele count assuming that there are - * exactly two alleles present in the genotype (with arbitrary non-zero counts each). - */ - void twoComponentGenotypeLikelihoodByRead(final GenotypeAlleleCounts genotypeAlleleCounts, - final double[] likelihoodByRead, - final double[] readLikelihoodComponentsByAlleleCount, - final int readCount) { - final int allele0 = genotypeAlleleCounts.alleleIndexAt(0); - final int freq0 = genotypeAlleleCounts.alleleCountAt(0); - final int allele1 = genotypeAlleleCounts.alleleIndexAt(1); - final int freq1 = ploidy - freq0; // no need to get it from genotypeAlleleCounts. - int allele0LnLkOffset = readCount * ((ploidy + 1) * allele0 + freq0); - int allele1LnLkOffset = readCount * ((ploidy + 1) * allele1 + freq1); - for (int r = 0; r < readCount; r++) { - final double lnLk0 = readLikelihoodComponentsByAlleleCount[allele0LnLkOffset++]; - final double lnLk1 = readLikelihoodComponentsByAlleleCount[allele1LnLkOffset++]; - likelihoodByRead[r] = MathUtils.approximateLog10SumLog10(lnLk0, lnLk1); - } - } - - /** - * Calculates the likelihood component by read for a given genotype allele count assuming that there are - * exactly one allele present in the genotype. - */ - void singleComponentGenotypeLikelihoodByRead(final GenotypeAlleleCounts genotypeAlleleCounts, - final double[] likelihoodByRead, final double[] readLikelihoodComponentsByAlleleCount, final int readCount) { - final int allele = genotypeAlleleCounts.alleleIndexAt(0); - // the count of the only component must be = ploidy. - int offset = (allele * (ploidy + 1) + ploidy) * readCount; - for (int r = 0; r < readCount; r++) { - likelihoodByRead[r] = - readLikelihoodComponentsByAlleleCount[offset++]; - } - } - - /** - * Returns a 3rd matrix with the likelihood components. - * - *
-     *     result[y][z][x] :=  z * lnLk ( read_x | allele_y ).
-     * 
- * - * @return never {@code null}. + * Given an input log10 log10Likelihoods matrix, subtract off the maximum of each read column so that each column's maximum is zero for numerical + * stability. (This is akin to dividing each read column by its maximum in non-log space). Then exponentiate to enter non-log space, mutating + * the log10Likelihoods matrix in-place. Finally, record the sum of all log-10 subtractions, which is the total amount in log10 space + * that must later be added to the overall likelihood, which is a sum over all reads (product in npon-log space). + * @param log10Likelihoods and input log-10 likelihoods matrix */ - private double[] readLikelihoodComponentsByAlleleCount(final LikelihoodMatrix likelihoods) { - final int readCount = likelihoods.evidenceCount(); - final int alleleDataSize = readCount * (ploidy + 1); + private static Pair rescaledNonLogLikelihoods(final LikelihoodMatrix log10Likelihoods) { + final int alleleCount = log10Likelihoods.numberOfAlleles(); + final double[][] log10LikelihoodsByAlleleAndRead = log10Likelihoods.asRealMatrix().getData(); - // frequency1Offset = readCount to skip the useless frequency == 0. So now we are at the start frequency == 1 - // frequency1Offset += alleleDataSize to skip to the next allele index data location (+ readCount) at each iteration. - for (int a = 0, frequency1Offset = readCount; a < alleleCount; a++, frequency1Offset += alleleDataSize) { - likelihoods.copyAlleleLikelihoods(a, readAlleleLikelihoodByAlleleCount, frequency1Offset); + final int readCount = log10Likelihoods.evidenceCount(); + final double[] perReadMaxima = new double[readCount]; + Arrays.fill(perReadMaxima, 0, readCount, Double.NEGATIVE_INFINITY); - // p = 2 because the frequency == 1 we already have it. - for (int frequency = 2, destinationOffset = frequency1Offset + readCount; frequency <= ploidy; frequency++) { - final double log10frequency = MathUtils.log10(frequency); - for (int r = 0, sourceOffset = frequency1Offset; r < readCount; r++) { - readAlleleLikelihoodByAlleleCount[destinationOffset++] = - readAlleleLikelihoodByAlleleCount[sourceOffset++] + log10frequency; - } + // find the maximum log-likelihood over all alleles for each read + // note how we traverse by read for cache-friendliness + for (int a = 0; a < alleleCount; a++) { + for (int r = 0; r < readCount; r++) { + perReadMaxima[r] = FastMath.max(perReadMaxima[r], log10LikelihoodsByAlleleAndRead[a][r]); } } - return readAlleleLikelihoodByAlleleCount; - } - - /** - * Returns the ploidy for this genotype likelihood calculator. - * @return 0 or greater. - */ - public int ploidy() { - return ploidy; - } - /** - * Returns the total number of alleles for this genotype calculator. - * @return the number of alleles considered by this calculator. - */ - public int alleleCount() { - return alleleCount; - } - - /** - * Returns the likelihood index given the allele counts. - * - * @param alleleCountArray the query allele counts. This must follow the format returned by - * {@link GenotypeAlleleCounts#copyAlleleCounts} with 0 offset. - * - * @throws IllegalArgumentException if {@code alleleCountArray} is not a valid {@code allele count array}: - *
    - *
  • is {@code null},
  • - *
  • or its length is not even,
  • - *
  • or it contains any negatives, - *
  • or the count sum does not match the calculator ploidy,
  • - *
  • or any of the alleles therein is negative or greater than the maximum allele index.
  • - *
- * - * @return 0 or greater but less than {@link #genotypeCount}. - */ - public int alleleCountsToIndex(final int ... alleleCountArray) { - Utils.nonNull(alleleCountArray, "the allele counts cannot be null"); - Utils.validateArg((alleleCountArray.length & 1) == 0, "the allele counts array cannot have odd length"); - alleleHeap.clear(); - for (int i = 0; i < alleleCountArray.length; i += 2) { - final int index = alleleCountArray[i]; - final int count = alleleCountArray[i+1]; - Utils.validateArg(count >= 0, "no allele count can be less than 0"); - for (int j = 0; j < count; j++) { - alleleHeap.add(index); + // subtract these maxima + for (int a = 0; a < alleleCount; a++) { + for (int r = 0; r < readCount; r++) { + log10LikelihoodsByAlleleAndRead[a][r] -= perReadMaxima[r]; } } - return alleleHeapToIndex(); - } - /** - * Transforms the content of the heap into an index. - * - *

- * The heap contents are flushed as a result, so is left ready for another use. - *

- * - * @return a valid likelihood index. - */ - private int alleleHeapToIndex() { - Utils.validateArg(alleleHeap.size() == ploidy, "the sum of allele counts must be equal to the ploidy of the calculator"); - Utils.validateArg(alleleHeap.peek() < alleleCount, () -> "invalid allele " + alleleHeap.peek() + " more than the maximum " + (alleleCount - 1)); - int result = 0; - for (int p = ploidy; p > 0; p--) { - final int allele = alleleHeap.remove(); - Utils.validateArg(allele >= 0, () -> "invalid allele " + allele + " must be equal or greater than 0 "); - result += alleleFirstGenotypeOffsetByPloidy[p][allele]; - } - return result; - } + final double scaleFactor = MathUtils.sum(perReadMaxima, 0, readCount); - /** - * Composes a genotype index map given a allele index recoding. - * - * @param oldToNewAlleleIndexMap allele recoding. The ith entry indicates the index of the allele in original encoding - * that corresponds to the ith allele index in the final encoding. - * - * @throws IllegalArgumentException if this calculator cannot handle the recoding provided. This is - * the case when either {@code oldToNewAlleleIndexMap}'s length or any of its element (+ 1 as they are 0-based) is larger - * this calculator's {@link #alleleCount()}. Also if any {@code oldToNewAllelesIndexMap} element is negative. - * - * @return never {@code null}. - */ - public int[] genotypeIndexMap(final int[] oldToNewAlleleIndexMap, final GenotypeLikelihoodCalculators calculators) { - Utils.nonNull(oldToNewAlleleIndexMap); - final int resultAlleleCount = oldToNewAlleleIndexMap.length; - Utils.validateArg(resultAlleleCount <= alleleCount, () -> "this calculator does not have enough capacity for handling " - + resultAlleleCount + " alleles "); - final int resultLength = resultAlleleCount == alleleCount - ? genotypeCount : calculators.genotypeCount(ploidy,resultAlleleCount); + // switch to non-log now that we have rescaled for numerical stability + new IndexRange(0, alleleCount).forEach(a -> MathUtils.applyToArrayInPlace(log10LikelihoodsByAlleleAndRead[a], x -> Math.pow(10.0, x))); - final int[] result = new int[resultLength]; - final int[] sortedAlleleCounts = new int[Math.max(ploidy, alleleCount) << 1]; - alleleHeap.clear(); - GenotypeAlleleCounts alleleCounts = genotypeAlleleCounts[0]; - for (int i = 0; i < resultLength; i++) { - genotypeIndexMapPerGenotypeIndex(i,alleleCounts, oldToNewAlleleIndexMap, result, sortedAlleleCounts); - if (i < resultLength - 1) { - alleleCounts = nextGenotypeAlleleCounts(alleleCounts); - } - } - return result; + // note that the variable name is now wrong + return ImmutablePair.of(log10LikelihoodsByAlleleAndRead, scaleFactor); } - /** - * Performs the genotype mapping per new genotype index. - * - * @param newGenotypeIndex the target new genotype index. - * @param alleleCounts tha correspond to {@code newGenotypeIndex}. - * @param oldToNewAlleleIndexMap the allele mapping. - * @param destination where to store the new genotype index mapping to old. - * @param sortedAlleleCountsBuffer a buffer to re-use to get the genotype-allele-count's sorted allele counts. - */ - private void genotypeIndexMapPerGenotypeIndex(final int newGenotypeIndex, final GenotypeAlleleCounts alleleCounts, final int[] oldToNewAlleleIndexMap, final int[] destination, final int[] sortedAlleleCountsBuffer) { - final int distinctAlleleCount = alleleCounts.distinctAlleleCount(); - alleleCounts.copyAlleleCounts(sortedAlleleCountsBuffer,0); - for (int j = 0, jj = 0; j < distinctAlleleCount; j++) { - final int oldIndex = sortedAlleleCountsBuffer[jj++]; - final int repeats = sortedAlleleCountsBuffer[jj++]; - final int newIndex = oldToNewAlleleIndexMap[oldIndex]; - if (newIndex < 0 || newIndex >= alleleCount) { - throw new IllegalArgumentException("found invalid new allele index (" + newIndex + ") for old index (" + oldIndex + ")"); - } - for (int k = 0; k < repeats; k++) { - alleleHeap.add(newIndex); - } - } - final int genotypeIndex = alleleHeapToIndex(); // this cleans the heap for the next use. - destination[newGenotypeIndex] = genotypeIndex; - } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypeLikelihoodCalculatorDRAGEN.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypeLikelihoodCalculatorDRAGEN.java index 40707f0a72c..5920de1d8de 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypeLikelihoodCalculatorDRAGEN.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypeLikelihoodCalculatorDRAGEN.java @@ -1,7 +1,6 @@ package org.broadinstitute.hellbender.tools.walkers.genotyper; import htsjdk.variant.variantcontext.Allele; -import htsjdk.variant.variantcontext.GenotypeLikelihoods; import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.HaplotypeCallerGenotypingDebugger; import org.broadinstitute.hellbender.utils.MathUtils; import org.broadinstitute.hellbender.utils.Utils; @@ -32,23 +31,11 @@ public final class GenotypeLikelihoodCalculatorDRAGEN extends GenotypeLikelihood // PhredScaled adjustment applied to the BQD score (this controls the weight of the base quality prior term in the BQD calculation) static final double PHRED_SCALED_ADJUSTMENT_FOR_BQ_SCORE = 2.5; - // Cache for enforcing the strictness of using the filled array with the correct likelihoods object - private LikelihoodMatrix cachedLikelihoods = null; + private static final double CACHED_LOG_10_ERROR_RATE = Math.log10(BQD_FIXED_ERROR_RATE); + private static final double CACHED_LOG_10_NON_ERROR_RATE = Math.log10(1 - BQD_FIXED_ERROR_RATE); - private final double cachedLog10ErrorRate; - private final double cachedLog10NonErrorRate; - - /** - * Creates a new calculator providing its ploidy and number of genotyping alleles. - */ - protected GenotypeLikelihoodCalculatorDRAGEN(final int ploidy, final int alleleCount, - final int[][] alleleFirstGenotypeOffsetByPloidy, - final GenotypeAlleleCounts[][] genotypeTableByPloidy) { - super(ploidy, alleleCount, alleleFirstGenotypeOffsetByPloidy, genotypeTableByPloidy); - Utils.validateArg(ploidy > 0, () -> "ploidy must be at least 1 but was " + ploidy); - // The number of possible components is limited by distinct allele count and ploidy. - cachedLog10ErrorRate = Math.log10(BQD_FIXED_ERROR_RATE); - cachedLog10NonErrorRate = Math.log10(1 - BQD_FIXED_ERROR_RATE); + private GenotypeLikelihoodCalculatorDRAGEN() { + super(); } /** @@ -70,33 +57,24 @@ protected GenotypeLikelihoodCalculatorDRAGEN(final int ploidy, final int alleleC * @param strandReverse list of reads in the reverse orientation overlapping the site * @param paddedReference reference bases (with padding) used for calculating homopolymer adjustemnt * @param offsetForRefIntoEvent offset of the variant into the reference event - * @param calculators likelihoods calculators object pre-filled with scores * @return An array corresponding to the likelihoods array score for BQD, with Double.NEGATIVE_INFINITY filling all mixed allele/indel allelse */ - public
double[] calculateBQDLikelihoods(final LikelihoodMatrix sampleLikelihoods, + public static double[] calculateBQDLikelihoods(final int ploidy, final LikelihoodMatrix sampleLikelihoods, final List strandForward, final List strandReverse, final byte[] paddedReference, - final int offsetForRefIntoEvent, - final GenotypeLikelihoodCalculators calculators) { - // First we invalidate the cache - Utils.validate(sampleLikelihoods == cachedLikelihoods, "There was a mismatch between the sample stored by the genotyper and the one requested for BQD, this will result in invalid genotype calling"); - final double[] outputArray = new double[genotypeCount]; + final int offsetForRefIntoEvent) { + final int alleleCount = sampleLikelihoods.numberOfAlleles(); + final double[] outputArray = new double[GenotypeIndexCalculator.genotypeCount(ploidy, alleleCount)]; Arrays.fill(outputArray, Double.NEGATIVE_INFINITY); final Allele refAllele = sampleLikelihoods.getAllele(0); - //Determine the size of an allele page for the readsLikelihoodsByAlleleFrequency table - final int readCount = sampleLikelihoods.evidenceCount(); - final int alleleDataSize = readCount * (ploidy + 1); - for (int gtAlleleIndex = 0; gtAlleleIndex < sampleLikelihoods.numberOfAlleles(); gtAlleleIndex++) { - //This is crufty, it just so happens that the index of the homozygous genotype corresponds to the maximum genotype count per field. - //This should be pulled off as a calculator in some genotyping class. - final int indexForGT = calculators.genotypeCount(ploidy, gtAlleleIndex + 1) - 1; - final double[] readLikelihoodsForGT = readLikelihoodsByGenotypeIndex[indexForGT]; + // find the index of the homozygous gtAllele genotype + final int indexForGT = GenotypeIndexCalculator.alleleCountsToIndex(gtAlleleIndex, ploidy); - for (int errorAlleleIndex = 0, offsetForReadLikelihoodGivenAlleleIndex = readCount; errorAlleleIndex < sampleLikelihoods.numberOfAlleles(); errorAlleleIndex++, offsetForReadLikelihoodGivenAlleleIndex += alleleDataSize) { + for (int errorAlleleIndex = 0; errorAlleleIndex < sampleLikelihoods.numberOfAlleles(); errorAlleleIndex++) { // We only want to make calls on SNPs for now if (sampleLikelihoods.getAllele(gtAlleleIndex) == sampleLikelihoods.getAllele(errorAlleleIndex) || sampleLikelihoods.getAllele(gtAlleleIndex).length() != refAllele.length() || @@ -110,8 +88,8 @@ public double[] calculateBQDLikelihoods(final LikelihoodMatri final double reverseHomopolymerAdjustment = FRDBQDUtils.computeReverseHomopolymerAdjustment(paddedReference, offsetForRefIntoEvent, baseOfErrorAllele); // BQD scores by strand - final double minScoreFoundForwardsStrand = computeBQDModelForStrandData(strandForward, forwardHomopolymerAdjustment, readLikelihoodsForGT, offsetForReadLikelihoodGivenAlleleIndex, true, errorAlleleIndex); - final double minScoreFoundReverseStrand = computeBQDModelForStrandData(strandReverse, reverseHomopolymerAdjustment, readLikelihoodsForGT, offsetForReadLikelihoodGivenAlleleIndex, false, errorAlleleIndex); + final double minScoreFoundForwardsStrand = computeBQDModelForStrandData(sampleLikelihoods, strandForward, forwardHomopolymerAdjustment, true, gtAlleleIndex, errorAlleleIndex); + final double minScoreFoundReverseStrand = computeBQDModelForStrandData(sampleLikelihoods, strandReverse, reverseHomopolymerAdjustment, false, gtAlleleIndex, errorAlleleIndex); final double modelScoreInLog10 = (minScoreFoundForwardsStrand + minScoreFoundReverseStrand) * -0.1; ////// @@ -128,16 +106,16 @@ public double[] calculateBQDLikelihoods(final LikelihoodMatri * * This method works by combining the computed genotype scores for reads with the raw allele likelihoods scores for each evidence * + * @param sampleLikelihoods allele likelihoods containing data for reads * @param positionSortedReads Reads pairs objects (Pair, sampleReadIndex>) objects sorted in the correct order for partitioning. * This means that the "error" reads in the partition are sorted by read cycle first in the provided list * @param homopolymerAdjustment Penalty to be applied to reads based on the homopolymer run (this should be precomputed for the ref site in quesiton) - * @param readLikelihoodsForGT The array corresponding to the log_10 genotype scores for the genotype in question - * @param offsetForReadLikelihoodGivenAlleleIndex * @return phred scale likelihood for a BQD error mode for reads in the given direction according to the offsets requested */ - private double computeBQDModelForStrandData(final List positionSortedReads, - final double homopolymerAdjustment, final double[] readLikelihoodsForGT, - final int offsetForReadLikelihoodGivenAlleleIndex, final boolean forwards, final int errorAlleleIndex) { + private static double computeBQDModelForStrandData(final LikelihoodMatrix sampleLikelihoods, + final List positionSortedReads, + final double homopolymerAdjustment, + final boolean forwards, final int homozygousAlleleIndex, final int errorAlleleIndex) { // If no reads are found for a particular strand direction return no adjusted likelihoods score for those (non-existent) reads if (positionSortedReads.isEmpty()) { return 0.0; @@ -163,8 +141,8 @@ private double computeBQDModelForStrandData(final List double[] calculateFRDLikelihoods(final LikelihoodMatrix sampleLikelihoods, final double[] ploidyModelLikelihoods, + public static double[] calculateFRDLikelihoods(final int ploidy, final LikelihoodMatrix sampleLikelihoods, final double[] ploidyModelLikelihoods, final List readContainers, - final double snipAprioriHet, final double indelAprioriHet, final int maxEffectiveDepthForHetAdjustment, - final GenotypeLikelihoodCalculators calculators) { - Utils.validate(sampleLikelihoods == cachedLikelihoods, "There was a mismatch between the sample stored by the genotyper and the one requested for BQD, this will result in invalid genotyping"); - final double[] outputArray = new double[genotypeCount]; + final double snipAprioriHet, final double indelAprioriHet, final int maxEffectiveDepthForHetAdjustment) { + final int alleleCount = sampleLikelihoods.numberOfAlleles(); + final double[] outputArray = new double[GenotypeIndexCalculator.genotypeCount(ploidy, alleleCount)]; Arrays.fill(outputArray, Double.NEGATIVE_INFINITY); final Allele refAllele = sampleLikelihoods.getAllele(0); - //Determine the size of an allele page for the readsLikelihoodsByAlleleFrequency table - final int readCount = sampleLikelihoods.evidenceCount(); - final int alleleDataSize = readCount * (ploidy + 1); - - for (int fAlleleIndex = 0, offsetForReadLikelihoodGivenAlleleIndex = readCount; fAlleleIndex < sampleLikelihoods.numberOfAlleles(); fAlleleIndex++, offsetForReadLikelihoodGivenAlleleIndex += alleleDataSize) { + for (int fAlleleIndex = 0; fAlleleIndex < sampleLikelihoods.numberOfAlleles(); fAlleleIndex++) { // ignore symbolic alleles final boolean isIndel = sampleLikelihoods.getAllele(fAlleleIndex).length() != refAllele.length(); @@ -278,19 +250,21 @@ public double[] calculateFRDLikelihoods(final LikelihoodMatri //This is crufty, it just so happens that the index of the homozygous genotype corresponds to the maximum genotype count per field. //This should be pulled off as a calculator in some genotyping class. - final int indexForGT = calculators.genotypeCount(ploidy, gtAlleleIndex + 1) - 1; - final double[] readLikelihoodsForGT = readLikelihoodsByGenotypeIndex[indexForGT]; + final int indexForGT = GenotypeIndexCalculator.alleleCountsToIndex(gtAlleleIndex, ploidy); // TODO restore the critical thresholds if (HaplotypeCallerGenotypingDebugger.isEnabled()) { - HaplotypeCallerGenotypingDebugger.println("indexForGT "+indexForGT+ " ooffsetForReadLikelihoodGivenAlleleIndex ="+offsetForReadLikelihoodGivenAlleleIndex); + HaplotypeCallerGenotypingDebugger.println("indexForGT "+indexForGT); HaplotypeCallerGenotypingDebugger.println("\nForwards Strands: "); } - final double[] maxLog10FForwardsStrand = computeFRDModelForStrandData(readContainers, c -> !c.isReverseStrand() , offsetForReadLikelihoodGivenAlleleIndex, readLikelihoodsForGT, thresholds.getCriticalThresholdsTotal()); + final double[] maxLog10FForwardsStrand = computeFRDModelForStrandData(sampleLikelihoods, gtAlleleIndex, fAlleleIndex, readContainers, + c -> !c.isReverseStrand() , thresholds.getCriticalThresholdsTotal()); if (HaplotypeCallerGenotypingDebugger.isEnabled()) { HaplotypeCallerGenotypingDebugger.println("\nReverse Strands: ");} - final double[] maxLog10FReverseStrand = computeFRDModelForStrandData(readContainers, c -> c.isReverseStrand(), offsetForReadLikelihoodGivenAlleleIndex, readLikelihoodsForGT, thresholds.getCriticalThresholdsTotal()); + final double[] maxLog10FReverseStrand = computeFRDModelForStrandData(sampleLikelihoods, gtAlleleIndex, fAlleleIndex, readContainers, + c -> c.isReverseStrand(), thresholds.getCriticalThresholdsTotal()); if (HaplotypeCallerGenotypingDebugger.isEnabled()) { HaplotypeCallerGenotypingDebugger.println("\nBoth Strands: ");} - final double[] maxLog10FBothStrands = computeFRDModelForStrandData(readContainers, c -> true, offsetForReadLikelihoodGivenAlleleIndex, readLikelihoodsForGT, thresholds.getCriticalThresholdsTotal()); + final double[] maxLog10FBothStrands = computeFRDModelForStrandData(sampleLikelihoods, gtAlleleIndex, fAlleleIndex, readContainers, + c -> true, thresholds.getCriticalThresholdsTotal()); if (HaplotypeCallerGenotypingDebugger.isEnabled()) { HaplotypeCallerGenotypingDebugger.println("gtAlleleIndex : "+gtAlleleIndex+ " fAlleleIndex: "+fAlleleIndex +" forwards: "+maxLog10FForwardsStrand+" reverse: "+maxLog10FReverseStrand+" both: "+maxLog10FBothStrands); @@ -307,7 +281,7 @@ public double[] calculateFRDLikelihoods(final LikelihoodMatri if (maxEffectiveDepthForHetAdjustment > 0) { // Use the index corresponding the mixture of F and final double localBestModelScore = localBestModel[0] - localBestModel[1]; - final int closestGTAlleleIndex = allelesToIndex(gtAlleleIndex, fAlleleIndex); + final int closestGTAlleleIndex = GenotypeIndexCalculator.allelesToIndex(gtAlleleIndex, fAlleleIndex); final double log10LikelihoodsForPloyidyModel = ploidyModelLikelihoods[closestGTAlleleIndex] - -MathUtils.LOG10_ONE_HALF; final int depthForGenotyping = sampleLikelihoods.evidenceCount(); final double adjustedBestModel = log10LikelihoodsForPloyidyModel + ((localBestModelScore - log10LikelihoodsForPloyidyModel) @@ -333,15 +307,19 @@ public double[] calculateFRDLikelihoods(final LikelihoodMatri /** + * @param sampleLikelihoods the likelihoods object with allele likelihoods for the reads to be genotyped + * @param homozygousAlleleIndex index of allele in homzygous genotype whose likelihood is to be adjusted + * @param fAlleleIndex index of foreign allele within likelihoods matrix * @param positionSortedReads read containers to use for genotyping * @param predicate predicate used to select the correct orientation combination for reads when genotyping - * @param offsetForReadLikelihoodGivenAlleleIndex offset corresponding to the Error Allele in the reads likelihoods object array - * @param readLikelihoodsForGT reads likelihoods for Genotype array table * @param criticalThresholdsSorted critical thresholds to use for this orientation combination * @return two doubles, index 0 is the frd score and the second is log p(F()) score used to adjust the score */ - private double[] computeFRDModelForStrandData(final List positionSortedReads, final Predicate predicate, - final int offsetForReadLikelihoodGivenAlleleIndex, final double[] readLikelihoodsForGT, final List criticalThresholdsSorted) { + private static double[] computeFRDModelForStrandData(final LikelihoodMatrix sampleLikelihoods, + final int homozygousAlleleIndex, final int fAlleleIndex, + final List positionSortedReads, + final Predicate predicate, + final List criticalThresholdsSorted) { if (positionSortedReads.isEmpty()) { return new double[]{Double.NEGATIVE_INFINITY, 0}; } @@ -369,8 +347,8 @@ private double[] computeFRDModelForStrandData(final List than the threshold) final double LPd_r_F = container.getPhredPFValue() + 0.0000001 <= logProbFAllele ? Double.NEGATIVE_INFINITY : - readAlleleLikelihoodByAlleleCount[offsetForReadLikelihoodGivenAlleleIndex + readIndex]; - final double lp_r_GT = readLikelihoodsForGT[readIndex] - -MathUtils.LOG10_ONE_HALF; + sampleLikelihoods.get(fAlleleIndex, readIndex); + final double lp_r_GT = sampleLikelihoods.get(homozygousAlleleIndex, readIndex); fAlleleProbRatio += Math.pow(10, LPd_r_F - MathUtils.approximateLog10SumLog10(LPd_r_F, lp_r_GT)); fAlleleProbDenom++; @@ -391,13 +369,13 @@ private double[] computeFRDModelForStrandData(final List container, final double log10MapqPriorAdjustment) { + private static FRDCriticalThresholds computeCriticalValues(final List container, final double log10MapqPriorAdjustment) { final Set criticalThresholdsForwards = new HashSet<>(); final Set criticalThresholdsReverse = new HashSet<>(); final Set criticalThresholdsTotal = new HashSet<>(); @@ -452,34 +430,10 @@ private FRDCriticalThresholds computeCriticalValues(final List GenotypeLikelihoods genotypeLikelihoods(final LikelihoodMatrix likelihoods) { - cachedLikelihoods = null; - GenotypeLikelihoods output = super.genotypeLikelihoods(likelihoods); - cachedLikelihoods = likelihoods; - return output; - } - - /** - * See {@link GenotypeLikelihoodCalculator#getReadRawReadLikelihoodsByGenotypeIndex}. This wrapper just enforces that the likelihoods object is recorded in the cache. - * - * @return never {@code null}. - */ - public double[] rawGenotypeLikelihoods(final LikelihoodMatrix likelihoods) { - cachedLikelihoods = null; - double[] output = super.getReadRawReadLikelihoodsByGenotypeIndex(likelihoods); - cachedLikelihoods = likelihoods; - return output; - } - /** * Helper class for storing FRD sorted and de-duplicated critical thresholds generated from reads to be accessed by subsequent calls. */ - private class FRDCriticalThresholds { + private static class FRDCriticalThresholds { private final List criticalThresholdsForwards; private final List criticalThresholdsReverse; private final List criticalThresholdsTotal; diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypeLikelihoodCalculators.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypeLikelihoodCalculators.java deleted file mode 100644 index fbf84a14fe3..00000000000 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypeLikelihoodCalculators.java +++ /dev/null @@ -1,418 +0,0 @@ -package org.broadinstitute.hellbender.tools.walkers.genotyper; - -import org.broadinstitute.hellbender.exceptions.GATKException; -import org.broadinstitute.hellbender.exceptions.UserException; -import org.broadinstitute.hellbender.utils.MathUtils; -import org.broadinstitute.hellbender.utils.Utils; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; - -import java.util.Arrays; - -/** - * Genotype likelihood calculator utility. This class is thread-safe since access to shared mutable state is - * synchronized. - * - *

- * This class provide genotype likelihood calculators with any number of alleles able given an arbitrary ploidy and allele - * count (number of distinct alleles). - *

- */ -public final class GenotypeLikelihoodCalculators { - - private static final Logger logger = LogManager.getLogger(GenotypeLikelihoodCalculators.class); - - /** - * The current maximum ploidy supported by the tables. - *

- * Its initial value indicates the initial capacity of the shared {@link #genotypeTableByPloidy}. Feel free - * to change it to anything reasonable that is non-negative. - *

- */ - private int maximumPloidy = 2; // its initial value is the initial capacity of the shared tables. - - /** - * Maximum possible number of genotypes that this calculator can handle. - */ - public static final int MAXIMUM_STRONG_REF_GENOTYPE_PER_PLOIDY = 1000; - - /** - * Mark to indicate genotype-count overflow due to a large number of allele and ploidy; - */ - static final int GENOTYPE_COUNT_OVERFLOW = -1; - - /** - * The current maximum allele index supported by the tables. - *

- * Its initial value indicates the initial capacity of the shared {@link #alleleFirstGenotypeOffsetByPloidy} table. - * Feel free to change it to anything reasonable that is non-negative. - *

- */ - private int maximumAllele = 1; // its initial value is the initial capacity of the shared tables. - - /** - * Shared copy of the offset table as described in {@link #buildGenotypeAlleleCountsTable(int, int, int[][])}. - * - * This reference holds the largest requested so far in terms of maximum-allele and maximum-ploidy. - */ - private int[][] alleleFirstGenotypeOffsetByPloidy = - buildAlleleFirstGenotypeOffsetTable(maximumPloidy, maximumAllele); - - - /** - * Shared table of genotypes given the ploidy sorted by their index in the likelihood array. - * - *

- * Its format is described in {@link #buildGenotypeAlleleCountsTable(int, int, int[][])}. - *

- */ - private GenotypeAlleleCounts[][] genotypeTableByPloidy = - buildGenotypeAlleleCountsTable(maximumPloidy,maximumAllele,alleleFirstGenotypeOffsetByPloidy); - - public GenotypeLikelihoodCalculators(){ - - } - - /** - * Build the table with the genotype offsets based on ploidy and the maximum allele index with representation - * in the genotype. - *

- * The result is a matrix containing the offset of the first genotype that contain a particular allele - * stratified by ploidy. - *

- * Row (first dimension) represent the ploidy, whereas - * the second dimension represents the allele. - *

- * - *

- * Thus the value a position [p][a] indicates how many genotypes of ploidy p there are before the first - * one that contains allele a.
- * - * For example, considering ploidy 3 and alleles A, B, C, D, etc ... (indexed 0, 1, 2, ... respectively): - *
- * [3][A] == [3][0] == 0 as the first genotype AAA contains A. - *
- * [3][C] == [3][2] == 4 as the first genotype that contains C, AAC follows: AAA AAB ABB BBB - *
- * [4][D] == [4][3] == 14 as the first genotype that contains D, AAAD follows: AAAA AAAB AABB ABBB BBBB AAAC - * AABC ABBC BBBC AACC ABCC BBCC ACCC BCCC CCCC. - * - *

- * - *

- * This value are calculated recursively as follows: - *

- *
-     *
-     *     Offset[p][a] := Offset[p-1][a] + Offset[p][a-1] when a > 0, p > 0
-     *                     0                               when a == 0
-     *                     1                               otherwise
-     *
-     *
-     *         0 1 1  1  1  1   1 ...
-     *         0 1 2  3  4  5   6 ...
-     *         0 1 3  6 10 15  21 ...
-     *         0 1 4 10 20 35  56 ...
-     *         0 1 5 15 35 70 126 ...
-     *         0 ..................
-     * 
- * - *

- * Note: if someone can come with a close form computable 0(1) (respect to ploidy and allele count) - * please let the author know. - *

- * - *

- * The matrix is guaranteed to have as many rows as indicated by {@code maximumPloidy} + 1; the first - * row refers to the special case of ploidy == 0, the second row to ploidy 1 and so forth. Thus the ploidy - * matches the index. - *

- *

- * The matrix is guaranteed to have as many columns as indicate by {@code maximumAllele} + 1. In this case however - * the first allele index 0 is a sense allele (typically the reference allele). The reason to have at least the total - * genotype count up to allele count {@link @alleleCapacity} that is equal to the offset of the first genotype - * of the following allele; thus we need an extra one. - *

- * - *

- * Although it might seem non-sense to have genotypes of ploidy 0. The values in the first row are used when - * filling up values in row 1 and so forth so it is present for programmatic convenience. - * Offsets in this row are 0 for the first column and 1 for any others. - *

- * - * @param maximumPloidy maximum supported ploidy. - * @param maximumAllele maximum supported allele index. - * - * @throws IllegalArgumentException if {@code maximumPloidy} or {@code maximumAllele} is negative. - * - * @return never {@code null}, the matrix described with enough information to address - * problems concerning up to the requested maximum allele index and ploidy. - */ - private static int[][] buildAlleleFirstGenotypeOffsetTable(final int maximumPloidy, final int maximumAllele) { - checkPloidyAndMaximumAllele(maximumPloidy, maximumAllele); - final int rowCount = maximumPloidy + 1; - final int colCount = maximumAllele + 1; - final int[][] result = new int[rowCount][colCount]; - - // Ploidy 0 array must be { 0, 1, 1, ...., 1} - Arrays.fill(result[0], 1, colCount, 1); - // Now we take care of the rest of ploidies. - // We leave the first allele offset to it correct value 0 by starting with allele := 1. - for (int ploidy = 1; ploidy < rowCount; ploidy++) { - for (int allele = 1; allele < colCount; allele++) { - result[ploidy][allele] = result[ploidy][allele - 1] + result[ploidy - 1][allele]; - if (result[ploidy][allele] < result[ploidy][allele - 1]) { - result[ploidy][allele] = GENOTYPE_COUNT_OVERFLOW; - } - } - } - return result; - } - - /** - * Composes a table with the lists of all possible genotype allele counts given the the ploidy and maximum allele index. - *

- * The resulting matrix has at least as many rows as {@code maximumPloidy } + 1 as the first row with index 0 correspond - * to ploidy == 0. Each row array has as many positions as necessary to contain all possible genotype-allele-counts in increasing order. - * This quantity varies with the ploidy. - *

- * - *

- * Therefore result[3][4] would contain the 5th genotype with ploidy 3, and result[4].length - * would be equal to the count of possible genotypes for ploidy 4. - *

- * - * @param maximumPloidy maximum ploidy to use in queries to the resulting table. - * @param maximumAllele maximum allele index to use in queries to the resulting table. - * @param offsetTable an allele first genotype offset table as constructed using {@link #buildAlleleFirstGenotypeOffsetTable(int, int)} - * that supports at least up to {@code maximumAllele} and {@code maximumPloidy}. - * - * @throws IllegalArgumentException if {@code maximumPloidy} or {@code maximumAllele} is negative, or {@code offsetTable} is {@code null}, - * or it does not have the capacity to handle the requested maximum ploidy or allele index. - * - * @return never {@code null}. - */ - private static GenotypeAlleleCounts[][] buildGenotypeAlleleCountsTable(final int maximumPloidy, final int maximumAllele, final int[][] offsetTable) { - checkPloidyAndMaximumAllele(maximumPloidy, maximumAllele); - checkOffsetTableCapacity(offsetTable,maximumPloidy,maximumAllele); - final int rowCount = maximumPloidy + 1; - final GenotypeAlleleCounts[][] result = new GenotypeAlleleCounts[rowCount][]; // each row has a different number of columns. - - for (int ploidy = 0; ploidy <= maximumPloidy; ploidy++) { - result[ploidy] = buildGenotypeAlleleCountsArray(ploidy, maximumAllele, offsetTable); - } - - return result; - } - - /** - * Builds a genotype-allele-counts array given the genotype ploidy and how many genotype you need. - *

- * The result is guarantee to have exactly {@code length} positions and the elements are sorted - * in agreement with the standard way to display genotypes following the VCF standard. - *

- * - *

Notice that is possible to request ploidy ==0. In that case the resulting array will have repetitions - * of the empty genotype allele count. - *

- * - *

- * For example, - * - *

-     *         ploidy = 1, length = 5 : [ {A}, {B}, {C}, {D}, {E} ]
-     *         ploidy = 2, length = 7 : [ {AA}, {AB}, {BB}, {AC}, {BC}, {CC}, {AD}
-     *         ploidy = 3, length = 10 : [ {AAA}, {AAB}, {ABB}, {BBB}, {AAC}, {ABC}, {BBC}, {BCC}, {CCC}, {AAD} ]
-     *     
- *

- * - * @param ploidy requested ploidy. - * @param alleleCount number of different alleles that the genotype table must support. - * @param genotypeOffsetTable table with the offset of the first genotype that contain an allele given - * the ploidy and its index. - * - * @throws IllegalArgumentException if {@code ploidy} or {@code length} is negative. - * - * @return never {@code null}, follows the specification above. - */ - private static GenotypeAlleleCounts[] buildGenotypeAlleleCountsArray(final int ploidy, final int alleleCount, final int[][] genotypeOffsetTable) { - Utils.validateArg(ploidy >= 0, () -> "the requested ploidy cannot be negative: " + ploidy); - Utils.validateArg(alleleCount >= 0, () -> "the requested maximum allele cannot be negative: " + alleleCount); - final int length = genotypeOffsetTable[ploidy][alleleCount]; - final int strongRefLength = length == GENOTYPE_COUNT_OVERFLOW ? MAXIMUM_STRONG_REF_GENOTYPE_PER_PLOIDY : Math.min(length, MAXIMUM_STRONG_REF_GENOTYPE_PER_PLOIDY); - final GenotypeAlleleCounts[] result = new GenotypeAlleleCounts[strongRefLength]; - result[0] = GenotypeAlleleCounts.first(ploidy); - for (int genotypeIndex = 1; genotypeIndex < strongRefLength; genotypeIndex++) { - result[genotypeIndex] = result[genotypeIndex - 1].next(); - } - return result; - } - - - /** - * Returns an instance given its ploidy and the number of alleles. - * - * @param alleleCount the required allele-count. - * @param ploidy the required ploidy-count. - * - * @throws IllegalArgumentException if either {@code ploidy} or {@code alleleCount} is negative, or the resulting number of genotypes is too large. - * - * @return never {@code null}. - */ - public synchronized GenotypeLikelihoodCalculator getInstance(final int ploidy, final int alleleCount) { - calculateGenotypeCountsUsingTablesAndValidate(ploidy, alleleCount); - - // At this point the tables must have at least the requested capacity, likely to be much more. - return new GenotypeLikelihoodCalculator(ploidy, alleleCount, alleleFirstGenotypeOffsetByPloidy, genotypeTableByPloidy); - } - - /** - * Calculate genotype counts using the tables and validate that there is no overflow - */ - private synchronized void calculateGenotypeCountsUsingTablesAndValidate(final int ploidy, final int alleleCount) { - checkPloidyAndMaximumAllele(ploidy, alleleCount); - - if (calculateGenotypeCountUsingTables(ploidy, alleleCount) == GENOTYPE_COUNT_OVERFLOW) { - final double largeGenotypeCount = Math.pow(10, MathUtils.log10BinomialCoefficient(ploidy + alleleCount - 1, alleleCount - 1)); - throw new UserException.WarnableAnnotationFailure(String.format("the number of genotypes is too large for ploidy %d and allele %d: approx. %.0f", ploidy, alleleCount, largeGenotypeCount)); - } - } - - /** - * Returns an instance of the DRAGEN genotypeLikelihoodCalculator given its ploidy and the number of alleles. - * - * @param alleleCount the required allele-count. - * @param ploidy the required ploidy-count. - * - * @throws IllegalArgumentException if either {@code ploidy} or {@code alleleCount} is negative, or the resulting number of genotypes is too large. - * - * @return never {@code null}. - */ - public synchronized GenotypeLikelihoodCalculatorDRAGEN getInstanceDRAGEN(final int ploidy, final int alleleCount) { - Utils.validate(ploidy == 2, "DRAGEN genotyping mode currently only supports diploid samples"); - calculateGenotypeCountsUsingTablesAndValidate(ploidy, alleleCount); - - // At this point the tables must have at least the requested capacity, likely to be much more. - return new GenotypeLikelihoodCalculatorDRAGEN(ploidy, alleleCount, alleleFirstGenotypeOffsetByPloidy, genotypeTableByPloidy); - } - - - /** - * Update of shared tables. - * - * @param requestedMaximumAllele the new requested maximum allele maximum. - * @param requestedMaximumPloidy the new requested ploidy maximum. - */ - private synchronized void ensureCapacity(final int requestedMaximumAllele, final int requestedMaximumPloidy) { - - final boolean needsToExpandAlleleCapacity = requestedMaximumAllele > maximumAllele; - final boolean needsToExpandPloidyCapacity = requestedMaximumPloidy > maximumPloidy; - - if (!needsToExpandAlleleCapacity && !needsToExpandPloidyCapacity) { - return; - } - - final int newMaximumPloidy = Math.max(maximumPloidy, requestedMaximumPloidy); - final int newMaximumAllele = Math.max(maximumAllele, requestedMaximumAllele); - - logger.debug("Expanding capacity ploidy:" + maximumPloidy + "->" + newMaximumPloidy + " allele:" + maximumAllele +"->" + newMaximumAllele ); - - // Update tables first. - alleleFirstGenotypeOffsetByPloidy = buildAlleleFirstGenotypeOffsetTable(newMaximumPloidy,newMaximumAllele); - genotypeTableByPloidy = buildGenotypeAlleleCountsTable(newMaximumPloidy,newMaximumAllele,alleleFirstGenotypeOffsetByPloidy); - - if (needsToExpandAlleleCapacity) { - maximumAllele = requestedMaximumAllele; - } - if (needsToExpandPloidyCapacity) { - maximumPloidy = requestedMaximumPloidy; - } - } - - /** - * Perform value checks on maximumPloidy and allele passed to diverse methods in this class. - *

- * Throws an exception if there is any issues. - *

- * - * @param ploidy the maximum ploidy value. - * @param maximumAllele the maximum allele value. - * - * @throws IllegalArgumentException if either value is negative. - */ - private static void checkPloidyAndMaximumAllele(final int ploidy, final int maximumAllele) { - Utils.validateArg(ploidy >= 0, () -> "the ploidy provided cannot be negative: " + ploidy); - Utils.validateArg(maximumAllele >= 0, () -> "the maximum allele index provided cannot be negative: " + maximumAllele); - } - - private static void checkOffsetTableCapacity(final int[][] offsetTable, final int maximumPloidy, final int maximumAllele) { - Utils.nonNull(offsetTable, "the allele first genotype offset table provided cannot be null"); - Utils.validateArg(offsetTable.length > maximumPloidy, () -> "the allele first genotype offset table provided does not have enough " + - "capacity for requested maximum ploidy: " + maximumPloidy); - Utils.validateArg(offsetTable[0].length >= maximumAllele, () -> "the allele first genotype offset table provided does not have enough " + - "capacity for requested maximum allele index: " + maximumAllele); - } - - - /** - * Returns the number of possible genotypes given the ploidy and number of different alleles. - * @param ploidy the requested ploidy. - * @param alleleCount the requested number of alleles. - * - * @throws IllegalArgumentException if {@code ploidy} or {@code alleleCount} is negative or - * the number of genotypes is too large (more than {@link Integer#MAX_VALUE}). - * - * @return the number of genotypes given ploidy and allele count (0 or greater). - */ - public int genotypeCount(final int ploidy, final int alleleCount) { - - final int result = calculateGenotypeCountUsingTables(ploidy, alleleCount); - if (result == GENOTYPE_COUNT_OVERFLOW) { - final double largeGenotypeCount = Math.pow(10, MathUtils.log10BinomialCoefficient(ploidy + alleleCount - 1, alleleCount - 1)); - throw new UserException.WarnableAnnotationFailure(String.format("the number of genotypes is too large for ploidy %d and allele %d: approx. %.0f", ploidy, alleleCount, largeGenotypeCount)); - } - return result; - } - - /** - * Compute the maximally acceptable allele count (ref allele included) given the maximally acceptable genotype count. - * @param ploidy sample ploidy - * @param maxGenotypeCount maximum number of genotype count used to calculate upper bound on number of alleles given ploidy - * @throws IllegalArgumentException if {@code ploidy} or {@code alleleCount} is negative. - * @return the maximally acceptable allele count given ploidy and maximum number of genotypes acceptable - */ - public static int computeMaxAcceptableAlleleCount(final int ploidy, final int maxGenotypeCount){ - - checkPloidyAndMaximumAllele(ploidy, ploidy); // a hack to check ploidy makes sense (could duplicate code but choice must be made) - - if (ploidy == 1) { - return maxGenotypeCount; - } - final double log10MaxGenotypeCount = Math.log10(maxGenotypeCount); - - // Math explanation: genotype count is determined by ${P+A-1 \choose A-1}$, this leads to constraint - // $\log(\frac{(P+A-1)!}{(A-1)!}) \le \log(P!G)$, - // where $P$ is ploidy, $A$ is allele count, and $G$ is maxGenotypeCount - // The upper and lower bounds of the left hand side of the constraint are $P \log(A-1+P)$ and $P \log(A)$ - // which require $A$ to be searched in interval $[10^{\log(P!G)/P} - (P-1), 10^{\log(P!G)/P}]$ - // Denote $[10^{\log(P!G)/P}$ as $x$ in the code. - - final double x = Math.pow(10, (MathUtils.log10Factorial(ploidy) + log10MaxGenotypeCount)/ploidy ); - final int lower = (int)Math.floor(x) - ploidy - 1; - final int upper = (int)Math.ceil(x); - for(int a=upper; a>=lower; --a){// check one by one - - final double log10GTCnt = MathUtils.log10BinomialCoefficient(ploidy+a-1, a-1); - if(log10MaxGenotypeCount >= log10GTCnt) { - return a; - } - } - throw new GATKException("Code should never reach here."); - } - - private synchronized int calculateGenotypeCountUsingTables(int ploidy, int alleleCount) { - checkPloidyAndMaximumAllele(ploidy, alleleCount); - ensureCapacity(alleleCount, ploidy); - return alleleFirstGenotypeOffsetByPloidy[ploidy][alleleCount]; - } -} \ No newline at end of file diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypesCache.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypesCache.java new file mode 100644 index 00000000000..0d80c1843af --- /dev/null +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypesCache.java @@ -0,0 +1,89 @@ +package org.broadinstitute.hellbender.tools.walkers.genotyper; + +import org.broadinstitute.hellbender.utils.Utils; + +import java.util.ArrayList; +import java.util.List; + +/** + * This class holds caches of {@link GenotypeAlleleCounts} for multiple fixed ploidy, allele count pairs, + * allowing for fast random access of genotypes. Note that the increment method of GenotypeAlleleCounts is always fast, + * so the caches here are only necessary when incremental traversal over genotypes in the canonical order is not possible. + * + * + * This class is thread-safe since modifying the caches is synchronized. + */ +public final class GenotypesCache { + + /** + * Maximum possible number of cached {@link GenotypeAlleleCounts} for each fixed ploidy + */ + public static final int MAX_CACHE_SIZE = 5000; + + /** + * Cache of GenotypeAlleleCounts objects by ploidy. Format is caches[p][n] = nth genotype of ploidy p in canonical order, + * with p up to the current maximum ploidy and n up to the maximum number of cached genotypes per table. + */ + private static List> caches = new ArrayList<>(); + + private GenotypesCache(){ } + + /** + * Returns the GenotypeAlleleCounts associated to a particular ploidy and genotype index. + * + * If the requested index is larger than {@link GenotypesCache#MAX_CACHE_SIZE}, + * this method will construct the result iteratively from the largest cached object. Thus if you are iterating + * through all genotype-allele-counts you should do sequentially using the iterator method to avoid a large efficiency drop. + * + * @param ploidy the ploidy + * @param genotypeIndex the genotype index in the canonical order + * @return never {@code null}. + */ + public static GenotypeAlleleCounts get(final int ploidy, final int genotypeIndex) { + ensureCapacity(genotypeIndex, ploidy); + Utils.validateArg(ploidy >= 0, "ploidy may not be negative"); + Utils.validateArg(genotypeIndex >= 0, "genotype index may not be negative"); + final List cache = caches.get(ploidy); + if (genotypeIndex < cache.size()) { + return cache.get(genotypeIndex); + } else { + final GenotypeAlleleCounts result = cache.get(cache.size() - 1).copy(); + result.increase(genotypeIndex + 1 - cache.size()); + return result; + } + } + + /** + * Extends the genotype allele counts cache for a certain ploidy up to a given size + * + * This method is synchronized since it modifies the shared cache. + */ + private static synchronized void extendCache(final int ploidy, final int newSize) { + final List cache = caches.get(ploidy); + + if (cache.isEmpty()) { + cache.add(GenotypeAlleleCounts.first(ploidy)); + } + + while (cache.size() < newSize) { + cache.add(cache.get(cache.size() - 1).next()); + } + } + + /** + * Update cache if necessary + */ + private static void ensureCapacity(final int genotypeIndex, final int ploidy) { + // add empty lists of genotypes until we have initialized all ploidies up to and including this one + while (ploidy >= caches.size()) { + caches.add(new ArrayList<>()); + } + + final List cache = caches.get(ploidy); + + if (cache.size() <= genotypeIndex && cache.size() < MAX_CACHE_SIZE) { + final int newSize = Math.min(Math.max(cache.size() * 2 + 1, genotypeIndex), MAX_CACHE_SIZE); + extendCache(ploidy, newSize); + } + } +} \ No newline at end of file diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypingEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypingEngine.java index 58724aa345d..350fd4f102b 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypingEngine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypingEngine.java @@ -217,19 +217,16 @@ private double extractPNoAlt(final List alleles, final Genotype gt) { return extractPNoAlt(alleles, gt, gpArray); } - private static final GenotypeLikelihoodCalculators GL_CALCS = new GenotypeLikelihoodCalculators(); - private double extractPNoAlt(final List alleles, final Genotype gt, final double[] posteriors) { if (!alleles.contains(Allele.SPAN_DEL)) { return posteriors[0] - Math.max(0, QualityUtils.phredSum(posteriors)); } else { // here we need to get indices of genotypes composed of REF and * alleles final int ploidy = gt.getPloidy(); - final GenotypeLikelihoodCalculator glCalc = GL_CALCS.getInstance(ploidy, alleles.size()); final int spanDelIndex = alleles.indexOf(Allele.SPAN_DEL); // allele counts are in the GenotypeLikelihoodCalculator format of {ref index, ref count, span del index, span del count} final double[] nonVariantLog10Posteriors = IntStream.rangeClosed(0, ploidy) - .map(n -> glCalc.alleleCountsToIndex(0, ploidy - n, spanDelIndex, n)) + .map(n -> GenotypeIndexCalculator.alleleCountsToIndex(0, ploidy - n, spanDelIndex, n)) .mapToDouble(n -> posteriors[n]) .toArray(); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/IndependentSampleGenotypesModel.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/IndependentSampleGenotypesModel.java index f16999f5a43..6b0c56cc6cb 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/IndependentSampleGenotypesModel.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/IndependentSampleGenotypesModel.java @@ -17,25 +17,7 @@ * under the assumption that sample genotypes are independent conditional on their population frequencies. */ public final class IndependentSampleGenotypesModel implements GenotypingModel { - private static final int DEFAULT_CACHE_PLOIDY_CAPACITY = 10; - private static final int DEFAULT_CACHE_ALLELE_CAPACITY = 50; - - private final int cacheAlleleCountCapacity; - private final int cachePloidyCapacity; - private GenotypeLikelihoodCalculator[][] likelihoodCalculators; - private final GenotypeLikelihoodCalculators calculators; - - public IndependentSampleGenotypesModel() { this(DEFAULT_CACHE_PLOIDY_CAPACITY, DEFAULT_CACHE_ALLELE_CAPACITY); } - - /** - * Initialize model with given maximum allele count and ploidy for caching - */ - public IndependentSampleGenotypesModel(final int calculatorCachePloidyCapacity, final int calculatorCacheAlleleCapacity) { - cachePloidyCapacity = calculatorCachePloidyCapacity; - cacheAlleleCountCapacity = calculatorCacheAlleleCapacity; - likelihoodCalculators = new GenotypeLikelihoodCalculator[calculatorCachePloidyCapacity][calculatorCacheAlleleCapacity]; - calculators = new GenotypeLikelihoodCalculators(); - } + public IndependentSampleGenotypesModel() { } public
GenotypingLikelihoods calculateLikelihoods(final AlleleList genotypingAlleles, final GenotypingData data, @@ -51,34 +33,14 @@ public GenotypingLikelihoods calculateLikelihoods(final Al final int sampleCount = data.numberOfSamples(); final PloidyModel ploidyModel = data.ploidyModel(); final List genotypeLikelihoods = new ArrayList<>(sampleCount); - final int alleleCount = genotypingAlleles.numberOfAlleles(); - GenotypeLikelihoodCalculator likelihoodsCalculator = sampleCount > 0 ? getLikelihoodsCalculator(ploidyModel.samplePloidy(0), alleleCount) : null; for (int i = 0; i < sampleCount; i++) { final int samplePloidy = ploidyModel.samplePloidy(i); - // get a new likelihoodsCalculator if this sample's ploidy differs from the previous sample's - if (samplePloidy != likelihoodsCalculator.ploidy()) { - likelihoodsCalculator = getLikelihoodsCalculator(samplePloidy, alleleCount); - } - final LikelihoodMatrix sampleLikelihoods = alleleLikelihoodMatrixMapper.mapAlleles(data.readLikelihoods().sampleMatrix(i)); - genotypeLikelihoods.add(likelihoodsCalculator.genotypeLikelihoods(sampleLikelihoods)); + genotypeLikelihoods.add(GenotypeLikelihoodCalculator.log10GenotypeLikelihoods(samplePloidy, sampleLikelihoods)); } return new GenotypingLikelihoods<>(genotypingAlleles, ploidyModel, genotypeLikelihoods); } - private GenotypeLikelihoodCalculator getLikelihoodsCalculator(final int samplePloidy, final int alleleCount) { - if (samplePloidy >= cachePloidyCapacity || alleleCount >= cacheAlleleCountCapacity) { - return calculators.getInstance(samplePloidy, alleleCount); - } - final GenotypeLikelihoodCalculator result = likelihoodCalculators[samplePloidy][alleleCount]; - if (result != null) { - return result; - } else { - final GenotypeLikelihoodCalculator newOne = calculators.getInstance(samplePloidy, alleleCount); - likelihoodCalculators[samplePloidy][alleleCount] = newOne; - return newOne; - } - } } \ No newline at end of file diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/AlleleFrequencyCalculator.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/AlleleFrequencyCalculator.java index 05261b90ace..eb1a1547f09 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/AlleleFrequencyCalculator.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/AlleleFrequencyCalculator.java @@ -4,19 +4,15 @@ import it.unimi.dsi.fastutil.doubles.DoubleArrayList; import it.unimi.dsi.fastutil.ints.Int2ObjectArrayMap; import org.apache.commons.math3.special.Gamma; +import org.apache.commons.math3.util.CombinatoricsUtils; import org.apache.commons.math3.util.MathArrays; -import org.broadinstitute.hellbender.utils.*; -import org.broadinstitute.hellbender.tools.walkers.genotyper.*; -import org.broadinstitute.hellbender.utils.dragstr.DragstrParams; -import org.broadinstitute.hellbender.utils.Dirichlet; -import org.broadinstitute.hellbender.utils.IndexRange; -import org.broadinstitute.hellbender.utils.MathUtils; -import org.broadinstitute.hellbender.utils.Utils; -import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.AlleleAndContext; import org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeAlleleCounts; import org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeCalculationArgumentCollection; -import org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeLikelihoodCalculator; -import org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeLikelihoodCalculators; +import org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeIndexCalculator; +import org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypingLikelihoods; +import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.AlleleAndContext; +import org.broadinstitute.hellbender.utils.*; +import org.broadinstitute.hellbender.utils.dragstr.DragstrParams; import java.util.ArrayList; import java.util.Arrays; @@ -29,10 +25,9 @@ * @author David Benjamin <davidben@broadinstitute.org> */ public final class AlleleFrequencyCalculator { + private static final double THRESHOLD_FOR_ALLELE_COUNT_CONVERGENCE = 0.1; + private static final int HOM_REF_GENOTYPE_INDEX = 0; - private static final GenotypeLikelihoodCalculators GL_CALCS = new GenotypeLikelihoodCalculators(); - public static final double THRESHOLD_FOR_ALLELE_COUNT_CONVERGENCE = 0.1; - public static final int HOM_REF_GENOTYPE_INDEX = 0; private final double refPseudocount; private final double snpPseudocount; @@ -71,49 +66,45 @@ public static AlleleFrequencyCalculator makeCalculator(final DragstrParams drags /** * - * @param g must have likelihoods or (if approximateHomRefsFromGQ is true) be hom-ref with GQ - * (see {@link org.broadinstitute.hellbender.utils.GenotypeUtils#genotypeIsUsableForAFCalculation(Genotype) - * genotypeIsUsableForAFCalculation} ) - * @param glCalc + * @param g must have likelihoods or (if approximateHomRefsFromGQ is true) GQ * @param log10AlleleFrequencies * @return */ - private static double[] log10NormalizedGenotypePosteriors(final Genotype g, final GenotypeLikelihoodCalculator glCalc, final double[] log10AlleleFrequencies) { - + private static double[] log10NormalizedGenotypePosteriors(final Genotype g, final double[] log10AlleleFrequencies) { final double[] log10Likelihoods; if (g.hasLikelihoods()) { log10Likelihoods = g.getLikelihoods().getAsVector(); - } else if ( g.isHomRef()) { - if (g.getPloidy() != 2) { - throw new IllegalStateException("Likelihoods are required to calculate posteriors for hom-refs with ploidy != 2, " + - "but were not found for genotype " + g + " with ploidy " + g.getPloidy()); - } - if (g.hasGQ()) { - log10Likelihoods = GenotypeUtils.makeApproximateDiploidLog10LikelihoodsFromGQ(g, log10AlleleFrequencies.length); - } else { - throw new IllegalStateException("Genotype " + g + " does not contain GQ necessary to calculate posteriors."); - } + } else if ( g.isHomRef() || g.isNoCall()) { + Utils.validate(g.getPloidy() == 2,() -> "Likelihoods are required to calculate posteriors for hom-refs with ploidy != 2, " + + "but were not found for genotype " + g); + Utils.validate(g.hasGQ(), () -> "Genotype " + g + " does not contain GQ necessary to calculate posteriors."); + log10Likelihoods = GenotypeUtils.makeApproximateDiploidLog10LikelihoodsFromGQ(g, log10AlleleFrequencies.length); } else { //no-call with no PLs are too risky -- don't assume they're reblocked hom-refs throw new IllegalStateException("Genotype " + g + " does not contain likelihoods necessary to calculate posteriors."); } - final double[] log10Posteriors = new IndexRange(0, glCalc.genotypeCount()).mapToDouble(genotypeIndex -> { - final GenotypeAlleleCounts gac = glCalc.genotypeAlleleCountsAt(genotypeIndex); - return gac.log10CombinationCount() + log10Likelihoods[genotypeIndex] + + final int ploidy = g.getPloidy(); + final int alleleCount = log10AlleleFrequencies.length; + final double[] log10Posteriors = new double[GenotypeIndexCalculator.genotypeCount(ploidy, alleleCount)]; + Utils.validate(log10Likelihoods.length == log10Posteriors.length, "Ploidy, allele count, and genotype likelihoods are inconsistent"); + + for (final GenotypeAlleleCounts gac : GenotypeAlleleCounts.iterable(ploidy, alleleCount)) { + log10Posteriors[gac.index()] = gac.log10CombinationCount() + log10Likelihoods[gac.index()] + gac.sumOverAlleleIndicesAndCounts((index, count) -> count * log10AlleleFrequencies[index]); - }); + } + return MathUtils.normalizeLog10(log10Posteriors); } private static int[] genotypeIndicesWithOnlyRefAndSpanDel(final int ploidy, final List alleles) { - final GenotypeLikelihoodCalculator glCalc = GL_CALCS.getInstance(ploidy, alleles.size()); final boolean spanningDeletionPresent = alleles.contains(Allele.SPAN_DEL); if (!spanningDeletionPresent) { return new int[] {HOM_REF_GENOTYPE_INDEX}; } else { final int spanDelIndex = alleles.indexOf(Allele.SPAN_DEL); // allele counts are in the GenotypeLikelihoodCalculator format of {ref index, ref count, span del index, span del count} - return new IndexRange(0, ploidy + 1).mapToInteger(n -> glCalc.alleleCountsToIndex(new int[]{0, ploidy - n, spanDelIndex, n})); + return new IndexRange(0, ploidy + 1).mapToInteger(n -> GenotypeIndexCalculator.alleleCountsToIndex(0, ploidy - n, spanDelIndex, n)); } } @@ -193,7 +184,7 @@ private AFCalculationResult calculate(final int numAlleles, .mapToDouble(a -> a.isReference() ? refPseudocount : (a.length() == refLength ? snpPseudocount : indelPseudocount)).toArray(); double[] alleleCounts = new double[numAlleles]; - final double flatLog10AlleleFrequency = -MathUtils.log10(numAlleles); // log10(1/numAlleles) + final double flatLog10AlleleFrequency = -Math.log10(numAlleles); // log10(1/numAlleles) double[] log10AlleleFrequencies = new IndexRange(0, numAlleles).mapToDouble(n -> flatLog10AlleleFrequency); for (double alleleCountsMaximumDifference = Double.POSITIVE_INFINITY; alleleCountsMaximumDifference > AlleleFrequencyCalculator.THRESHOLD_FOR_ALLELE_COUNT_CONVERGENCE; ) { @@ -223,9 +214,8 @@ private AFCalculationResult calculate(final int numAlleles, continue; } final int ploidy = g.getPloidy() == 0 ? defaultPloidy : g.getPloidy(); - final GenotypeLikelihoodCalculator glCalc = GL_CALCS.getInstance(ploidy, numAlleles); - final double[] log10GenotypePosteriors = log10NormalizedGenotypePosteriors(g, glCalc, log10AlleleFrequencies); + final double[] log10GenotypePosteriors = log10NormalizedGenotypePosteriors(g,log10AlleleFrequencies); //the total probability if (!spanningDeletionPresent) { @@ -249,9 +239,9 @@ private AFCalculationResult calculate(final int numAlleles, // to get the log10 probability that the allele is absent in this sample log10AbsentPosteriors.forEach(DoubleArrayList::clear); // clear the buffers. Note that this is O(1) due to the primitive backing array - for (int genotype = 0; genotype < glCalc.genotypeCount(); genotype++) { - final double log10GenotypePosterior = log10GenotypePosteriors[genotype]; - glCalc.genotypeAlleleCountsAt(genotype).forEachAbsentAlleleIndex(a -> log10AbsentPosteriors.get(a).add(log10GenotypePosterior), numAlleles); + for (final GenotypeAlleleCounts gac : GenotypeAlleleCounts.iterable(ploidy, numAlleles)) { + final double log10GenotypePosterior = log10GenotypePosteriors[gac.index()]; + gac.forEachAbsentAlleleIndex(a -> log10AbsentPosteriors.get(a).add(log10GenotypePosterior), numAlleles); } final double[] log10PNoAllele = log10AbsentPosteriors.stream() @@ -297,8 +287,8 @@ public double calculateSingleSampleBiallelicNonRefPosterior(final double[] log10 final int ploidy = log10GenotypeLikelihoods.length - 1; final double[] log10UnnormalizedPosteriors = new IndexRange(0, ploidy + 1) - .mapToDouble(n -> log10GenotypeLikelihoods[n] + MathUtils.log10BinomialCoefficient(ploidy, n) - + MathUtils.logToLog10(Gamma.logGamma(n + snpPseudocount ) + Gamma.logGamma(ploidy - n + refPseudocount))); + .mapToDouble(n -> log10GenotypeLikelihoods[n] + MathUtils.logToLog10(CombinatoricsUtils.binomialCoefficientLog(ploidy, n) + + Gamma.logGamma(n + snpPseudocount ) + Gamma.logGamma(ploidy - n + refPseudocount))); return (returnZeroIfRefIsMax && MathUtils.maxElementIndex(log10UnnormalizedPosteriors) == 0) ? 0.0 : 1 - MathUtils.normalizeFromLog10ToLinearSpace(log10UnnormalizedPosteriors)[0]; @@ -317,13 +307,13 @@ private double[] effectiveAlleleCounts(List genotypes, final double[] if (!GenotypeUtils.genotypeIsUsableForAFCalculation(g)) { continue; } - final GenotypeLikelihoodCalculator glCalc = GL_CALCS.getInstance(g.getPloidy(), numAlleles); - final double[] log10GenotypePosteriors = log10NormalizedGenotypePosteriors(g, glCalc, log10AlleleFrequencies); + final double[] log10GenotypePosteriors = log10NormalizedGenotypePosteriors(g, log10AlleleFrequencies); - new IndexRange(0, glCalc.genotypeCount()).forEach(genotypeIndex -> - glCalc.genotypeAlleleCountsAt(genotypeIndex).forEachAlleleIndexAndCount((alleleIndex, count) -> - log10Result[alleleIndex] = MathUtils.log10SumLog10(log10Result[alleleIndex], log10GenotypePosteriors[genotypeIndex] + MathUtils.log10(count)))); + for (final GenotypeAlleleCounts gac : GenotypeAlleleCounts.iterable(g.getPloidy(), numAlleles)) { + gac.forEachAlleleIndexAndCount((alleleIndex, count) -> log10Result[alleleIndex] = + MathUtils.log10SumLog10(log10Result[alleleIndex], log10GenotypePosteriors[gac.index()] + Math.log10(count))); + } } return MathUtils.applyToArrayInPlace(log10Result, x -> Math.pow(10.0, x)); } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/gnarlyGenotyper/GnarlyGenotyperEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/gnarlyGenotyper/GnarlyGenotyperEngine.java index c8de26eea99..2dd89345cfe 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/gnarlyGenotyper/GnarlyGenotyperEngine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/gnarlyGenotyper/GnarlyGenotyperEngine.java @@ -3,14 +3,17 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.primitives.Ints; import htsjdk.variant.variantcontext.*; -import htsjdk.variant.vcf.*; +import htsjdk.variant.vcf.VCFConstants; import org.apache.commons.lang.StringUtils; import org.apache.commons.lang3.tuple.Pair; import org.broadinstitute.hellbender.exceptions.UserException; import org.broadinstitute.hellbender.tools.walkers.annotator.*; import org.broadinstitute.hellbender.tools.walkers.annotator.allelespecific.*; -import org.broadinstitute.hellbender.tools.walkers.genotyper.*; -import org.broadinstitute.hellbender.utils.*; +import org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeAlleleCounts; +import org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeCalculationArgumentCollection; +import org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypesCache; +import org.broadinstitute.hellbender.utils.GenotypeCounts; +import org.broadinstitute.hellbender.utils.MathUtils; import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants; import org.broadinstitute.hellbender.utils.variant.GATKVariantContextUtils; import org.broadinstitute.hellbender.utils.variant.HomoSapiensConstants; @@ -38,7 +41,6 @@ public final class GnarlyGenotyperEngine { // cache the ploidy 2 PL array sizes for increasing numbers of alts up to the maximum of maxAltAllelesToOutput private int[] likelihoodSizeCache; - private final ArrayList glcCache = new ArrayList<>(); private Set> allASAnnotations; private final int maxAltAllelesToOutput; @@ -56,15 +58,10 @@ public GnarlyGenotyperEngine(final boolean keepAllSites, final int maxAltAlleles this.keepAllSites = keepAllSites; this.stripASAnnotations = stripASAnnotations; - final GenotypeLikelihoodCalculators GLCprovider = new GenotypeLikelihoodCalculators(); - //initialize PL size cache -- HTSJDK cache only goes up to 4 alts, but I need 6 likelihoodSizeCache = new int[maxAltAllelesToOutput + 1 + 1]; //+1 for ref and +1 so index == numAlleles - glcCache.add(null); //add a null at index zero because zero alleles (incl. ref) makes no sense for (final int numAlleles : IntStream.rangeClosed(1, maxAltAllelesToOutput + 1).boxed().collect(Collectors.toList())) { likelihoodSizeCache[numAlleles] = GenotypeLikelihoods.numLikelihoods(numAlleles, ASSUMED_PLOIDY); - //GL calculator cache is indexed by the total number of alleles, including ref - glcCache.add(numAlleles, GLCprovider.getInstance(ASSUMED_PLOIDY, numAlleles)); } //TODO: fix weird reflection logging? @@ -408,16 +405,7 @@ protected void makeGenotypeCall(final GenotypeBuilder gb, gb.alleles(GATKVariantContextUtils.noCallAlleles(ASSUMED_PLOIDY)).noGQ(); } else { final int maxLikelihoodIndex = MathUtils.maxElementIndex(genotypeLikelihoods); - - GenotypeLikelihoodCalculator glCalc; - if ( allelesToUse.size() <= maxAllelesToOutput ) { - glCalc = glcCache.get(allelesToUse.size()); - } else { - final GenotypeLikelihoodCalculators GLCprovider = new GenotypeLikelihoodCalculators(); - glCalc = GLCprovider.getInstance(ASSUMED_PLOIDY, allelesToUse.size()); - } - - final GenotypeAlleleCounts alleleCounts = glCalc.genotypeAlleleCountsAt(maxLikelihoodIndex); + final GenotypeAlleleCounts alleleCounts = GenotypesCache.get(ASSUMED_PLOIDY, maxLikelihoodIndex); gb.alleles(alleleCounts.asAlleleList(allelesToUse)); final int numAltAlleles = allelesToUse.size() - 1; diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerGenotypingEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerGenotypingEngine.java index 15dbcf86c19..0d05f78fb27 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerGenotypingEngine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerGenotypingEngine.java @@ -411,7 +411,7 @@ static VariantContext replaceWithSpanDelVC(final VariantContext variantContext, private VariantContext removeAltAllelesIfTooManyGenotypes(final int ploidy, final Map> alleleMapper, final VariantContext mergedVC) { final int originalAlleleCount = alleleMapper.size(); - practicalAlleleCountForPloidy.putIfAbsent(ploidy, GenotypeLikelihoodCalculators.computeMaxAcceptableAlleleCount(ploidy, maxGenotypeCountToEnumerate)); + practicalAlleleCountForPloidy.putIfAbsent(ploidy, GenotypeIndexCalculator.computeMaxAcceptableAlleleCount(ploidy, maxGenotypeCountToEnumerate)); final int practicalAlleleCount = practicalAlleleCountForPloidy.get(ploidy); if (originalAlleleCount > practicalAlleleCount) { diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/ReferenceConfidenceModel.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/ReferenceConfidenceModel.java index 887d0f2b922..4d519f19721 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/ReferenceConfidenceModel.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/ReferenceConfidenceModel.java @@ -367,7 +367,7 @@ private GenotypeLikelihoods[] initializeIndelPLCache(final int ploidy) { return indelPLCache[ploidy]; } - final double denominator = - MathUtils.log10(ploidy); + final double denominator = - Math.log10(ploidy); final GenotypeLikelihoods[] result = new GenotypeLikelihoods[MAX_N_INDEL_INFORMATIVE_READS + 1]; //Note: an array of zeros is the right answer for result[0]. @@ -376,8 +376,8 @@ private GenotypeLikelihoods[] initializeIndelPLCache(final int ploidy) { final double[] PLs = new double[ploidy + 1]; PLs[0] = nInformativeReads * NO_INDEL_LIKELIHOOD; for (int altCount = 1; altCount <= ploidy; altCount++) { - final double refLikelihoodAccum = NO_INDEL_LIKELIHOOD + MathUtils.log10(ploidy - altCount); - final double altLikelihoodAccum = INDEL_LIKELIHOOD + MathUtils.log10(altCount); + final double refLikelihoodAccum = NO_INDEL_LIKELIHOOD + Math.log10(ploidy - altCount); + final double altLikelihoodAccum = INDEL_LIKELIHOOD + Math.log10(altCount); PLs[altCount] = nInformativeReads * (MathUtils.approximateLog10SumLog10(refLikelihoodAccum ,altLikelihoodAccum) + denominator); } result[nInformativeReads] = GenotypeLikelihoods.fromLog10Likelihoods(PLs); @@ -404,7 +404,7 @@ public ReferenceConfidenceResult calcGenotypeLikelihoodsOfRefVsAny(final int plo final boolean readsWereRealigned) { final int likelihoodCount = ploidy + 1; - final double log10Ploidy = MathUtils.log10(ploidy); + final double log10Ploidy = Math.log10(ploidy); final RefVsAnyResult result = new RefVsAnyResult(likelihoodCount); int readCount = 0; @@ -491,8 +491,8 @@ private void applyPileupElementRefVsNonRefLikelihoodAndCount(final byte refBase, for (int i = 1, j = likelihoodCount - 2; i < likelihoodCount - 1; i++, j--) { result.genotypeLikelihoods[i] += MathUtils.approximateLog10SumLog10( - referenceLikelihood + MathUtils.log10(j), - nonRefLikelihood + MathUtils.log10(i)); + referenceLikelihood + Math.log10(j), + nonRefLikelihood + Math.log10(i)); } if (isAlt && hqSoftClips != null && element.isNextToSoftClip()) { hqSoftClips.add(AlignmentUtils.countHighQualitySoftClips(element.getRead(), HQ_BASE_QUALITY_SOFTCLIP_THRESHOLD)); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/graphs/KBestHaplotype.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/graphs/KBestHaplotype.java index 0e16ec5382b..86caa0e2172 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/graphs/KBestHaplotype.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/graphs/KBestHaplotype.java @@ -29,7 +29,7 @@ public KBestHaplotype(final KBestHaplotype p, final E edge, final int tota } public static double computeLogPenaltyScore(int edgeMultiplicity, int totalOutgoingMultiplicity) { - return MathUtils.log10(edgeMultiplicity) - MathUtils.log10(totalOutgoingMultiplicity); + return Math.log10(edgeMultiplicity) - Math.log10(totalOutgoingMultiplicity); } public KBestHaplotype(final KBestHaplotype p, final List edgesToExtend, final double edgePenalty) { diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2Engine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2Engine.java index 77ff1f67d0b..ce13d6a8950 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2Engine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2Engine.java @@ -18,6 +18,7 @@ import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.commons.lang3.tuple.Pair; import org.apache.commons.math3.special.Gamma; +import org.apache.commons.math3.util.CombinatoricsUtils; import org.apache.commons.math3.util.FastMath; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -644,7 +645,7 @@ public static double logLikelihoodRatio(final int nRef, final List altQual betaEntropy = Gamma.logGamma(alpha + beta) - Gamma.logGamma(alpha) - Gamma.logGamma(beta) - Gamma.logGamma(alpha + beta + n) + Gamma.logGamma(alpha + nAlt) + Gamma.logGamma(beta + nRef); } else { - betaEntropy = MathUtils.log10ToLog(-MathUtils.log10Factorial(n + 1) + MathUtils.log10Factorial(nAlt) + MathUtils.log10Factorial(nRef)); + betaEntropy = -Math.log(n + 1) - CombinatoricsUtils.binomialCoefficientLog(n, nAlt); } return betaEntropy + readSum * repeatFactor; } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/ReblockGVCF.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/ReblockGVCF.java index 143ab9af11b..847f0facd6e 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/ReblockGVCF.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/ReblockGVCF.java @@ -6,9 +6,12 @@ import htsjdk.variant.vcf.*; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; -import org.broadinstitute.barclay.argparser.*; +import org.broadinstitute.barclay.argparser.Advanced; +import org.broadinstitute.barclay.argparser.Argument; +import org.broadinstitute.barclay.argparser.ArgumentCollection; +import org.broadinstitute.barclay.argparser.CommandLineProgramProperties; import org.broadinstitute.barclay.help.DocumentedFeature; -import org.broadinstitute.hellbender.cmdline.*; +import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; import org.broadinstitute.hellbender.cmdline.argumentcollections.DbsnpArgumentCollection; import org.broadinstitute.hellbender.engine.*; import org.broadinstitute.hellbender.exceptions.GATKException; @@ -30,10 +33,10 @@ import org.broadinstitute.hellbender.utils.genotyper.SampleList; import org.broadinstitute.hellbender.utils.logging.OneShotLogger; import org.broadinstitute.hellbender.utils.reference.ReferenceUtils; -import org.broadinstitute.hellbender.utils.variant.*; import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants; import org.broadinstitute.hellbender.utils.variant.GATKVCFHeaderLines; import org.broadinstitute.hellbender.utils.variant.GATKVariantContextUtils; +import org.broadinstitute.hellbender.utils.variant.VariantContextGetters; import org.broadinstitute.hellbender.utils.variant.writers.GVCFWriter; import org.broadinstitute.hellbender.utils.variant.writers.ReblockingGVCFBlockCombiner; import org.broadinstitute.hellbender.utils.variant.writers.ReblockingGVCFWriter; @@ -112,8 +115,6 @@ public final class ReblockGVCF extends MultiVariantWalker { public static final String KEEP_SITE_FILTERS_LONG_NAME = "keep-site-filters"; public static final String KEEP_SITE_FILTERS_SHORT_NAME = "keep-filters"; - private static final GenotypeLikelihoodCalculators GL_CALCS = new GenotypeLikelihoodCalculators(); - @Argument(fullName = StandardArgumentDefinitions.OUTPUT_LONG_NAME, shortName = StandardArgumentDefinitions.OUTPUT_SHORT_NAME, doc="File to which variants should be written") protected GATKPath outputFile; @@ -137,7 +138,7 @@ public final class ReblockGVCF extends MultiVariantWalker { } @Advanced - @DeprecatedFeature(detail="This argument introduces 'holes', resulting in an invalid GVCF") + //@DeprecatedFeature(detail="This argument introduces 'holes', resulting in an invalid GVCF") @Argument(fullName=DROP_LOW_QUALS_ARG_NAME, shortName=DROP_LOW_QUALS_ARG_NAME, doc="Exclude variants and homRef blocks that are GQ0 from the reblocked GVCF to save space; drop low quality/uncalled alleles", optional = true) protected boolean dropLowQuals = false; @@ -476,8 +477,7 @@ boolean shouldBeReblocked(final VariantContext vc) { return true; } final int minLikelihoodIndex = MathUtils.minElementIndex(pls); - final GenotypeLikelihoodCalculator glCalc = GL_CALCS.getInstance(genotype.getPloidy(), vc.getAlleles().size()); - final GenotypeAlleleCounts alleleCounts = glCalc.genotypeAlleleCountsAt(minLikelihoodIndex); + final GenotypeAlleleCounts alleleCounts = GenotypesCache.get(genotype.getPloidy(), minLikelihoodIndex); final List finalAlleles = alleleCounts.asAlleleList(vc.getAlleles()); return (pls != null && pls[0] < rgqThreshold) @@ -766,8 +766,7 @@ private Genotype getCalledGenotype(final VariantContext variant) { + variant.getContig() + ":" + variant.getStart()); } final int minLikelihoodIndex = MathUtils.minElementIndex(pls); - final GenotypeLikelihoodCalculator glCalc = GL_CALCS.getInstance(origG.getPloidy(), variant.getAlleles().size()); - final GenotypeAlleleCounts alleleCounts = glCalc.genotypeAlleleCountsAt(minLikelihoodIndex); + final GenotypeAlleleCounts alleleCounts = GenotypesCache.get(origG.getPloidy(), minLikelihoodIndex); final List finalAlleles = alleleCounts.asAlleleList(variant.getAlleles()); hasPLAndPosteriorMismatch = !finalAlleles.containsAll(origG.getAlleles()); diff --git a/src/main/java/org/broadinstitute/hellbender/utils/GenotypeUtils.java b/src/main/java/org/broadinstitute/hellbender/utils/GenotypeUtils.java index 77e920a918c..3829c13435b 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/GenotypeUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/GenotypeUtils.java @@ -1,13 +1,11 @@ package org.broadinstitute.hellbender.utils; import htsjdk.variant.variantcontext.*; -import org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeLikelihoodCalculators; +import org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeAlleleCounts; +import org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeIndexCalculator; import picard.util.MathUtil; -import java.util.Arrays; - public final class GenotypeUtils { - private static final GenotypeLikelihoodCalculators GL_CALCS = new GenotypeLikelihoodCalculators(); final static int TYPICAL_BASE_QUALITY = 30; //from the genotype likelihoods equations assuming the SNP ref conf model with no mismatches //PL[2] = GQ; scaleFactor = PL[3]/GQ ~ -10 * DP * log10(P_error) / (-10 * DP * log10(1/ploidy)) where BASE_QUALITY = -10 * log10(P_error) @@ -153,9 +151,25 @@ public static boolean genotypeIsUsableForAFCalculation(Genotype g) { } /** - * Make approximate likelihoods for a diploid genotype without PLs. - * For a hom-ref, as long as we have GQ we can make a very accurate QUAL calculation - * since the hom-var likelihood should make a minuscule contribution + * Make approximate likelihoods for a diploid genotype (with arbitrary allele count) without PLs given genotype quality GQ. + * + * The method is as follows: + * 1) For the biallelic diploid case with alleles A,B, the genotype likelihoods would be + * AA: 0, AB: GQ, BB: PLOIDY_2_HOM_VAR_SCALE_FACTOR * GQ + * + * 2) For arbitrary allele count, set the genotype likelihoods as + * AA: same as AA in the biallelic case + * AB, AC, AD etc: same as AB in the biallelic case + * BB, BC, CC, BD etc: same as BB in the biallelic case + * + * WARNING: this calculation is completely bogus! Legacy javadoc said: "For a hom-ref, as long as we have GQ we can + * make a very accurate QUAL calculation since the hom-var likelihood should make a minuscule contribution." Basically, + * the bogusness of this method doesn't matter because the voodoo only involves the very small hom-var contribution. + * That's true, but for multiallelics it incorrectly assigns the same GQ to every het genotype, essentially deflating + * the qulity of the hom-ref call. + * + * In summary, the effect of this calculation is to depress the QUAL of multiallelic hom refs by a small amount for + * no reason whatsoever. * @param g a diploid genotype with GQ * @param nAlleles number of alleles (including reference) * @return log10 likelihoods @@ -163,20 +177,17 @@ public static boolean genotypeIsUsableForAFCalculation(Genotype g) { public static double[] makeApproximateDiploidLog10LikelihoodsFromGQ(Genotype g, int nAlleles) { Utils.validate(g.getPloidy() == 2, "This method can only be used to approximate likelihoods for diploid genotypes"); Utils.validate(g.hasGQ(), "Genotype must have GQ in order to approximate PLs"); - final int[] perSampleIndexesOfRelevantAlleles = new int[nAlleles]; - Arrays.fill(perSampleIndexesOfRelevantAlleles, 1); - perSampleIndexesOfRelevantAlleles[0] = 0; //ref still maps to ref - //use these values for diploid ref/ref, ref/alt, alt/alt likelihoods - final int gq = g.getGQ(); - final int ploidy = g.getPloidy(); - //here we supply likelihoods for ref/ref, ref/alt, and alt/alt and then generalize to multiallic PLs if necessary - final int[] approxLikelihoods = {0, gq, PLOIDY_2_HOM_VAR_SCALE_FACTOR*gq}; - //map likelihoods for any other alts to biallelic ref/alt likelihoods above - final int[] genotypeIndexMapByPloidy = GL_CALCS.getInstance(ploidy, nAlleles).genotypeIndexMap(perSampleIndexesOfRelevantAlleles, GL_CALCS); //probably horribly slow - final int[] PLs = new int[genotypeIndexMapByPloidy.length]; - for (int i = 0; i < PLs.length; i++) { - PLs[i] = approxLikelihoods[genotypeIndexMapByPloidy[i]]; + + final int homRefLikelihood = 0; + final int hetLikelihood = g.getGQ(); + final int homVarLikelihood = PLOIDY_2_HOM_VAR_SCALE_FACTOR * g.getGQ(); + + final int[] PLs = new int[GenotypeIndexCalculator.genotypeCount(2, nAlleles)]; + //TODO: replace with GenotypesCache::iterator + for (final GenotypeAlleleCounts gac : GenotypeAlleleCounts.iterable(2, nAlleles)) { + PLs[gac.index()] = gac.index() == 0 ? homRefLikelihood : (gac.containsAllele(0) ? hetLikelihood : homVarLikelihood); } + return GenotypeLikelihoods.fromPLs(PLs).getAsVector(); //fromPLs converts from Phred-space back to log10-space } diff --git a/src/main/java/org/broadinstitute/hellbender/utils/IndexRange.java b/src/main/java/org/broadinstitute/hellbender/utils/IndexRange.java index 59a527c06eb..1c774c7b392 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/IndexRange.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/IndexRange.java @@ -187,6 +187,20 @@ public double sum(final IntToDoubleFunction lambda) { return result; } + /** + * Sums the values of an int -> int function applied to this range + * + * @param lambda the int -> int function + */ + public int sumInt(final IntUnaryOperator lambda) { + Utils.nonNull(lambda, "the lambda function cannot be null"); + int result = 0; + for (int i = from; i < to; i++) { + result += lambda.applyAsInt(i); + } + return result; + } + /** * Apply an int -> int function to this range, producing an int[] * diff --git a/src/main/java/org/broadinstitute/hellbender/utils/Log10Cache.java b/src/main/java/org/broadinstitute/hellbender/utils/Log10Cache.java deleted file mode 100644 index 345aae44f12..00000000000 --- a/src/main/java/org/broadinstitute/hellbender/utils/Log10Cache.java +++ /dev/null @@ -1,13 +0,0 @@ -package org.broadinstitute.hellbender.utils; - -public final class Log10Cache extends IntToDoubleFunctionCache { - @Override - protected int maxSize() { - return Integer.MAX_VALUE; - } - - @Override - protected double compute(final int n) { - return Math.log10(n); - } -} diff --git a/src/main/java/org/broadinstitute/hellbender/utils/Log10FactorialCache.java b/src/main/java/org/broadinstitute/hellbender/utils/Log10FactorialCache.java deleted file mode 100644 index 34bb596cd87..00000000000 --- a/src/main/java/org/broadinstitute/hellbender/utils/Log10FactorialCache.java +++ /dev/null @@ -1,20 +0,0 @@ -package org.broadinstitute.hellbender.utils; - -import org.apache.commons.math3.special.Gamma; - -/** - * Wrapper class so that the log10Factorial array is only calculated if it's used - */ -public final class Log10FactorialCache extends IntToDoubleFunctionCache { - private static final int CACHE_SIZE = 10_000; - - @Override - protected int maxSize() { - return CACHE_SIZE; - } - - @Override - protected double compute(final int n) { - return MathUtils.log10Gamma(n + 1); - } -} diff --git a/src/main/java/org/broadinstitute/hellbender/utils/MannWhitneyU.java b/src/main/java/org/broadinstitute/hellbender/utils/MannWhitneyU.java index 41439e82aff..6a936f1e981 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/MannWhitneyU.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/MannWhitneyU.java @@ -2,6 +2,7 @@ import htsjdk.samtools.util.Histogram; import org.apache.commons.math3.distribution.NormalDistribution; +import org.apache.commons.math3.util.CombinatoricsUtils; import org.apache.commons.math3.util.FastMath; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -176,21 +177,14 @@ public int hashCode() { /** * A map of an Integer[] of the labels to the set of all possible permutations of those labels. */ - private static Map>> PERMUTATIONS = new ConcurrentHashMap>>(); + private static final Map>> PERMUTATIONS = new ConcurrentHashMap>>(); /** * The minimum length for both data series in order to use a normal distribution * to calculate Z and p. If both series are shorter than this value then a permutation test * will be used. */ - private int minimumNormalN = 10; - - /** - * Sets the minimum number of values in each data series to use the normal distribution approximation. - */ - public void setMinimumSeriesLengthForNormalApproximation(final int n) { - this.minimumNormalN = n; - } + private static final int MINIMUM_NORMAL_N = 10; /** * A variable that indicates if the test is one sided or two sided and if it's one sided @@ -397,7 +391,7 @@ public Result test(final double[] series1, final double[] series2, final TestTyp double z; double p; - if (n1 >= this.minimumNormalN || n2 >= this.minimumNormalN) { + if (n1 >= MINIMUM_NORMAL_N || n2 >= MINIMUM_NORMAL_N) { z = calculateZ(u, n1, n2, nties, whichSide); p = 2 * NORMAL.cumulativeProbability(NORMAL_MEAN + z * NORMAL_SD); if (whichSide != TestType.TWO_SIDED) { @@ -485,7 +479,7 @@ Set> getPermutations(final Integer[] listToPermute, int numOfPermu * @param testStatU Test statistic U from observed data * @return P-value based on histogram with u calculated for every possible permutation of group tag. */ - public double permutationTest(final double[] series1, final double[] series2, final double testStatU) { + private double permutationTest(final double[] series1, final double[] series2, final double testStatU) { // note that Mann-Whitney U stats are always integer or half-integer (this is true even in the case of ties) // thus for safety we store a histogram of twice the Mann-Whitney values @@ -506,7 +500,7 @@ public double permutationTest(final double[] series1, final double[] series2, fi } } - final int numOfPerms = (int) MathUtils.binomialCoefficient(n1 + n2, n2); + final int numOfPerms = (int) CombinatoricsUtils.binomialCoefficient(n1 + n2, n2); Set> allPermutations = getPermutations(firstPermutation, numOfPerms); double[] newSeries1 = new double[n1]; diff --git a/src/main/java/org/broadinstitute/hellbender/utils/MathUtils.java b/src/main/java/org/broadinstitute/hellbender/utils/MathUtils.java index 3a2ccba5f6a..2a258033452 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/MathUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/MathUtils.java @@ -1,5 +1,6 @@ package org.broadinstitute.hellbender.utils; +import org.apache.commons.math3.distribution.BinomialDistribution; import org.apache.commons.math3.distribution.EnumeratedDistribution; import org.apache.commons.math3.exception.DimensionMismatchException; import org.apache.commons.math3.exception.NotStrictlyPositiveException; @@ -25,12 +26,6 @@ */ public final class MathUtils { - /** - * The smallest log10 value we'll emit from normalizeFromLog10 and other functions - * where the real-space value is 0.0. - */ - public static final double LOG10_P_OF_ZERO = -1000000.0; - public static final double LOG10_ONE_HALF = Math.log10(0.5); public static final double LOG10_ONE_THIRD = -Math.log10(3.0); public static final double LOG_ONE_THIRD = -Math.log(3.0); @@ -42,10 +37,11 @@ public final class MathUtils { private static final double ROOT_TWO_PI = Math.sqrt(2.0 * Math.PI); - private static final Log10Cache LOG_10_CACHE = new Log10Cache(); - private static final Log10FactorialCache LOG_10_FACTORIAL_CACHE = new Log10FactorialCache(); private static final DigammaCache DIGAMMA_CACHE = new DigammaCache(); + // represent overflow for computations returning a positive long + public static final int LONG_OVERFLOW = -1; + /** * Private constructor. No instantiating this class! */ @@ -376,25 +372,6 @@ public static int[] vectorDiff(final int[] x, final int[] y) { return new IndexRange(0, x.length).mapToInteger(k -> x[k] - y[k]); } - /** - * Calculates the log10 of the multinomial coefficient. Designed to prevent - * overflows even with very large numbers. - * - * @param n total number of trials - * @param k array of any size with the number of successes for each grouping (k1, k2, k3, ..., km) - * @return {@link Double#NaN NaN} if {@code a > 0}, otherwise the corresponding value. - */ - public static double log10MultinomialCoefficient(final int n, final int[] k) { - Utils.validateArg(n >= 0, "n: Must have non-negative number of trials"); - Utils.validateArg(allMatch(k, x -> x >= 0), "Elements of k must be non-negative"); - Utils.validateArg(sum(k) == n, "Sum of observations k must sum to total number of trials n"); - return log10Factorial(n) - new IndexRange(0, k.length).sum(j -> log10Factorial(k[j])); - } - - public static double log10(int i) { - return LOG_10_CACHE.get(i); - } - public static double digamma(int i) { return DIGAMMA_CACHE.get(i); } @@ -611,27 +588,6 @@ public static byte compareDoubles(final double a, final double b, final double e return 1; } - /** - * Calculates the binomial coefficient. Designed to prevent - * overflows even with very large numbers. - * - * @param n total number of trials - * @param k number of successes - * @return the binomial coefficient - */ - public static double binomialCoefficient(final int n, final int k) { - return Math.pow(10, log10BinomialCoefficient(n, k)); - } - - /** - * @see #binomialCoefficient(int, int) with log10 applied to result - */ - public static double log10BinomialCoefficient(final int n, final int k) { - Utils.validateArg(n >= 0, "Must have non-negative number of trials"); - Utils.validateArg( k <= n && k >= 0, "k: Must have non-negative number of successes, and no more successes than number of trials"); - return log10Factorial(n) - log10Factorial(k) - log10Factorial(n - k); - } - /** * Computes a binomial probability. This is computed using the formula *

@@ -642,31 +598,17 @@ public static double log10BinomialCoefficient(final int n, final int k) { * @param n number of Bernoulli trials * @param k number of successes * @param p probability of success - * @return the binomial probability of the specified configuration. Computes values down to about 1e-237. + * @return the binomial probability of the specified configuration. */ public static double binomialProbability(final int n, final int k, final double p) { - return Math.pow(10.0, log10BinomialProbability(n, k, Math.log10(p))); + return new BinomialDistribution(null, n, p).probability(k); } /** * binomial Probability(int, int, double) with log applied to result */ - public static double log10BinomialProbability(final int n, final int k, final double log10p) { - Utils.validateArg(log10p < 1.0e-18, "log10p: Log10-probability must be 0 or less"); - if (log10p == Double.NEGATIVE_INFINITY){ - return k == 0 ? 0 : Double.NEGATIVE_INFINITY; - } else if (log10p == 0) { - return k == n ? 0 : Double.NEGATIVE_INFINITY; - } - double log10OneMinusP = Math.log10(1 - Math.pow(10.0, log10p)); - return log10BinomialCoefficient(n, k) + log10p * k + log10OneMinusP * (n - k); - } - - /** - * @see #binomialProbability(int, int, double) with p=0.5 and log10 applied to result - */ - public static double log10BinomialProbability(final int n, final int k) { - return log10BinomialCoefficient(n, k) + (n * LOG10_ONE_HALF); + public static double logBinomialProbability(final int n, final int k, final double p) { + return new BinomialDistribution(null, n, p).logProbability(k); } public static double log10SumLog10(final double[] log10Values, final int start) { @@ -939,21 +881,6 @@ public static double log10Gamma(final double x) { return logToLog10(Gamma.logGamma(x)); } - public static double log10Factorial(final int n) { - return LOG_10_FACTORIAL_CACHE.get(n); - } - - /** - * Converts a real space array of numbers (typically probabilities) into a log10 array - * - * @param prRealSpace - * @return - */ - public static double[] toLog10(final double[] prRealSpace) { - Utils.nonNull(prRealSpace); - return applyToArray(prRealSpace, Math::log10); - } - /** * Compute in a numerically correct way the quantity log10(1-x) * @@ -1035,8 +962,8 @@ public static double dirichletMultinomial(double[] params, int[] counts) { Utils.validateArg(params.length == counts.length, "The number of dirichlet parameters must match the number of categories"); final double dirichletSum = sum(params); final int countSum = (int) sum(counts); - double prefactor = log10MultinomialCoefficient(countSum,counts) + log10Gamma(dirichletSum) - log10Gamma(dirichletSum+countSum); - return prefactor + new IndexRange(0, counts.length).sum(n -> log10Gamma(counts[n] + params[n]) - log10Gamma(params[n])); + return logToLog10(Gamma.logGamma(countSum+1) + Gamma.logGamma(dirichletSum) - Gamma.logGamma(dirichletSum+countSum) + + new IndexRange(0, counts.length).sum(n -> Gamma.logGamma(counts[n] + params[n]) - Gamma.logGamma(params[n]) - Gamma.logGamma(counts[n]+1) )); } /** diff --git a/src/main/java/org/broadinstitute/hellbender/utils/NaturalLogUtils.java b/src/main/java/org/broadinstitute/hellbender/utils/NaturalLogUtils.java index 55f7b9d8909..e5bcca1fbc4 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/NaturalLogUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/NaturalLogUtils.java @@ -7,8 +7,7 @@ import java.util.Collections; public class NaturalLogUtils { - public static final double LOG_ONE_HALF = FastMath.log(0.5); - public static final double LOG_ONE_THIRD = FastMath.log(1.0/3); + public static final double LOG_ONE_HALF = Math.log(0.5); private static final double LOG1MEXP_THRESHOLD = Math.log(0.5); private static final double PHRED_TO_LOG_ERROR_PROB_FACTOR = -Math.log(10)/10; @@ -146,6 +145,6 @@ public static double qualToLogProb(final byte qual) { } public static double logSumLog(final double a, final double b) { - return a > b ? a + FastMath.log(1 + FastMath.exp(b - a)) : b + FastMath.log(1 + FastMath.exp(a - b)); + return a > b ? a + Math.log(1 + FastMath.exp(b - a)) : b + Math.log(1 + FastMath.exp(a - b)); } } diff --git a/src/main/java/org/broadinstitute/hellbender/utils/genotyper/GenotypePriorCalculator.java b/src/main/java/org/broadinstitute/hellbender/utils/genotyper/GenotypePriorCalculator.java index 887a1ea52a9..d5a92d54834 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/genotyper/GenotypePriorCalculator.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/genotyper/GenotypePriorCalculator.java @@ -2,6 +2,7 @@ import htsjdk.variant.variantcontext.Allele; import org.apache.commons.math3.util.MathArrays; +import org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeIndexCalculator; import org.broadinstitute.hellbender.utils.dragstr.DragstrParams; import org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeAlleleCounts; import org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeCalculationArgumentCollection; @@ -23,7 +24,7 @@ *

* *

- * Such priors are obtained by invoking {@link #getLog10Priors(GenotypeLikelihoodCalculator, List). + * Such priors are obtained by invoking {@link #getLog10Priors} * This method takes on the list of alleles for that variant, an a reference to the genotype likelihood calculator witch determines the ploidy. *

* assumptions @@ -39,7 +40,7 @@ private enum AlleleType { // A snp can go to 3 different bases (standard-nucs - 1), so we normalize SNP lks accordingly. Here is the // log10 constant used for that: private static final double LOG10_SNP_NORMALIZATION_CONSTANT = - MathUtils.log10(Nucleotide.STANDARD_BASES.size() - 1); + Math.log10(Nucleotide.STANDARD_BASES.size() - 1); private final double[] hetValues; private final double[] homValues; @@ -134,30 +135,26 @@ public static GenotypePriorCalculator assumingHW(final GenotypeCalculationArgume } /** - * Calculates the priors given the alleles to genetype and a likelihood calculator that determines the ploidy - * of the sample at that site. - * @param lkCalculator the input calculator + * Calculates the priors given the alleles to genotype + * * @param alleles the input alleles. - * @throws IllegalArgumentException if either input is {@code null} or the calculator maximum number of supported alleles is less that the input allele size. * @return never {@code null}, the array will have as many positions as necessary to hold the priors of all possible * unphased genotypes as per the number of input alleles and the input calculator's ploidy. */ - public double[] getLog10Priors(final GenotypeLikelihoodCalculator lkCalculator, final List alleles) { - Utils.nonNull(lkCalculator); + public double[] getLog10Priors(final int ploidy, final List alleles) { Utils.nonNull(alleles); - if (lkCalculator.alleleCount() < alleles.size()) { - throw new IllegalArgumentException("the number of alleles in the input calculator must be at least as large as the number of alleles in the input list"); - } final int[] alleleTypes = calculateAlleleTypes(alleles); - final int numberOfGenotypes = lkCalculator.genotypeCount(); - final double[] result = new double[numberOfGenotypes]; - // implied = result[0] = 0.0; - for (int g = 1; g < numberOfGenotypes; g++) { - final GenotypeAlleleCounts gac = lkCalculator.genotypeAlleleCountsAt(g); - result[g] = gac.sumOverAlleleIndicesAndCounts((idx, cnt) -> cnt == 2 - ? homValues[alleleTypes[idx]] - : hetValues[alleleTypes[idx]] + diffValues[alleleTypes[idx]] * (cnt - 1)); + + final double[] result = new double[GenotypeIndexCalculator.genotypeCount(ploidy, alleles.size())]; + + for (final GenotypeAlleleCounts gac : GenotypeAlleleCounts.iterable(ploidy, alleles.size())) { + // implied = result[0] = 0.0; + if (gac.index() > 0) { + result[gac.index()] = gac.sumOverAlleleIndicesAndCounts((allele, count) -> count == 2 ? homValues[alleleTypes[allele]] + : hetValues[alleleTypes[allele]] + diffValues[alleleTypes[allele]] * (count - 1)); + } } + return result; } diff --git a/src/main/java/org/broadinstitute/hellbender/utils/recalibration/RecalDatum.java b/src/main/java/org/broadinstitute/hellbender/utils/recalibration/RecalDatum.java index 974062cafa8..37e78079104 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/recalibration/RecalDatum.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/recalibration/RecalDatum.java @@ -1,7 +1,8 @@ package org.broadinstitute.hellbender.utils.recalibration; +import com.google.common.annotations.VisibleForTesting; import htsjdk.samtools.SAMUtils; -import org.apache.commons.math3.analysis.function.Gaussian; +import org.apache.commons.math3.distribution.NormalDistribution; import org.broadinstitute.hellbender.utils.IndexRange; import org.broadinstitute.hellbender.utils.MathUtils; import org.broadinstitute.hellbender.utils.QualityUtils; @@ -257,11 +258,11 @@ public static double bayesianEstimateOfEmpiricalQuality(final long nObservations final int numBins = (QualityUtils.MAX_REASONABLE_Q_SCORE + 1) * (int)RESOLUTION_BINS_PER_QUAL; - final double[] log10Posteriors = new IndexRange(0, numBins).mapToDouble(bin -> { + final double[] logPosteriors = new IndexRange(0, numBins).mapToDouble(bin -> { final double QEmpOfBin = bin / RESOLUTION_BINS_PER_QUAL; - return log10QempPrior(QEmpOfBin, QReported) + log10QempLikelihood(QEmpOfBin, nObservations, nErrors); + return logQempPrior(QEmpOfBin, QReported) + logQempLikelihood(QEmpOfBin, nObservations, nErrors); }); - final int MLEbin = MathUtils.maxElementIndex(log10Posteriors); + final int MLEbin = MathUtils.maxElementIndex(logPosteriors); return MLEbin / RESOLUTION_BINS_PER_QUAL; } @@ -270,31 +271,29 @@ public static double bayesianEstimateOfEmpiricalQuality(final long nObservations * in the base quality score recalibrator */ public static final byte MAX_GATK_USABLE_Q_SCORE = 40; - private static final double[] log10QempPriorCache = new double[MAX_GATK_USABLE_Q_SCORE + 1]; + private static final double[] logQempPriorCache = new double[MAX_GATK_USABLE_Q_SCORE + 1]; static { - // f(x) = a*exp(-((x - b)^2 / (2*c^2))) - // Note that a is the height of the curve's peak, b is the position of the center of the peak, and c controls the width of the "bell". - final double GF_a = 0.9; - final double GF_b = 0.0; - final double GF_c = 0.5; // with these parameters, deltas can shift at most ~20 Q points + // normal distribution describing P(Q empirical - Q reported). Its mean is zero because a priori we expect + // no systematic bias in the reported quality score + final double mean = 0.0; + final double sigma = 0.5; // with these parameters, deltas can shift at most ~20 Q points + final NormalDistribution gaussian = new NormalDistribution(null, mean, sigma); - final Gaussian gaussian = new Gaussian(GF_a, GF_b, GF_c); for ( int i = 0; i <= MAX_GATK_USABLE_Q_SCORE; i++ ) { - double log10Prior = Math.log10(gaussian.value((double) i)); - if ( Double.isInfinite(log10Prior) ) - log10Prior = -Double.MAX_VALUE; - log10QempPriorCache[i] = log10Prior; + logQempPriorCache[i] = gaussian.logDensity(i); } } - protected static double log10QempPrior(final double Qempirical, final double Qreported) { + @VisibleForTesting + protected static double logQempPrior(final double Qempirical, final double Qreported) { final int difference = Math.min(Math.abs((int) (Qempirical - Qreported)), MAX_GATK_USABLE_Q_SCORE); - return log10QempPriorCache[difference]; + return logQempPriorCache[difference]; } private static final long MAX_NUMBER_OF_OBSERVATIONS = Integer.MAX_VALUE - 1; - protected static double log10QempLikelihood(final double Qempirical, long nObservations, long nErrors) { + @VisibleForTesting + protected static double logQempLikelihood(final double Qempirical, long nObservations, long nErrors) { if ( nObservations == 0 ) return 0.0; @@ -309,10 +308,7 @@ protected static double log10QempLikelihood(final double Qempirical, long nObser } // this is just a straight binomial PDF - double log10Prob = MathUtils.log10BinomialProbability((int) nObservations, (int) nErrors, QualityUtils.qualToErrorProbLog10(Qempirical)); - if ( Double.isInfinite(log10Prob) || Double.isNaN(log10Prob) ) - log10Prob = -Double.MAX_VALUE; - - return log10Prob; + final double logLikelihood = MathUtils.logBinomialProbability((int) nObservations, (int) nErrors, QualityUtils.qualToErrorProb(Qempirical)); + return ( Double.isInfinite(logLikelihood) || Double.isNaN(logLikelihood) ) ? -Double.MAX_VALUE : logLikelihood; } } diff --git a/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVariantContextUtils.java b/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVariantContextUtils.java index e541d6b3858..c02d90a0958 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVariantContextUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVariantContextUtils.java @@ -19,10 +19,12 @@ import org.broadinstitute.hellbender.exceptions.GATKException; import org.broadinstitute.hellbender.tools.walkers.annotator.AnnotationUtils; import org.broadinstitute.hellbender.tools.walkers.annotator.allelespecific.StrandBiasUtils; -import org.broadinstitute.hellbender.exceptions.GATKException; -import org.broadinstitute.hellbender.utils.genotyper.GenotypePriorCalculator; -import org.broadinstitute.hellbender.tools.walkers.genotyper.*; +import org.broadinstitute.hellbender.tools.walkers.genotyper.AlleleSubsettingUtils; +import org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeAlleleCounts; +import org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeAssignmentMethod; +import org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypesCache; import org.broadinstitute.hellbender.utils.*; +import org.broadinstitute.hellbender.utils.genotyper.GenotypePriorCalculator; import org.broadinstitute.hellbender.utils.param.ParamUtils; import org.broadinstitute.hellbender.utils.pileup.PileupElement; import org.broadinstitute.hellbender.utils.read.AlignmentUtils; @@ -45,8 +47,6 @@ public final class GATKVariantContextUtils { public static final int DEFAULT_PLOIDY = HomoSapiensConstants.DEFAULT_PLOIDY; - private static final GenotypeLikelihoodCalculators GL_CALCS = new GenotypeLikelihoodCalculators(); - public static final double SUM_GL_THRESH_NOCALL = -0.1; // if sum(gl) is bigger than this threshold, we treat GL's as non-informative and will force a no-call. public static boolean isInformative(final double[] gls) { @@ -326,9 +326,7 @@ public static void makeGenotypeCall(final int ploidy, } } else { final int maxLikelihoodIndex = MathUtils.maxElementIndex(genotypeLikelihoods); - final GenotypeLikelihoodCalculator glCalc = GL_CALCS.getInstance(ploidy, allelesToUse.size()); - final GenotypeAlleleCounts alleleCounts = glCalc.genotypeAlleleCountsAt(maxLikelihoodIndex); - + final GenotypeAlleleCounts alleleCounts = GenotypesCache.get(ploidy, maxLikelihoodIndex); final List finalAlleles = alleleCounts.asAlleleList(allelesToUse); if (finalAlleles.contains(Allele.NON_REF_ALLELE)) { final Allele ref = allelesToUse.stream().filter(Allele::isReference).collect(Collectors.toList()).get(0); @@ -351,8 +349,7 @@ public static void makeGenotypeCall(final int ploidy, throw new GATKException("cannot uses posteriors without an genotype prior calculator present"); } else { // Calculate posteriors. - final GenotypeLikelihoodCalculator glCalc = GL_CALCS.getInstance(ploidy, allelesToUse.size()); - final double[] log10Priors = gpc.getLog10Priors(glCalc, allelesToUse); + final double[] log10Priors = gpc.getLog10Priors(ploidy, allelesToUse); final double[] log10Posteriors = MathUtils.ebeAdd(log10Priors, genotypeLikelihoods); final double[] normalizedLog10Posteriors = MathUtils.scaleLogSpaceArrayForNumericalStability(log10Posteriors); // Update GP and PG annotations: @@ -368,7 +365,7 @@ public static void makeGenotypeCall(final int ploidy, gb.log10PError(getGQLog10FromPosteriors(maxPosteriorIndex, normalizedLog10Posteriors)); } // Finally we update the genotype alleles. - gb.alleles(glCalc.genotypeAlleleCountsAt(maxPosteriorIndex).asAlleleList(allelesToUse)); + gb.alleles(GenotypesCache.get(ploidy, maxPosteriorIndex).asAlleleList(allelesToUse)); } } } diff --git a/src/test/java/org/broadinstitute/hellbender/tools/copynumber/ModelSegmentsIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/copynumber/ModelSegmentsIntegrationTest.java index 8b3e1977b51..0f8d15a2874 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/copynumber/ModelSegmentsIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/copynumber/ModelSegmentsIntegrationTest.java @@ -77,12 +77,7 @@ public void assertThatExpectedOutputUpdateToggleIsDisabled() { private static final SampleLocatableMetadata TUMOR_2_EXPECTED_METADATA = new CopyRatioCollection(TUMOR_2_DENOISED_COPY_RATIOS_FILE).getMetadata(); private static final SampleLocatableMetadata NORMAL_EXPECTED_METADATA = new AllelicCountCollection(NORMAL_ALLELIC_COUNTS_FILE).getMetadata(); - /** - * Note that {@link org.broadinstitute.hellbender.tools.copynumber.formats.CopyNumberFormatsUtils#DOUBLE_FORMAT} - * is set so that doubles in somatic CNV outputs will have 6 decimal places. We thus set the allowed delta - * to detect differences at that level. - */ - private static final double ALLOWED_DELTA_FOR_DOUBLE_VALUES = 1E-6; + private static final double ALLOWED_DELTA_FOR_DOUBLE_VALUES = 1E-5; @Test public void testMetadata() { diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypeAlleleCountsUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypeAlleleCountsUnitTest.java index 5e8277b3dee..a21772d5549 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypeAlleleCountsUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypeAlleleCountsUnitTest.java @@ -2,6 +2,7 @@ import com.google.common.base.Strings; import htsjdk.variant.variantcontext.Allele; +import org.apache.commons.math3.util.CombinatoricsUtils; import org.broadinstitute.hellbender.utils.MathUtils; import org.testng.Assert; import org.testng.annotations.DataProvider; @@ -24,6 +25,9 @@ */ public final class GenotypeAlleleCountsUnitTest { + // tolerate a tiny error in case of different logarithmic math implementations + private static final double EPSILON = 1.0e-15; + @Test(expectedExceptions = IllegalArgumentException.class) public void testFirstError() { GenotypeAlleleCounts.first(-1); @@ -47,24 +51,6 @@ public void testAlleleCountAtError() { first.alleleCountAt(-1); } - @Test(expectedExceptions = IllegalArgumentException.class) - public void testAlleleCountsByIndexError() { - final GenotypeAlleleCounts first = GenotypeAlleleCounts.first(2); - first.alleleCountsByIndex(-1); - } - - @Test(expectedExceptions = IllegalArgumentException.class) - public void testCopyAlleleCountsIndexError() { - final GenotypeAlleleCounts first = GenotypeAlleleCounts.first(2); - first.copyAlleleCounts(new int[3], -1); - } - - @Test(expectedExceptions = IllegalArgumentException.class) - public void testCopyAlleleCountsOffsetError() { - final GenotypeAlleleCounts first = GenotypeAlleleCounts.first(2); - first.copyAlleleCounts(new int[3], 4); - } - @Test(dataProvider = "ploidyData") public void testFirst(final int ploidy) { final GenotypeAlleleCounts subject = GenotypeAlleleCounts.first(ploidy); @@ -88,7 +74,6 @@ public void testFirst(final int ploidy) { for (int maximumAlleleIndex = 0; maximumAlleleIndex <= MAXIMUM_ALLELE_INDEX; maximumAlleleIndex++) { final int[] expected = new int[maximumAlleleIndex + 1]; expected[0] = ploidy; - assertEquals(subject.alleleCountsByIndex(maximumAlleleIndex), expected); } Assert.assertNotNull(subject.toString()); @@ -127,10 +112,6 @@ private void testNextZeroPloidy() { assertEquals(next.ploidy(), 0); assertEquals(next.index(), 0); assertEquals(next.asAlleleList(testAlleles), Collections.EMPTY_LIST); - for (int maximumAlleleIndex = 0; maximumAlleleIndex <= 10; maximumAlleleIndex++) { - final int[] expected = new int[maximumAlleleIndex + 1]; - assertEquals(next.alleleCountsByIndex(maximumAlleleIndex), expected); - } first.increase(); assertEquals(first, next); @@ -153,10 +134,6 @@ private void testNextOnePloidy() { assertEquals(next.alleleCountFor(next.minimumAlleleIndex() + 1), 0); assertEquals(next.ploidy(), 1); - final int[] dest = new int[next.distinctAlleleCount() * 2]; - next.copyAlleleCounts(dest, 0); - assertEquals(dest, new int[]{next.index(), 1}); - Assert.assertTrue(next.compareTo(current) > 0); Assert.assertTrue(current.compareTo(next) < 0); assertEquals(next.compareTo(next), 0); @@ -174,13 +151,6 @@ private void testNextOnePloidy() { assertEquals(next.asAlleleList(testAlleles), Collections.singletonList(testAlleles.get(next.maximumAlleleIndex()))); - for (int maximumAlleleIndex = 0; maximumAlleleIndex <= MAXIMUM_ALLELE_INDEX; maximumAlleleIndex++) { - final int[] expected = new int[maximumAlleleIndex + 1]; - if (maximumAlleleIndex >= current.minimumAlleleIndex() + 1) { - expected[current.minimumAlleleIndex() + 1] = 1; - } - assertEquals(next.alleleCountsByIndex(maximumAlleleIndex), expected); - } current = next; } } @@ -200,15 +170,15 @@ private void testPloidyTwoOrMore(final int ploidy) { // test log10CombinationCount if (ploidy == 2) { - assertEquals(next.log10CombinationCount(), next.distinctAlleleCount() == 2 ? Math.log10(2) : 0.0); + assertEquals(next.log10CombinationCount(), next.distinctAlleleCount() == 2 ? Math.log10(2) : 0.0, EPSILON); } else if (ploidy == 3) { assertEquals(next.log10CombinationCount(), - next.distinctAlleleCount() == 3 ? Math.log10(6) : (next.distinctAlleleCount() == 2 ? Math.log10(6) - Math.log10(2) : 0.0)); + next.distinctAlleleCount() == 3 ? Math.log10(6) : (next.distinctAlleleCount() == 2 ? Math.log10(6) - Math.log10(2) : 0.0), EPSILON); } else { if (next.distinctAlleleCount() == 1) { - assertEquals(next.log10CombinationCount(), 0.0); + assertEquals(next.log10CombinationCount(), 0.0, EPSILON); } else if (next.distinctAlleleCount() == ploidy) { - assertEquals(next.log10CombinationCount(), MathUtils.log10Factorial(ploidy)); + assertEquals(next.log10CombinationCount(), MathUtils.logToLog10(CombinatoricsUtils.factorialLog(ploidy)), EPSILON); } } @@ -220,10 +190,6 @@ private void testPloidyTwoOrMore(final int ploidy) { alleleCountsAsList.add(alleleCount); }); next.forEachAbsentAlleleIndex(absentAlleles::add, MAXIMUM_ALLELE_INDEX + 1); - final int[] actualAlleleCounts = new int[next.distinctAlleleCount() * 2]; - next.copyAlleleCounts(actualAlleleCounts, 0); - - assertEquals(alleleCountsAsList.stream().mapToInt(n -> n).toArray(), actualAlleleCounts); assertEquals(absentAlleles.size(), MAXIMUM_ALLELE_INDEX + 1 - next.distinctAlleleCount()); next.forEachAlleleIndexAndCount((index, count) -> Assert.assertFalse(absentAlleles.contains(index))); @@ -261,7 +227,6 @@ private void testPloidyTwoOrMore(final int ploidy) { assertEquals(next.alleleCountFor(index), current.alleleCountFor(index)); } } - Assert.assertTrue(Arrays.equals(next.alleleCountsByIndex(Math.max(MAXIMUM_ALLELE_INDEX, next.maximumAlleleIndex())), expectedAlleleCountsByIndex)); assertEquals(totalCountSum, ploidy); Assert.assertTrue(next.compareTo(current) > 0); @@ -296,10 +261,6 @@ private void testNextZeroPloidyIncrease() { assertEquals(next.distinctAlleleCount(), 0); assertEquals(next.ploidy(), 0); assertEquals(next.index(), 0); - for (int maximumAlleleIndex = 0; maximumAlleleIndex <= 10; maximumAlleleIndex++) { - final int[] expected = new int[maximumAlleleIndex + 1]; - assertEquals(next.alleleCountsByIndex(maximumAlleleIndex), expected); - } } private void testNextOnePloidyIncrease() { @@ -307,7 +268,7 @@ private void testNextOnePloidyIncrease() { while (!next.containsAllele(MAXIMUM_ALLELE_INDEX + 1)) { final GenotypeAlleleCounts current = next.copy(); - next.increase(1); + next.increase(); assertEquals(next.minimumAlleleIndex(), next.maximumAlleleIndex()); assertEquals(next.minimumAlleleIndex(), current.minimumAlleleIndex() + 1); assertEquals(next.alleleCountAt(0), 1); @@ -327,14 +288,6 @@ private void testNextOnePloidyIncrease() { assertEquals(next.index(), current.index() + 1); assertEquals(next.ploidy(), current.ploidy()); - - for (int maximumAlleleIndex = 0; maximumAlleleIndex <= MAXIMUM_ALLELE_INDEX; maximumAlleleIndex++) { - final int[] expected = new int[maximumAlleleIndex + 1]; - if (maximumAlleleIndex >= current.minimumAlleleIndex() + 1) { - expected[current.minimumAlleleIndex() + 1] = 1; - } - assertEquals(next.alleleCountsByIndex(maximumAlleleIndex), expected); - } } } @@ -381,7 +334,6 @@ private void testPloidyTwoOrMoreIncrease(final int ploidy) { assertEquals(next.alleleCountFor(index), current.alleleCountFor(index)); } } - Assert.assertTrue(Arrays.equals(next.alleleCountsByIndex(Math.max(MAXIMUM_ALLELE_INDEX, next.maximumAlleleIndex())), expectedAlleleCountsByIndex)); assertEquals(totalCountSum, ploidy); Assert.assertTrue(next.compareTo(current) > 0); diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypeIndexCalculatorUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypeIndexCalculatorUnitTest.java new file mode 100644 index 00000000000..fb64280f2c2 --- /dev/null +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypeIndexCalculatorUnitTest.java @@ -0,0 +1,148 @@ +package org.broadinstitute.hellbender.tools.walkers.genotyper; + +import org.apache.commons.math3.exception.MathArithmeticException; +import org.broadinstitute.hellbender.utils.IndexRange; +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import static org.testng.Assert.*; + +public class GenotypeIndexCalculatorUnitTest { + + // ploidy, allele, index of first genotype with that allele + @DataProvider(name = "firstGenotypeWithAlleleData") + public Object[][] firstGenotypeWithAlleleData() { + return new Object[][] { + {1, 0, 0 }, + {1, 5, 5}, + {2, 0, 0 }, + {2, 1, 1 }, + {2, 2, 3}, + {4, 0, 0}, + {4, 1, 1}, + {4, 2, 5}, + {4, 3, 15} + }; + } + + @Test(dataProvider = "firstGenotypeWithAlleleData") + public void testIndexOfFirstGenotypeWithAllele(final int ploidy, final int allele, final int expected) { + Assert.assertEquals(GenotypeIndexCalculator.indexOfFirstGenotypeWithAllele(ploidy, allele), expected); + } + + // ploidy, allele count + @DataProvider(name = "genotypeCountData") + public Object[][] genotypeCountData() { + return new Object[][] { + {1, 1}, + {1, 5}, + {2, 1}, + {2, 5}, + {3, 5} + }; + } + + // a ploidy-P genotype with A alleles can be decomposed as 0 <= N <= P copies of the Ath allele and a ploidy P-N genotype + // using only A-1 alleles. If this recursion is satisfied, then the genotype method is correct. + @Test(dataProvider = "genotypeCountData") + public void testGenotypeCount(final int ploidy, final int alleleCount) { + final int direct = GenotypeIndexCalculator.genotypeCount(ploidy, alleleCount); + if (ploidy == 1) { + Assert.assertEquals(direct, alleleCount); + } else { + // the '1' below is the N = P term + final int recursive = 1 + new IndexRange(0, ploidy).sumInt(n -> GenotypeIndexCalculator.genotypeCount(ploidy - n, alleleCount - 1)); + Assert.assertEquals(direct, recursive); + } + } + + @Test(expectedExceptions = MathArithmeticException.class) + public void testGenotypeCountOverflow() throws Exception { + final int genotypeCount = GenotypeIndexCalculator.genotypeCount(10_000, 10_000); + } + + // alleles list, expected index + @DataProvider(name = "allelesToIndexData") + public Object[][] allelesToIndexData() { + return new Object[][] { + {new int[] {0}, 0}, + {new int[] {0,0}, 0}, + {new int[] {0,0,0}, 0}, + {new int[] {0,1}, 1}, + {new int[] {1,0}, 1}, + {new int[] {1,1}, 2}, + {new int[] {100}, 100}, + {new int[] {0,100}, 5050}, + {new int[] {100,0}, 5050}, + {new int[] {0,1,2}, 5}, + {new int[] {2,0,0}, 4}, + {new int[] {1,2,1}, 6}, + {new int[] {2,1,2}, 8}, + {new int[] {2,2,2}, 9}, + }; + } + + @Test(dataProvider = "allelesToIndexData") + public void testAllelesToIndex(final int[] alleles, final int index) { + Assert.assertEquals(GenotypeIndexCalculator.allelesToIndex(alleles), index); + } + + // allele counts array, expected index + @DataProvider(name = "alleleCountsToIndexData") + public Object[][] alleleCountsToIndexData() { + return new Object[][] { + {new int[] {0,1}, 0}, + {new int[] {0,2}, 0}, + {new int[] {0,3}, 0}, + {new int[] {0,1,1,1}, 1}, + {new int[] {1,1,0,1}, 1}, + {new int[] {1,2}, 2}, + {new int[] {100,1}, 100}, + {new int[] {0,1,100,1}, 5050}, + {new int[] {100,1,0,1}, 5050}, + {new int[] {0,1,1,1,2,1}, 5}, + {new int[] {2,1,0,2}, 4}, + {new int[] {1,2,2,1}, 6}, + {new int[] {2,2,1,1}, 8}, + {new int[] {2,3}, 9}, + }; + } + + @Test(dataProvider = "alleleCountsToIndexData") + public void testAlleleCountsToIndex(final int[] counts, final int index) { + Assert.assertEquals(GenotypeIndexCalculator.alleleCountsToIndex(counts), index); + } + + @Test + public void testComputeMaxAcceptableAlleleCount(){ + Assert.assertEquals(1024, GenotypeIndexCalculator.computeMaxAcceptableAlleleCount(1, 1024)); + Assert.assertEquals(44, GenotypeIndexCalculator.computeMaxAcceptableAlleleCount(2, 1024)); + Assert.assertEquals(17, GenotypeIndexCalculator.computeMaxAcceptableAlleleCount(3, 1024)); + Assert.assertEquals(5, GenotypeIndexCalculator.computeMaxAcceptableAlleleCount(10, 1024)); + Assert.assertEquals(3, GenotypeIndexCalculator.computeMaxAcceptableAlleleCount(20, 1024)); + Assert.assertEquals(2, GenotypeIndexCalculator.computeMaxAcceptableAlleleCount(100, 1024)); + } + + // ploidy, new to old allele reordering, expected result + @DataProvider(name = "newToOldMapData") + public Object[][] newToOldMapData() { + return new Object[][] { + {1, new int[] {0}, new int[] {0}}, + {1, new int[] {1}, new int[] {1}}, + {2, new int[] {0}, new int[] {0}}, + {2, new int[] {1}, new int[] {2}}, + {2, new int[] {2}, new int[] {5}}, + {2, new int[] {0,1}, new int[] {0,1,2}}, + {2, new int[] {1,0}, new int[] {2,1,0}}, + {2, new int[] {0,2}, new int[] {0,3,5}} + }; + } + + @Test(dataProvider = "newToOldMapData") + public void testNewToOldIndexMap(final int ploidy, final int[] newToOldAlleleMap, final int[] expected) { + final int[] result = GenotypeIndexCalculator.newToOldGenotypeMap(ploidy, newToOldAlleleMap); + Assert.assertEquals(result, expected); + } + +} \ No newline at end of file diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypeLikelihoodCalculatorUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypeLikelihoodCalculatorUnitTest.java index 2df30719033..a78d9bda580 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypeLikelihoodCalculatorUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypeLikelihoodCalculatorUnitTest.java @@ -15,131 +15,41 @@ import java.util.*; /** - * Tests {@link GenotypeLikelihoodCalculators} and {@link GenotypeLikelihoodCalculatorDRAGEN}. + * Tests {@link GenotypesCache}. * * @author Valentin Ruano-Rubio <valentin@broadinstitute.org> */ public final class GenotypeLikelihoodCalculatorUnitTest { - - @Test(dataProvider = "ploidyAndMaximumAlleleData") - public void testPloidyAndMaximumAllele(final int ploidy, final int alleleCount) { - final GenotypeLikelihoodCalculator calculator = new GenotypeLikelihoodCalculators().getInstance(ploidy, alleleCount); - Assert.assertNotNull(calculator); - Assert.assertEquals(calculator.ploidy(), ploidy); - Assert.assertEquals(calculator.alleleCount(), alleleCount); - Assert.assertEquals(calculator.genotypeCount(), calculateGenotypeCount(ploidy, alleleCount), " ploidy = " + ploidy + " alleleCount = " + alleleCount); - final int genotypeCount = calculator.genotypeCount(); - final int testGenotypeCount = Math.min(30000, genotypeCount); - for (int i = 0; i < testGenotypeCount; i++) { - final GenotypeAlleleCounts alleleCounts = calculator.genotypeAlleleCountsAt(i); - Assert.assertNotNull(alleleCounts); - if (i > 0) - Assert.assertTrue(calculator.genotypeAlleleCountsAt(i - 1).compareTo(alleleCounts) < 0); - final int[] alleleArray = new int[ploidy]; - int index = 0; - for (int j = 0; j < alleleCounts.distinctAlleleCount(); j++) - Arrays.fill(alleleArray, index, index += alleleCounts.alleleCountAt(j), alleleCounts.alleleIndexAt(j)); - final int[] alleleCountArray = new int[alleleCounts.distinctAlleleCount() << 1]; - alleleCounts.copyAlleleCounts(alleleCountArray,0); - Assert.assertEquals(index, ploidy); - Assert.assertEquals(calculator.allelesToIndex(alleleArray), i); - Assert.assertEquals(calculator.alleleCountsToIndex(alleleCountArray), i); - } - } - - @Test(dataProvider = "ploidyAndMaximumAlleleAndReadCountsData", dependsOnMethods = "testPloidyAndMaximumAllele") + + @Test(dataProvider = "ploidyAndMaximumAlleleAndReadCountsData") public void testLikelihoodCalculation(final int ploidy, final int alleleCount, final int[] readCount) { final AlleleLikelihoods readLikelihoods = ReadLikelihoodsUnitTester.readLikelihoods(alleleCount, readCount); - final GenotypeLikelihoodCalculator calculator = new GenotypeLikelihoodCalculators().getInstance(ploidy, alleleCount); - final int genotypeCount = calculator.genotypeCount(); - final int testGenotypeCount = Math.min(30000, genotypeCount); final int sampleCount = readCount.length; for (int s = 0; s < sampleCount ; s++) { final LikelihoodMatrix sampleLikelihoods = readLikelihoods.sampleMatrix(s); - final GenotypeLikelihoods genotypeLikelihoods = calculator.genotypeLikelihoods(sampleLikelihoods); + final GenotypeLikelihoods genotypeLikelihoods = GenotypeLikelihoodCalculator.log10GenotypeLikelihoods(ploidy, sampleLikelihoods); final double[] genotypeLikelihoodsDoubles = genotypeLikelihoods.getAsVector(); - Assert.assertEquals(genotypeLikelihoodsDoubles.length, genotypeCount); - for (int i = 0; i < testGenotypeCount; i++) { - final GenotypeAlleleCounts genotypeAlleleCounts = calculator.genotypeAlleleCountsAt(i); + for (final GenotypeAlleleCounts gac : GenotypeAlleleCounts.iterable(ploidy, alleleCount)) { Assert.assertNotNull(genotypeLikelihoods); final double[] readGenotypeLikelihoods = new double[sampleLikelihoods.evidenceCount()]; for (int r = 0; r < sampleLikelihoods.evidenceCount(); r++) { - final double[] compoments = new double[genotypeAlleleCounts.distinctAlleleCount()]; - for (int ar = 0; ar < genotypeAlleleCounts.distinctAlleleCount(); ar++) { - final int a = genotypeAlleleCounts.alleleIndexAt(ar); - final int aCount = genotypeAlleleCounts.alleleCountAt(ar); + final double[] compoments = new double[gac.distinctAlleleCount()]; + for (int ar = 0; ar < gac.distinctAlleleCount(); ar++) { + final int a = gac.alleleIndexAt(ar); + final int aCount = gac.alleleCountAt(ar); final double readLk = sampleLikelihoods.get(a, r); compoments[ar] = readLk + Math.log10(aCount); } readGenotypeLikelihoods[r] = MathUtils.approximateLog10SumLog10(compoments) - Math.log10(ploidy); } final double genotypeLikelihood = MathUtils.sum(readGenotypeLikelihoods); - Assert.assertEquals(genotypeLikelihoodsDoubles[i], genotypeLikelihood, 0.0001); + Assert.assertEquals(genotypeLikelihoodsDoubles[gac.index()], genotypeLikelihood, 0.0001 * Math.abs(genotypeLikelihood)); } } } - @Test(dataProvider = "ploidyAndMaximumAlleleAndNewMaximumAlleleData") - public void testGenotypeIndexMap(final int ploidy, final int oldAlleleCount, final int newAlleleCount) { - final Random rnd = Utils.getRandomGenerator(); - final int maxAlleleCount = Math.max(oldAlleleCount, newAlleleCount); - final int[] alleleMap = new int[newAlleleCount]; - final Map> reverseMap = new LinkedHashMap<>(oldAlleleCount); - for (int i = 0; i < alleleMap.length; i++) { - alleleMap[i] = rnd.nextInt(oldAlleleCount); - if (reverseMap.get(alleleMap[i]) == null) reverseMap.put(alleleMap[i],new LinkedHashSet<>(6)); - reverseMap.get(alleleMap[i]).add(i); - } - final GenotypeLikelihoodCalculators calculators = new GenotypeLikelihoodCalculators(); - final GenotypeLikelihoodCalculator calculator = calculators.getInstance(ploidy, maxAlleleCount); - - final int[] genotypeIndexMap = calculator.genotypeIndexMap(alleleMap, calculators); - Assert.assertNotNull(genotypeIndexMap); - Assert.assertEquals(genotypeIndexMap.length, calculators.genotypeCount(ploidy, newAlleleCount)); - final GenotypeLikelihoodCalculator oldCalculator = calculators.getInstance(ploidy, oldAlleleCount); - final GenotypeLikelihoodCalculator newCalculator = calculators.getInstance(ploidy, newAlleleCount); - - for (int i = 0; i < genotypeIndexMap.length; i++) { - final GenotypeAlleleCounts oldCounts = oldCalculator.genotypeAlleleCountsAt(genotypeIndexMap[i]); - final GenotypeAlleleCounts newCounts = newCalculator.genotypeAlleleCountsAt(i); - final int[] reverseCounts = new int[oldAlleleCount]; - for (int j = 0; j < newCounts.distinctAlleleCount(); j++) { - final int newIndex = newCounts.alleleIndexAt(j); - final int newRepeats = newCounts.alleleCountAt(j); - final int expectedOldIndex = alleleMap[newIndex]; - final int oldIndexRank = oldCounts.alleleRankFor(expectedOldIndex); - Assert.assertNotEquals(oldIndexRank, -1); - final int oldIndex = oldCounts.alleleIndexAt(oldIndexRank); - final int oldRepeats = oldCounts.alleleCountAt(oldIndexRank); - Assert.assertEquals(oldIndex, expectedOldIndex); - // not necessarily the same count if two or more new alleles map the same old allele. - Assert.assertTrue(oldRepeats >= newRepeats); - reverseCounts[oldIndex] += newRepeats; - } - for (int j = 0; j < oldAlleleCount; j++) - Assert.assertEquals(oldCounts.alleleCountFor(j), reverseCounts[j]); - } - } - - - // Simple inefficient calculation of the genotype count given the ploidy. - private int calculateGenotypeCount(final int ploidy, final int alleleCount) { - if (ploidy == 0) - return 0; - else if (ploidy == 1) - return alleleCount; - else if (ploidy == 2) - return ((alleleCount) * (alleleCount + 1)) >> 1; - else if (alleleCount == 0) - return 0; - else { - return calculateGenotypeCount(ploidy - 1, alleleCount) + - calculateGenotypeCount(ploidy, alleleCount - 1); - } - } - - private static final int[] MAXIMUM_ALLELE = { 1, 2, 5, 6 }; + private static final int[] MAXIMUM_ALLELE = { 1, 2, 5, 6}; private static final int[] PLOIDY = { 1, 2, 3, 20 }; @@ -160,24 +70,4 @@ public Object[][] ploidyAndMaximumAlleleAndReadCountsData() { result[index++] = new Object[] { i, j, k }; return result; } - - @DataProvider(name="ploidyAndMaximumAlleleData") - public Object[][] ploidyAndMaximumAlleleData() { - final Object[][] result = new Object[PLOIDY.length * MAXIMUM_ALLELE.length][]; - int index = 0; - for (final int i : PLOIDY) - for (final int j : MAXIMUM_ALLELE) - result[index++] = new Object[] { i, j }; - return result; - } - - @DataProvider(name="ploidyAndMaximumAlleleAndNewMaximumAlleleData") - public Object[][] ploidyAndMaximumAlleleAndNewMaximumAlleleData() { - final List result = new ArrayList<>(PLOIDY.length * MAXIMUM_ALLELE.length * 20); - for (final int i : PLOIDY) - for (final int j : MAXIMUM_ALLELE) - for (int k = 0; k < (i < 10? j * 2 : j + 1); k++) - result.add(new Object[] { i, j, k }); - return result.toArray(new Object[result.size()][]); - } } diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypeLikelihoodCalculatorsUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypeLikelihoodCalculatorsUnitTest.java deleted file mode 100644 index 56e8f2f6a83..00000000000 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypeLikelihoodCalculatorsUnitTest.java +++ /dev/null @@ -1,103 +0,0 @@ -package org.broadinstitute.hellbender.tools.walkers.genotyper; - -import org.broadinstitute.hellbender.GATKBaseTest; -import org.broadinstitute.hellbender.exceptions.UserException; -import org.testng.Assert; -import org.testng.annotations.BeforeClass; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; - -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; - -public final class GenotypeLikelihoodCalculatorsUnitTest extends GATKBaseTest { - - GenotypeLikelihoodCalculators calcs; - - @BeforeClass - public void init(){ - calcs = new GenotypeLikelihoodCalculators(); - } - - - @DataProvider(name="genotypeCount") - public Iterator ploidyAndMaximumAlleleAndReadCountsData() { - final int[][] expected = - {{0, 1, 1, 1, 1, 1, 1 }, - {0, 1, 2, 3, 4, 5, 6 }, - {0, 1, 3, 6, 10, 15, 21 }, - {0, 1, 4, 10, 20, 35, 56 }, - {0, 1, 5, 15, 35, 70, 126 }}; - - final List result = new ArrayList<>(); - for (int i = 0; i < expected.length; i++){ - for (int j = 0; j < expected[i].length; j++){ - result.add(new Object[]{i, j, expected[i][j]}); - } - } - - //now add it backwards too just to check ordering issues - for (int i = expected.length-1; i >= 0; i--){ - for (int j = expected[i].length-1; j >= 0; j--){ - result.add(new Object[]{i, j, expected[i][j]}); - } - } - - return result.iterator(); - } - @Test(dataProvider = "genotypeCount") - public void testGenotypeCountSharedInstance(int ploidy, int alleleCount, int expected) throws Exception { - Assert.assertEquals(calcs.genotypeCount(ploidy, alleleCount), expected); - } - - @Test(dataProvider = "genotypeCount") - public void testInstanceSharedInstance(int ploidy, int alleleCount, int expected) throws Exception { - if (ploidy > 0) { - final GenotypeLikelihoodCalculator inst = calcs.getInstance(ploidy, alleleCount); - Assert.assertEquals(inst.genotypeCount(), expected); - Assert.assertEquals(inst.ploidy(), ploidy); - Assert.assertEquals(inst.alleleCount(), alleleCount); - } - } - - @Test(dataProvider = "genotypeCount") - public void testGenotypeCountNewInstance(int ploidy, int alleleCount, int expected) throws Exception { - Assert.assertEquals(new GenotypeLikelihoodCalculators().genotypeCount(ploidy, alleleCount), expected); - } - - @Test(dataProvider = "genotypeCount") - public void testInstanceNewInstance(int ploidy, int alleleCount, int expected) throws Exception { - if (ploidy > 0) { - final GenotypeLikelihoodCalculator inst = new GenotypeLikelihoodCalculators().getInstance(ploidy, alleleCount); - Assert.assertEquals(inst.genotypeCount(), expected); - Assert.assertEquals(inst.ploidy(), ploidy); - Assert.assertEquals(inst.alleleCount(), alleleCount); - } - } - - @Test(expectedExceptions = UserException.WarnableAnnotationFailure.class) - public void testGenotypeCountOverflow() throws Exception { - final int genotypeCount = new GenotypeLikelihoodCalculators().genotypeCount(10_000, 10_000); - } - - @Test(expectedExceptions = IllegalArgumentException.class) - public void testGenotypeCountNegativePloidy() throws Exception { - new GenotypeLikelihoodCalculators().genotypeCount(-1, 1); - } - - @Test(expectedExceptions = IllegalArgumentException.class) - public void testGenotypeCountNegativeAlleleCount() throws Exception { - new GenotypeLikelihoodCalculators().genotypeCount(1, -1); - } - - @Test - public void testComputeMaxAcceptableAlleleCount(){ - Assert.assertEquals(1024, GenotypeLikelihoodCalculators.computeMaxAcceptableAlleleCount(1, 1024)); - Assert.assertEquals(44, GenotypeLikelihoodCalculators.computeMaxAcceptableAlleleCount(2, 1024)); - Assert.assertEquals(17, GenotypeLikelihoodCalculators.computeMaxAcceptableAlleleCount(3, 1024)); - Assert.assertEquals(5, GenotypeLikelihoodCalculators.computeMaxAcceptableAlleleCount(10, 1024)); - Assert.assertEquals(3, GenotypeLikelihoodCalculators.computeMaxAcceptableAlleleCount(20, 1024)); - Assert.assertEquals(2, GenotypeLikelihoodCalculators.computeMaxAcceptableAlleleCount(100, 1024)); - } -} diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypesCacheUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypesCacheUnitTest.java new file mode 100644 index 00000000000..ceed9ac6975 --- /dev/null +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypesCacheUnitTest.java @@ -0,0 +1,55 @@ +package org.broadinstitute.hellbender.tools.walkers.genotyper; + +import org.broadinstitute.hellbender.GATKBaseTest; +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.Arrays; + +public final class GenotypesCacheUnitTest extends GATKBaseTest { + + + // ploidy, genotypeIndex, expected allele counts array + @DataProvider(name = "randomAccessData") + public Object[][] randomAccessData() { + return new Object[][] { + {1, 0, new int[] {1}}, + {1, 7, new int[] {0,0,0,0,0,0,0,1}}, + {2, 4, new int[] {0,1,1}}, + {2, 4, new int[] {0,1,1}}, + {3, 13, new int[] {1,0,1,1}}, + {4, (int) GenotypeIndexCalculator.indexOfFirstGenotypeWithAllele(4,5), new int[] {3,0,0,0,0,1}}, + {4, (int) GenotypeIndexCalculator.indexOfFirstGenotypeWithAllele(4,4), new int[] {3,0,0,0,1}}, + {4, (int) GenotypeIndexCalculator.indexOfFirstGenotypeWithAllele(4,3), new int[] {3,0,0,1}} + + }; + } + + @Test(dataProvider = "randomAccessData") + public void testCache(final int ploidy, final int genotypeIndex, final int[] expectedAlleleCounts) { + final GenotypeAlleleCounts gac = GenotypesCache.get(ploidy,genotypeIndex); + final int distinctCount = (int) Arrays.stream(expectedAlleleCounts).filter(n -> n > 0).count(); + Assert.assertEquals(gac.distinctAlleleCount(), distinctCount); + + for (int n = 0; n < expectedAlleleCounts.length; n++) { + Assert.assertEquals(gac.alleleCountFor(n), expectedAlleleCounts[n]); + } + + for (int n = expectedAlleleCounts.length; n < expectedAlleleCounts.length + 3; n++) { + Assert.assertEquals(gac.alleleCountFor(n), 0); + } + + final GenotypeAlleleCounts next = gac.next(); + Assert.assertTrue(next.equals(GenotypesCache.get(ploidy, genotypeIndex+1))); + final GenotypeAlleleCounts nextNext = next.next(); + + GenotypeAlleleCounts plusEleven = next.copy(); + for (int i = 0; i < 10; i++) { + plusEleven = plusEleven.next(); + } + Assert.assertTrue(plusEleven.equals(GenotypesCache.get(ploidy, genotypeIndex+11))); + Assert.assertTrue(nextNext.equals(GenotypesCache.get(ploidy, genotypeIndex+2))); + } + +} diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/IndependentSampleGenotypesModelUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/IndependentSampleGenotypesModelUnitTest.java index a086dca4156..d63e46c96c8 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/IndependentSampleGenotypesModelUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/IndependentSampleGenotypesModelUnitTest.java @@ -36,7 +36,7 @@ public void testCalculateLikelihoods(final int[] ploidies, final int alleleCount Assert.assertNotNull(sampleLikelihoods); final double[] values = sampleLikelihoods.getAsVector(); Assert.assertNotNull(values); - Assert.assertEquals(values.length, new GenotypeLikelihoodCalculators().getInstance(ploidies[i], genotypingAlleleList.numberOfAlleles()).genotypeCount()); + Assert.assertEquals(values.length, GenotypeIndexCalculator.genotypeCount(ploidies[i], genotypingAlleleList.numberOfAlleles())); for (int j = 0; j < values.length; j++) Assert.assertTrue(values[j] <= 0); } diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/AlleleFrequencyCalculatorUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/AlleleFrequencyCalculatorUnitTest.java index a8e3164a7ca..27b71988fe2 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/AlleleFrequencyCalculatorUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/afcalc/AlleleFrequencyCalculatorUnitTest.java @@ -4,9 +4,8 @@ import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.commons.lang3.tuple.Pair; import org.apache.commons.math3.util.MathArrays; -import org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeLikelihoodCalculator; -import org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeLikelihoodCalculators; import org.broadinstitute.hellbender.GATKBaseTest; +import org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeIndexCalculator; import org.broadinstitute.hellbender.utils.MathUtils; import org.testng.Assert; import org.testng.annotations.Test; @@ -20,7 +19,6 @@ */ public class AlleleFrequencyCalculatorUnitTest extends GATKBaseTest { private static final double EPS = 1.0e-3; - private static final GenotypeLikelihoodCalculators GL_CALCS = new GenotypeLikelihoodCalculators(); private static final Allele A = Allele.create("A", true); private static final Allele B = Allele.create("C"); @@ -159,7 +157,7 @@ public void testManyVeryConfidentSamples() { // to first-order in x, which is an extremely good approximation, this is 1 - x/2 // thus the probability that N identical samples don't have the C allele is (x/2)^N, and the log-10 probability of this is // N * [log_10(1/2) - PL/10] - final double expectedLog10ProbabilityOfNoCAllele = numSamples * (MathUtils.LOG10_ONE_HALF - EXTREMELY_CONFIDENT_PL / 10); + final double expectedLog10ProbabilityOfNoCAllele = numSamples * (MathUtils.LOG10_ONE_HALF - EXTREMELY_CONFIDENT_PL / 10.); Assert.assertEquals(result.getLog10PosteriorOfAlleleAbsent(C), expectedLog10ProbabilityOfNoCAllele, numSamples * 0.01); } } @@ -202,7 +200,7 @@ public void testManyRefSamplesDontKillGoodVariant() { genotypeList.add(AB); final VariantContext vc = makeVC(alleles, genotypeList); final double log10PRef = afCalc.calculate(vc).log10ProbOnlyRefAlleleExists(); - Assert.assertTrue(log10PRef < (-EXTREMELY_CONFIDENT_PL/10) + Math.log10(numRef) + 1); + Assert.assertTrue(log10PRef < (-EXTREMELY_CONFIDENT_PL / 10.) + Math.log10(numRef) + 1); } } @@ -326,9 +324,8 @@ public void testSingleSampleBiallelicShortcut() { // make PLs that correspond to an obvious call i.e. one PL is relatively big and the rest are zero // alleleCounts is the GenotypeAlleleCounts format for the obvious genotype, with repeats but in no particular order private static int[] PLsForObviousCall(final int ploidy, final int numAlleles, final int[] alleleCounts, final int PL) { - final GenotypeLikelihoodCalculator glCalc = GL_CALCS.getInstance(ploidy, numAlleles); - final int[] result = Collections.nCopies(glCalc.genotypeCount(), PL).stream().mapToInt(n->n).toArray(); - result[glCalc.alleleCountsToIndex(alleleCounts)] = 0; + final int[] result = Collections.nCopies(GenotypeIndexCalculator.genotypeCount(ploidy, numAlleles), PL).stream().mapToInt(n->n).toArray(); + result[GenotypeIndexCalculator.alleleCountsToIndex(alleleCounts)] = 0; return result; } diff --git a/src/test/java/org/broadinstitute/hellbender/utils/IndexRangeUnitTest.java b/src/test/java/org/broadinstitute/hellbender/utils/IndexRangeUnitTest.java index 851b2a500b5..dec8fe89239 100644 --- a/src/test/java/org/broadinstitute/hellbender/utils/IndexRangeUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/utils/IndexRangeUnitTest.java @@ -76,6 +76,16 @@ public void testSum(final int from, final int to) { Assert.assertEquals(range.sum(func), IntStream.range(from, to).mapToDouble(func).sum(), 1.0e-8); } + @Test(dataProvider = "correctFromToData", dependsOnMethods = "testCorrectConstruction") + public void testSumInt(final int from, final int to) { + final IndexRange range = new IndexRange(from,to); + final IntUnaryOperator linearFunc = n -> 3*n + 11; + Assert.assertEquals(range.sumInt(linearFunc), IntStream.range(from, to).map(linearFunc).sum(), 1.0e-8); + + final IntUnaryOperator cubicFunc = n -> 3*n*n*n - 4*n*n + 17; + Assert.assertEquals(range.sumInt(cubicFunc), IntStream.range(from, to).map(cubicFunc).sum(), 1.0e-8); + } + @Test(dataProvider = "correctFromToData", dependsOnMethods = "testCorrectConstruction") public void testFilter(final int from, final int to) { final IndexRange range = new IndexRange(from,to); diff --git a/src/test/java/org/broadinstitute/hellbender/utils/MathUtilsUnitTest.java b/src/test/java/org/broadinstitute/hellbender/utils/MathUtilsUnitTest.java index 13f53234da0..7e31cb8c9b8 100644 --- a/src/test/java/org/broadinstitute/hellbender/utils/MathUtilsUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/utils/MathUtilsUnitTest.java @@ -3,6 +3,7 @@ import org.apache.commons.math3.distribution.NormalDistribution; import org.apache.commons.math3.random.RandomGenerator; import org.apache.commons.math3.random.RandomGeneratorFactory; +import org.apache.commons.math3.util.CombinatoricsUtils; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.broadinstitute.hellbender.GATKBaseTest; @@ -48,13 +49,6 @@ public void testRunningAverage() { Assert.assertTrue(r.stddev() - 9072.6515881128 < 2e-10); } - @Test - public void log10BinomialProbability() throws Exception { - Assert.assertEquals(MathUtils.log10BinomialProbability(2, 1), log10(0.5),1E-9); - Assert.assertEquals(MathUtils.log10BinomialProbability(4, 1), log10(0.25),1E-9); - Assert.assertEquals(MathUtils.log10BinomialProbability(4, 2), log10(0.375),1E-9); - } - @Test(dataProvider = "log10OneMinusPow10Data") public void testLog10OneMinusPow10(final double x, final double expected) { final double actual = MathUtils.log10OneMinusPow10(x); @@ -151,14 +145,6 @@ public void testLog10OneMinusX(){ Assert.assertEquals(Double.NEGATIVE_INFINITY, MathUtils.log10OneMinusX(1.0), 1e-6); } - @Test - public void testLog10Gamma() { - //The expected values were checked against Wolphram Alpha - Assert.assertEquals(MathUtils.log10Gamma(4.0), 0.7781513, 1e-6); - Assert.assertEquals(MathUtils.log10Gamma(10), 5.559763, 1e-6); - Assert.assertEquals(MathUtils.log10Gamma(10654), 38280.532152137, 1e-6); - } - @Test public void testBinomialProbability() { // results from Wolfram Alpha @@ -171,60 +157,6 @@ public void testBinomialProbability() { Assert.assertEquals(MathUtils.binomialProbability(300, 112, 0.98), 2.34763e-236, 1e-237); } - @Test(expectedExceptions = IllegalArgumentException.class) - public void testBinomialProbabilityError() { - Assert.assertEquals(MathUtils.binomialProbability(3, 2, 1.5), 0.375, 0.0001); - } - - @Test - public void testBinomialCoefficient() { - // results from Wolfram Alpha - Assert.assertEquals(MathUtils.binomialCoefficient(4, 2), 6.0, 1e-6); - Assert.assertEquals(MathUtils.binomialCoefficient(10, 3), 120.0, 1e-6); - Assert.assertEquals(MathUtils.binomialCoefficient(20, 3), 1140.0, 1e-6); - Assert.assertEquals(MathUtils.binomialCoefficient(100, 4), 3921225.0, 1e-6); - } - - @Test - public void testLog10BinomialCoefficient() { - // note that we can test the binomial coefficient calculation indirectly via Newton's identity - // (1+z)^m = sum (m choose k)z^k - double[] z_vals = new double[]{0.999, 0.9, 0.8, 0.5, 0.2, 0.01, 0.0001}; - int[] exponent = new int[]{5, 15, 25, 50, 100}; - for (double z : z_vals) { - double logz = log10(z); - for (int exp : exponent) { - double expected_log = exp * log10(1 + z); - double[] newtonArray_log = new double[1 + exp]; - for (int k = 0; k <= exp; k++) { - newtonArray_log[k] = MathUtils.log10BinomialCoefficient(exp, k) + k * logz; - } - Assert.assertEquals(MathUtils.log10SumLog10(newtonArray_log), expected_log, 1e-6); - } - } - - // results from Wolfram Alpha - Assert.assertEquals(MathUtils.log10BinomialCoefficient(4, 2), 0.7781513, 1e-6); - Assert.assertEquals(MathUtils.log10BinomialCoefficient(10, 3), 2.079181, 1e-6); - Assert.assertEquals(MathUtils.log10BinomialCoefficient(103928, 119), 400.2156, 1e-4); - } - - - @Test(expectedExceptions = IllegalArgumentException.class) - public void testLogBinomialCoefficientErrorN() { - Assert.assertEquals(MathUtils.log10BinomialCoefficient(-1, 1), 0.0, 1e-6); - } - - @Test(expectedExceptions = IllegalArgumentException.class) - public void testLogBinomialCoefficientErrorK() { - Assert.assertEquals(MathUtils.log10BinomialCoefficient(1, -1), 0.0, 1e-6); - } - - @Test(expectedExceptions = IllegalArgumentException.class) - public void testLogBinomialCoefficientErrorKmoreThanN() { - Assert.assertEquals(MathUtils.log10BinomialCoefficient(1, 2), 0.0, 1e-6); - } - @Test public void testApproximateLogSumLog() { final double requiredPrecision = 1E-4; @@ -332,32 +264,6 @@ public void testNormalize(){ assertEqualsDoubleArray(normalizedLogInLogExpected, normalizedLogInLog10, error); } - @Test - public void testLog10Factorial() { - // results from Wolfram Alpha - Assert.assertEquals(log10Factorial(4), 1.3802112, 1e-6); - Assert.assertEquals(log10Factorial(10), 6.559763, 1e-6); - Assert.assertEquals(log10Factorial(200), 374.896888, 1e-3); - Assert.assertEquals(log10Factorial(12342), 45138.2626503, 1e-1); - - - int small_start = 1; - int med_start = 200; - int large_start = 12342; - double log10Factorial_small = 0; - double log10Factorial_middle = log10Factorial(med_start); - double log10Factorial_large = log10Factorial(large_start); - for ( int i = 1; i < 1000; i++ ) { - log10Factorial_small += log10(i + small_start); - log10Factorial_middle += log10(i + med_start); - log10Factorial_large += log10(i + large_start); - - Assert.assertEquals(log10Factorial(small_start + i),log10Factorial_small,1e-6); - Assert.assertEquals(log10Factorial(med_start + i),log10Factorial_middle,1e-3); - Assert.assertEquals(log10Factorial(large_start + i),log10Factorial_large,1e-1); - } - } - @Test public void testSum() { double[] doubleTest = {-1,0,1,2,3}; @@ -921,8 +827,6 @@ public void testDirichletMultinomial() { new double[]{90,20000,400,20,4,1280,720,1} ); - Assert.assertTrue(! Double.isInfinite(MathUtils.log10Gamma(1e-3)) && ! Double.isNaN(MathUtils.log10Gamma(1e-3))); - int[] numAlleleSampled = new int[]{2,5,10,20,25}; for ( double[] alleles : testAlleles ) { for ( int count : numAlleleSampled ) { diff --git a/src/test/java/org/broadinstitute/hellbender/utils/genotyper/ReadLikelihoodsUnitTester.java b/src/test/java/org/broadinstitute/hellbender/utils/genotyper/ReadLikelihoodsUnitTester.java index f6a34bf416e..9afdd90263a 100644 --- a/src/test/java/org/broadinstitute/hellbender/utils/genotyper/ReadLikelihoodsUnitTester.java +++ b/src/test/java/org/broadinstitute/hellbender/utils/genotyper/ReadLikelihoodsUnitTester.java @@ -37,7 +37,7 @@ public static AlleleLikelihoods readLikelihoods(final int alle * produces a test likelihood depending on the sample, read and allele index. */ private static double testLikelihood(final int sampleIndex, final int alleleIndex, final int readIndex) { - return - Math.abs(3 * (sampleIndex + 1) + 7 * (alleleIndex + 1) + 11 * (readIndex + 1)); + return - Math.abs((3 * (sampleIndex + 1) + 7 * (alleleIndex + 1) + 11 * (readIndex + 1))%13); } diff --git a/src/test/java/org/broadinstitute/hellbender/utils/help/DocumentationGenerationIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/utils/help/DocumentationGenerationIntegrationTest.java index e1800e1ed29..a0f51b0f6bd 100644 --- a/src/test/java/org/broadinstitute/hellbender/utils/help/DocumentationGenerationIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/utils/help/DocumentationGenerationIntegrationTest.java @@ -2,8 +2,10 @@ import org.broadinstitute.hellbender.CommandLineProgramTest; import org.testng.Assert; +import org.testng.SkipException; import org.testng.annotations.Test; +import java.util.spi.ToolProvider; import java.io.File; import java.io.IOException; import java.io.PrintWriter; @@ -11,7 +13,6 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; -import java.util.spi.ToolProvider; /** * Smoke test to run doc gen on a subset of classes to make sure it doesn't regress. diff --git a/src/test/java/org/broadinstitute/hellbender/utils/recalibration/RecalDatumUnitTest.java b/src/test/java/org/broadinstitute/hellbender/utils/recalibration/RecalDatumUnitTest.java index 6d360f40c23..0d131f61201 100644 --- a/src/test/java/org/broadinstitute/hellbender/utils/recalibration/RecalDatumUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/utils/recalibration/RecalDatumUnitTest.java @@ -153,10 +153,10 @@ public void testNoObs() { public void testlog10QempPrior() { for ( int Qemp = 0; Qemp <= QualityUtils.MAX_SAM_QUAL_SCORE; Qemp++ ) { for ( int Qrep = 0; Qrep <= QualityUtils.MAX_SAM_QUAL_SCORE; Qrep++ ) { - final double log10Prior = RecalDatum.log10QempPrior(Qemp, Qrep); - Assert.assertTrue(log10Prior < 0.0); - Assert.assertFalse(Double.isInfinite(log10Prior)); - Assert.assertFalse(Double.isNaN(log10Prior)); + final double logPrior = RecalDatum.logQempPrior(Qemp, Qrep); + Assert.assertTrue(logPrior < 0.0); + Assert.assertFalse(Double.isInfinite(logPrior)); + Assert.assertFalse(Double.isNaN(logPrior)); } } @@ -164,10 +164,10 @@ public void testlog10QempPrior() { int maxQemp = -1; double maxQempValue = -Double.MAX_VALUE; for ( int Qemp = 0; Qemp <= QualityUtils.MAX_SAM_QUAL_SCORE; Qemp++ ) { - final double log10prior = RecalDatum.log10QempPrior(Qemp, Qrep); - if ( log10prior > maxQempValue ) { + final double logprior = RecalDatum.logQempPrior(Qemp, Qrep); + if ( logprior > maxQempValue ) { maxQemp = Qemp; - maxQempValue = log10prior; + maxQempValue = logprior; } } Assert.assertEquals(maxQemp, Qrep); @@ -209,19 +209,19 @@ public void testlog10QempLikelihood() { if ( error > observation ) continue; - final double log10likelihood = RecalDatum.log10QempLikelihood(Qemp, observation, error); - Assert.assertTrue(observation == 0 ? MathUtils.compareDoubles(log10likelihood, 0.0) == 0 : log10likelihood < 0.0 || Qemp == 0.0); - Assert.assertFalse(Double.isInfinite(log10likelihood)); - Assert.assertFalse(Double.isNaN(log10likelihood)); + final double loglikelihood = RecalDatum.logQempLikelihood(Qemp, observation, error); + Assert.assertTrue(observation == 0 ? MathUtils.compareDoubles(loglikelihood, 0.0) == 0 : loglikelihood < 0.0 || Qemp == 0.0); + Assert.assertFalse(Double.isInfinite(loglikelihood)); + Assert.assertFalse(Double.isNaN(loglikelihood)); } } } long bigNum = Integer.MAX_VALUE; bigNum *= 2L; - final double log10likelihood = RecalDatum.log10QempLikelihood(30, bigNum, 100000); - Assert.assertTrue(log10likelihood < 0.0); - Assert.assertFalse(Double.isInfinite(log10likelihood)); - Assert.assertFalse(Double.isNaN(log10likelihood)); + final double loglikelihood = RecalDatum.logQempLikelihood(30, bigNum, 100000); + Assert.assertTrue(loglikelihood < 0.0); + Assert.assertFalse(Double.isInfinite(loglikelihood)); + Assert.assertFalse(Double.isNaN(loglikelihood)); } } \ No newline at end of file diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-ac-nac-tumor-1.af.igv.seg b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-ac-nac-tumor-1.af.igv.seg index 585a0fa23a7..c3104235f1b 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-ac-nac-tumor-1.af.igv.seg +++ b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-ac-nac-tumor-1.af.igv.seg @@ -1,2 +1,3 @@ Sample Chromosome Start End Num_Probes Segment_Mean -SM-74P4M-1 20 138125 62871232 827 0.372342 +SM-74P4M-1 20 138125 41076751 487 0.286650 +SM-74P4M-1 20 41962342 62871232 340 0.493819 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-ac-nac-tumor-1.cr.igv.seg b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-ac-nac-tumor-1.cr.igv.seg index 0c67842a77d..861f41d5748 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-ac-nac-tumor-1.cr.igv.seg +++ b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-ac-nac-tumor-1.cr.igv.seg @@ -1,2 +1,3 @@ Sample Chromosome Start End Num_Probes Segment_Mean -SM-74P4M-1 20 138125 62871232 0 NaN +SM-74P4M-1 20 138125 41076751 0 NaN +SM-74P4M-1 20 41962342 62871232 0 NaN diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-ac-nac-tumor-1.cr.seg b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-ac-nac-tumor-1.cr.seg index be3acf2547a..89ed84f4185 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-ac-nac-tumor-1.cr.seg +++ b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-ac-nac-tumor-1.cr.seg @@ -2,4 +2,5 @@ @SQ SN:20 LN:63025520 UR:http://www.broadinstitute.org/ftp/pub/seq/references/Homo_sapiens_assembly19.fasta AS:GRCh37 M5:0dec9660ec1efaaf33281c0d5ea2560f SP:Homo Sapiens @RG ID:GATKCopyNumber SM:SM-74P4M-1 CONTIG START END NUM_POINTS_COPY_RATIO MEAN_LOG2_COPY_RATIO -20 138125 62871232 0 NaN +20 138125 41076751 0 NaN +20 41962342 62871232 0 NaN diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-ac-nac-tumor-1.modelBegin.af.param b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-ac-nac-tumor-1.modelBegin.af.param index bb6129c4004..502ee607543 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-ac-nac-tumor-1.modelBegin.af.param +++ b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-ac-nac-tumor-1.modelBegin.af.param @@ -1,6 +1,6 @@ @HD VN:1.6 @RG ID:GATKCopyNumber SM:SM-74P4M-1 PARAMETER_NAME POSTERIOR_10 POSTERIOR_20 POSTERIOR_30 POSTERIOR_40 POSTERIOR_50 POSTERIOR_60 POSTERIOR_70 POSTERIOR_80 POSTERIOR_90 -MEAN_BIAS 1.092482 1.106161 1.113179 1.124902 1.131327 1.144333 1.154568 1.164827 1.180697 -BIAS_VARIANCE 0.434015 0.446432 0.459002 0.467580 0.475156 0.484281 0.487932 0.492417 0.495571 -OUTLIER_PROBABILITY 0.057314 0.069014 0.076118 0.079128 0.082268 0.089037 0.093737 0.100190 0.110147 +MEAN_BIAS 1.080392 1.088670 1.100639 1.108221 1.116697 1.126881 1.132606 1.149726 1.164609 +BIAS_VARIANCE 0.414955 0.427961 0.437249 0.460279 0.469526 0.480451 0.487824 0.492685 0.498206 +OUTLIER_PROBABILITY 0.068313 0.076651 0.084386 0.089322 0.094193 0.102905 0.107338 0.113612 0.120933 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-ac-nac-tumor-1.modelBegin.seg b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-ac-nac-tumor-1.modelBegin.seg index f878e6f373d..02a415210e1 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-ac-nac-tumor-1.modelBegin.seg +++ b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-ac-nac-tumor-1.modelBegin.seg @@ -2,194 +2,194 @@ @SQ SN:20 LN:63025520 UR:http://www.broadinstitute.org/ftp/pub/seq/references/Homo_sapiens_assembly19.fasta AS:GRCh37 M5:0dec9660ec1efaaf33281c0d5ea2560f SP:Homo Sapiens @RG ID:GATKCopyNumber SM:SM-74P4M-1 CONTIG START END NUM_POINTS_COPY_RATIO NUM_POINTS_ALLELE_FRACTION LOG2_COPY_RATIO_POSTERIOR_10 LOG2_COPY_RATIO_POSTERIOR_50 LOG2_COPY_RATIO_POSTERIOR_90 MINOR_ALLELE_FRACTION_POSTERIOR_10 MINOR_ALLELE_FRACTION_POSTERIOR_50 MINOR_ALLELE_FRACTION_POSTERIOR_90 -20 138125 138148 0 2 NaN NaN NaN 0.464407 0.486070 0.497579 -20 139409 139576 0 3 NaN NaN NaN 0.461122 0.485931 0.499176 -20 168466 259156 0 12 NaN NaN NaN 0.457895 0.491265 0.498872 -20 259818 259818 0 1 NaN NaN NaN 0.466382 0.491250 0.498654 -20 259969 278806 0 2 NaN NaN NaN 0.463206 0.490312 0.498144 -20 368905 377226 0 3 NaN NaN NaN 0.458044 0.487224 0.498757 -20 389456 402921 0 4 NaN NaN NaN 0.466234 0.489776 0.497812 -20 425606 744415 0 4 NaN NaN NaN 0.464461 0.486551 0.497698 -20 744570 744570 0 1 NaN NaN NaN 0.453157 0.481366 0.497870 -20 746098 1285933 0 5 NaN NaN NaN 0.462423 0.490947 0.498324 -20 1424303 1424303 0 1 NaN NaN NaN 0.468911 0.490827 0.497889 -20 1426393 1458504 0 3 NaN NaN NaN 0.466803 0.482278 0.495897 -20 1517979 1552430 0 8 NaN NaN NaN 0.470641 0.492234 0.499442 -20 1559330 1592312 0 6 NaN NaN NaN 0.430067 0.486737 0.498420 -20 1600524 1600524 0 1 NaN NaN NaN 0.465881 0.488372 0.498560 -20 1610790 1610894 0 3 NaN NaN NaN 0.441414 0.484672 0.499017 -20 1615883 1616892 0 2 NaN NaN NaN 0.454391 0.481182 0.497772 -20 1895889 1896060 0 3 NaN NaN NaN 0.461932 0.489455 0.498334 -20 2056358 2056358 0 1 NaN NaN NaN 0.459308 0.484849 0.498173 -20 2315929 2517377 0 3 NaN NaN NaN 0.446734 0.491616 0.498323 -20 2517432 2517825 0 2 NaN NaN NaN 0.450113 0.492662 0.498695 -20 2552805 2552805 0 1 NaN NaN NaN 0.446937 0.485765 0.498437 -20 2593006 2621998 0 9 NaN NaN NaN 0.460278 0.482027 0.496574 -20 2633380 2779257 0 3 NaN NaN NaN 0.438601 0.484985 0.498236 -20 2796471 2796471 0 1 NaN NaN NaN 0.451218 0.484158 0.497717 -20 2818801 2996589 0 5 NaN NaN NaN 0.456774 0.480657 0.496687 -20 3002889 3199446 0 4 NaN NaN NaN 0.456957 0.481845 0.497566 -20 3624830 3640823 0 2 NaN NaN NaN 0.441022 0.483394 0.496273 -20 3641881 3653149 0 5 NaN NaN NaN 0.441273 0.481576 0.496279 -20 3654433 3686436 0 12 NaN NaN NaN 0.458082 0.483582 0.495629 -20 3721456 3731622 0 2 NaN NaN NaN 0.467440 0.490354 0.498490 -20 3732633 3838441 0 2 NaN NaN NaN 0.445706 0.486568 0.498390 -20 3870124 4055656 0 2 NaN NaN NaN 0.456928 0.486150 0.499034 -20 4155948 4162411 0 2 NaN NaN NaN 0.475001 0.491001 0.497316 -20 4202802 4705756 0 4 NaN NaN NaN 0.459922 0.483416 0.492498 -20 4843609 4843609 0 1 NaN NaN NaN 0.465557 0.483322 0.497828 -20 4880308 5261206 0 4 NaN NaN NaN 0.465757 0.489166 0.498847 -20 5273253 5273382 0 3 NaN NaN NaN 0.454133 0.486813 0.498199 -20 5283256 5482307 0 8 NaN NaN NaN 0.446504 0.481487 0.495978 -20 5528518 6064839 0 8 NaN NaN NaN 0.459175 0.482503 0.496616 -20 6065731 6100230 0 5 NaN NaN NaN 0.459071 0.486316 0.498339 -20 6194421 6195664 0 2 NaN NaN NaN 0.425071 0.477271 0.492088 -20 7866261 7999537 0 4 NaN NaN NaN 0.436744 0.482666 0.498196 -20 8626903 8626903 0 1 NaN NaN NaN 0.440978 0.486508 0.497051 -20 8665751 8703145 0 4 NaN NaN NaN 0.459312 0.482962 0.495206 -20 8707900 8707927 0 2 NaN NaN NaN 0.438393 0.486206 0.496464 -20 8737734 8769423 0 4 NaN NaN NaN 0.454830 0.476714 0.497234 -20 8770318 8770932 0 3 NaN NaN NaN 0.449636 0.484128 0.496957 -20 8773096 8773096 0 1 NaN NaN NaN 0.445031 0.482924 0.498446 -20 8773155 9417870 0 2 NaN NaN NaN 0.463520 0.486597 0.497565 -20 9424810 9510263 0 3 NaN NaN NaN 0.460224 0.482838 0.497542 -20 10024951 10024951 0 1 NaN NaN NaN 0.452279 0.486484 0.498485 -20 10026357 10032413 0 4 NaN NaN NaN 0.434263 0.475963 0.493759 -20 10329888 10393145 0 3 NaN NaN NaN 0.434909 0.486369 0.498872 -20 10438780 10438780 0 1 NaN NaN NaN 0.475837 0.491947 0.499234 -20 10629129 10629525 0 2 NaN NaN NaN 0.456437 0.489532 0.499008 -20 13054633 13060331 0 2 NaN NaN NaN 0.454256 0.477287 0.496440 -20 13071871 13071871 0 1 NaN NaN NaN 0.459138 0.479761 0.498746 -20 13074243 13074243 0 1 NaN NaN NaN 0.467449 0.483747 0.495100 -20 13090745 13260252 0 2 NaN NaN NaN 0.451010 0.483723 0.498521 -20 13769127 13769127 0 1 NaN NaN NaN 0.460953 0.483593 0.497393 -20 13798676 13830137 0 3 NaN NaN NaN 0.470761 0.491039 0.499020 -20 13845726 13845726 0 1 NaN NaN NaN 0.458991 0.489243 0.498246 -20 14306896 14306896 0 1 NaN NaN NaN 0.462697 0.482747 0.497749 -20 14306953 15874325 0 2 NaN NaN NaN 0.471777 0.489701 0.498759 -20 15967327 15967327 0 1 NaN NaN NaN 0.460896 0.482684 0.498108 -20 16730479 16730522 0 2 NaN NaN NaN 0.472701 0.490168 0.499277 -20 17459905 17460005 0 3 NaN NaN NaN 0.438471 0.494247 0.499313 -20 17474690 17594729 0 6 NaN NaN NaN 0.405755 0.481667 0.494895 -20 17595329 17596731 0 3 NaN NaN NaN 0.455723 0.485909 0.498079 -20 17597531 17597531 0 1 NaN NaN NaN 0.456243 0.484270 0.499608 -20 17600357 17600357 0 1 NaN NaN NaN 0.460066 0.489594 0.496607 -20 17602028 17602028 0 1 NaN NaN NaN 0.457020 0.483003 0.496373 -20 17992979 18022171 0 3 NaN NaN NaN 0.445688 0.485691 0.498082 -20 18142924 18142924 0 1 NaN NaN NaN 0.459703 0.486154 0.497652 -20 18286888 18446024 0 6 NaN NaN NaN 0.446208 0.485565 0.497564 -20 18806046 19970705 0 7 NaN NaN NaN 0.418702 0.473775 0.495490 -20 20032998 20033367 0 5 NaN NaN NaN 0.424455 0.488391 0.499058 -20 20033380 20033380 0 1 NaN NaN NaN 0.462944 0.488471 0.498671 -20 20037222 21336825 0 17 NaN NaN NaN 0.304050 0.346376 0.394794 -20 21897170 22714612 0 4 NaN NaN NaN 0.457290 0.489059 0.499217 -20 23345844 23424613 0 5 NaN NaN NaN 0.437295 0.481003 0.497200 -20 23425812 23529388 0 3 NaN NaN NaN 0.452706 0.484566 0.498899 -20 23529418 23807028 0 9 NaN NaN NaN 0.453581 0.483115 0.494498 -20 23842032 23842032 0 1 NaN NaN NaN 0.437562 0.483631 0.497048 -20 23860178 24200652 0 2 NaN NaN NaN 0.456297 0.486667 0.498941 -20 24939590 24939590 0 1 NaN NaN NaN 0.447034 0.481283 0.496603 -20 24993414 25011423 0 3 NaN NaN NaN 0.451753 0.482944 0.497565 -20 25038484 25059442 0 2 NaN NaN NaN 0.440957 0.482353 0.495245 -20 25190598 25255338 0 3 NaN NaN NaN 0.467700 0.487215 0.497722 -20 25255415 25255415 0 1 NaN NaN NaN 0.447030 0.476191 0.497503 -20 25257260 25261784 0 6 NaN NaN NaN 0.440299 0.481490 0.498109 -20 25262769 25262789 0 2 NaN NaN NaN 0.458143 0.487150 0.498592 -20 25263756 25263756 0 1 NaN NaN NaN 0.452278 0.488451 0.496533 -20 25264664 25282944 0 12 NaN NaN NaN 0.458368 0.487880 0.498741 -20 25288505 25295787 0 2 NaN NaN NaN 0.451414 0.485110 0.498004 -20 25320228 25398876 0 3 NaN NaN NaN 0.447331 0.480219 0.497599 -20 25434351 25470056 0 8 NaN NaN NaN 0.448601 0.489158 0.498747 -20 25597236 25838130 0 7 NaN NaN NaN 0.389397 0.466843 0.492718 -20 25838802 25841650 0 4 NaN NaN NaN 0.473491 0.490042 0.498458 -20 25846283 25900162 0 3 NaN NaN NaN 0.432052 0.475427 0.497368 -20 25900379 25965961 0 2 NaN NaN NaN 0.453943 0.479595 0.498381 -20 29449417 29449417 0 1 NaN NaN NaN 0.447085 0.487201 0.499140 -20 29516670 29572218 0 6 NaN NaN NaN 0.466761 0.489625 0.497234 -20 29632564 29632564 0 1 NaN NaN NaN 0.459153 0.488990 0.497113 -20 29633929 29648701 0 3 NaN NaN NaN 0.450700 0.486758 0.498239 -20 29873577 30037783 0 3 NaN NaN NaN 0.461542 0.488383 0.497918 -20 30053255 31025231 0 4 NaN NaN NaN 0.417046 0.477463 0.498282 -20 31647126 31677476 0 8 NaN NaN NaN 0.436732 0.479856 0.496731 -20 31811551 32710710 0 12 NaN NaN NaN 0.289504 0.321465 0.364668 -20 32990050 33063830 0 5 NaN NaN NaN 0.426077 0.479029 0.498662 -20 33068563 33879478 0 3 NaN NaN NaN 0.459169 0.491222 0.498511 -20 34218673 34312713 0 8 NaN NaN NaN 0.455638 0.486018 0.497565 -20 34319765 34324484 0 4 NaN NaN NaN 0.476236 0.492410 0.499058 -20 34328848 34328848 0 1 NaN NaN NaN 0.465145 0.489464 0.498062 -20 34443173 34782171 0 11 NaN NaN NaN 0.454545 0.486928 0.497617 -20 35491033 35559352 0 2 NaN NaN NaN 0.437221 0.475433 0.494420 -20 35710453 35710453 0 1 NaN NaN NaN 0.440980 0.481082 0.496704 -20 35740794 35740794 0 1 NaN NaN NaN 0.461184 0.488450 0.497873 -20 35748894 35869619 0 15 NaN NaN NaN 0.345601 0.399604 0.492575 -20 36022539 36718059 0 5 NaN NaN NaN 0.466266 0.487012 0.498169 -20 36790166 36793529 0 3 NaN NaN NaN 0.464594 0.484363 0.497311 -20 36841756 36919758 0 4 NaN NaN NaN 0.475975 0.492286 0.498968 -20 36932551 36937246 0 4 NaN NaN NaN 0.470434 0.493272 0.497778 -20 36946848 36959353 0 3 NaN NaN NaN 0.465418 0.487228 0.498530 -20 36965617 36965617 0 1 NaN NaN NaN 0.416402 0.488857 0.498547 -20 36989269 37279458 0 11 NaN NaN NaN 0.470659 0.487779 0.494168 -20 37377139 37396262 0 5 NaN NaN NaN 0.458424 0.487488 0.495674 -20 38354742 38354742 0 1 NaN NaN NaN 0.454108 0.483353 0.499402 -20 38898213 40076509 0 7 NaN NaN NaN 0.447008 0.483202 0.497320 -20 40179909 40626474 0 3 NaN NaN NaN 0.471387 0.487519 0.498714 -20 40714479 40743829 0 3 NaN NaN NaN 0.476941 0.494495 0.498685 -20 40981002 41076751 0 2 NaN NaN NaN 0.458540 0.487028 0.496517 -20 41962342 43944958 0 40 NaN NaN NaN 0.473302 0.491109 0.498286 -20 44144118 44638971 0 3 NaN NaN NaN 0.464721 0.487006 0.497128 -20 44639511 44645010 0 8 NaN NaN NaN 0.470040 0.488928 0.497723 -20 44650318 44650318 0 1 NaN NaN NaN 0.463754 0.489499 0.497115 -20 44806875 44806875 0 1 NaN NaN NaN 0.462006 0.486080 0.498316 -20 44983517 45092517 0 3 NaN NaN NaN 0.467053 0.491367 0.495998 -20 45092859 45092921 0 2 NaN NaN NaN 0.453263 0.479677 0.496586 -20 45093125 45629941 0 4 NaN NaN NaN 0.458446 0.485933 0.497562 -20 45789953 45789953 0 1 NaN NaN NaN 0.453517 0.487099 0.498494 -20 45797639 45797883 0 2 NaN NaN NaN 0.441544 0.487608 0.498836 -20 45797954 45808688 0 3 NaN NaN NaN 0.452646 0.486699 0.498181 -20 45809652 45816639 0 4 NaN NaN NaN 0.471200 0.492912 0.498757 -20 45816649 45891189 0 2 NaN NaN NaN 0.461405 0.488350 0.495849 -20 45923383 47253043 0 4 NaN NaN NaN 0.460551 0.489527 0.499063 -20 47256300 47258763 0 4 NaN NaN NaN 0.451655 0.481536 0.497646 -20 47261017 47361725 0 12 NaN NaN NaN 0.471686 0.488044 0.498559 -20 47569112 47615692 0 7 NaN NaN NaN 0.467308 0.487975 0.496869 -20 47621554 47713063 0 9 NaN NaN NaN 0.462921 0.487406 0.497686 -20 47739814 47850182 0 5 NaN NaN NaN 0.465915 0.486673 0.497344 -20 47852822 47859217 0 3 NaN NaN NaN 0.441528 0.485213 0.498505 -20 47865372 48131036 0 9 NaN NaN NaN 0.458035 0.488366 0.497891 -20 48257149 48259034 0 2 NaN NaN NaN 0.456168 0.482560 0.497847 -20 48300990 48300990 0 1 NaN NaN NaN 0.439052 0.487982 0.497607 -20 48301146 49191228 0 4 NaN NaN NaN 0.462891 0.486785 0.497767 -20 49195248 49195248 0 1 NaN NaN NaN 0.460212 0.486067 0.498486 -20 49196167 49236478 0 5 NaN NaN NaN 0.461609 0.488748 0.496163 -20 49237419 49575334 0 2 NaN NaN NaN 0.426356 0.479625 0.497564 -20 50287736 53691106 0 21 NaN NaN NaN 0.459925 0.487000 0.496720 -20 54935242 55108617 0 9 NaN NaN NaN 0.456686 0.486608 0.498358 -20 55111371 56886062 0 15 NaN NaN NaN 0.475776 0.493004 0.499227 -20 57009796 57024541 0 4 NaN NaN NaN 0.452654 0.482674 0.497665 -20 57024589 57266134 0 4 NaN NaN NaN 0.473275 0.488409 0.499132 -20 57266592 57470517 0 8 NaN NaN NaN 0.469937 0.489358 0.499090 -20 57478448 57569860 0 7 NaN NaN NaN 0.452146 0.488930 0.497382 -20 57570854 57572839 0 2 NaN NaN NaN 0.452491 0.483795 0.498573 -20 57599402 58571125 0 8 NaN NaN NaN 0.460808 0.480546 0.492648 -20 58581863 60582540 0 8 NaN NaN NaN 0.459929 0.487883 0.498276 -20 60712347 60740362 0 8 NaN NaN NaN 0.455112 0.484063 0.498388 -20 60740447 60886611 0 13 NaN NaN NaN 0.478498 0.492252 0.499334 -20 60897721 60992224 0 8 NaN NaN NaN 0.461806 0.488967 0.498954 -20 60992402 61039958 0 2 NaN NaN NaN 0.461005 0.488151 0.498545 -20 61041653 61273578 0 8 NaN NaN NaN 0.457569 0.482513 0.497947 -20 61289958 61441061 0 4 NaN NaN NaN 0.465706 0.485892 0.497771 -20 61444697 61444697 0 1 NaN NaN NaN 0.471853 0.492488 0.498701 -20 61444785 61453348 0 2 NaN NaN NaN 0.472790 0.490997 0.498827 -20 61453549 61834695 0 18 NaN NaN NaN 0.461142 0.494401 0.498638 -20 61869607 61869607 0 1 NaN NaN NaN 0.459515 0.483016 0.496899 -20 61870727 61873039 0 3 NaN NaN NaN 0.460018 0.487014 0.496166 -20 61875497 61881296 0 5 NaN NaN NaN 0.463407 0.489213 0.496283 -20 61987572 61987572 0 1 NaN NaN NaN 0.464694 0.490781 0.498078 -20 62191558 62193019 0 2 NaN NaN NaN 0.453222 0.486798 0.499500 -20 62193445 62229244 0 11 NaN NaN NaN 0.458416 0.489111 0.496635 -20 62245686 62303794 0 5 NaN NaN NaN 0.449289 0.484567 0.498853 -20 62305274 62698484 0 10 NaN NaN NaN 0.464029 0.491714 0.497155 -20 62701092 62737318 0 5 NaN NaN NaN 0.455620 0.489770 0.497906 -20 62836271 62836271 0 1 NaN NaN NaN 0.450621 0.481579 0.496660 -20 62836520 62854417 0 2 NaN NaN NaN 0.456501 0.488762 0.498790 -20 62868043 62871232 0 2 NaN NaN NaN 0.431664 0.490538 0.497807 +20 138125 138148 0 2 NaN NaN NaN 0.441132 0.479059 0.497782 +20 139409 139576 0 3 NaN NaN NaN 0.466404 0.487820 0.499058 +20 168466 259156 0 12 NaN NaN NaN 0.460654 0.486318 0.496113 +20 259818 259818 0 1 NaN NaN NaN 0.461463 0.489247 0.497961 +20 259969 278806 0 2 NaN NaN NaN 0.452200 0.487257 0.498626 +20 368905 377226 0 3 NaN NaN NaN 0.467994 0.487685 0.498338 +20 389456 402921 0 4 NaN NaN NaN 0.436361 0.483566 0.496845 +20 425606 744415 0 4 NaN NaN NaN 0.448597 0.486628 0.498398 +20 744570 744570 0 1 NaN NaN NaN 0.466136 0.485562 0.496486 +20 746098 1285933 0 5 NaN NaN NaN 0.442481 0.485271 0.498731 +20 1424303 1424303 0 1 NaN NaN NaN 0.451468 0.485119 0.496044 +20 1426393 1458504 0 3 NaN NaN NaN 0.440678 0.484609 0.498401 +20 1517979 1552430 0 8 NaN NaN NaN 0.451229 0.479337 0.497541 +20 1559330 1592312 0 6 NaN NaN NaN 0.454420 0.488920 0.498461 +20 1600524 1600524 0 1 NaN NaN NaN 0.458197 0.482191 0.496532 +20 1610790 1610894 0 3 NaN NaN NaN 0.427993 0.471600 0.497604 +20 1615883 1616892 0 2 NaN NaN NaN 0.468083 0.484382 0.496569 +20 1895889 1896060 0 3 NaN NaN NaN 0.440623 0.486064 0.499081 +20 2056358 2056358 0 1 NaN NaN NaN 0.461552 0.486241 0.499346 +20 2315929 2517377 0 3 NaN NaN NaN 0.461290 0.484651 0.497899 +20 2517432 2517825 0 2 NaN NaN NaN 0.455944 0.481403 0.497915 +20 2552805 2552805 0 1 NaN NaN NaN 0.457481 0.486817 0.497110 +20 2593006 2621998 0 9 NaN NaN NaN 0.443539 0.475004 0.493699 +20 2633380 2779257 0 3 NaN NaN NaN 0.454120 0.484516 0.496548 +20 2796471 2796471 0 1 NaN NaN NaN 0.466998 0.489857 0.498063 +20 2818801 2996589 0 5 NaN NaN NaN 0.466766 0.487847 0.496698 +20 3002889 3199446 0 4 NaN NaN NaN 0.433548 0.485568 0.497066 +20 3624830 3640823 0 2 NaN NaN NaN 0.451408 0.483866 0.495964 +20 3641881 3653149 0 5 NaN NaN NaN 0.446455 0.490793 0.498193 +20 3654433 3686436 0 12 NaN NaN NaN 0.439299 0.487341 0.498825 +20 3721456 3731622 0 2 NaN NaN NaN 0.439913 0.491107 0.499001 +20 3732633 3838441 0 2 NaN NaN NaN 0.444543 0.489225 0.495898 +20 3870124 4055656 0 2 NaN NaN NaN 0.455436 0.489361 0.497367 +20 4155948 4162411 0 2 NaN NaN NaN 0.460104 0.492870 0.498375 +20 4202802 4705756 0 4 NaN NaN NaN 0.432247 0.477877 0.499614 +20 4843609 4843609 0 1 NaN NaN NaN 0.467601 0.486031 0.496935 +20 4880308 5261206 0 4 NaN NaN NaN 0.421490 0.488482 0.498184 +20 5273253 5273382 0 3 NaN NaN NaN 0.464778 0.494609 0.498514 +20 5283256 5482307 0 8 NaN NaN NaN 0.453778 0.488050 0.497590 +20 5528518 6064839 0 8 NaN NaN NaN 0.460154 0.487282 0.498092 +20 6065731 6100230 0 5 NaN NaN NaN 0.466286 0.487533 0.497104 +20 6194421 6195664 0 2 NaN NaN NaN 0.446307 0.480555 0.496129 +20 7866261 7999537 0 4 NaN NaN NaN 0.450505 0.486295 0.498360 +20 8626903 8626903 0 1 NaN NaN NaN 0.466530 0.489637 0.498539 +20 8665751 8703145 0 4 NaN NaN NaN 0.445025 0.481368 0.496482 +20 8707900 8707927 0 2 NaN NaN NaN 0.453318 0.483265 0.495526 +20 8737734 8769423 0 4 NaN NaN NaN 0.457602 0.487739 0.498030 +20 8770318 8770932 0 3 NaN NaN NaN 0.450819 0.489405 0.498892 +20 8773096 8773096 0 1 NaN NaN NaN 0.464225 0.489022 0.497686 +20 8773155 9417870 0 2 NaN NaN NaN 0.454454 0.488580 0.498629 +20 9424810 9510263 0 3 NaN NaN NaN 0.456964 0.489442 0.498843 +20 10024951 10024951 0 1 NaN NaN NaN 0.469407 0.485610 0.496697 +20 10026357 10032413 0 4 NaN NaN NaN 0.420056 0.482159 0.498074 +20 10329888 10393145 0 3 NaN NaN NaN 0.443791 0.487602 0.498178 +20 10438780 10438780 0 1 NaN NaN NaN 0.444673 0.486714 0.496707 +20 10629129 10629525 0 2 NaN NaN NaN 0.471881 0.487079 0.499060 +20 13054633 13060331 0 2 NaN NaN NaN 0.457412 0.490371 0.498636 +20 13071871 13071871 0 1 NaN NaN NaN 0.469204 0.485859 0.499661 +20 13074243 13074243 0 1 NaN NaN NaN 0.438464 0.481052 0.497984 +20 13090745 13260252 0 2 NaN NaN NaN 0.449581 0.488775 0.497863 +20 13769127 13769127 0 1 NaN NaN NaN 0.465302 0.490290 0.498633 +20 13798676 13830137 0 3 NaN NaN NaN 0.440436 0.481056 0.495478 +20 13845726 13845726 0 1 NaN NaN NaN 0.431540 0.483300 0.497749 +20 14306896 14306896 0 1 NaN NaN NaN 0.430295 0.486908 0.498783 +20 14306953 15874325 0 2 NaN NaN NaN 0.446831 0.480471 0.498990 +20 15967327 15967327 0 1 NaN NaN NaN 0.457186 0.481393 0.497307 +20 16730479 16730522 0 2 NaN NaN NaN 0.459780 0.484833 0.498474 +20 17459905 17460005 0 3 NaN NaN NaN 0.469150 0.492020 0.498595 +20 17474690 17594729 0 6 NaN NaN NaN 0.445233 0.484764 0.498676 +20 17595329 17596731 0 3 NaN NaN NaN 0.468029 0.485992 0.495951 +20 17597531 17597531 0 1 NaN NaN NaN 0.454723 0.491252 0.498646 +20 17600357 17600357 0 1 NaN NaN NaN 0.470685 0.487724 0.498238 +20 17602028 17602028 0 1 NaN NaN NaN 0.449260 0.485042 0.498414 +20 17992979 18022171 0 3 NaN NaN NaN 0.449760 0.481254 0.496587 +20 18142924 18142924 0 1 NaN NaN NaN 0.455104 0.490799 0.498216 +20 18286888 18446024 0 6 NaN NaN NaN 0.444579 0.479390 0.497635 +20 18806046 19970705 0 7 NaN NaN NaN 0.395426 0.475728 0.496292 +20 20032998 20033367 0 5 NaN NaN NaN 0.434629 0.480317 0.495163 +20 20033380 20033380 0 1 NaN NaN NaN 0.452454 0.485454 0.498694 +20 20037222 21336825 0 17 NaN NaN NaN 0.280375 0.336923 0.391464 +20 21897170 22714612 0 4 NaN NaN NaN 0.453066 0.489031 0.497690 +20 23345844 23424613 0 5 NaN NaN NaN 0.432223 0.483339 0.498789 +20 23425812 23529388 0 3 NaN NaN NaN 0.461333 0.489749 0.495653 +20 23529418 23807028 0 9 NaN NaN NaN 0.445003 0.474315 0.497276 +20 23842032 23842032 0 1 NaN NaN NaN 0.460581 0.488166 0.497952 +20 23860178 24200652 0 2 NaN NaN NaN 0.460088 0.488071 0.497810 +20 24939590 24939590 0 1 NaN NaN NaN 0.452191 0.478721 0.495592 +20 24993414 25011423 0 3 NaN NaN NaN 0.462414 0.490062 0.498383 +20 25038484 25059442 0 2 NaN NaN NaN 0.432923 0.479906 0.498400 +20 25190598 25255338 0 3 NaN NaN NaN 0.451174 0.491714 0.498972 +20 25255415 25255415 0 1 NaN NaN NaN 0.452999 0.480917 0.498478 +20 25257260 25261784 0 6 NaN NaN NaN 0.417580 0.476872 0.496563 +20 25262769 25262789 0 2 NaN NaN NaN 0.445936 0.484426 0.496884 +20 25263756 25263756 0 1 NaN NaN NaN 0.404650 0.480812 0.496397 +20 25264664 25282944 0 12 NaN NaN NaN 0.441366 0.486265 0.497394 +20 25288505 25295787 0 2 NaN NaN NaN 0.443146 0.484280 0.495498 +20 25320228 25398876 0 3 NaN NaN NaN 0.444734 0.491010 0.498788 +20 25434351 25470056 0 8 NaN NaN NaN 0.438105 0.486874 0.497496 +20 25597236 25838130 0 7 NaN NaN NaN 0.420620 0.483851 0.497995 +20 25838802 25841650 0 4 NaN NaN NaN 0.456600 0.488407 0.498967 +20 25846283 25900162 0 3 NaN NaN NaN 0.396351 0.466193 0.496770 +20 25900379 25965961 0 2 NaN NaN NaN 0.453702 0.485682 0.495674 +20 29449417 29449417 0 1 NaN NaN NaN 0.424636 0.479277 0.495494 +20 29516670 29572218 0 6 NaN NaN NaN 0.456266 0.490256 0.498520 +20 29632564 29632564 0 1 NaN NaN NaN 0.442208 0.478893 0.497457 +20 29633929 29648701 0 3 NaN NaN NaN 0.465676 0.492646 0.497657 +20 29873577 30037783 0 3 NaN NaN NaN 0.456004 0.491005 0.499281 +20 30053255 31025231 0 4 NaN NaN NaN 0.438881 0.482444 0.497198 +20 31647126 31677476 0 8 NaN NaN NaN 0.413261 0.474859 0.497380 +20 31811551 32710710 0 12 NaN NaN NaN 0.275794 0.325648 0.410601 +20 32990050 33063830 0 5 NaN NaN NaN 0.428519 0.470481 0.494541 +20 33068563 33879478 0 3 NaN NaN NaN 0.434852 0.468191 0.496151 +20 34218673 34312713 0 8 NaN NaN NaN 0.428136 0.474428 0.495506 +20 34319765 34324484 0 4 NaN NaN NaN 0.455192 0.484937 0.499372 +20 34328848 34328848 0 1 NaN NaN NaN 0.452005 0.474596 0.496241 +20 34443173 34782171 0 11 NaN NaN NaN 0.463264 0.483612 0.495745 +20 35491033 35559352 0 2 NaN NaN NaN 0.452080 0.489344 0.498861 +20 35710453 35710453 0 1 NaN NaN NaN 0.452739 0.481477 0.496800 +20 35740794 35740794 0 1 NaN NaN NaN 0.455396 0.483011 0.497864 +20 35748894 35869619 0 15 NaN NaN NaN 0.341317 0.428471 0.491153 +20 36022539 36718059 0 5 NaN NaN NaN 0.439423 0.487553 0.497143 +20 36790166 36793529 0 3 NaN NaN NaN 0.421378 0.482481 0.497349 +20 36841756 36919758 0 4 NaN NaN NaN 0.470713 0.491916 0.498942 +20 36932551 36937246 0 4 NaN NaN NaN 0.440575 0.485954 0.497735 +20 36946848 36959353 0 3 NaN NaN NaN 0.455640 0.483879 0.499096 +20 36965617 36965617 0 1 NaN NaN NaN 0.440947 0.480841 0.498227 +20 36989269 37279458 0 11 NaN NaN NaN 0.456184 0.488342 0.498183 +20 37377139 37396262 0 5 NaN NaN NaN 0.452293 0.483242 0.495880 +20 38354742 38354742 0 1 NaN NaN NaN 0.451345 0.480563 0.495807 +20 38898213 40076509 0 7 NaN NaN NaN 0.456795 0.486755 0.498681 +20 40179909 40626474 0 3 NaN NaN NaN 0.469415 0.491865 0.499561 +20 40714479 40743829 0 3 NaN NaN NaN 0.445480 0.487891 0.498656 +20 40981002 41076751 0 2 NaN NaN NaN 0.464240 0.485483 0.497056 +20 41962342 43944958 0 40 NaN NaN NaN 0.484562 0.493125 0.498706 +20 44144118 44638971 0 3 NaN NaN NaN 0.457000 0.482908 0.496543 +20 44639511 44645010 0 8 NaN NaN NaN 0.461972 0.488078 0.498140 +20 44650318 44650318 0 1 NaN NaN NaN 0.450482 0.488544 0.497546 +20 44806875 44806875 0 1 NaN NaN NaN 0.451953 0.490777 0.499325 +20 44983517 45092517 0 3 NaN NaN NaN 0.461905 0.486561 0.497397 +20 45092859 45092921 0 2 NaN NaN NaN 0.428895 0.474200 0.495911 +20 45093125 45629941 0 4 NaN NaN NaN 0.452697 0.480990 0.495184 +20 45789953 45789953 0 1 NaN NaN NaN 0.458728 0.482139 0.497661 +20 45797639 45797883 0 2 NaN NaN NaN 0.454426 0.487202 0.498389 +20 45797954 45808688 0 3 NaN NaN NaN 0.432874 0.486013 0.498678 +20 45809652 45816639 0 4 NaN NaN NaN 0.462669 0.485798 0.498496 +20 45816649 45891189 0 2 NaN NaN NaN 0.445715 0.490303 0.498848 +20 45923383 47253043 0 4 NaN NaN NaN 0.461245 0.484325 0.496360 +20 47256300 47258763 0 4 NaN NaN NaN 0.468586 0.488475 0.498665 +20 47261017 47361725 0 12 NaN NaN NaN 0.470417 0.483963 0.497872 +20 47569112 47615692 0 7 NaN NaN NaN 0.458015 0.489878 0.499388 +20 47621554 47713063 0 9 NaN NaN NaN 0.457084 0.479067 0.494389 +20 47739814 47850182 0 5 NaN NaN NaN 0.474219 0.491590 0.499355 +20 47852822 47859217 0 3 NaN NaN NaN 0.443306 0.486836 0.498531 +20 47865372 48131036 0 9 NaN NaN NaN 0.463755 0.486312 0.499198 +20 48257149 48259034 0 2 NaN NaN NaN 0.466709 0.485602 0.498039 +20 48300990 48300990 0 1 NaN NaN NaN 0.457198 0.485977 0.498052 +20 48301146 49191228 0 4 NaN NaN NaN 0.440743 0.483039 0.496985 +20 49195248 49195248 0 1 NaN NaN NaN 0.460278 0.492388 0.498668 +20 49196167 49236478 0 5 NaN NaN NaN 0.427622 0.488874 0.498380 +20 49237419 49575334 0 2 NaN NaN NaN 0.459466 0.488446 0.498881 +20 50287736 53691106 0 21 NaN NaN NaN 0.474046 0.489512 0.498200 +20 54935242 55108617 0 9 NaN NaN NaN 0.469564 0.491520 0.498695 +20 55111371 56886062 0 15 NaN NaN NaN 0.469585 0.489166 0.496428 +20 57009796 57024541 0 4 NaN NaN NaN 0.461049 0.489370 0.497497 +20 57024589 57266134 0 4 NaN NaN NaN 0.461907 0.488394 0.497060 +20 57266592 57470517 0 8 NaN NaN NaN 0.470224 0.487630 0.498867 +20 57478448 57569860 0 7 NaN NaN NaN 0.451097 0.481989 0.496385 +20 57570854 57572839 0 2 NaN NaN NaN 0.452535 0.484771 0.497744 +20 57599402 58571125 0 8 NaN NaN NaN 0.452008 0.485350 0.498033 +20 58581863 60582540 0 8 NaN NaN NaN 0.474970 0.488811 0.496892 +20 60712347 60740362 0 8 NaN NaN NaN 0.458387 0.491172 0.498549 +20 60740447 60886611 0 13 NaN NaN NaN 0.456841 0.490671 0.498978 +20 60897721 60992224 0 8 NaN NaN NaN 0.461369 0.490647 0.498666 +20 60992402 61039958 0 2 NaN NaN NaN 0.471889 0.490791 0.499185 +20 61041653 61273578 0 8 NaN NaN NaN 0.481385 0.493300 0.499516 +20 61289958 61441061 0 4 NaN NaN NaN 0.451699 0.479149 0.498382 +20 61444697 61444697 0 1 NaN NaN NaN 0.459046 0.479661 0.496865 +20 61444785 61453348 0 2 NaN NaN NaN 0.460040 0.487149 0.497305 +20 61453549 61834695 0 18 NaN NaN NaN 0.476015 0.489597 0.498518 +20 61869607 61869607 0 1 NaN NaN NaN 0.462549 0.488123 0.497086 +20 61870727 61873039 0 3 NaN NaN NaN 0.445894 0.474871 0.491184 +20 61875497 61881296 0 5 NaN NaN NaN 0.444293 0.482928 0.498635 +20 61987572 61987572 0 1 NaN NaN NaN 0.451241 0.487377 0.498628 +20 62191558 62193019 0 2 NaN NaN NaN 0.436065 0.485550 0.494510 +20 62193445 62229244 0 11 NaN NaN NaN 0.472891 0.492278 0.498331 +20 62245686 62303794 0 5 NaN NaN NaN 0.459203 0.490452 0.498321 +20 62305274 62698484 0 10 NaN NaN NaN 0.440573 0.487235 0.498182 +20 62701092 62737318 0 5 NaN NaN NaN 0.475459 0.491088 0.499346 +20 62836271 62836271 0 1 NaN NaN NaN 0.444882 0.487701 0.498333 +20 62836520 62854417 0 2 NaN NaN NaN 0.455130 0.487820 0.498895 +20 62868043 62871232 0 2 NaN NaN NaN 0.457744 0.490403 0.496554 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-ac-nac-tumor-1.modelFinal.af.param b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-ac-nac-tumor-1.modelFinal.af.param index 677dfe51fb9..dd0cef5819a 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-ac-nac-tumor-1.modelFinal.af.param +++ b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-ac-nac-tumor-1.modelFinal.af.param @@ -1,6 +1,6 @@ @HD VN:1.6 @RG ID:GATKCopyNumber SM:SM-74P4M-1 PARAMETER_NAME POSTERIOR_10 POSTERIOR_20 POSTERIOR_30 POSTERIOR_40 POSTERIOR_50 POSTERIOR_60 POSTERIOR_70 POSTERIOR_80 POSTERIOR_90 -MEAN_BIAS 1.084446 1.109278 1.126956 1.141747 1.146308 1.153156 1.165927 1.179890 1.193730 -BIAS_VARIANCE 0.276000 0.288151 0.311355 0.325161 0.336705 0.349116 0.352861 0.360239 0.416764 -OUTLIER_PROBABILITY 0.027608 0.035325 0.040763 0.043144 0.044959 0.049443 0.052779 0.058926 0.075451 +MEAN_BIAS 1.072565 1.080952 1.083472 1.088221 1.090982 1.093712 1.097881 1.104375 1.109871 +BIAS_VARIANCE 0.045525 0.047460 0.048416 0.049867 0.052743 0.054315 0.056993 0.059563 0.065495 +OUTLIER_PROBABILITY 0.029634 0.034797 0.039080 0.041003 0.044212 0.046528 0.053994 0.060181 0.068049 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-ac-nac-tumor-1.modelFinal.seg b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-ac-nac-tumor-1.modelFinal.seg index 72d99391432..fa09cc896f8 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-ac-nac-tumor-1.modelFinal.seg +++ b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-ac-nac-tumor-1.modelFinal.seg @@ -2,4 +2,5 @@ @SQ SN:20 LN:63025520 UR:http://www.broadinstitute.org/ftp/pub/seq/references/Homo_sapiens_assembly19.fasta AS:GRCh37 M5:0dec9660ec1efaaf33281c0d5ea2560f SP:Homo Sapiens @RG ID:GATKCopyNumber SM:SM-74P4M-1 CONTIG START END NUM_POINTS_COPY_RATIO NUM_POINTS_ALLELE_FRACTION LOG2_COPY_RATIO_POSTERIOR_10 LOG2_COPY_RATIO_POSTERIOR_50 LOG2_COPY_RATIO_POSTERIOR_90 MINOR_ALLELE_FRACTION_POSTERIOR_10 MINOR_ALLELE_FRACTION_POSTERIOR_50 MINOR_ALLELE_FRACTION_POSTERIOR_90 -20 138125 62871232 0 827 NaN NaN NaN 0.362534 0.372342 0.405177 +20 138125 41076751 0 487 NaN NaN NaN 0.278279 0.286650 0.292823 +20 41962342 62871232 0 340 NaN NaN NaN 0.485835 0.493819 0.498268 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-ac-tumor-1.af.igv.seg b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-ac-tumor-1.af.igv.seg index 00ac442ec7c..9b545848a76 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-ac-tumor-1.af.igv.seg +++ b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-ac-tumor-1.af.igv.seg @@ -1,2 +1,3 @@ Sample Chromosome Start End Num_Probes Segment_Mean -SM-74P4M-1 20 138125 62904542 1226 0.432421 +SM-74P4M-1 20 138125 2655019 104 0.424853 +SM-74P4M-1 20 2777828 62904542 1122 0.453979 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-ac-tumor-1.cr.igv.seg b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-ac-tumor-1.cr.igv.seg index 2e4be85e3ea..de02ddf98e1 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-ac-tumor-1.cr.igv.seg +++ b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-ac-tumor-1.cr.igv.seg @@ -1,2 +1,3 @@ Sample Chromosome Start End Num_Probes Segment_Mean -SM-74P4M-1 20 138125 62904542 0 NaN +SM-74P4M-1 20 138125 2655019 0 NaN +SM-74P4M-1 20 2777828 62904542 0 NaN diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-ac-tumor-1.cr.seg b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-ac-tumor-1.cr.seg index b3f4cd2e57e..b0479412ed1 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-ac-tumor-1.cr.seg +++ b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-ac-tumor-1.cr.seg @@ -2,4 +2,5 @@ @SQ SN:20 LN:63025520 UR:http://www.broadinstitute.org/ftp/pub/seq/references/Homo_sapiens_assembly19.fasta AS:GRCh37 M5:0dec9660ec1efaaf33281c0d5ea2560f SP:Homo Sapiens @RG ID:GATKCopyNumber SM:SM-74P4M-1 CONTIG START END NUM_POINTS_COPY_RATIO MEAN_LOG2_COPY_RATIO -20 138125 62904542 0 NaN +20 138125 2655019 0 NaN +20 2777828 62904542 0 NaN diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-ac-tumor-1.modelBegin.af.param b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-ac-tumor-1.modelBegin.af.param index ebe0465040e..0e35114f07e 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-ac-tumor-1.modelBegin.af.param +++ b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-ac-tumor-1.modelBegin.af.param @@ -1,6 +1,6 @@ @HD VN:1.6 @RG ID:GATKCopyNumber SM:SM-74P4M-1 PARAMETER_NAME POSTERIOR_10 POSTERIOR_20 POSTERIOR_30 POSTERIOR_40 POSTERIOR_50 POSTERIOR_60 POSTERIOR_70 POSTERIOR_80 POSTERIOR_90 -MEAN_BIAS 1.116663 1.123408 1.127238 1.130430 1.140573 1.156446 1.161426 1.170822 1.176871 -BIAS_VARIANCE 0.470922 0.474828 0.481765 0.486072 0.492303 0.493527 0.495814 0.497711 0.498807 -OUTLIER_PROBABILITY 0.014787 0.020051 0.024841 0.028065 0.029756 0.035209 0.036907 0.040733 0.046803 +MEAN_BIAS 1.118668 1.123675 1.132300 1.139653 1.143305 1.147675 1.150803 1.159322 1.173944 +BIAS_VARIANCE 0.474632 0.483568 0.488214 0.489837 0.490476 0.493436 0.494400 0.496112 0.498473 +OUTLIER_PROBABILITY 0.015268 0.019325 0.022309 0.027281 0.030181 0.034175 0.039262 0.047530 0.062008 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-ac-tumor-1.modelBegin.seg b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-ac-tumor-1.modelBegin.seg index 6d3c1d452e0..55bfbe65b84 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-ac-tumor-1.modelBegin.seg +++ b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-ac-tumor-1.modelBegin.seg @@ -2,291 +2,291 @@ @SQ SN:20 LN:63025520 UR:http://www.broadinstitute.org/ftp/pub/seq/references/Homo_sapiens_assembly19.fasta AS:GRCh37 M5:0dec9660ec1efaaf33281c0d5ea2560f SP:Homo Sapiens @RG ID:GATKCopyNumber SM:SM-74P4M-1 CONTIG START END NUM_POINTS_COPY_RATIO NUM_POINTS_ALLELE_FRACTION LOG2_COPY_RATIO_POSTERIOR_10 LOG2_COPY_RATIO_POSTERIOR_50 LOG2_COPY_RATIO_POSTERIOR_90 MINOR_ALLELE_FRACTION_POSTERIOR_10 MINOR_ALLELE_FRACTION_POSTERIOR_50 MINOR_ALLELE_FRACTION_POSTERIOR_90 -20 138125 138148 0 2 NaN NaN NaN 0.460434 0.490184 0.499296 -20 139409 139576 0 3 NaN NaN NaN 0.453976 0.486981 0.498205 -20 168466 259156 0 13 NaN NaN NaN 0.454587 0.492084 0.499293 -20 259818 259818 0 1 NaN NaN NaN 0.447380 0.482895 0.498186 -20 259969 278806 0 2 NaN NaN NaN 0.448854 0.483264 0.495394 -20 316911 316911 0 1 NaN NaN NaN 0.472993 0.494986 0.498601 -20 355319 360306 0 4 NaN NaN NaN 0.463678 0.486573 0.497766 -20 368905 377226 0 3 NaN NaN NaN 0.447725 0.479829 0.497487 -20 389456 402921 0 4 NaN NaN NaN 0.457089 0.485645 0.497324 -20 425606 425606 0 1 NaN NaN NaN 0.448019 0.482467 0.498973 -20 428767 744415 0 8 NaN NaN NaN 0.465704 0.488948 0.497850 -20 744570 744570 0 1 NaN NaN NaN 0.462078 0.487091 0.498351 -20 746098 1285933 0 6 NaN NaN NaN 0.464773 0.485290 0.496726 -20 1417397 1417397 0 1 NaN NaN NaN 0.451706 0.484873 0.498250 -20 1424303 1424303 0 1 NaN NaN NaN 0.460276 0.489536 0.499400 -20 1426393 1458504 0 3 NaN NaN NaN 0.467522 0.485276 0.498588 -20 1510769 1510769 0 1 NaN NaN NaN 0.448234 0.477227 0.494773 -20 1517979 1538249 0 4 NaN NaN NaN 0.444083 0.473807 0.496519 -20 1540032 1540032 0 1 NaN NaN NaN 0.469498 0.488371 0.499077 -20 1546911 1552430 0 3 NaN NaN NaN 0.465833 0.482744 0.495503 -20 1592265 1592284 0 2 NaN NaN NaN 0.388623 0.471636 0.498685 -20 1600524 1610894 0 4 NaN NaN NaN 0.470144 0.488336 0.497910 -20 1615883 1880912 0 6 NaN NaN NaN 0.455607 0.486071 0.497994 -20 1895630 1895658 0 2 NaN NaN NaN 0.463521 0.485156 0.497243 -20 1895889 1896244 0 2 NaN NaN NaN 0.430605 0.483477 0.497440 -20 2036954 2139080 0 4 NaN NaN NaN 0.459054 0.491162 0.498942 -20 2315929 2357863 0 3 NaN NaN NaN 0.445993 0.480480 0.497003 -20 2482325 2517825 0 3 NaN NaN NaN 0.458293 0.486506 0.497311 -20 2542747 2552805 0 2 NaN NaN NaN 0.454241 0.483899 0.498694 -20 2593006 2621998 0 10 NaN NaN NaN 0.449364 0.482314 0.495168 -20 2624956 2655019 0 3 NaN NaN NaN 0.450557 0.489741 0.499140 -20 2777828 2779257 0 2 NaN NaN NaN 0.459450 0.487771 0.498640 -20 2796471 2818801 0 2 NaN NaN NaN 0.468498 0.483997 0.497081 -20 2945759 3005413 0 7 NaN NaN NaN 0.463499 0.487026 0.498142 -20 3007228 3007492 0 2 NaN NaN NaN 0.387339 0.477685 0.498292 -20 3013228 3054033 0 2 NaN NaN NaN 0.453197 0.482918 0.497871 -20 3147024 3624830 0 5 NaN NaN NaN 0.445961 0.486933 0.497118 -20 3640823 3640823 0 1 NaN NaN NaN 0.462666 0.490137 0.499568 -20 3641881 3675333 0 11 NaN NaN NaN 0.447027 0.488941 0.498221 -20 3675498 3686436 0 8 NaN NaN NaN 0.451301 0.481394 0.493853 -20 3701867 3732633 0 4 NaN NaN NaN 0.436802 0.478649 0.499094 -20 3838441 3838441 0 1 NaN NaN NaN 0.439626 0.482882 0.498844 -20 3858985 4055656 0 3 NaN NaN NaN 0.452064 0.483880 0.495925 -20 4138585 4162411 0 4 NaN NaN NaN 0.426056 0.478765 0.497846 -20 4167234 4843609 0 9 NaN NaN NaN 0.464091 0.488069 0.498543 -20 4880308 5035125 0 2 NaN NaN NaN 0.464237 0.487172 0.498629 -20 5063643 5063643 0 1 NaN NaN NaN 0.446118 0.483328 0.499048 -20 5068730 5068730 0 1 NaN NaN NaN 0.454063 0.479350 0.498764 -20 5159344 5482307 0 16 NaN NaN NaN 0.447803 0.478233 0.498210 -20 5528518 5634575 0 5 NaN NaN NaN 0.445101 0.483955 0.496934 -20 5640886 5738376 0 4 NaN NaN NaN 0.457593 0.487804 0.498419 -20 5903517 6065731 0 6 NaN NaN NaN 0.456486 0.479566 0.497433 -20 6090623 6100391 0 6 NaN NaN NaN 0.464884 0.488306 0.498172 -20 6194421 6195664 0 2 NaN NaN NaN 0.466463 0.489674 0.499360 -20 6302114 6798939 0 3 NaN NaN NaN 0.474227 0.491760 0.498414 -20 7866261 8581713 0 5 NaN NaN NaN 0.452260 0.482317 0.496949 -20 8625108 8625108 0 1 NaN NaN NaN 0.437420 0.481791 0.497503 -20 8625250 8639443 0 4 NaN NaN NaN 0.455576 0.485347 0.494478 -20 8665751 8703145 0 5 NaN NaN NaN 0.452752 0.474301 0.495632 -20 8707900 8707927 0 2 NaN NaN NaN 0.458551 0.490155 0.498624 -20 8708166 8708166 0 1 NaN NaN NaN 0.474001 0.490070 0.498034 -20 8737734 8737734 0 1 NaN NaN NaN 0.450880 0.478930 0.496333 -20 8741188 8741188 0 1 NaN NaN NaN 0.444763 0.482739 0.497864 -20 8742326 8742326 0 1 NaN NaN NaN 0.455033 0.489396 0.498763 -20 8755243 8770932 0 4 NaN NaN NaN 0.443649 0.482461 0.497819 -20 8773096 8773155 0 2 NaN NaN NaN 0.464203 0.492723 0.498360 -20 9108585 9376019 0 4 NaN NaN NaN 0.431972 0.484491 0.497038 -20 9417870 9510263 0 4 NaN NaN NaN 0.457622 0.480088 0.497002 -20 9624587 10004147 0 3 NaN NaN NaN 0.444964 0.477956 0.498422 -20 10012714 10012751 0 2 NaN NaN NaN 0.446668 0.483037 0.498349 -20 10024951 10024951 0 1 NaN NaN NaN 0.449815 0.484226 0.496840 -20 10026357 10032413 0 4 NaN NaN NaN 0.445564 0.482176 0.497082 -20 10329888 10393145 0 4 NaN NaN NaN 0.453621 0.482595 0.495123 -20 10426975 10439002 0 3 NaN NaN NaN 0.466376 0.492280 0.498704 -20 10629129 10629525 0 2 NaN NaN NaN 0.452277 0.482736 0.497512 -20 11224228 12113410 0 5 NaN NaN NaN 0.474877 0.493979 0.498719 -20 13054307 13060331 0 5 NaN NaN NaN 0.447051 0.490676 0.498846 -20 13071871 13074243 0 3 NaN NaN NaN 0.458712 0.488512 0.498533 -20 13090745 13227837 0 3 NaN NaN NaN 0.461960 0.485010 0.497171 -20 13239860 13260252 0 2 NaN NaN NaN 0.470416 0.491244 0.497974 -20 13709407 13709407 0 1 NaN NaN NaN 0.467603 0.487859 0.496119 -20 13769127 13769127 0 1 NaN NaN NaN 0.443801 0.482756 0.499202 -20 13798676 13830137 0 4 NaN NaN NaN 0.430177 0.482163 0.497342 -20 13845726 13845726 0 1 NaN NaN NaN 0.457690 0.486277 0.498221 -20 13872093 13885105 0 2 NaN NaN NaN 0.446412 0.480469 0.498772 -20 13921411 13992299 0 2 NaN NaN NaN 0.463438 0.486779 0.498300 -20 14273462 14273462 0 1 NaN NaN NaN 0.453439 0.486598 0.498438 -20 14306896 14306896 0 1 NaN NaN NaN 0.445844 0.489930 0.499003 -20 14306953 14306953 0 1 NaN NaN NaN 0.428035 0.480821 0.498546 -20 14319063 14319063 0 1 NaN NaN NaN 0.458149 0.485913 0.497507 -20 14910634 14910665 0 2 NaN NaN NaN 0.462191 0.486162 0.498319 -20 15131723 15411848 0 2 NaN NaN NaN 0.466228 0.491627 0.498656 -20 15874325 15967327 0 2 NaN NaN NaN 0.414514 0.483757 0.498364 -20 16729262 16730522 0 3 NaN NaN NaN 0.457663 0.488414 0.498545 -20 17028059 17460132 0 5 NaN NaN NaN 0.448718 0.490855 0.497000 -20 17474690 17594729 0 9 NaN NaN NaN 0.461752 0.492474 0.499182 -20 17595329 17595329 0 1 NaN NaN NaN 0.461384 0.490787 0.499284 -20 17596155 17596155 0 1 NaN NaN NaN 0.442160 0.473962 0.496629 -20 17596731 17597331 0 2 NaN NaN NaN 0.448900 0.479448 0.496122 -20 17597531 17597531 0 1 NaN NaN NaN 0.467745 0.489836 0.499099 -20 17600357 17990713 0 2 NaN NaN NaN 0.468554 0.490784 0.499002 -20 17992979 18022171 0 5 NaN NaN NaN 0.447403 0.482054 0.492927 -20 18142924 18175556 0 3 NaN NaN NaN 0.463277 0.480896 0.496716 -20 18192433 18241880 0 2 NaN NaN NaN 0.471613 0.485885 0.498777 -20 18286888 18286888 0 1 NaN NaN NaN 0.473563 0.490120 0.498881 -20 18287104 18327570 0 3 NaN NaN NaN 0.461803 0.483616 0.496188 -20 18429497 18429509 0 2 NaN NaN NaN 0.471049 0.490184 0.497396 -20 18432690 18446024 0 4 NaN NaN NaN 0.469614 0.488824 0.499135 -20 18806046 18806046 0 1 NaN NaN NaN 0.446312 0.488777 0.498021 -20 18810705 19261623 0 2 NaN NaN NaN 0.429752 0.469653 0.493572 -20 19345285 19345285 0 1 NaN NaN NaN 0.445614 0.487575 0.499035 -20 19560664 19914520 0 4 NaN NaN NaN 0.470360 0.485394 0.498109 -20 19941538 19951660 0 3 NaN NaN NaN 0.458458 0.484192 0.496369 -20 19970705 19970705 0 1 NaN NaN NaN 0.444941 0.488480 0.498224 -20 19987674 19987674 0 1 NaN NaN NaN 0.473440 0.491694 0.499193 -20 20007325 20033137 0 3 NaN NaN NaN 0.458911 0.490568 0.497655 -20 20033242 20033423 0 5 NaN NaN NaN 0.442468 0.485616 0.497530 -20 20037222 20152462 0 8 NaN NaN NaN 0.383598 0.461904 0.491987 -20 20168497 20265171 0 4 NaN NaN NaN 0.428327 0.479169 0.495846 -20 20269631 21147009 0 9 NaN NaN NaN 0.388698 0.485046 0.499103 -20 21147407 21868713 0 6 NaN NaN NaN 0.457290 0.484072 0.498454 -20 21897170 22714612 0 6 NaN NaN NaN 0.428639 0.473095 0.496358 -20 23345844 23425812 0 8 NaN NaN NaN 0.440309 0.479568 0.495233 -20 23426972 23426972 0 1 NaN NaN NaN 0.467893 0.492212 0.498504 -20 23528536 23529388 0 2 NaN NaN NaN 0.453514 0.485877 0.499502 -20 23529418 23731560 0 10 NaN NaN NaN 0.449138 0.480797 0.493702 -20 23756529 23756529 0 1 NaN NaN NaN 0.440413 0.482807 0.495772 -20 23756613 23756613 0 1 NaN NaN NaN 0.459569 0.482890 0.497768 -20 23761395 23761395 0 1 NaN NaN NaN 0.452716 0.480842 0.498523 -20 23805832 23807028 0 2 NaN NaN NaN 0.456561 0.482990 0.499324 -20 23842032 23842078 0 2 NaN NaN NaN 0.424279 0.478273 0.498161 -20 23860178 24200699 0 4 NaN NaN NaN 0.464521 0.484810 0.498636 -20 24201344 24201393 0 2 NaN NaN NaN 0.454831 0.487916 0.498275 -20 24790712 24790712 0 1 NaN NaN NaN 0.437908 0.492416 0.499044 -20 24911562 24938195 0 3 NaN NaN NaN 0.436223 0.485302 0.497863 -20 24939590 24959386 0 2 NaN NaN NaN 0.439154 0.482131 0.497391 -20 24993414 25011423 0 4 NaN NaN NaN 0.452759 0.485907 0.499301 -20 25038484 25059442 0 2 NaN NaN NaN 0.421380 0.473399 0.497600 -20 25190598 25196520 0 2 NaN NaN NaN 0.464501 0.491177 0.498881 -20 25252161 25255338 0 2 NaN NaN NaN 0.470785 0.489356 0.497686 -20 25257260 25261784 0 6 NaN NaN NaN 0.389955 0.476901 0.496871 -20 25262769 25262789 0 2 NaN NaN NaN 0.467130 0.492101 0.498184 -20 25263756 25263756 0 1 NaN NaN NaN 0.458307 0.479487 0.498208 -20 25264664 25282944 0 13 NaN NaN NaN 0.458384 0.489131 0.498005 -20 25286059 25286059 0 1 NaN NaN NaN 0.464842 0.487563 0.498688 -20 25288505 25324410 0 5 NaN NaN NaN 0.441620 0.479227 0.497516 -20 25336445 25398876 0 5 NaN NaN NaN 0.447225 0.484852 0.498139 -20 25424713 25424713 0 1 NaN NaN NaN 0.464878 0.489213 0.497500 -20 25434351 25470056 0 11 NaN NaN NaN 0.465879 0.485623 0.498066 -20 25597236 25604655 0 2 NaN NaN NaN 0.446418 0.487250 0.497854 -20 25622858 25622858 0 1 NaN NaN NaN 0.468142 0.488721 0.497516 -20 25629114 25666807 0 3 NaN NaN NaN 0.407661 0.480397 0.493583 -20 25699941 25700293 0 4 NaN NaN NaN 0.452132 0.486489 0.499276 -20 25754173 25755672 0 2 NaN NaN NaN 0.421888 0.477235 0.497405 -20 25756059 25837915 0 2 NaN NaN NaN 0.477208 0.490059 0.499041 -20 25838130 25838130 0 1 NaN NaN NaN 0.450676 0.481665 0.496069 -20 25838802 25841650 0 4 NaN NaN NaN 0.467296 0.486735 0.497338 -20 25846283 25900162 0 3 NaN NaN NaN 0.400701 0.474501 0.495136 -20 25900379 26134237 0 4 NaN NaN NaN 0.459843 0.489669 0.496762 -20 26138206 29449417 0 2 NaN NaN NaN 0.460606 0.489562 0.499293 -20 29449678 29449678 0 1 NaN NaN NaN 0.461306 0.490108 0.497953 -20 29516670 29516670 0 1 NaN NaN NaN 0.445434 0.482388 0.497332 -20 29517417 29520510 0 3 NaN NaN NaN 0.443656 0.479848 0.497554 -20 29521061 29632564 0 4 NaN NaN NaN 0.460992 0.484146 0.496888 -20 29633929 29648701 0 3 NaN NaN NaN 0.466731 0.487119 0.498520 -20 29847211 29847211 0 1 NaN NaN NaN 0.452252 0.488039 0.498887 -20 29847618 29872796 0 2 NaN NaN NaN 0.468938 0.491683 0.499366 -20 29873577 29873577 0 1 NaN NaN NaN 0.455265 0.485851 0.497294 -20 29899208 29899208 0 1 NaN NaN NaN 0.450682 0.484555 0.496711 -20 29977156 29977156 0 1 NaN NaN NaN 0.458957 0.488063 0.498671 -20 29986332 30035869 0 2 NaN NaN NaN 0.453485 0.485547 0.498590 -20 30037783 30037783 0 1 NaN NaN NaN 0.475835 0.490036 0.498666 -20 30053255 31025163 0 2 NaN NaN NaN 0.437103 0.483602 0.495639 -20 31647126 31676804 0 8 NaN NaN NaN 0.428218 0.481079 0.497932 -20 31811551 31826027 0 4 NaN NaN NaN 0.438438 0.480786 0.496517 -20 31828265 31897554 0 2 NaN NaN NaN 0.454624 0.487380 0.498324 -20 31956468 32289763 0 2 NaN NaN NaN 0.424312 0.485560 0.498265 -20 32325329 32330930 0 2 NaN NaN NaN 0.454202 0.484338 0.498448 -20 32340077 32710710 0 8 NaN NaN NaN 0.362933 0.444359 0.490223 -20 32935192 33006597 0 6 NaN NaN NaN 0.452374 0.485703 0.498309 -20 33030405 33031276 0 2 NaN NaN NaN 0.463400 0.488831 0.498944 -20 33037336 33150503 0 7 NaN NaN NaN 0.380579 0.476931 0.496612 -20 33178782 33279604 0 5 NaN NaN NaN 0.465602 0.484205 0.497259 -20 33283649 33879478 0 2 NaN NaN NaN 0.461913 0.489222 0.498212 -20 33882720 33882791 0 2 NaN NaN NaN 0.456287 0.487289 0.497666 -20 34218673 34243017 0 4 NaN NaN NaN 0.451490 0.481328 0.496312 -20 34269577 34269577 0 1 NaN NaN NaN 0.463039 0.488272 0.499196 -20 34271574 34271574 0 1 NaN NaN NaN 0.452613 0.488914 0.496909 -20 34289005 34289005 0 1 NaN NaN NaN 0.452237 0.491220 0.499022 -20 34304783 34312834 0 6 NaN NaN NaN 0.377342 0.481372 0.495939 -20 34314114 34314114 0 1 NaN NaN NaN 0.467645 0.492923 0.499112 -20 34314140 34324484 0 9 NaN NaN NaN 0.451978 0.485820 0.497268 -20 34324648 34335861 0 3 NaN NaN NaN 0.473362 0.488136 0.497394 -20 34339854 34431377 0 4 NaN NaN NaN 0.448856 0.482765 0.497493 -20 34443173 34446395 0 2 NaN NaN NaN 0.448091 0.482762 0.496340 -20 34457513 34457513 0 1 NaN NaN NaN 0.462159 0.488392 0.497900 -20 34492974 34492974 0 1 NaN NaN NaN 0.455535 0.490870 0.498482 -20 34499996 34505054 0 3 NaN NaN NaN 0.446249 0.479420 0.498256 -20 34525807 34525807 0 1 NaN NaN NaN 0.442364 0.481195 0.498689 -20 34535373 34581979 0 5 NaN NaN NaN 0.456539 0.486439 0.497588 -20 34589995 34766745 0 8 NaN NaN NaN 0.437184 0.489427 0.498905 -20 34775551 34782171 0 2 NaN NaN NaN 0.469808 0.489869 0.499042 -20 34974252 34974252 0 1 NaN NaN NaN 0.445748 0.481609 0.492556 -20 35068018 35740794 0 10 NaN NaN NaN 0.464681 0.487597 0.498203 -20 35748894 35869619 0 21 NaN NaN NaN 0.369695 0.423835 0.479538 -20 36022539 36288284 0 3 NaN NaN NaN 0.434927 0.487231 0.497979 -20 36337303 36607077 0 3 NaN NaN NaN 0.449672 0.483354 0.496946 -20 36615416 36793501 0 8 NaN NaN NaN 0.432721 0.482939 0.499334 -20 36793529 36793529 0 1 NaN NaN NaN 0.440256 0.484254 0.497543 -20 36841756 36919758 0 4 NaN NaN NaN 0.469843 0.489908 0.498566 -20 36932551 36944379 0 5 NaN NaN NaN 0.462033 0.486311 0.498534 -20 36946848 36953097 0 3 NaN NaN NaN 0.450135 0.483083 0.498330 -20 36958262 36958262 0 1 NaN NaN NaN 0.453375 0.484553 0.498961 -20 36959318 36965617 0 4 NaN NaN NaN 0.413945 0.472093 0.497277 -20 36989269 37291486 0 19 NaN NaN NaN 0.411379 0.473824 0.492787 -20 37366218 37396262 0 9 NaN NaN NaN 0.456673 0.491919 0.499590 -20 37404951 38354742 0 4 NaN NaN NaN 0.440430 0.472428 0.497850 -20 38483658 39501689 0 3 NaN NaN NaN 0.452357 0.482185 0.494986 -20 39501744 40126679 0 12 NaN NaN NaN 0.446647 0.487710 0.498695 -20 40179909 40179920 0 2 NaN NaN NaN 0.465611 0.487282 0.497982 -20 40234917 43803708 0 48 NaN NaN NaN 0.456142 0.488258 0.498201 -20 43803725 43881630 0 10 NaN NaN NaN 0.464148 0.486133 0.499149 -20 43920730 44645010 0 19 NaN NaN NaN 0.463745 0.481262 0.493393 -20 44650318 44650318 0 1 NaN NaN NaN 0.442719 0.473708 0.497030 -20 44806875 44806875 0 1 NaN NaN NaN 0.454028 0.489757 0.497468 -20 44812893 44983517 0 2 NaN NaN NaN 0.420470 0.487612 0.498797 -20 44987318 45023247 0 2 NaN NaN NaN 0.452640 0.479764 0.498640 -20 45092517 45092921 0 3 NaN NaN NaN 0.448411 0.491257 0.499235 -20 45093125 45789953 0 5 NaN NaN NaN 0.462363 0.491570 0.498117 -20 45797639 45816649 0 12 NaN NaN NaN 0.464985 0.487772 0.497823 -20 45891189 45891189 0 1 NaN NaN NaN 0.456895 0.486319 0.498029 -20 45923383 47253043 0 12 NaN NaN NaN 0.437330 0.486640 0.498939 -20 47256300 47258763 0 4 NaN NaN NaN 0.448233 0.480363 0.496259 -20 47261017 47361725 0 14 NaN NaN NaN 0.457149 0.488718 0.497551 -20 47569112 47589122 0 6 NaN NaN NaN 0.464327 0.487752 0.499047 -20 47591082 47639464 0 12 NaN NaN NaN 0.467733 0.486825 0.498215 -20 47648353 47850182 0 12 NaN NaN NaN 0.452828 0.483399 0.498619 -20 47852822 47859217 0 2 NaN NaN NaN 0.462212 0.492586 0.498053 -20 47865372 48129706 0 10 NaN NaN NaN 0.465947 0.490095 0.498235 -20 48129987 48129987 0 1 NaN NaN NaN 0.460453 0.490726 0.499314 -20 48130628 48259034 0 5 NaN NaN NaN 0.459919 0.490614 0.498955 -20 48300990 48300990 0 1 NaN NaN NaN 0.452984 0.484971 0.497330 -20 48301146 48701527 0 4 NaN NaN NaN 0.474371 0.488398 0.497599 -20 48720687 49225953 0 9 NaN NaN NaN 0.457860 0.482330 0.497673 -20 49227294 49575334 0 5 NaN NaN NaN 0.475173 0.491330 0.499207 -20 50078910 54464410 0 39 NaN NaN NaN 0.469053 0.488314 0.498636 -20 54935242 54941140 0 5 NaN NaN NaN 0.456144 0.484160 0.498089 -20 54945783 55803149 0 13 NaN NaN NaN 0.462125 0.488177 0.497216 -20 56064267 56064267 0 1 NaN NaN NaN 0.444666 0.482527 0.497378 -20 56071296 56136536 0 4 NaN NaN NaN 0.465843 0.488155 0.497390 -20 56137184 57024589 0 17 NaN NaN NaN 0.459149 0.484309 0.496623 -20 57045667 57045765 0 2 NaN NaN NaN 0.443906 0.479921 0.497793 -20 57266134 57470517 0 9 NaN NaN NaN 0.472568 0.492878 0.498200 -20 57478448 57569860 0 7 NaN NaN NaN 0.454338 0.485450 0.498503 -20 57570854 57768399 0 7 NaN NaN NaN 0.468466 0.488630 0.499160 -20 57768743 58578068 0 8 NaN NaN NaN 0.479529 0.492119 0.498696 -20 58581863 60582540 0 10 NaN NaN NaN 0.466288 0.489301 0.499123 -20 60584328 60584328 0 1 NaN NaN NaN 0.464335 0.489799 0.496885 -20 60712284 60712284 0 1 NaN NaN NaN 0.442181 0.485124 0.498852 -20 60712347 60735098 0 9 NaN NaN NaN 0.478940 0.492581 0.499016 -20 60739079 60739079 0 1 NaN NaN NaN 0.455358 0.485029 0.497990 -20 60740362 60740362 0 1 NaN NaN NaN 0.457997 0.487558 0.498100 -20 60740447 60886611 0 16 NaN NaN NaN 0.464101 0.484383 0.497441 -20 60897721 60992224 0 10 NaN NaN NaN 0.460464 0.485621 0.498462 -20 60992402 61039958 0 2 NaN NaN NaN 0.443693 0.484444 0.495282 -20 61040125 61040125 0 1 NaN NaN NaN 0.459168 0.483449 0.497955 -20 61040313 61040313 0 1 NaN NaN NaN 0.472881 0.491679 0.498592 -20 61041653 61150928 0 3 NaN NaN NaN 0.459862 0.490558 0.498679 -20 61150959 61162037 0 2 NaN NaN NaN 0.461984 0.490706 0.497922 -20 61162100 61162100 0 1 NaN NaN NaN 0.462580 0.489902 0.499510 -20 61167883 61443547 0 8 NaN NaN NaN 0.466374 0.489014 0.498194 -20 61444697 61444697 0 1 NaN NaN NaN 0.470155 0.485852 0.498770 -20 61444785 61453348 0 2 NaN NaN NaN 0.465422 0.491863 0.499370 -20 61453549 61527563 0 14 NaN NaN NaN 0.470363 0.489367 0.497731 -20 61528074 61528074 0 1 NaN NaN NaN 0.425956 0.488365 0.497173 -20 61528271 61528306 0 2 NaN NaN NaN 0.446290 0.481513 0.498134 -20 61541028 61542001 0 2 NaN NaN NaN 0.431168 0.478006 0.489505 -20 61588159 61834695 0 4 NaN NaN NaN 0.454649 0.486121 0.497582 -20 61869607 61869607 0 1 NaN NaN NaN 0.457406 0.480959 0.498766 -20 61870727 61875909 0 5 NaN NaN NaN 0.468459 0.487113 0.496821 -20 61879009 61879009 0 1 NaN NaN NaN 0.461557 0.484165 0.495584 -20 61880274 62234240 0 21 NaN NaN NaN 0.468643 0.490869 0.498391 -20 62245686 62305274 0 5 NaN NaN NaN 0.443920 0.476067 0.497015 -20 62324289 62698484 0 10 NaN NaN NaN 0.455244 0.484321 0.498762 -20 62701092 62720193 0 5 NaN NaN NaN 0.444600 0.487363 0.498842 -20 62729431 62836271 0 3 NaN NaN NaN 0.414653 0.475402 0.495953 -20 62836520 62854417 0 2 NaN NaN NaN 0.457524 0.488430 0.498349 -20 62868043 62904542 0 3 NaN NaN NaN 0.416166 0.476612 0.495896 +20 138125 138148 0 2 NaN NaN NaN 0.437434 0.475471 0.496501 +20 139409 139576 0 3 NaN NaN NaN 0.439216 0.472238 0.493691 +20 168466 259156 0 13 NaN NaN NaN 0.452559 0.484851 0.497956 +20 259818 259818 0 1 NaN NaN NaN 0.431579 0.484130 0.493377 +20 259969 278806 0 2 NaN NaN NaN 0.451233 0.485852 0.498799 +20 316911 316911 0 1 NaN NaN NaN 0.462493 0.485898 0.498889 +20 355319 360306 0 4 NaN NaN NaN 0.443928 0.478345 0.497289 +20 368905 377226 0 3 NaN NaN NaN 0.446614 0.480843 0.497988 +20 389456 402921 0 4 NaN NaN NaN 0.433140 0.478876 0.498329 +20 425606 425606 0 1 NaN NaN NaN 0.456399 0.484492 0.497655 +20 428767 744415 0 8 NaN NaN NaN 0.444525 0.489543 0.498351 +20 744570 744570 0 1 NaN NaN NaN 0.461708 0.487716 0.497632 +20 746098 1285933 0 6 NaN NaN NaN 0.442046 0.477998 0.496688 +20 1417397 1417397 0 1 NaN NaN NaN 0.429464 0.479365 0.497461 +20 1424303 1424303 0 1 NaN NaN NaN 0.468721 0.489475 0.498199 +20 1426393 1458504 0 3 NaN NaN NaN 0.427244 0.482865 0.496943 +20 1510769 1510769 0 1 NaN NaN NaN 0.450192 0.480079 0.497165 +20 1517979 1538249 0 4 NaN NaN NaN 0.447519 0.484050 0.497092 +20 1540032 1540032 0 1 NaN NaN NaN 0.462543 0.492322 0.498549 +20 1546911 1552430 0 3 NaN NaN NaN 0.450432 0.483252 0.498599 +20 1592265 1592284 0 2 NaN NaN NaN 0.410010 0.462624 0.493081 +20 1600524 1610894 0 4 NaN NaN NaN 0.448964 0.486123 0.498243 +20 1615883 1880912 0 6 NaN NaN NaN 0.466325 0.490096 0.498811 +20 1895630 1895658 0 2 NaN NaN NaN 0.456766 0.484356 0.497900 +20 1895889 1896244 0 2 NaN NaN NaN 0.461801 0.488866 0.498359 +20 2036954 2139080 0 4 NaN NaN NaN 0.458680 0.488431 0.498698 +20 2315929 2357863 0 3 NaN NaN NaN 0.463189 0.488822 0.497517 +20 2482325 2517825 0 3 NaN NaN NaN 0.451560 0.484367 0.499505 +20 2542747 2552805 0 2 NaN NaN NaN 0.463871 0.483065 0.494878 +20 2593006 2621998 0 10 NaN NaN NaN 0.428121 0.487823 0.497360 +20 2624956 2655019 0 3 NaN NaN NaN 0.449175 0.486782 0.498085 +20 2777828 2779257 0 2 NaN NaN NaN 0.477939 0.489568 0.497389 +20 2796471 2818801 0 2 NaN NaN NaN 0.455316 0.485635 0.496772 +20 2945759 3005413 0 7 NaN NaN NaN 0.463543 0.488291 0.498038 +20 3007228 3007492 0 2 NaN NaN NaN 0.452887 0.485720 0.497536 +20 3013228 3054033 0 2 NaN NaN NaN 0.462818 0.484592 0.498533 +20 3147024 3624830 0 5 NaN NaN NaN 0.453210 0.484071 0.497874 +20 3640823 3640823 0 1 NaN NaN NaN 0.438459 0.479954 0.497890 +20 3641881 3675333 0 11 NaN NaN NaN 0.444446 0.487234 0.498888 +20 3675498 3686436 0 8 NaN NaN NaN 0.453068 0.482165 0.498239 +20 3701867 3732633 0 4 NaN NaN NaN 0.468587 0.493670 0.499142 +20 3838441 3838441 0 1 NaN NaN NaN 0.460912 0.487115 0.498084 +20 3858985 4055656 0 3 NaN NaN NaN 0.463510 0.491291 0.498266 +20 4138585 4162411 0 4 NaN NaN NaN 0.434909 0.482960 0.495207 +20 4167234 4843609 0 9 NaN NaN NaN 0.453809 0.480493 0.499053 +20 4880308 5035125 0 2 NaN NaN NaN 0.460419 0.488264 0.497680 +20 5063643 5063643 0 1 NaN NaN NaN 0.464665 0.483342 0.498139 +20 5068730 5068730 0 1 NaN NaN NaN 0.423531 0.484316 0.498697 +20 5159344 5482307 0 16 NaN NaN NaN 0.468342 0.492899 0.498462 +20 5528518 5634575 0 5 NaN NaN NaN 0.447288 0.491894 0.498943 +20 5640886 5738376 0 4 NaN NaN NaN 0.469491 0.489574 0.498741 +20 5903517 6065731 0 6 NaN NaN NaN 0.454255 0.480400 0.496902 +20 6090623 6100391 0 6 NaN NaN NaN 0.466899 0.485631 0.497829 +20 6194421 6195664 0 2 NaN NaN NaN 0.448126 0.487390 0.498780 +20 6302114 6798939 0 3 NaN NaN NaN 0.457703 0.485554 0.499318 +20 7866261 8581713 0 5 NaN NaN NaN 0.458614 0.491996 0.497981 +20 8625108 8625108 0 1 NaN NaN NaN 0.462357 0.487211 0.497545 +20 8625250 8639443 0 4 NaN NaN NaN 0.453969 0.489555 0.498894 +20 8665751 8703145 0 5 NaN NaN NaN 0.422848 0.481486 0.496292 +20 8707900 8707927 0 2 NaN NaN NaN 0.458571 0.488222 0.498180 +20 8708166 8708166 0 1 NaN NaN NaN 0.464783 0.489675 0.499259 +20 8737734 8737734 0 1 NaN NaN NaN 0.459331 0.486866 0.498313 +20 8741188 8741188 0 1 NaN NaN NaN 0.449884 0.487051 0.498097 +20 8742326 8742326 0 1 NaN NaN NaN 0.474221 0.489083 0.498355 +20 8755243 8770932 0 4 NaN NaN NaN 0.453598 0.482547 0.498255 +20 8773096 8773155 0 2 NaN NaN NaN 0.449671 0.483984 0.496103 +20 9108585 9376019 0 4 NaN NaN NaN 0.449161 0.489050 0.499263 +20 9417870 9510263 0 4 NaN NaN NaN 0.446422 0.487949 0.497084 +20 9624587 10004147 0 3 NaN NaN NaN 0.450435 0.488545 0.498817 +20 10012714 10012751 0 2 NaN NaN NaN 0.451647 0.488599 0.498228 +20 10024951 10024951 0 1 NaN NaN NaN 0.461068 0.487943 0.495867 +20 10026357 10032413 0 4 NaN NaN NaN 0.456013 0.488293 0.497652 +20 10329888 10393145 0 4 NaN NaN NaN 0.470348 0.491502 0.496271 +20 10426975 10439002 0 3 NaN NaN NaN 0.462449 0.488402 0.498590 +20 10629129 10629525 0 2 NaN NaN NaN 0.450466 0.489037 0.499098 +20 11224228 12113410 0 5 NaN NaN NaN 0.453640 0.488622 0.498233 +20 13054307 13060331 0 5 NaN NaN NaN 0.439896 0.480507 0.494337 +20 13071871 13074243 0 3 NaN NaN NaN 0.461459 0.480843 0.496588 +20 13090745 13227837 0 3 NaN NaN NaN 0.470309 0.492500 0.498585 +20 13239860 13260252 0 2 NaN NaN NaN 0.470246 0.485568 0.498308 +20 13709407 13709407 0 1 NaN NaN NaN 0.459766 0.488368 0.497836 +20 13769127 13769127 0 1 NaN NaN NaN 0.449126 0.486230 0.499325 +20 13798676 13830137 0 4 NaN NaN NaN 0.474211 0.488073 0.496407 +20 13845726 13845726 0 1 NaN NaN NaN 0.458910 0.492801 0.498797 +20 13872093 13885105 0 2 NaN NaN NaN 0.467399 0.488895 0.497835 +20 13921411 13992299 0 2 NaN NaN NaN 0.471843 0.492523 0.498015 +20 14273462 14273462 0 1 NaN NaN NaN 0.470341 0.493079 0.498896 +20 14306896 14306896 0 1 NaN NaN NaN 0.433605 0.480218 0.498511 +20 14306953 14306953 0 1 NaN NaN NaN 0.437115 0.489885 0.498703 +20 14319063 14319063 0 1 NaN NaN NaN 0.463698 0.485253 0.498175 +20 14910634 14910665 0 2 NaN NaN NaN 0.479603 0.492418 0.499361 +20 15131723 15411848 0 2 NaN NaN NaN 0.451311 0.484265 0.498598 +20 15874325 15967327 0 2 NaN NaN NaN 0.450631 0.487877 0.498662 +20 16729262 16730522 0 3 NaN NaN NaN 0.460343 0.487129 0.497915 +20 17028059 17460132 0 5 NaN NaN NaN 0.420894 0.477187 0.493765 +20 17474690 17594729 0 9 NaN NaN NaN 0.450525 0.486071 0.498350 +20 17595329 17595329 0 1 NaN NaN NaN 0.469832 0.490422 0.497638 +20 17596155 17596155 0 1 NaN NaN NaN 0.456718 0.485312 0.497969 +20 17596731 17597331 0 2 NaN NaN NaN 0.462359 0.483586 0.498745 +20 17597531 17597531 0 1 NaN NaN NaN 0.445409 0.478487 0.496526 +20 17600357 17990713 0 2 NaN NaN NaN 0.457280 0.489917 0.497328 +20 17992979 18022171 0 5 NaN NaN NaN 0.455670 0.485795 0.497792 +20 18142924 18175556 0 3 NaN NaN NaN 0.436810 0.481374 0.498120 +20 18192433 18241880 0 2 NaN NaN NaN 0.467186 0.489029 0.496675 +20 18286888 18286888 0 1 NaN NaN NaN 0.457530 0.486033 0.496977 +20 18287104 18327570 0 3 NaN NaN NaN 0.408368 0.471992 0.495256 +20 18429497 18429509 0 2 NaN NaN NaN 0.457903 0.488596 0.498127 +20 18432690 18446024 0 4 NaN NaN NaN 0.420560 0.477758 0.498028 +20 18806046 18806046 0 1 NaN NaN NaN 0.452464 0.480818 0.499107 +20 18810705 19261623 0 2 NaN NaN NaN 0.432356 0.483744 0.496354 +20 19345285 19345285 0 1 NaN NaN NaN 0.443728 0.479397 0.495497 +20 19560664 19914520 0 4 NaN NaN NaN 0.431559 0.486600 0.498030 +20 19941538 19951660 0 3 NaN NaN NaN 0.467214 0.491679 0.498132 +20 19970705 19970705 0 1 NaN NaN NaN 0.446258 0.481839 0.495726 +20 19987674 19987674 0 1 NaN NaN NaN 0.467708 0.488537 0.498218 +20 20007325 20033137 0 3 NaN NaN NaN 0.420924 0.475798 0.496949 +20 20033242 20033423 0 5 NaN NaN NaN 0.450658 0.487612 0.499457 +20 20037222 20152462 0 8 NaN NaN NaN 0.394850 0.466663 0.497133 +20 20168497 20265171 0 4 NaN NaN NaN 0.445827 0.485725 0.498902 +20 20269631 21147009 0 9 NaN NaN NaN 0.402743 0.475336 0.496805 +20 21147407 21868713 0 6 NaN NaN NaN 0.446115 0.481263 0.495796 +20 21897170 22714612 0 6 NaN NaN NaN 0.429535 0.478374 0.495654 +20 23345844 23425812 0 8 NaN NaN NaN 0.442629 0.490308 0.497933 +20 23426972 23426972 0 1 NaN NaN NaN 0.457875 0.484051 0.497478 +20 23528536 23529388 0 2 NaN NaN NaN 0.428886 0.481046 0.494703 +20 23529418 23731560 0 10 NaN NaN NaN 0.461317 0.481787 0.496472 +20 23756529 23756529 0 1 NaN NaN NaN 0.446239 0.478781 0.495992 +20 23756613 23756613 0 1 NaN NaN NaN 0.444338 0.491630 0.498340 +20 23761395 23761395 0 1 NaN NaN NaN 0.452149 0.484936 0.498400 +20 23805832 23807028 0 2 NaN NaN NaN 0.437686 0.483303 0.495003 +20 23842032 23842078 0 2 NaN NaN NaN 0.453523 0.481601 0.497615 +20 23860178 24200699 0 4 NaN NaN NaN 0.463893 0.488375 0.499339 +20 24201344 24201393 0 2 NaN NaN NaN 0.468665 0.488275 0.498537 +20 24790712 24790712 0 1 NaN NaN NaN 0.467291 0.489455 0.497295 +20 24911562 24938195 0 3 NaN NaN NaN 0.432662 0.475891 0.497481 +20 24939590 24959386 0 2 NaN NaN NaN 0.473807 0.488328 0.497381 +20 24993414 25011423 0 4 NaN NaN NaN 0.454596 0.484947 0.498764 +20 25038484 25059442 0 2 NaN NaN NaN 0.440269 0.480760 0.495259 +20 25190598 25196520 0 2 NaN NaN NaN 0.460328 0.489265 0.498033 +20 25252161 25255338 0 2 NaN NaN NaN 0.466456 0.492445 0.498273 +20 25257260 25261784 0 6 NaN NaN NaN 0.434627 0.481465 0.495804 +20 25262769 25262789 0 2 NaN NaN NaN 0.446930 0.479510 0.497886 +20 25263756 25263756 0 1 NaN NaN NaN 0.442895 0.484763 0.497315 +20 25264664 25282944 0 13 NaN NaN NaN 0.433771 0.480635 0.498105 +20 25286059 25286059 0 1 NaN NaN NaN 0.454636 0.484430 0.496887 +20 25288505 25324410 0 5 NaN NaN NaN 0.435035 0.485532 0.497117 +20 25336445 25398876 0 5 NaN NaN NaN 0.455594 0.487581 0.498283 +20 25424713 25424713 0 1 NaN NaN NaN 0.462979 0.487859 0.499460 +20 25434351 25470056 0 11 NaN NaN NaN 0.458963 0.485255 0.498610 +20 25597236 25604655 0 2 NaN NaN NaN 0.436787 0.475896 0.497300 +20 25622858 25622858 0 1 NaN NaN NaN 0.449240 0.486538 0.498660 +20 25629114 25666807 0 3 NaN NaN NaN 0.447002 0.483530 0.498033 +20 25699941 25700293 0 4 NaN NaN NaN 0.467074 0.493806 0.499212 +20 25754173 25755672 0 2 NaN NaN NaN 0.434392 0.484303 0.497963 +20 25756059 25837915 0 2 NaN NaN NaN 0.469612 0.489116 0.497674 +20 25838130 25838130 0 1 NaN NaN NaN 0.453217 0.487459 0.498662 +20 25838802 25841650 0 4 NaN NaN NaN 0.470039 0.485273 0.497893 +20 25846283 25900162 0 3 NaN NaN NaN 0.419166 0.474846 0.496143 +20 25900379 26134237 0 4 NaN NaN NaN 0.434276 0.484909 0.497517 +20 26138206 29449417 0 2 NaN NaN NaN 0.464994 0.488117 0.497219 +20 29449678 29449678 0 1 NaN NaN NaN 0.457683 0.485528 0.497130 +20 29516670 29516670 0 1 NaN NaN NaN 0.458716 0.488188 0.497237 +20 29517417 29520510 0 3 NaN NaN NaN 0.456606 0.491082 0.498505 +20 29521061 29632564 0 4 NaN NaN NaN 0.471383 0.490839 0.497796 +20 29633929 29648701 0 3 NaN NaN NaN 0.437909 0.482463 0.495900 +20 29847211 29847211 0 1 NaN NaN NaN 0.452727 0.485660 0.497968 +20 29847618 29872796 0 2 NaN NaN NaN 0.461282 0.492037 0.499150 +20 29873577 29873577 0 1 NaN NaN NaN 0.458493 0.489294 0.498888 +20 29899208 29899208 0 1 NaN NaN NaN 0.458334 0.485681 0.498005 +20 29977156 29977156 0 1 NaN NaN NaN 0.446399 0.487004 0.496456 +20 29986332 30035869 0 2 NaN NaN NaN 0.471476 0.487268 0.497745 +20 30037783 30037783 0 1 NaN NaN NaN 0.458303 0.484962 0.498080 +20 30053255 31025163 0 2 NaN NaN NaN 0.439641 0.481920 0.495767 +20 31647126 31676804 0 8 NaN NaN NaN 0.440517 0.489222 0.499018 +20 31811551 31826027 0 4 NaN NaN NaN 0.390489 0.485585 0.497018 +20 31828265 31897554 0 2 NaN NaN NaN 0.461458 0.487674 0.499455 +20 31956468 32289763 0 2 NaN NaN NaN 0.462490 0.490994 0.498626 +20 32325329 32330930 0 2 NaN NaN NaN 0.462625 0.484748 0.495975 +20 32340077 32710710 0 8 NaN NaN NaN 0.375307 0.444969 0.495864 +20 32935192 33006597 0 6 NaN NaN NaN 0.465974 0.487811 0.497614 +20 33030405 33031276 0 2 NaN NaN NaN 0.461138 0.485552 0.496937 +20 33037336 33150503 0 7 NaN NaN NaN 0.388198 0.471788 0.495802 +20 33178782 33279604 0 5 NaN NaN NaN 0.467823 0.488150 0.497197 +20 33283649 33879478 0 2 NaN NaN NaN 0.434887 0.482770 0.497279 +20 33882720 33882791 0 2 NaN NaN NaN 0.460460 0.480246 0.497741 +20 34218673 34243017 0 4 NaN NaN NaN 0.456583 0.485072 0.498790 +20 34269577 34269577 0 1 NaN NaN NaN 0.436399 0.476921 0.496500 +20 34271574 34271574 0 1 NaN NaN NaN 0.464045 0.491331 0.498325 +20 34289005 34289005 0 1 NaN NaN NaN 0.456441 0.489737 0.498746 +20 34304783 34312834 0 6 NaN NaN NaN 0.435988 0.480261 0.496481 +20 34314114 34314114 0 1 NaN NaN NaN 0.453448 0.478759 0.495824 +20 34314140 34324484 0 9 NaN NaN NaN 0.468585 0.488638 0.496844 +20 34324648 34335861 0 3 NaN NaN NaN 0.472084 0.491775 0.498737 +20 34339854 34431377 0 4 NaN NaN NaN 0.462197 0.484142 0.498730 +20 34443173 34446395 0 2 NaN NaN NaN 0.471207 0.491610 0.498443 +20 34457513 34457513 0 1 NaN NaN NaN 0.477878 0.490659 0.498518 +20 34492974 34492974 0 1 NaN NaN NaN 0.468193 0.486195 0.498871 +20 34499996 34505054 0 3 NaN NaN NaN 0.467826 0.494536 0.499243 +20 34525807 34525807 0 1 NaN NaN NaN 0.444563 0.481499 0.497119 +20 34535373 34581979 0 5 NaN NaN NaN 0.459345 0.486792 0.497304 +20 34589995 34766745 0 8 NaN NaN NaN 0.456196 0.488723 0.499004 +20 34775551 34782171 0 2 NaN NaN NaN 0.477084 0.491548 0.498032 +20 34974252 34974252 0 1 NaN NaN NaN 0.459613 0.489047 0.497708 +20 35068018 35740794 0 10 NaN NaN NaN 0.451703 0.490878 0.498858 +20 35748894 35869619 0 21 NaN NaN NaN 0.368103 0.426100 0.488750 +20 36022539 36288284 0 3 NaN NaN NaN 0.457216 0.488785 0.499441 +20 36337303 36607077 0 3 NaN NaN NaN 0.466585 0.490218 0.497213 +20 36615416 36793501 0 8 NaN NaN NaN 0.456713 0.485664 0.495751 +20 36793529 36793529 0 1 NaN NaN NaN 0.463542 0.487008 0.497483 +20 36841756 36919758 0 4 NaN NaN NaN 0.454214 0.478741 0.497288 +20 36932551 36944379 0 5 NaN NaN NaN 0.460891 0.492303 0.498574 +20 36946848 36953097 0 3 NaN NaN NaN 0.455534 0.488196 0.497615 +20 36958262 36958262 0 1 NaN NaN NaN 0.462240 0.495016 0.499832 +20 36959318 36965617 0 4 NaN NaN NaN 0.467440 0.489826 0.497956 +20 36989269 37291486 0 19 NaN NaN NaN 0.447439 0.488126 0.498583 +20 37366218 37396262 0 9 NaN NaN NaN 0.459654 0.487920 0.496467 +20 37404951 38354742 0 4 NaN NaN NaN 0.475001 0.489410 0.499191 +20 38483658 39501689 0 3 NaN NaN NaN 0.448301 0.482604 0.497628 +20 39501744 40126679 0 12 NaN NaN NaN 0.433609 0.482977 0.497524 +20 40179909 40179920 0 2 NaN NaN NaN 0.469092 0.490709 0.498169 +20 40234917 43803708 0 48 NaN NaN NaN 0.474536 0.489906 0.496597 +20 43803725 43881630 0 10 NaN NaN NaN 0.464088 0.490392 0.499383 +20 43920730 44645010 0 19 NaN NaN NaN 0.456732 0.487335 0.496585 +20 44650318 44650318 0 1 NaN NaN NaN 0.471282 0.492026 0.498621 +20 44806875 44806875 0 1 NaN NaN NaN 0.416861 0.487722 0.498207 +20 44812893 44983517 0 2 NaN NaN NaN 0.448088 0.482602 0.496838 +20 44987318 45023247 0 2 NaN NaN NaN 0.462968 0.486868 0.499187 +20 45092517 45092921 0 3 NaN NaN NaN 0.466427 0.490032 0.497986 +20 45093125 45789953 0 5 NaN NaN NaN 0.459012 0.489325 0.497835 +20 45797639 45816649 0 12 NaN NaN NaN 0.475860 0.487264 0.498021 +20 45891189 45891189 0 1 NaN NaN NaN 0.436267 0.473915 0.495082 +20 45923383 47253043 0 12 NaN NaN NaN 0.467876 0.492749 0.498703 +20 47256300 47258763 0 4 NaN NaN NaN 0.456426 0.482199 0.495004 +20 47261017 47361725 0 14 NaN NaN NaN 0.463217 0.486053 0.498118 +20 47569112 47589122 0 6 NaN NaN NaN 0.464197 0.492965 0.499383 +20 47591082 47639464 0 12 NaN NaN NaN 0.460111 0.488088 0.497853 +20 47648353 47850182 0 12 NaN NaN NaN 0.450674 0.488607 0.498835 +20 47852822 47859217 0 2 NaN NaN NaN 0.444226 0.488775 0.498938 +20 47865372 48129706 0 10 NaN NaN NaN 0.454555 0.483904 0.497169 +20 48129987 48129987 0 1 NaN NaN NaN 0.462563 0.489346 0.499423 +20 48130628 48259034 0 5 NaN NaN NaN 0.452472 0.490592 0.498477 +20 48300990 48300990 0 1 NaN NaN NaN 0.441178 0.488210 0.499284 +20 48301146 48701527 0 4 NaN NaN NaN 0.458063 0.483692 0.497637 +20 48720687 49225953 0 9 NaN NaN NaN 0.449583 0.482196 0.495537 +20 49227294 49575334 0 5 NaN NaN NaN 0.463611 0.489666 0.498271 +20 50078910 54464410 0 39 NaN NaN NaN 0.478552 0.493825 0.498864 +20 54935242 54941140 0 5 NaN NaN NaN 0.464698 0.487253 0.497739 +20 54945783 55803149 0 13 NaN NaN NaN 0.460106 0.489042 0.497970 +20 56064267 56064267 0 1 NaN NaN NaN 0.451896 0.490924 0.498611 +20 56071296 56136536 0 4 NaN NaN NaN 0.472918 0.488780 0.498129 +20 56137184 57024589 0 17 NaN NaN NaN 0.461550 0.484916 0.498635 +20 57045667 57045765 0 2 NaN NaN NaN 0.448631 0.489208 0.499315 +20 57266134 57470517 0 9 NaN NaN NaN 0.459106 0.485214 0.499322 +20 57478448 57569860 0 7 NaN NaN NaN 0.460581 0.482758 0.493869 +20 57570854 57768399 0 7 NaN NaN NaN 0.447816 0.484499 0.497834 +20 57768743 58578068 0 8 NaN NaN NaN 0.459299 0.483562 0.497619 +20 58581863 60582540 0 10 NaN NaN NaN 0.463734 0.485974 0.497505 +20 60584328 60584328 0 1 NaN NaN NaN 0.460363 0.485581 0.498744 +20 60712284 60712284 0 1 NaN NaN NaN 0.452780 0.487427 0.498538 +20 60712347 60735098 0 9 NaN NaN NaN 0.449598 0.480351 0.496229 +20 60739079 60739079 0 1 NaN NaN NaN 0.454772 0.481361 0.495071 +20 60740362 60740362 0 1 NaN NaN NaN 0.447617 0.481865 0.497413 +20 60740447 60886611 0 16 NaN NaN NaN 0.451611 0.478181 0.495320 +20 60897721 60992224 0 10 NaN NaN NaN 0.462119 0.489666 0.499484 +20 60992402 61039958 0 2 NaN NaN NaN 0.440996 0.482062 0.497047 +20 61040125 61040125 0 1 NaN NaN NaN 0.437852 0.482373 0.494887 +20 61040313 61040313 0 1 NaN NaN NaN 0.461391 0.482763 0.497602 +20 61041653 61150928 0 3 NaN NaN NaN 0.465412 0.493004 0.498436 +20 61150959 61162037 0 2 NaN NaN NaN 0.464212 0.483388 0.497042 +20 61162100 61162100 0 1 NaN NaN NaN 0.459608 0.488440 0.499301 +20 61167883 61443547 0 8 NaN NaN NaN 0.461219 0.482463 0.495516 +20 61444697 61444697 0 1 NaN NaN NaN 0.449839 0.483721 0.497001 +20 61444785 61453348 0 2 NaN NaN NaN 0.447821 0.486343 0.499520 +20 61453549 61527563 0 14 NaN NaN NaN 0.473521 0.491660 0.497730 +20 61528074 61528074 0 1 NaN NaN NaN 0.462222 0.486150 0.497586 +20 61528271 61528306 0 2 NaN NaN NaN 0.452473 0.480404 0.497614 +20 61541028 61542001 0 2 NaN NaN NaN 0.444631 0.486719 0.499009 +20 61588159 61834695 0 4 NaN NaN NaN 0.448140 0.484252 0.497531 +20 61869607 61869607 0 1 NaN NaN NaN 0.467192 0.491005 0.498730 +20 61870727 61875909 0 5 NaN NaN NaN 0.451134 0.491034 0.498417 +20 61879009 61879009 0 1 NaN NaN NaN 0.459962 0.480216 0.496562 +20 61880274 62234240 0 21 NaN NaN NaN 0.460409 0.487571 0.498643 +20 62245686 62305274 0 5 NaN NaN NaN 0.473715 0.487510 0.498125 +20 62324289 62698484 0 10 NaN NaN NaN 0.469517 0.491390 0.498780 +20 62701092 62720193 0 5 NaN NaN NaN 0.444376 0.478563 0.495824 +20 62729431 62836271 0 3 NaN NaN NaN 0.462955 0.486711 0.499064 +20 62836520 62854417 0 2 NaN NaN NaN 0.446140 0.488884 0.497338 +20 62868043 62904542 0 3 NaN NaN NaN 0.475015 0.489167 0.498089 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-ac-tumor-1.modelFinal.af.param b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-ac-tumor-1.modelFinal.af.param index aa1318a9fe5..653a9ee7501 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-ac-tumor-1.modelFinal.af.param +++ b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-ac-tumor-1.modelFinal.af.param @@ -1,6 +1,6 @@ @HD VN:1.6 @RG ID:GATKCopyNumber SM:SM-74P4M-1 PARAMETER_NAME POSTERIOR_10 POSTERIOR_20 POSTERIOR_30 POSTERIOR_40 POSTERIOR_50 POSTERIOR_60 POSTERIOR_70 POSTERIOR_80 POSTERIOR_90 -MEAN_BIAS 1.096712 1.122409 1.127938 1.132610 1.143475 1.147810 1.152063 1.175109 1.185040 -BIAS_VARIANCE 0.367914 0.407483 0.446579 0.466324 0.472971 0.476175 0.479822 0.491112 0.497704 -OUTLIER_PROBABILITY 0.005892 0.009638 0.013610 0.014514 0.016924 0.025290 0.030210 0.033307 0.039133 +MEAN_BIAS 1.114242 1.130955 1.136030 1.142676 1.150428 1.153654 1.157442 1.180595 1.193161 +BIAS_VARIANCE 0.417806 0.435181 0.455769 0.460817 0.475757 0.483382 0.487224 0.494051 0.498065 +OUTLIER_PROBABILITY 0.004614 0.009076 0.011983 0.016301 0.020536 0.025629 0.031470 0.035280 0.045898 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-ac-tumor-1.modelFinal.seg b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-ac-tumor-1.modelFinal.seg index ea62ffe5c62..b0b5a984349 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-ac-tumor-1.modelFinal.seg +++ b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-ac-tumor-1.modelFinal.seg @@ -2,4 +2,5 @@ @SQ SN:20 LN:63025520 UR:http://www.broadinstitute.org/ftp/pub/seq/references/Homo_sapiens_assembly19.fasta AS:GRCh37 M5:0dec9660ec1efaaf33281c0d5ea2560f SP:Homo Sapiens @RG ID:GATKCopyNumber SM:SM-74P4M-1 CONTIG START END NUM_POINTS_COPY_RATIO NUM_POINTS_ALLELE_FRACTION LOG2_COPY_RATIO_POSTERIOR_10 LOG2_COPY_RATIO_POSTERIOR_50 LOG2_COPY_RATIO_POSTERIOR_90 MINOR_ALLELE_FRACTION_POSTERIOR_10 MINOR_ALLELE_FRACTION_POSTERIOR_50 MINOR_ALLELE_FRACTION_POSTERIOR_90 -20 138125 62904542 0 1226 NaN NaN NaN 0.408440 0.432421 0.481377 +20 138125 2655019 0 104 NaN NaN NaN 0.365485 0.424853 0.493549 +20 2777828 62904542 0 1122 NaN NaN NaN 0.421150 0.453979 0.493609 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-cr-ac-nac-tumor-1.af.igv.seg b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-cr-ac-nac-tumor-1.af.igv.seg index b1401d06f13..cbf9683bc72 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-cr-ac-nac-tumor-1.af.igv.seg +++ b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-cr-ac-nac-tumor-1.af.igv.seg @@ -1,15 +1,15 @@ Sample Chromosome Start End Num_Probes Segment_Mean SM-74P4M-1 20 68100 2622249 73 0.311068 -SM-74P4M-1 20 2633368 5297334 46 0.334775 -SM-74P4M-1 20 5454040 8771160 31 0.324159 -SM-74P4M-1 20 8772985 13091116 18 0.293368 +SM-74P4M-1 20 2633368 5297334 46 0.334774 +SM-74P4M-1 20 5454040 8771160 31 0.324160 +SM-74P4M-1 20 8772985 13091116 18 0.293367 SM-74P4M-1 20 13097902 19560970 33 0.296141 -SM-74P4M-1 20 19565364 25439484 77 0.246220 -SM-74P4M-1 20 25441907 25829597 9 0.243290 +SM-74P4M-1 20 19565364 25439484 77 0.246221 +SM-74P4M-1 20 25441907 25829597 9 0.243289 SM-74P4M-1 20 26061550 26072430 0 NaN -SM-74P4M-1 20 26083858 30126381 5 0.379403 +SM-74P4M-1 20 26083858 30126381 5 0.379416 SM-74P4M-1 20 30132499 31828443 16 0.174395 -SM-74P4M-1 20 31828940 32700168 1 0.487963 +SM-74P4M-1 20 31828940 32700168 1 0.488038 SM-74P4M-1 20 32847930 35559530 31 0.258026 SM-74P4M-1 20 35563181 37667438 52 0.299942 SM-74P4M-1 20 39316268 62905205 323 0.495837 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-cr-ac-nac-tumor-1.modelBegin.af.param b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-cr-ac-nac-tumor-1.modelBegin.af.param index c1095d09ceb..aeb3de0e07c 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-cr-ac-nac-tumor-1.modelBegin.af.param +++ b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-cr-ac-nac-tumor-1.modelBegin.af.param @@ -1,6 +1,6 @@ @HD VN:1.6 @RG ID:GATKCopyNumber SM:SM-74P4M-1 PARAMETER_NAME POSTERIOR_10 POSTERIOR_20 POSTERIOR_30 POSTERIOR_40 POSTERIOR_50 POSTERIOR_60 POSTERIOR_70 POSTERIOR_80 POSTERIOR_90 -MEAN_BIAS 1.092310 1.098678 1.111646 1.117541 1.120565 1.125976 1.133225 1.136642 1.148982 -BIAS_VARIANCE 0.451935 0.459585 0.465554 0.471289 0.475876 0.479446 0.482660 0.491585 0.495789 -OUTLIER_PROBABILITY 0.045637 0.051060 0.054284 0.057989 0.064227 0.068471 0.077494 0.083395 0.093476 +MEAN_BIAS 1.087662 1.101391 1.106330 1.111385 1.116388 1.126375 1.137021 1.142962 1.158178 +BIAS_VARIANCE 0.447572 0.459136 0.467595 0.471885 0.481739 0.486141 0.491809 0.497145 0.498573 +OUTLIER_PROBABILITY 0.042809 0.049674 0.054817 0.057880 0.064266 0.071982 0.082537 0.091431 0.112845 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-cr-ac-nac-tumor-1.modelBegin.seg b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-cr-ac-nac-tumor-1.modelBegin.seg index 1cc21a3ed90..3a92029e031 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-cr-ac-nac-tumor-1.modelBegin.seg +++ b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-cr-ac-nac-tumor-1.modelBegin.seg @@ -2,89 +2,89 @@ @SQ SN:20 LN:63025520 UR:http://www.broadinstitute.org/ftp/pub/seq/references/Homo_sapiens_assembly19.fasta AS:GRCh37 M5:0dec9660ec1efaaf33281c0d5ea2560f SP:Homo Sapiens @RG ID:GATKCopyNumber SM:SM-74P4M-1 CONTIG START END NUM_POINTS_COPY_RATIO NUM_POINTS_ALLELE_FRACTION LOG2_COPY_RATIO_POSTERIOR_10 LOG2_COPY_RATIO_POSTERIOR_50 LOG2_COPY_RATIO_POSTERIOR_90 MINOR_ALLELE_FRACTION_POSTERIOR_10 MINOR_ALLELE_FRACTION_POSTERIOR_50 MINOR_ALLELE_FRACTION_POSTERIOR_90 -20 68100 363520 28 20 0.616858 0.645817 0.674736 0.409789 0.471381 0.498710 -20 368404 377586 3 3 0.455567 0.553719 0.619254 0.444268 0.484721 0.498171 -20 389132 398077 4 3 0.580702 0.639602 0.734505 0.457889 0.491709 0.498982 -20 398078 1373576 94 11 0.636273 0.656804 0.674736 0.459475 0.483937 0.496808 -20 1424143 1426618 2 2 0.558403 0.686476 0.821470 0.444009 0.485036 0.497889 -20 1432887 1515339 16 2 0.664547 0.697313 0.747386 0.434855 0.475514 0.496759 -20 1517550 1559592 7 8 0.664479 0.733470 0.804384 0.383600 0.461272 0.495372 -20 1600264 1896353 7 9 0.504773 0.588939 0.638667 0.458351 0.480160 0.495468 -20 1901790 2474793 51 2 0.610221 0.628385 0.643121 0.464564 0.482640 0.496452 -20 2517017 2518214 2 3 0.466824 0.581057 0.727678 0.442603 0.485772 0.497383 -20 2538851 2597102 13 2 0.679380 0.737303 0.802924 0.436187 0.476695 0.497050 -20 2597467 2622249 5 8 0.586969 0.673810 0.758800 0.450158 0.478906 0.497040 -20 2633368 3680358 256 21 0.620770 0.627961 0.638491 0.450843 0.479585 0.495897 -20 3681738 3686841 4 6 0.513418 0.599017 0.663398 0.464355 0.490281 0.497014 -20 3686842 4866807 95 13 0.598378 0.621248 0.637607 0.461567 0.493639 0.499383 -20 4879950 4880610 1 1 -38.354541 -19.841739 1.879385 0.432495 0.482473 0.496767 +20 68100 363520 28 20 0.616858 0.645817 0.674736 0.409387 0.481073 0.496909 +20 368404 377586 3 3 0.455567 0.553719 0.619254 0.430478 0.487059 0.497226 +20 389132 398077 4 3 0.580702 0.639602 0.734505 0.428406 0.478076 0.497462 +20 398078 1373576 94 11 0.636273 0.656804 0.674736 0.424591 0.485048 0.496412 +20 1424143 1426618 2 2 0.558403 0.686476 0.821470 0.470841 0.490268 0.497503 +20 1432887 1515339 16 2 0.664547 0.697313 0.747386 0.444223 0.480022 0.494628 +20 1517550 1559592 7 8 0.664479 0.733470 0.804384 0.436421 0.483417 0.497259 +20 1600264 1896353 7 9 0.504773 0.588939 0.638667 0.447541 0.479586 0.496110 +20 1901790 2474793 51 2 0.610221 0.628385 0.643121 0.471277 0.491609 0.499082 +20 2517017 2518214 2 3 0.466824 0.581057 0.727678 0.429881 0.486334 0.497107 +20 2538851 2597102 13 2 0.679380 0.737303 0.802924 0.448794 0.482979 0.497389 +20 2597467 2622249 5 8 0.586969 0.673810 0.758800 0.449058 0.490787 0.498843 +20 2633368 3680358 256 21 0.620770 0.627961 0.638491 0.447637 0.492291 0.499251 +20 3681738 3686841 4 6 0.513418 0.599017 0.663398 0.450196 0.487417 0.498344 +20 3686842 4866807 95 13 0.598378 0.621248 0.637607 0.452688 0.482130 0.496862 +20 4879950 4880610 1 1 -38.354541 -19.841739 1.879385 0.436431 0.475478 0.495785 20 4882837 5171132 27 0 0.610876 0.647102 0.682110 NaN NaN NaN -20 5282435 5297334 3 5 0.451959 0.554162 0.660650 0.440798 0.483426 0.497444 -20 5454040 8746257 127 26 0.647893 0.660167 0.671549 0.401577 0.478646 0.495989 -20 8754935 8771160 5 5 0.754330 0.823035 0.912155 0.461202 0.487140 0.499448 -20 8772985 9416547 29 2 0.858519 0.883094 0.926069 0.457733 0.483351 0.498120 -20 9417398 9425142 3 3 0.573965 0.670621 0.746411 0.441158 0.487900 0.498664 -20 9433743 10025443 24 2 0.832737 0.877060 0.915016 0.470050 0.489653 0.497616 -20 10025971 10032789 3 4 0.850305 0.956211 1.045650 0.416258 0.476656 0.497775 -20 10033509 13055397 58 4 0.858814 0.887775 0.902961 0.435063 0.479230 0.496810 -20 13071480 13091116 3 3 0.863603 0.960950 1.085440 0.456907 0.488265 0.497822 -20 13097902 16729903 128 10 0.858418 0.873171 0.891151 0.435937 0.480662 0.497917 -20 16730297 16730907 1 2 0.786506 0.989487 1.137585 0.462517 0.483981 0.497330 +20 5282435 5297334 3 5 0.451959 0.554162 0.660650 0.436838 0.484296 0.496855 +20 5454040 8746257 127 26 0.647893 0.660167 0.671549 0.454658 0.490082 0.498026 +20 8754935 8771160 5 5 0.754330 0.823035 0.912155 0.454728 0.482059 0.496619 +20 8772985 9416547 29 2 0.858519 0.883094 0.926069 0.454153 0.484516 0.498082 +20 9417398 9425142 3 3 0.573965 0.670621 0.746411 0.453902 0.485545 0.498717 +20 9433743 10025443 24 2 0.832737 0.877060 0.915016 0.441967 0.487879 0.497522 +20 10025971 10032789 3 4 0.850305 0.956211 1.045650 0.418908 0.481441 0.498593 +20 10033509 13055397 58 4 0.858814 0.887775 0.902961 0.446555 0.482779 0.493827 +20 13071480 13091116 3 3 0.863603 0.960950 1.085440 0.446093 0.488441 0.497524 +20 13097902 16729903 128 10 0.858418 0.873171 0.891151 0.451428 0.487058 0.497774 +20 16730297 16730907 1 2 0.786506 0.989487 1.137585 0.455303 0.486577 0.498031 20 16731489 17462967 13 0 0.888570 0.931614 0.963131 NaN NaN NaN -20 17474468 17492965 4 4 0.765912 0.858438 0.931668 0.462784 0.484380 0.498417 -20 17495115 17601704 14 3 0.855885 0.903735 0.954234 0.444740 0.481472 0.491304 -20 17601852 17602264 1 1 0.785991 0.961659 1.108774 0.465607 0.487797 0.498672 +20 17474468 17492965 4 4 0.765912 0.858438 0.931668 0.442994 0.481937 0.498662 +20 17495115 17601704 14 3 0.855885 0.903735 0.954234 0.460698 0.487796 0.498036 +20 17601852 17602264 1 1 0.785991 0.961659 1.108774 0.436143 0.473356 0.496025 20 17602460 17971079 31 0 0.866080 0.903972 0.937876 NaN NaN NaN -20 18005029 18022619 2 2 0.795920 0.931541 1.041274 0.433740 0.481543 0.497831 -20 18037928 18429964 34 3 0.919930 0.944497 0.972318 0.436358 0.480910 0.497113 -20 18432248 18433633 2 2 0.778596 0.948346 1.059295 0.459969 0.481866 0.498046 -20 18434125 18795182 47 2 0.831734 0.871539 0.893205 0.460556 0.490242 0.498278 -20 18805660 19560970 5 4 0.784477 0.880833 0.993205 0.417590 0.475906 0.497200 -20 19565364 20031518 44 3 0.951942 0.981857 1.011919 0.445241 0.484677 0.497625 -20 20032685 20051900 5 8 1.056331 1.144170 1.202413 0.405932 0.485601 0.496053 -20 20054695 21117443 68 6 0.919610 0.944809 0.962379 0.387876 0.473482 0.498818 -20 21125956 21143319 2 3 0.746576 0.885838 1.011789 0.437016 0.487228 0.498208 -20 21143320 21335762 29 1 0.915857 0.946611 0.977729 0.441617 0.477593 0.497587 -20 21336467 21337020 1 1 1.049420 1.168026 1.278388 0.458295 0.490273 0.499093 +20 18005029 18022619 2 2 0.795920 0.931541 1.041274 0.459394 0.482972 0.496578 +20 18037928 18429964 34 3 0.919930 0.944497 0.972318 0.433985 0.478851 0.497935 +20 18432248 18433633 2 2 0.778596 0.948346 1.059295 0.422801 0.481147 0.495672 +20 18434125 18795182 47 2 0.831734 0.871539 0.893205 0.465638 0.489594 0.498461 +20 18805660 19560970 5 4 0.784477 0.880833 0.993205 0.444794 0.481235 0.498489 +20 19565364 20031518 44 3 0.951942 0.981857 1.011919 0.454948 0.480488 0.494505 +20 20032685 20051900 5 8 1.056331 1.144170 1.202413 0.407732 0.480087 0.497592 +20 20054695 21117443 68 6 0.919610 0.944809 0.962379 0.464971 0.484369 0.497844 +20 21125956 21143319 2 3 0.746576 0.885838 1.011789 0.445876 0.477858 0.496148 +20 21143320 21335762 29 1 0.915857 0.946611 0.977729 0.452994 0.486785 0.499067 +20 21336467 21337020 1 1 1.049420 1.168026 1.278388 0.452974 0.479707 0.495687 20 21337021 23335353 23 0 0.843591 0.873977 0.898971 NaN NaN NaN -20 23344770 23347986 2 2 0.754716 0.889823 1.036976 0.451827 0.486870 0.498835 -20 23349148 23476803 23 3 0.923513 0.962569 1.015895 0.470283 0.488686 0.496804 -20 23528151 23529802 2 3 0.751213 0.863958 1.016981 0.430882 0.478451 0.494496 -20 23530800 23804992 16 5 0.925557 0.984711 1.032753 0.442622 0.482650 0.494298 -20 23805596 23807549 2 2 1.000609 1.122151 1.251156 0.431926 0.475602 0.497183 -20 23856574 25058753 44 6 1.007022 1.031021 1.066713 0.419597 0.470506 0.494977 -20 25059122 25059830 1 1 0.924371 1.074894 1.231265 0.440977 0.487037 0.497514 -20 25059831 25250095 19 1 1.042005 1.075242 1.125932 0.458144 0.485891 0.498323 -20 25251768 25269459 12 16 1.045976 1.096869 1.125607 0.300267 0.396284 0.497883 -20 25270235 25283234 9 8 0.971934 1.056236 1.125711 0.463458 0.485461 0.495536 -20 25283935 25426879 19 4 1.023388 1.058247 1.124318 0.415201 0.479079 0.494780 -20 25433836 25439484 3 4 1.089659 1.188626 1.278029 0.447954 0.489290 0.497590 -20 25441907 25520944 21 4 1.049891 1.081117 1.117048 0.461965 0.487943 0.497185 -20 25596310 25829597 8 5 1.113431 1.213732 1.321630 0.403474 0.461151 0.493035 +20 23344770 23347986 2 2 0.754716 0.889823 1.036976 0.461474 0.489396 0.499665 +20 23349148 23476803 23 3 0.923513 0.962569 1.015895 0.455488 0.487888 0.499080 +20 23528151 23529802 2 3 0.751213 0.863958 1.016981 0.469967 0.490949 0.498184 +20 23530800 23804992 16 5 0.925557 0.984711 1.032753 0.457607 0.481802 0.498065 +20 23805596 23807549 2 2 1.000609 1.122151 1.251156 0.463573 0.487469 0.497764 +20 23856574 25058753 44 6 1.007022 1.031021 1.066713 0.409756 0.479021 0.498791 +20 25059122 25059830 1 1 0.924371 1.074894 1.231265 0.419756 0.473179 0.499195 +20 25059831 25250095 19 1 1.042005 1.075242 1.125932 0.444729 0.478620 0.498973 +20 25251768 25269459 12 16 1.045976 1.096869 1.125607 0.306679 0.418985 0.496539 +20 25270235 25283234 9 8 0.971934 1.056236 1.125711 0.467552 0.487209 0.496978 +20 25283935 25426879 19 4 1.023388 1.058247 1.124318 0.387352 0.466078 0.494667 +20 25433836 25439484 3 4 1.089659 1.188626 1.278029 0.444656 0.478541 0.495874 +20 25441907 25520944 21 4 1.049891 1.081117 1.117048 0.425085 0.472407 0.494296 +20 25596310 25829597 8 5 1.113431 1.213732 1.321630 0.372243 0.444396 0.495789 20 26061550 26072430 3 0 0.367641 0.515342 0.634110 NaN NaN NaN -20 26083858 30126381 33 5 1.126152 1.152054 1.187677 0.459142 0.485827 0.498167 -20 30132499 31021972 161 1 1.778447 1.796121 1.818081 0.453168 0.486421 0.499158 -20 31021984 31025393 1 2 -41.644357 -19.617064 3.691117 0.467031 0.490179 0.497997 +20 26083858 30126381 33 5 1.126152 1.152054 1.187677 0.437891 0.483506 0.498101 +20 30132499 31021972 161 1 1.778447 1.796121 1.818081 0.431099 0.479080 0.495366 +20 31021984 31025393 1 2 -41.644357 -19.617064 3.691117 0.458267 0.491562 0.498997 20 31034848 31644756 121 0 1.707959 1.722598 1.734877 NaN NaN NaN -20 31646933 31657031 7 5 1.507781 1.555989 1.618804 0.449831 0.478359 0.497385 +20 31646933 31657031 7 5 1.507781 1.555989 1.618804 0.464282 0.483042 0.496601 20 31657443 31673054 9 0 1.581746 1.640515 1.722870 NaN NaN NaN -20 31673576 31677616 3 3 1.438886 1.537880 1.604160 0.449716 0.483926 0.495909 +20 31673576 31677616 3 3 1.438886 1.537880 1.604160 0.444488 0.487541 0.497184 20 31678263 31805721 17 0 1.630800 1.668371 1.724806 NaN NaN NaN -20 31811366 31828443 9 5 1.556041 1.616400 1.675757 0.384104 0.440858 0.495959 -20 31828940 32700168 105 1 1.629432 1.650935 1.663871 0.471075 0.491959 0.498668 -20 32847930 34218538 301 6 1.107186 1.114977 1.126401 0.432530 0.485085 0.499278 -20 34218539 34782534 94 23 1.132510 1.151938 1.172347 0.392348 0.481583 0.497366 -20 34783000 35555907 102 1 1.082704 1.097204 1.120763 0.437582 0.489327 0.497035 -20 35558912 35559530 1 1 0.853674 1.090897 1.288792 0.445938 0.485191 0.495392 -20 35563181 35757820 38 4 0.948333 0.974652 1.008372 0.444197 0.475088 0.496887 -20 35765963 35769967 2 2 0.868623 0.960101 1.080009 0.450706 0.475335 0.495864 -20 35771870 35833556 14 3 0.934589 0.993416 1.059647 0.457173 0.489366 0.498523 -20 35835425 35838822 2 4 0.736938 0.846223 0.971703 0.453829 0.485948 0.498814 -20 35841912 36784743 74 9 0.907187 0.929155 0.948146 0.435112 0.480162 0.497193 -20 36789571 36846128 5 6 0.812387 0.874448 0.956137 0.456018 0.484753 0.498036 -20 36846395 37272803 90 12 0.942329 0.966290 0.982398 0.467053 0.487589 0.498108 -20 37274440 37279609 6 7 0.788135 0.914467 1.002640 0.441471 0.476914 0.495875 -20 37353117 37667438 46 5 0.924444 0.945050 0.977642 0.441712 0.484168 0.497860 -20 39316268 45878435 698 77 0.115573 0.126588 0.138203 0.472154 0.493084 0.499358 -20 45890779 45891424 1 1 -0.268258 -0.088095 0.101311 0.454396 0.485635 0.498555 -20 45904807 62905205 1317 245 0.092592 0.100428 0.105982 0.483032 0.492206 0.498982 +20 31811366 31828443 9 5 1.556041 1.616400 1.675757 0.399750 0.481248 0.498510 +20 31828940 32700168 105 1 1.629432 1.650935 1.663871 0.455555 0.483341 0.497187 +20 32847930 34218538 301 6 1.107186 1.114977 1.126401 0.362470 0.437232 0.495023 +20 34218539 34782534 94 23 1.132510 1.151938 1.172347 0.407484 0.480976 0.494815 +20 34783000 35555907 102 1 1.082704 1.097204 1.120763 0.453904 0.488582 0.498458 +20 35558912 35559530 1 1 0.853674 1.090897 1.288792 0.461263 0.485922 0.498259 +20 35563181 35757820 38 4 0.948333 0.974652 1.008372 0.440777 0.473824 0.497708 +20 35765963 35769967 2 2 0.868623 0.960101 1.080009 0.463655 0.485527 0.496400 +20 35771870 35833556 14 3 0.934589 0.993416 1.059647 0.440580 0.488437 0.498315 +20 35835425 35838822 2 4 0.736938 0.846223 0.971703 0.432129 0.482051 0.498058 +20 35841912 36784743 74 9 0.907187 0.929155 0.948146 0.453974 0.491408 0.497463 +20 36789571 36846128 5 6 0.812387 0.874448 0.956137 0.456500 0.485379 0.497566 +20 36846395 37272803 90 12 0.942329 0.966290 0.982398 0.449697 0.483017 0.499242 +20 37274440 37279609 6 7 0.788135 0.914467 1.002640 0.436477 0.486555 0.498404 +20 37353117 37667438 46 5 0.924444 0.945050 0.977642 0.456963 0.488537 0.498642 +20 39316268 45878435 698 77 0.115573 0.126588 0.138203 0.478867 0.493289 0.499561 +20 45890779 45891424 1 1 -0.268258 -0.088095 0.101311 0.458861 0.487353 0.498527 +20 45904807 62905205 1317 245 0.092592 0.100428 0.105982 0.485711 0.495766 0.499534 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-cr-ac-nac-tumor-1.modelFinal.af.param b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-cr-ac-nac-tumor-1.modelFinal.af.param index 843f4bc4670..ae3cab42f0c 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-cr-ac-nac-tumor-1.modelFinal.af.param +++ b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-cr-ac-nac-tumor-1.modelFinal.af.param @@ -1,6 +1,6 @@ @HD VN:1.6 @RG ID:GATKCopyNumber SM:SM-74P4M-1 PARAMETER_NAME POSTERIOR_10 POSTERIOR_20 POSTERIOR_30 POSTERIOR_40 POSTERIOR_50 POSTERIOR_60 POSTERIOR_70 POSTERIOR_80 POSTERIOR_90 -MEAN_BIAS 1.077487 1.082793 1.083963 1.085385 1.086957 1.090137 1.091851 1.096486 1.103647 -BIAS_VARIANCE 0.019911 0.021798 0.022330 0.023406 0.024475 0.024995 0.025457 0.026191 0.029032 +MEAN_BIAS 1.077473 1.082782 1.083953 1.085376 1.086949 1.090131 1.091846 1.096485 1.103651 +BIAS_VARIANCE 0.019911 0.021799 0.022330 0.023406 0.024475 0.024995 0.025457 0.026192 0.029033 OUTLIER_PROBABILITY 0.016021 0.019093 0.020858 0.024850 0.027103 0.030039 0.031283 0.034056 0.038297 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-cr-ac-nac-tumor-1.modelFinal.seg b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-cr-ac-nac-tumor-1.modelFinal.seg index 4a2b1adf70d..8030dd67ac2 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-cr-ac-nac-tumor-1.modelFinal.seg +++ b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-cr-ac-nac-tumor-1.modelFinal.seg @@ -2,17 +2,17 @@ @SQ SN:20 LN:63025520 UR:http://www.broadinstitute.org/ftp/pub/seq/references/Homo_sapiens_assembly19.fasta AS:GRCh37 M5:0dec9660ec1efaaf33281c0d5ea2560f SP:Homo Sapiens @RG ID:GATKCopyNumber SM:SM-74P4M-1 CONTIG START END NUM_POINTS_COPY_RATIO NUM_POINTS_ALLELE_FRACTION LOG2_COPY_RATIO_POSTERIOR_10 LOG2_COPY_RATIO_POSTERIOR_50 LOG2_COPY_RATIO_POSTERIOR_90 MINOR_ALLELE_FRACTION_POSTERIOR_10 MINOR_ALLELE_FRACTION_POSTERIOR_50 MINOR_ALLELE_FRACTION_POSTERIOR_90 -20 68100 2622249 232 73 0.640558 0.655264 0.668338 0.300040 0.311068 0.326137 -20 2633368 5297334 386 46 0.618199 0.628326 0.637287 0.319425 0.334775 0.345944 -20 5454040 8771160 132 31 0.643092 0.658227 0.675938 0.315548 0.324159 0.334958 -20 8772985 13091116 120 18 0.858455 0.876600 0.889469 0.272409 0.293368 0.313940 -20 13097902 19560970 282 33 0.882213 0.892243 0.906033 0.283832 0.296141 0.317290 -20 19565364 25439484 324 77 0.979870 0.989054 1.002455 0.239936 0.246220 0.253255 -20 25441907 25829597 29 9 1.059752 1.101960 1.141341 0.219062 0.243290 0.284768 +20 68100 2622249 232 73 0.640558 0.655264 0.668338 0.300039 0.311068 0.326137 +20 2633368 5297334 386 46 0.618199 0.628326 0.637287 0.319426 0.334774 0.345943 +20 5454040 8771160 132 31 0.643092 0.658227 0.675938 0.315548 0.324160 0.334960 +20 8772985 13091116 120 18 0.858455 0.876600 0.889469 0.272410 0.293367 0.313939 +20 13097902 19560970 282 33 0.882213 0.892243 0.906033 0.283832 0.296141 0.317289 +20 19565364 25439484 324 77 0.979870 0.989054 1.002455 0.239936 0.246221 0.253256 +20 25441907 25829597 29 9 1.059752 1.101960 1.141341 0.219062 0.243289 0.284766 20 26061550 26072430 3 0 0.372550 0.482659 0.680549 NaN NaN NaN -20 26083858 30126381 33 5 1.126021 1.166890 1.199691 0.332710 0.379403 0.450056 +20 26083858 30126381 33 5 1.126021 1.166890 1.199691 0.332718 0.379416 0.450075 20 30132499 31828443 328 16 1.727361 1.737376 1.747481 0.166044 0.174395 0.187170 -20 31828940 32700168 105 1 1.626782 1.645405 1.662463 0.453442 0.487963 0.498889 -20 32847930 35559530 498 31 1.111093 1.120290 1.132834 0.250137 0.258026 0.273794 -20 35563181 37667438 277 52 0.938428 0.946579 0.967842 0.288985 0.299942 0.308865 -20 39316268 62905205 2016 323 0.103065 0.109202 0.115927 0.489449 0.495837 0.499417 +20 31828940 32700168 105 1 1.626782 1.645405 1.662463 0.453508 0.488038 0.498967 +20 32847930 35559530 498 31 1.111093 1.120290 1.132834 0.250137 0.258026 0.273795 +20 35563181 37667438 277 52 0.938428 0.946579 0.967842 0.288986 0.299942 0.308864 +20 39316268 62905205 2016 323 0.103065 0.109202 0.115927 0.489451 0.495837 0.499416 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-cr-ac-tumor-1.af.igv.seg b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-cr-ac-tumor-1.af.igv.seg index dd4aabe8298..7136bdf66c8 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-cr-ac-tumor-1.af.igv.seg +++ b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-cr-ac-tumor-1.af.igv.seg @@ -1,13 +1,13 @@ Sample Chromosome Start End Num_Probes Segment_Mean SM-74P4M-1 20 68100 3199530 86 0.318793 -SM-74P4M-1 20 3202236 8771160 80 0.335292 +SM-74P4M-1 20 3202236 8771160 80 0.335293 SM-74P4M-1 20 8772985 14308404 31 0.299679 SM-74P4M-1 20 14473874 23807549 77 0.273803 SM-74P4M-1 20 23856574 25829597 57 0.240454 SM-74P4M-1 20 26061550 26072430 0 NaN -SM-74P4M-1 20 26083858 30126381 7 0.356927 +SM-74P4M-1 20 26083858 30126381 7 0.356944 SM-74P4M-1 20 30132499 32700168 15 0.185572 SM-74P4M-1 20 32847930 32996850 1 0.486257 -SM-74P4M-1 20 33000070 35838822 56 0.273746 +SM-74P4M-1 20 33000070 35838822 56 0.273745 SM-74P4M-1 20 35841912 37667438 50 0.307028 SM-74P4M-1 20 39316268 62905205 375 0.495287 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-cr-ac-tumor-1.modelBegin.af.param b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-cr-ac-tumor-1.modelBegin.af.param index e6ef049c3c2..c9874174893 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-cr-ac-tumor-1.modelBegin.af.param +++ b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-cr-ac-tumor-1.modelBegin.af.param @@ -1,6 +1,6 @@ @HD VN:1.6 @RG ID:GATKCopyNumber SM:SM-74P4M-1 PARAMETER_NAME POSTERIOR_10 POSTERIOR_20 POSTERIOR_30 POSTERIOR_40 POSTERIOR_50 POSTERIOR_60 POSTERIOR_70 POSTERIOR_80 POSTERIOR_90 -MEAN_BIAS 1.107480 1.127598 1.130514 1.139511 1.142812 1.148875 1.156153 1.165459 1.169052 -BIAS_VARIANCE 0.452167 0.463604 0.476934 0.482244 0.485483 0.490797 0.492643 0.494530 0.498252 -OUTLIER_PROBABILITY 0.005022 0.010228 0.012933 0.015383 0.017580 0.020834 0.025056 0.034416 0.041315 +MEAN_BIAS 1.113001 1.117963 1.124780 1.129948 1.140812 1.146029 1.150391 1.153607 1.162609 +BIAS_VARIANCE 0.449861 0.451470 0.463033 0.468201 0.470782 0.474679 0.480235 0.483758 0.494226 +OUTLIER_PROBABILITY 0.003838 0.008639 0.010689 0.016000 0.017634 0.020029 0.023901 0.028524 0.036400 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-cr-ac-tumor-1.modelBegin.seg b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-cr-ac-tumor-1.modelBegin.seg index c5525185d8b..27449eb38ec 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-cr-ac-tumor-1.modelBegin.seg +++ b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-cr-ac-tumor-1.modelBegin.seg @@ -3,125 +3,125 @@ @RG ID:GATKCopyNumber SM:SM-74P4M-1 CONTIG START END NUM_POINTS_COPY_RATIO NUM_POINTS_ALLELE_FRACTION LOG2_COPY_RATIO_POSTERIOR_10 LOG2_COPY_RATIO_POSTERIOR_50 LOG2_COPY_RATIO_POSTERIOR_90 MINOR_ALLELE_FRACTION_POSTERIOR_10 MINOR_ALLELE_FRACTION_POSTERIOR_50 MINOR_ALLELE_FRACTION_POSTERIOR_90 20 68100 126585 4 0 0.524470 0.601407 0.706644 NaN NaN NaN -20 137935 139917 2 5 0.785879 0.898611 1.016510 0.445793 0.485867 0.498427 -20 168276 363520 22 15 0.583876 0.625972 0.667696 0.458232 0.488759 0.499237 -20 368404 377586 3 3 0.461714 0.553914 0.626951 0.438345 0.481932 0.497096 -20 389132 398077 4 3 0.566831 0.656941 0.737884 0.463090 0.491570 0.499148 -20 398078 422417 13 1 0.608469 0.656020 0.689490 0.452115 0.493336 0.498722 -20 422418 428970 3 2 0.653730 0.744561 0.857407 0.475196 0.489912 0.498860 -20 464335 1209491 51 6 0.625960 0.646304 0.665655 0.464880 0.490417 0.498328 -20 1210274 1215085 3 2 0.415255 0.496718 0.602324 0.460029 0.484509 0.496862 -20 1218438 1373576 24 1 0.670675 0.701721 0.741305 0.434863 0.478191 0.496487 -20 1424143 1426618 2 2 0.549623 0.670839 0.798850 0.465797 0.489200 0.498222 -20 1432887 1515339 16 2 0.653477 0.694731 0.731888 0.436415 0.476516 0.497533 -20 1517550 1552935 6 6 0.728388 0.778619 0.853447 0.449801 0.481660 0.496806 -20 1558733 1617403 5 6 0.434020 0.509677 0.596605 0.446609 0.477000 0.498868 -20 1629447 2474793 54 6 0.597024 0.617708 0.654253 0.426963 0.481804 0.494611 -20 2517017 2518214 2 2 0.483187 0.624226 0.713785 0.445836 0.485371 0.497777 -20 2538851 2597102 13 3 0.699476 0.728465 0.778510 0.464870 0.488898 0.497643 -20 2597467 2622249 5 8 0.569809 0.655909 0.738855 0.459638 0.490843 0.499251 -20 2633368 2796137 50 1 0.570440 0.597928 0.616003 0.450413 0.488519 0.498891 -20 2796138 2796673 1 1 0.467547 0.638188 0.769026 0.452887 0.486223 0.497524 -20 2796731 3196210 84 10 0.622314 0.642648 0.661420 0.444392 0.480503 0.497243 -20 3198912 3199530 1 1 0.409606 0.604688 0.747195 0.459946 0.486965 0.497928 -20 3202236 3680358 120 12 0.632795 0.647795 0.665503 0.447657 0.479447 0.492065 -20 3681738 3686841 4 6 0.526923 0.597767 0.674034 0.459753 0.485331 0.496566 +20 137935 139917 2 5 0.785879 0.898611 1.016510 0.454899 0.481194 0.496829 +20 168276 363520 22 15 0.583876 0.625972 0.667696 0.458621 0.485813 0.495186 +20 368404 377586 3 3 0.461714 0.553914 0.626951 0.443044 0.482750 0.498033 +20 389132 398077 4 3 0.566831 0.656941 0.737884 0.442042 0.485793 0.498145 +20 398078 422417 13 1 0.608469 0.656020 0.689490 0.459843 0.491273 0.499439 +20 422418 428970 3 2 0.653730 0.744561 0.857407 0.461025 0.485813 0.497644 +20 464335 1209491 51 6 0.625960 0.646304 0.665655 0.452526 0.483565 0.498092 +20 1210274 1215085 3 2 0.415255 0.496718 0.602324 0.467655 0.482648 0.497859 +20 1218438 1373576 24 1 0.670675 0.701721 0.741305 0.459803 0.484934 0.498038 +20 1424143 1426618 2 2 0.549623 0.670839 0.798850 0.447535 0.487159 0.498383 +20 1432887 1515339 16 2 0.653477 0.694731 0.731888 0.450653 0.479254 0.496999 +20 1517550 1552935 6 6 0.728388 0.778619 0.853447 0.462755 0.492027 0.498369 +20 1558733 1617403 5 6 0.434020 0.509677 0.596605 0.460876 0.485499 0.498227 +20 1629447 2474793 54 6 0.597024 0.617708 0.654253 0.451329 0.486133 0.497863 +20 2517017 2518214 2 2 0.483187 0.624226 0.713785 0.429820 0.483169 0.497490 +20 2538851 2597102 13 3 0.699476 0.728465 0.778510 0.457542 0.488557 0.497785 +20 2597467 2622249 5 8 0.569809 0.655909 0.738855 0.462138 0.488767 0.498990 +20 2633368 2796137 50 1 0.570440 0.597928 0.616003 0.464517 0.488647 0.498256 +20 2796138 2796673 1 1 0.467547 0.638188 0.769026 0.441914 0.490884 0.499167 +20 2796731 3196210 84 10 0.622314 0.642648 0.661420 0.471112 0.493943 0.499430 +20 3198912 3199530 1 1 0.409606 0.604688 0.747195 0.456995 0.482552 0.495715 +20 3202236 3680358 120 12 0.632795 0.647795 0.665503 0.455743 0.483536 0.495980 +20 3681738 3686841 4 6 0.526923 0.597767 0.674034 0.431848 0.482494 0.498494 20 3686842 3719616 3 0 0.536630 0.653823 0.739464 NaN NaN NaN -20 3721211 3721811 1 1 0.432850 0.597559 0.746468 0.460657 0.488252 0.499192 -20 3722680 4055845 60 5 0.603406 0.625085 0.653040 0.456796 0.489967 0.497601 -20 4155452 4162680 3 3 0.436800 0.537025 0.647765 0.449710 0.486831 0.498523 -20 4162681 4866807 28 8 0.579587 0.618340 0.662348 0.454679 0.484096 0.497551 -20 4879950 4880610 1 1 -37.542394 0.615667 0.804203 0.442931 0.487704 0.498279 -20 4882837 5897817 53 12 0.621204 0.645923 0.672654 0.443534 0.481419 0.498393 -20 5902730 5905947 2 2 0.558012 0.672789 0.758048 0.452608 0.477251 0.497889 -20 5918930 8609330 78 13 0.644667 0.664414 0.675468 0.450091 0.481151 0.495678 -20 8626498 8639603 6 3 0.688078 0.738301 0.809159 0.460188 0.484696 0.498440 -20 8665328 8771160 23 14 0.693840 0.739937 0.766986 0.418592 0.486230 0.497522 -20 8772985 9416547 29 3 0.854021 0.881668 0.918282 0.457301 0.486102 0.497722 -20 9417398 9425142 3 3 0.568999 0.663009 0.771887 0.450309 0.483783 0.498145 +20 3721211 3721811 1 1 0.432850 0.597559 0.746468 0.430959 0.486534 0.497225 +20 3722680 4055845 60 5 0.603406 0.625085 0.653040 0.450732 0.493170 0.498621 +20 4155452 4162680 3 3 0.436800 0.537025 0.647765 0.456077 0.488099 0.497282 +20 4162681 4866807 28 8 0.579587 0.618340 0.662348 0.451117 0.490174 0.498182 +20 4879950 4880610 1 1 -37.542394 0.615667 0.804203 0.442330 0.477883 0.495452 +20 4882837 5897817 53 12 0.621204 0.645923 0.672654 0.456385 0.483062 0.497625 +20 5902730 5905947 2 2 0.558012 0.672789 0.758048 0.468868 0.490738 0.497666 +20 5918930 8609330 78 13 0.644667 0.664414 0.675468 0.448025 0.481190 0.496877 +20 8626498 8639603 6 3 0.688078 0.738301 0.809159 0.455609 0.484989 0.497659 +20 8665328 8771160 23 14 0.693840 0.739937 0.766986 0.418146 0.476478 0.497272 +20 8772985 9416547 29 3 0.854021 0.881668 0.918282 0.471584 0.491800 0.499054 +20 9417398 9425142 3 3 0.568999 0.663009 0.771887 0.445170 0.486193 0.496482 20 9433743 9499127 12 0 0.844885 0.894935 0.932438 NaN NaN NaN -20 9510038 9510719 1 1 -42.234544 0.781574 0.983984 0.431922 0.474307 0.491828 -20 9519858 10025443 11 2 0.816088 0.870296 0.920325 0.458978 0.485296 0.497405 -20 10025971 10032789 3 4 0.849116 0.925941 1.054150 0.437493 0.480054 0.499230 -20 10033509 10628978 37 3 0.847368 0.873779 0.899660 0.449639 0.487719 0.497859 -20 10628979 10629540 1 2 0.630511 0.828540 0.982925 0.462413 0.490165 0.498911 -20 10629541 13055397 20 1 0.849101 0.889400 0.926613 0.423254 0.481949 0.497124 -20 13071480 13091116 3 4 0.874972 0.944811 1.020779 0.408779 0.484474 0.497597 -20 13097902 13768250 40 1 0.854831 0.879653 0.906136 0.440968 0.484188 0.496410 -20 13768984 13769550 1 1 -3.480082 0.651350 0.843135 0.455241 0.484641 0.497044 -20 13773555 14067121 34 4 0.796034 0.818948 0.850134 0.436796 0.488863 0.499117 -20 14305952 14308404 1 2 -29.708743 0.886004 1.204083 0.457324 0.489595 0.498755 -20 14473874 16721902 50 3 0.897186 0.915981 0.937491 0.465750 0.483117 0.494326 -20 16728796 16730907 3 3 1.046724 1.147365 1.233400 0.429276 0.483001 0.495996 +20 9510038 9510719 1 1 -42.234544 0.781574 0.983984 0.465352 0.488559 0.498123 +20 9519858 10025443 11 2 0.816088 0.870296 0.920325 0.459065 0.481332 0.495923 +20 10025971 10032789 3 4 0.849116 0.925941 1.054150 0.434579 0.478489 0.498015 +20 10033509 10628978 37 3 0.847368 0.873779 0.899660 0.447024 0.479099 0.497611 +20 10628979 10629540 1 2 0.630511 0.828540 0.982925 0.453713 0.482190 0.496947 +20 10629541 13055397 20 1 0.849101 0.889400 0.926613 0.450335 0.487595 0.499610 +20 13071480 13091116 3 4 0.874972 0.944811 1.020779 0.470678 0.487647 0.497415 +20 13097902 13768250 40 1 0.854831 0.879653 0.906136 0.463652 0.486892 0.498469 +20 13768984 13769550 1 1 -3.480082 0.651350 0.843135 0.441119 0.483174 0.496917 +20 13773555 14067121 34 4 0.796034 0.818948 0.850134 0.462188 0.489386 0.497702 +20 14305952 14308404 1 2 -29.708743 0.886004 1.204083 0.449458 0.483515 0.497252 +20 14473874 16721902 50 3 0.897186 0.915981 0.937491 0.445171 0.485217 0.498989 +20 16728796 16730907 3 3 1.046724 1.147365 1.233400 0.449733 0.478264 0.495856 20 16731489 17462967 13 0 0.885677 0.927447 0.962016 NaN NaN NaN -20 17474468 17492965 4 5 0.767899 0.848583 0.914066 0.454999 0.485977 0.496023 -20 17495115 17971079 46 3 0.884311 0.906667 0.930729 0.459715 0.489877 0.497739 -20 18005029 18022619 2 2 0.745948 0.881739 1.066532 0.462521 0.487925 0.498529 -20 18037928 18429964 34 6 0.909837 0.938251 0.983586 0.448821 0.486737 0.498635 -20 18432248 18433633 2 2 0.797740 0.906709 1.047442 0.447009 0.482801 0.498572 -20 18434125 18795182 47 2 0.843838 0.869582 0.891032 0.474411 0.488506 0.498345 -20 18805660 19560970 5 4 0.838289 0.888106 0.939910 0.457942 0.490592 0.496397 -20 19565364 19937701 16 1 0.938640 0.997341 1.050233 0.449407 0.488675 0.497119 -20 19941094 19956681 4 3 0.801671 0.899010 0.994614 0.453786 0.481332 0.495590 -20 19970400 19971205 1 1 0.676305 0.828829 1.005478 0.465328 0.487769 0.496609 -20 19972562 20031518 23 1 0.952952 0.984219 1.020643 0.459628 0.488902 0.499259 -20 20032685 20050660 4 9 1.043540 1.136218 1.215523 0.408692 0.473176 0.496212 +20 17474468 17492965 4 5 0.767899 0.848583 0.914066 0.449366 0.484665 0.497373 +20 17495115 17971079 46 3 0.884311 0.906667 0.930729 0.456458 0.482544 0.497364 +20 18005029 18022619 2 2 0.745948 0.881739 1.066532 0.438032 0.478796 0.495543 +20 18037928 18429964 34 6 0.909837 0.938251 0.983586 0.460473 0.486243 0.497260 +20 18432248 18433633 2 2 0.797740 0.906709 1.047442 0.470863 0.490835 0.498080 +20 18434125 18795182 47 2 0.843838 0.869582 0.891032 0.466315 0.490293 0.498963 +20 18805660 19560970 5 4 0.838289 0.888106 0.939910 0.410608 0.470202 0.495990 +20 19565364 19937701 16 1 0.938640 0.997341 1.050233 0.439065 0.484331 0.498568 +20 19941094 19956681 4 3 0.801671 0.899010 0.994614 0.459726 0.485923 0.497977 +20 19970400 19971205 1 1 0.676305 0.828829 1.005478 0.450402 0.483615 0.496966 +20 19972562 20031518 23 1 0.952952 0.984219 1.020643 0.437389 0.479667 0.497542 +20 20032685 20050660 4 9 1.043540 1.136218 1.215523 0.433991 0.487870 0.498453 20 20051247 20066430 5 0 0.949809 1.022599 1.079028 NaN NaN NaN -20 20071237 20168952 8 6 0.982955 1.045001 1.100870 0.447426 0.486084 0.498203 +20 20071237 20168952 8 6 0.982955 1.045001 1.100870 0.408018 0.472992 0.494697 20 20171735 20232659 6 0 0.868100 0.940002 0.994605 NaN NaN NaN -20 20243349 20244026 1 1 0.592460 0.869890 1.027119 0.438122 0.484322 0.498544 -20 20245809 21117443 49 1 0.902107 0.925627 0.942873 0.408751 0.485730 0.497850 -20 21125956 21143319 2 3 0.758861 0.877973 0.982262 0.401704 0.468562 0.493244 -20 21143320 21335762 29 1 0.918967 0.950775 0.975699 0.438312 0.479148 0.497020 -20 21336467 21337020 1 1 0.951806 1.119504 1.347018 0.432382 0.474451 0.493147 +20 20243349 20244026 1 1 0.592460 0.869890 1.027119 0.447444 0.482811 0.495962 +20 20245809 21117443 49 1 0.902107 0.925627 0.942873 0.457545 0.486172 0.497712 +20 21125956 21143319 2 3 0.758861 0.877973 0.982262 0.437129 0.481146 0.493196 +20 21143320 21335762 29 1 0.918967 0.950775 0.975699 0.440480 0.486889 0.498845 +20 21336467 21337020 1 1 0.951806 1.119504 1.347018 0.447338 0.485549 0.495673 20 21337021 23335353 23 0 0.839495 0.886394 0.931221 NaN NaN NaN -20 23344770 23347986 2 2 0.784379 0.895208 0.996667 0.460554 0.484914 0.496191 -20 23349148 23476803 23 3 0.930992 0.973755 1.008134 0.450212 0.480663 0.497300 -20 23528151 23549339 6 8 0.918465 0.996926 1.107777 0.434165 0.469544 0.496407 -20 23583896 23804992 12 4 0.888770 0.959020 1.015016 0.455701 0.483432 0.499233 -20 23805596 23807549 2 2 0.974566 1.124046 1.262117 0.437890 0.478817 0.495153 -20 23856574 25058753 44 9 1.019328 1.037878 1.070047 0.449475 0.486774 0.497735 -20 25059122 25059830 1 1 0.929910 1.107691 1.338866 0.410701 0.477186 0.496561 -20 25059831 25250095 19 2 1.047704 1.076587 1.107808 0.442719 0.486679 0.497750 -20 25251768 25269459 12 15 1.032362 1.081958 1.157877 0.367954 0.476942 0.498365 -20 25270235 25398090 24 12 0.991888 1.033597 1.086257 0.362618 0.489197 0.497602 -20 25398490 25399083 1 1 1.218677 1.390608 1.553730 0.437730 0.476120 0.495946 -20 25405596 25470212 15 11 1.072084 1.114885 1.172830 0.438331 0.484138 0.495441 +20 23344770 23347986 2 2 0.784379 0.895208 0.996667 0.452151 0.486586 0.498581 +20 23349148 23476803 23 3 0.930992 0.973755 1.008134 0.460407 0.488177 0.499327 +20 23528151 23549339 6 8 0.918465 0.996926 1.107777 0.427532 0.471463 0.493692 +20 23583896 23804992 12 4 0.888770 0.959020 1.015016 0.436084 0.484688 0.497474 +20 23805596 23807549 2 2 0.974566 1.124046 1.262117 0.457081 0.483118 0.495893 +20 23856574 25058753 44 9 1.019328 1.037878 1.070047 0.408367 0.472439 0.496759 +20 25059122 25059830 1 1 0.929910 1.107691 1.338866 0.428141 0.469570 0.492915 +20 25059831 25250095 19 2 1.047704 1.076587 1.107808 0.448260 0.483372 0.497287 +20 25251768 25269459 12 15 1.032362 1.081958 1.157877 0.346150 0.402553 0.477517 +20 25270235 25398090 24 12 0.991888 1.033597 1.086257 0.366249 0.467836 0.490621 +20 25398490 25399083 1 1 1.218677 1.390608 1.553730 0.465357 0.490340 0.496794 +20 25405596 25470212 15 11 1.072084 1.114885 1.172830 0.440299 0.484472 0.499546 20 25470260 25520944 12 0 1.021558 1.075534 1.145459 NaN NaN NaN -20 25596310 25829597 8 6 1.098352 1.234171 1.333143 0.397657 0.462887 0.497475 +20 25596310 25829597 8 6 1.098352 1.234171 1.333143 0.427853 0.478984 0.498348 20 26061550 26072430 3 0 0.346585 0.476691 0.592981 NaN NaN NaN -20 26083858 30126381 33 7 1.128613 1.155134 1.189744 0.433831 0.483528 0.496667 -20 30132499 31644756 283 1 1.742213 1.754708 1.770965 0.436856 0.475611 0.496588 -20 31646933 31657031 7 5 1.486861 1.580170 1.653185 0.444664 0.471426 0.495946 -20 31657443 31673054 9 1 1.587447 1.648094 1.701662 0.457106 0.488117 0.497674 -20 31673576 31677070 2 2 1.367535 1.437547 1.576903 0.454234 0.482689 0.498518 +20 26083858 30126381 33 7 1.128613 1.155134 1.189744 0.443393 0.481375 0.496631 +20 30132499 31644756 283 1 1.742213 1.754708 1.770965 0.434645 0.478587 0.495755 +20 31646933 31657031 7 5 1.486861 1.580170 1.653185 0.432946 0.474700 0.496831 +20 31657443 31673054 9 1 1.587447 1.648094 1.701662 0.457057 0.486993 0.498932 +20 31673576 31677070 2 2 1.367535 1.437547 1.576903 0.438748 0.483781 0.497840 20 31677071 31805721 18 0 1.636028 1.675802 1.722761 NaN NaN NaN -20 31811366 31828443 9 5 1.547919 1.623082 1.678306 0.406299 0.471450 0.496816 -20 31828940 32700168 105 1 1.627604 1.651674 1.666737 0.450647 0.488298 0.497500 +20 31811366 31828443 9 5 1.547919 1.623082 1.678306 0.383077 0.471537 0.495100 +20 31828940 32700168 105 1 1.627604 1.651674 1.666737 0.450769 0.486458 0.497953 20 32847930 32981939 14 0 0.981523 1.032955 1.097114 NaN NaN NaN -20 32996206 32996850 1 1 -44.144161 -23.442478 6.075372 0.441531 0.486796 0.498200 -20 33000070 34218538 286 6 1.106545 1.121904 1.136853 0.376880 0.464391 0.495483 -20 34218539 34243516 10 4 1.059556 1.117523 1.199210 0.425429 0.472382 0.492492 -20 34246603 34302563 22 2 1.156840 1.183857 1.209517 0.459783 0.485626 0.498282 -20 34304411 34501521 25 16 1.133764 1.161469 1.186708 0.446864 0.490285 0.499201 -20 34501704 34505357 2 2 1.023992 1.129776 1.243862 0.457191 0.492502 0.498569 -20 34505358 35065005 53 8 1.093915 1.118065 1.140264 0.462744 0.486942 0.498102 -20 35067906 35068523 1 1 0.841778 1.092655 1.330495 0.448476 0.485228 0.498517 -20 35070879 35468094 64 1 1.090973 1.110308 1.127704 0.450405 0.484837 0.498576 -20 35490811 35491997 1 1 0.881239 1.050916 1.252413 0.461223 0.485843 0.497804 +20 32996206 32996850 1 1 -44.144161 -23.442478 6.075372 0.458797 0.480457 0.495207 +20 33000070 34218538 286 6 1.106545 1.121904 1.136853 0.388324 0.465741 0.495831 +20 34218539 34243516 10 4 1.059556 1.117523 1.199210 0.460967 0.486098 0.498906 +20 34246603 34302563 22 2 1.156840 1.183857 1.209517 0.442578 0.479098 0.498178 +20 34304411 34501521 25 16 1.133764 1.161469 1.186708 0.455424 0.482021 0.494876 +20 34501704 34505357 2 2 1.023992 1.129776 1.243862 0.461297 0.489227 0.498912 +20 34505358 35065005 53 8 1.093915 1.118065 1.140264 0.426009 0.481539 0.497408 +20 35067906 35068523 1 1 0.841778 1.092655 1.330495 0.464606 0.486248 0.497141 +20 35070879 35468094 64 1 1.090973 1.110308 1.127704 0.458465 0.488106 0.499004 +20 35490811 35491997 1 1 0.881239 1.050916 1.252413 0.470493 0.490799 0.498666 20 35504327 35555907 18 0 0.992799 1.064285 1.109742 NaN NaN NaN -20 35558912 35559530 1 1 0.920315 1.079069 1.257841 0.434159 0.476086 0.495184 -20 35563181 35757820 38 4 0.936493 0.973360 1.002878 0.455034 0.485798 0.498183 -20 35765963 35769967 2 2 0.774601 0.951250 1.057507 0.426728 0.480781 0.496349 -20 35771870 35833556 14 4 0.942110 0.983035 1.026967 0.467737 0.488991 0.496488 -20 35835425 35838822 2 4 0.738773 0.890097 1.000615 0.418123 0.476152 0.495263 -20 35841912 36784743 74 11 0.909883 0.932272 0.944864 0.433347 0.482283 0.497554 -20 36789571 36846128 5 6 0.809076 0.875575 0.946742 0.453502 0.488412 0.499434 -20 36846395 36946389 16 5 0.901904 0.935909 0.990166 0.433242 0.475947 0.494414 -20 36946564 36963147 8 5 0.851695 0.915816 0.982273 0.454641 0.491101 0.498921 -20 36963749 37268279 65 6 0.951057 0.971139 0.999376 0.465846 0.485789 0.497945 -20 37272097 37279609 7 11 0.829791 0.897668 0.977499 0.441992 0.487394 0.499113 -20 37353117 37667438 46 6 0.920441 0.944903 0.968250 0.449098 0.480317 0.495612 -20 39316268 45878435 698 90 0.116500 0.125577 0.134277 0.476517 0.491417 0.499391 -20 45890779 45891424 1 1 -0.195750 -0.083534 0.091984 0.468558 0.481017 0.496755 -20 45904807 62905205 1317 284 0.092283 0.099282 0.107203 0.484183 0.492933 0.498847 +20 35558912 35559530 1 1 0.920315 1.079069 1.257841 0.431546 0.477170 0.497345 +20 35563181 35757820 38 4 0.936493 0.973360 1.002878 0.458073 0.490124 0.497936 +20 35765963 35769967 2 2 0.774601 0.951250 1.057507 0.470140 0.490706 0.497878 +20 35771870 35833556 14 4 0.942110 0.983035 1.026967 0.433423 0.483359 0.497350 +20 35835425 35838822 2 4 0.738773 0.890097 1.000615 0.433396 0.482922 0.498006 +20 35841912 36784743 74 11 0.909883 0.932272 0.944864 0.448043 0.491218 0.499686 +20 36789571 36846128 5 6 0.809076 0.875575 0.946742 0.449897 0.484268 0.496683 +20 36846395 36946389 16 5 0.901904 0.935909 0.990166 0.450584 0.490227 0.498538 +20 36946564 36963147 8 5 0.851695 0.915816 0.982273 0.438364 0.479592 0.497340 +20 36963749 37268279 65 6 0.951057 0.971139 0.999376 0.443135 0.485951 0.498881 +20 37272097 37279609 7 11 0.829791 0.897668 0.977499 0.443635 0.483465 0.495968 +20 37353117 37667438 46 6 0.920441 0.944903 0.968250 0.440528 0.486456 0.497697 +20 39316268 45878435 698 90 0.116500 0.125577 0.134277 0.477498 0.489755 0.497717 +20 45890779 45891424 1 1 -0.195750 -0.083534 0.091984 0.455293 0.489391 0.498696 +20 45904807 62905205 1317 284 0.092283 0.099282 0.107203 0.483751 0.495525 0.498529 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-cr-ac-tumor-1.modelFinal.af.param b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-cr-ac-tumor-1.modelFinal.af.param index 479fa394432..999282cc6dc 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-cr-ac-tumor-1.modelFinal.af.param +++ b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-cr-ac-tumor-1.modelFinal.af.param @@ -1,6 +1,6 @@ @HD VN:1.6 @RG ID:GATKCopyNumber SM:SM-74P4M-1 PARAMETER_NAME POSTERIOR_10 POSTERIOR_20 POSTERIOR_30 POSTERIOR_40 POSTERIOR_50 POSTERIOR_60 POSTERIOR_70 POSTERIOR_80 POSTERIOR_90 -MEAN_BIAS 1.068694 1.072355 1.078041 1.080358 1.082215 1.085802 1.089243 1.092773 1.100664 -BIAS_VARIANCE 0.019467 0.020594 0.021289 0.021940 0.024032 0.025316 0.027161 0.029158 0.032057 -OUTLIER_PROBABILITY 0.004914 0.008517 0.010379 0.012226 0.013318 0.014576 0.016812 0.018983 0.021454 +MEAN_BIAS 1.068640 1.072311 1.078013 1.080336 1.082198 1.085795 1.089245 1.092784 1.100696 +BIAS_VARIANCE 0.019467 0.020594 0.021289 0.021940 0.024032 0.025316 0.027162 0.029159 0.032059 +OUTLIER_PROBABILITY 0.004913 0.008517 0.010379 0.012227 0.013319 0.014577 0.016813 0.018985 0.021456 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-cr-ac-tumor-1.modelFinal.seg b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-cr-ac-tumor-1.modelFinal.seg index 891674962d3..b123dc997e3 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-cr-ac-tumor-1.modelFinal.seg +++ b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/multiple-sample-cr-ac-tumor-1.modelFinal.seg @@ -3,14 +3,14 @@ @RG ID:GATKCopyNumber SM:SM-74P4M-1 CONTIG START END NUM_POINTS_COPY_RATIO NUM_POINTS_ALLELE_FRACTION LOG2_COPY_RATIO_POSTERIOR_10 LOG2_COPY_RATIO_POSTERIOR_50 LOG2_COPY_RATIO_POSTERIOR_90 MINOR_ALLELE_FRACTION_POSTERIOR_10 MINOR_ALLELE_FRACTION_POSTERIOR_50 MINOR_ALLELE_FRACTION_POSTERIOR_90 20 68100 3199530 368 86 0.632513 0.644224 0.655379 0.308647 0.318793 0.328331 -20 3202236 8771160 382 80 0.636729 0.645768 0.657769 0.317504 0.335292 0.346240 +20 3202236 8771160 382 80 0.636729 0.645768 0.657769 0.317504 0.335293 0.346240 20 8772985 14308404 196 31 0.861766 0.875304 0.888045 0.283233 0.299679 0.318028 20 14473874 23807549 423 77 0.922533 0.932403 0.946326 0.265123 0.273803 0.281830 20 23856574 25829597 136 57 1.037238 1.053846 1.066452 0.227733 0.240454 0.251234 20 26061550 26072430 3 0 0.388794 0.476871 0.603231 NaN NaN NaN -20 26083858 30126381 33 7 1.123949 1.166801 1.194897 0.321192 0.356927 0.399228 +20 26083858 30126381 33 7 1.123949 1.166801 1.194897 0.321204 0.356944 0.399252 20 30132499 32700168 433 15 1.709861 1.716546 1.725381 0.176894 0.185572 0.199122 20 32847930 32996850 15 1 1.024331 1.062170 1.093941 0.431057 0.486257 0.499213 -20 33000070 35838822 539 56 1.096825 1.106572 1.115875 0.259971 0.273746 0.286897 -20 35841912 37667438 221 50 0.936659 0.945480 0.953464 0.296066 0.307028 0.319575 -20 39316268 62905205 2016 375 0.104958 0.109687 0.124970 0.486375 0.495287 0.499459 +20 33000070 35838822 539 56 1.096825 1.106572 1.115875 0.259971 0.273745 0.286896 +20 35841912 37667438 221 50 0.936659 0.945480 0.953464 0.296066 0.307028 0.319574 +20 39316268 62905205 2016 375 0.104958 0.109687 0.124970 0.486377 0.495287 0.499459 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/single-sample-ac-nac.af.igv.seg b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/single-sample-ac-nac.af.igv.seg index 10e268ac898..18ecaed9498 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/single-sample-ac-nac.af.igv.seg +++ b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/single-sample-ac-nac.af.igv.seg @@ -1,10 +1,12 @@ Sample Chromosome Start End Num_Probes Segment_Mean -SM-74P4M-1 20 138125 1552430 49 0.310482 -SM-74P4M-1 20 1559330 2779257 34 0.331388 -SM-74P4M-1 20 2796471 3838441 33 0.330721 -SM-74P4M-1 20 3870124 13060331 76 0.315462 -SM-74P4M-1 20 13071871 25841650 149 0.262686 -SM-74P4M-1 20 25846283 31025231 23 0.340765 -SM-74P4M-1 20 31647126 34782171 52 0.242045 -SM-74P4M-1 20 35491033 37396262 55 0.301540 -SM-74P4M-1 20 38354742 62871232 356 0.496904 +SM-74P4M-1 20 138125 1552430 49 0.314169 +SM-74P4M-1 20 1559330 2779257 34 0.336741 +SM-74P4M-1 20 2796471 3838441 33 0.327668 +SM-74P4M-1 20 3870124 4843609 9 0.386814 +SM-74P4M-1 20 4880308 6100230 28 0.350959 +SM-74P4M-1 20 6194421 6195664 2 0.477504 +SM-74P4M-1 20 7866261 25841650 186 0.265292 +SM-74P4M-1 20 25846283 31025231 23 0.340602 +SM-74P4M-1 20 31647126 35869619 71 0.255700 +SM-74P4M-1 20 36022539 37396262 36 0.314936 +SM-74P4M-1 20 38354742 62871232 356 0.495190 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/single-sample-ac-nac.cr.igv.seg b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/single-sample-ac-nac.cr.igv.seg index 37cb07c9193..26644367d34 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/single-sample-ac-nac.cr.igv.seg +++ b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/single-sample-ac-nac.cr.igv.seg @@ -2,9 +2,11 @@ Sample Chromosome Start End Num_Probes Segment_Mean SM-74P4M-1 20 138125 1552430 0 NaN SM-74P4M-1 20 1559330 2779257 0 NaN SM-74P4M-1 20 2796471 3838441 0 NaN -SM-74P4M-1 20 3870124 13060331 0 NaN -SM-74P4M-1 20 13071871 25841650 0 NaN +SM-74P4M-1 20 3870124 4843609 0 NaN +SM-74P4M-1 20 4880308 6100230 0 NaN +SM-74P4M-1 20 6194421 6195664 0 NaN +SM-74P4M-1 20 7866261 25841650 0 NaN SM-74P4M-1 20 25846283 31025231 0 NaN -SM-74P4M-1 20 31647126 34782171 0 NaN -SM-74P4M-1 20 35491033 37396262 0 NaN +SM-74P4M-1 20 31647126 35869619 0 NaN +SM-74P4M-1 20 36022539 37396262 0 NaN SM-74P4M-1 20 38354742 62871232 0 NaN diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/single-sample-ac-nac.cr.seg b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/single-sample-ac-nac.cr.seg index 2db501a0499..03cf655fca4 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/single-sample-ac-nac.cr.seg +++ b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/single-sample-ac-nac.cr.seg @@ -5,9 +5,11 @@ CONTIG START END NUM_POINTS_COPY_RATIO MEAN_LOG2_COPY_RATIO 20 138125 1552430 0 NaN 20 1559330 2779257 0 NaN 20 2796471 3838441 0 NaN -20 3870124 13060331 0 NaN -20 13071871 25841650 0 NaN +20 3870124 4843609 0 NaN +20 4880308 6100230 0 NaN +20 6194421 6195664 0 NaN +20 7866261 25841650 0 NaN 20 25846283 31025231 0 NaN -20 31647126 34782171 0 NaN -20 35491033 37396262 0 NaN +20 31647126 35869619 0 NaN +20 36022539 37396262 0 NaN 20 38354742 62871232 0 NaN diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/single-sample-ac-nac.modelBegin.af.param b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/single-sample-ac-nac.modelBegin.af.param index 04f1b0882cd..5af3a6deec3 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/single-sample-ac-nac.modelBegin.af.param +++ b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/single-sample-ac-nac.modelBegin.af.param @@ -1,6 +1,6 @@ @HD VN:1.6 @RG ID:GATKCopyNumber SM:SM-74P4M-1 PARAMETER_NAME POSTERIOR_10 POSTERIOR_20 POSTERIOR_30 POSTERIOR_40 POSTERIOR_50 POSTERIOR_60 POSTERIOR_70 POSTERIOR_80 POSTERIOR_90 -MEAN_BIAS 1.065001 1.070465 1.073150 1.077038 1.078562 1.082153 1.087074 1.089658 1.091769 -BIAS_VARIANCE 0.026059 0.027576 0.029252 0.030330 0.032522 0.034146 0.034816 0.036562 0.039798 -OUTLIER_PROBABILITY 0.055326 0.064962 0.071506 0.074164 0.081333 0.085060 0.089944 0.095421 0.103508 +MEAN_BIAS 1.053481 1.060050 1.070962 1.073508 1.076470 1.079928 1.083190 1.088578 1.095074 +BIAS_VARIANCE 0.027770 0.029645 0.031053 0.032112 0.033418 0.036189 0.037964 0.040556 0.042925 +OUTLIER_PROBABILITY 0.050972 0.057111 0.062290 0.067122 0.070422 0.074016 0.076174 0.082792 0.092373 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/single-sample-ac-nac.modelBegin.seg b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/single-sample-ac-nac.modelBegin.seg index f59b83c9932..5d63fca2788 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/single-sample-ac-nac.modelBegin.seg +++ b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/single-sample-ac-nac.modelBegin.seg @@ -2,49 +2,49 @@ @SQ SN:20 LN:63025520 UR:http://www.broadinstitute.org/ftp/pub/seq/references/Homo_sapiens_assembly19.fasta AS:GRCh37 M5:0dec9660ec1efaaf33281c0d5ea2560f SP:Homo Sapiens @RG ID:GATKCopyNumber SM:SM-74P4M-1 CONTIG START END NUM_POINTS_COPY_RATIO NUM_POINTS_ALLELE_FRACTION LOG2_COPY_RATIO_POSTERIOR_10 LOG2_COPY_RATIO_POSTERIOR_50 LOG2_COPY_RATIO_POSTERIOR_90 MINOR_ALLELE_FRACTION_POSTERIOR_10 MINOR_ALLELE_FRACTION_POSTERIOR_50 MINOR_ALLELE_FRACTION_POSTERIOR_90 -20 138125 139576 0 5 NaN NaN NaN 0.307582 0.346773 0.438145 -20 168466 377226 0 18 NaN NaN NaN 0.330926 0.347455 0.367309 -20 389456 402921 0 4 NaN NaN NaN 0.318461 0.352416 0.395247 -20 425606 1458504 0 14 NaN NaN NaN 0.308598 0.335077 0.363829 -20 1517979 1552430 0 8 NaN NaN NaN 0.311232 0.343111 0.373709 -20 1559330 1592312 0 6 NaN NaN NaN 0.447784 0.484865 0.497913 -20 1600524 1616892 0 6 NaN NaN NaN 0.336560 0.362889 0.396577 -20 1895889 1896060 0 3 NaN NaN NaN 0.460422 0.486434 0.498040 -20 2056358 2517825 0 6 NaN NaN NaN 0.353765 0.385550 0.448352 -20 2552805 2621998 0 10 NaN NaN NaN 0.311485 0.345045 0.369987 -20 2633380 2779257 0 3 NaN NaN NaN 0.435613 0.479867 0.496273 -20 2796471 3838441 0 33 NaN NaN NaN 0.325125 0.334590 0.351038 -20 3870124 4055656 0 2 NaN NaN NaN 0.442941 0.478607 0.498017 -20 4155948 4843609 0 7 NaN NaN NaN 0.377582 0.399804 0.432501 -20 4880308 5482307 0 15 NaN NaN NaN 0.329403 0.351695 0.371260 -20 5528518 6100230 0 13 NaN NaN NaN 0.333372 0.350985 0.371450 -20 6194421 6195664 0 2 NaN NaN NaN 0.374732 0.463121 0.494839 -20 7866261 8703145 0 9 NaN NaN NaN 0.309391 0.343965 0.369563 -20 8707900 8707927 0 2 NaN NaN NaN 0.407690 0.463100 0.495038 -20 8737734 8770932 0 7 NaN NaN NaN 0.277306 0.307132 0.346704 -20 8773096 10032413 0 11 NaN NaN NaN 0.274085 0.305374 0.329942 -20 10329888 10629525 0 6 NaN NaN NaN 0.301127 0.338250 0.482567 -20 13054633 13060331 0 2 NaN NaN NaN 0.384441 0.478201 0.496186 -20 13071871 16730522 0 15 NaN NaN NaN 0.289290 0.309515 0.322158 -20 17459905 17460005 0 3 NaN NaN NaN 0.363929 0.400084 0.456407 -20 17474690 18446024 0 22 NaN NaN NaN 0.283351 0.300435 0.319628 -20 18806046 22714612 0 34 NaN NaN NaN 0.257412 0.268365 0.283623 -20 23345844 25011423 0 24 NaN NaN NaN 0.259794 0.272567 0.286583 -20 25038484 25255415 0 6 NaN NaN NaN 0.228080 0.250970 0.274484 -20 25257260 25282944 0 21 NaN NaN NaN 0.234277 0.251599 0.265177 -20 25288505 25398876 0 5 NaN NaN NaN 0.246085 0.274298 0.308657 -20 25434351 25470056 0 8 NaN NaN NaN 0.239093 0.255439 0.285947 -20 25597236 25838130 0 7 NaN NaN NaN 0.248991 0.278405 0.316273 -20 25838802 25841650 0 4 NaN NaN NaN 0.401315 0.463642 0.494459 -20 25846283 25900162 0 3 NaN NaN NaN 0.459655 0.485372 0.498836 -20 25900379 30037783 0 16 NaN NaN NaN 0.356864 0.458995 0.493495 -20 30053255 31025231 0 4 NaN NaN NaN 0.453880 0.490586 0.498220 -20 31647126 31677476 0 8 NaN NaN NaN 0.183849 0.205084 0.227366 -20 31811551 33879478 0 20 NaN NaN NaN 0.234299 0.244505 0.263952 -20 34218673 34782171 0 24 NaN NaN NaN 0.265194 0.282690 0.301982 -20 35491033 35869619 0 19 NaN NaN NaN 0.287529 0.305609 0.319915 -20 36022539 36937246 0 16 NaN NaN NaN 0.291775 0.304733 0.314173 -20 36946848 36965617 0 4 NaN NaN NaN 0.337974 0.452887 0.488730 -20 36989269 37279458 0 11 NaN NaN NaN 0.307611 0.334935 0.358422 -20 37377139 37396262 0 5 NaN NaN NaN 0.335320 0.369374 0.412040 -20 38354742 62871232 0 356 NaN NaN NaN 0.490904 0.495706 0.498569 +20 138125 139576 0 5 NaN NaN NaN 0.295960 0.331962 0.362405 +20 168466 377226 0 18 NaN NaN NaN 0.334873 0.353067 0.369216 +20 389456 402921 0 4 NaN NaN NaN 0.327663 0.352975 0.390260 +20 425606 1458504 0 14 NaN NaN NaN 0.317287 0.338621 0.363546 +20 1517979 1552430 0 8 NaN NaN NaN 0.314263 0.341635 0.362239 +20 1559330 1592312 0 6 NaN NaN NaN 0.465207 0.488761 0.498344 +20 1600524 1616892 0 6 NaN NaN NaN 0.326981 0.369726 0.418677 +20 1895889 1896060 0 3 NaN NaN NaN 0.440874 0.485184 0.498181 +20 2056358 2517825 0 6 NaN NaN NaN 0.344581 0.392590 0.440969 +20 2552805 2621998 0 10 NaN NaN NaN 0.319310 0.347763 0.372991 +20 2633380 2779257 0 3 NaN NaN NaN 0.444933 0.483290 0.498703 +20 2796471 3838441 0 33 NaN NaN NaN 0.311529 0.339317 0.350677 +20 3870124 4055656 0 2 NaN NaN NaN 0.439971 0.487242 0.496084 +20 4155948 4843609 0 7 NaN NaN NaN 0.369690 0.411479 0.476015 +20 4880308 5482307 0 15 NaN NaN NaN 0.325550 0.348840 0.369000 +20 5528518 6100230 0 13 NaN NaN NaN 0.338014 0.356147 0.374497 +20 6194421 6195664 0 2 NaN NaN NaN 0.443386 0.478831 0.498147 +20 7866261 8703145 0 9 NaN NaN NaN 0.316870 0.340537 0.366931 +20 8707900 8707927 0 2 NaN NaN NaN 0.402838 0.464506 0.495509 +20 8737734 8770932 0 7 NaN NaN NaN 0.275950 0.306463 0.336945 +20 8773096 10032413 0 11 NaN NaN NaN 0.287417 0.301807 0.318153 +20 10329888 10629525 0 6 NaN NaN NaN 0.299099 0.338448 0.375342 +20 13054633 13060331 0 2 NaN NaN NaN 0.387197 0.461845 0.496041 +20 13071871 16730522 0 15 NaN NaN NaN 0.285509 0.306388 0.323943 +20 17459905 17460005 0 3 NaN NaN NaN 0.363608 0.406221 0.487274 +20 17474690 18446024 0 22 NaN NaN NaN 0.285575 0.305847 0.317724 +20 18806046 22714612 0 34 NaN NaN NaN 0.255259 0.269818 0.286072 +20 23345844 25011423 0 24 NaN NaN NaN 0.252180 0.269608 0.278246 +20 25038484 25255415 0 6 NaN NaN NaN 0.218896 0.252258 0.272157 +20 25257260 25282944 0 21 NaN NaN NaN 0.234668 0.249133 0.269723 +20 25288505 25398876 0 5 NaN NaN NaN 0.240197 0.278564 0.335928 +20 25434351 25470056 0 8 NaN NaN NaN 0.233867 0.259202 0.281301 +20 25597236 25838130 0 7 NaN NaN NaN 0.235294 0.267868 0.323576 +20 25838802 25841650 0 4 NaN NaN NaN 0.382794 0.446528 0.492828 +20 25846283 25900162 0 3 NaN NaN NaN 0.452142 0.487947 0.498416 +20 25900379 30037783 0 16 NaN NaN NaN 0.342588 0.420037 0.494109 +20 30053255 31025231 0 4 NaN NaN NaN 0.439609 0.484485 0.498296 +20 31647126 31677476 0 8 NaN NaN NaN 0.190331 0.204916 0.227945 +20 31811551 33879478 0 20 NaN NaN NaN 0.233148 0.249044 0.263226 +20 34218673 34782171 0 24 NaN NaN NaN 0.260888 0.283356 0.304531 +20 35491033 35869619 0 19 NaN NaN NaN 0.274379 0.305277 0.320868 +20 36022539 36937246 0 16 NaN NaN NaN 0.290723 0.304729 0.319067 +20 36946848 36965617 0 4 NaN NaN NaN 0.348928 0.443790 0.489723 +20 36989269 37279458 0 11 NaN NaN NaN 0.310949 0.335662 0.364916 +20 37377139 37396262 0 5 NaN NaN NaN 0.346388 0.371595 0.402850 +20 38354742 62871232 0 356 NaN NaN NaN 0.486356 0.494616 0.498896 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/single-sample-ac-nac.modelFinal.af.param b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/single-sample-ac-nac.modelFinal.af.param index 62e366134cb..26f24c9b727 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/single-sample-ac-nac.modelFinal.af.param +++ b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/single-sample-ac-nac.modelFinal.af.param @@ -1,6 +1,6 @@ @HD VN:1.6 @RG ID:GATKCopyNumber SM:SM-74P4M-1 PARAMETER_NAME POSTERIOR_10 POSTERIOR_20 POSTERIOR_30 POSTERIOR_40 POSTERIOR_50 POSTERIOR_60 POSTERIOR_70 POSTERIOR_80 POSTERIOR_90 -MEAN_BIAS 1.071219 1.074761 1.079743 1.084029 1.087154 1.089152 1.090474 1.093839 1.098595 -BIAS_VARIANCE 0.027265 0.028460 0.030686 0.032551 0.033488 0.035178 0.037179 0.038672 0.039563 -OUTLIER_PROBABILITY 0.033591 0.038581 0.040129 0.042728 0.045875 0.048124 0.052671 0.055083 0.057366 +MEAN_BIAS 1.058559 1.064990 1.071915 1.075084 1.078066 1.081042 1.083864 1.086846 1.092080 +BIAS_VARIANCE 0.025121 0.028657 0.029296 0.030419 0.031224 0.032140 0.033557 0.035158 0.038407 +OUTLIER_PROBABILITY 0.038561 0.041982 0.045016 0.048070 0.050265 0.053654 0.060145 0.065666 0.071586 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/single-sample-ac-nac.modelFinal.seg b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/single-sample-ac-nac.modelFinal.seg index 69f57fb4052..d9666941f6e 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/single-sample-ac-nac.modelFinal.seg +++ b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/single-sample-ac-nac.modelFinal.seg @@ -2,12 +2,14 @@ @SQ SN:20 LN:63025520 UR:http://www.broadinstitute.org/ftp/pub/seq/references/Homo_sapiens_assembly19.fasta AS:GRCh37 M5:0dec9660ec1efaaf33281c0d5ea2560f SP:Homo Sapiens @RG ID:GATKCopyNumber SM:SM-74P4M-1 CONTIG START END NUM_POINTS_COPY_RATIO NUM_POINTS_ALLELE_FRACTION LOG2_COPY_RATIO_POSTERIOR_10 LOG2_COPY_RATIO_POSTERIOR_50 LOG2_COPY_RATIO_POSTERIOR_90 MINOR_ALLELE_FRACTION_POSTERIOR_10 MINOR_ALLELE_FRACTION_POSTERIOR_50 MINOR_ALLELE_FRACTION_POSTERIOR_90 -20 138125 1552430 0 49 NaN NaN NaN 0.301656 0.310482 0.321020 -20 1559330 2779257 0 34 NaN NaN NaN 0.321121 0.331388 0.351097 -20 2796471 3838441 0 33 NaN NaN NaN 0.311604 0.330721 0.346451 -20 3870124 13060331 0 76 NaN NaN NaN 0.295467 0.315462 0.329249 -20 13071871 25841650 0 149 NaN NaN NaN 0.253458 0.262686 0.277703 -20 25846283 31025231 0 23 NaN NaN NaN 0.283981 0.340765 0.421111 -20 31647126 34782171 0 52 NaN NaN NaN 0.222884 0.242045 0.262660 -20 35491033 37396262 0 55 NaN NaN NaN 0.289958 0.301540 0.314308 -20 38354742 62871232 0 356 NaN NaN NaN 0.488923 0.496904 0.499041 +20 138125 1552430 0 49 NaN NaN NaN 0.301921 0.314169 0.322889 +20 1559330 2779257 0 34 NaN NaN NaN 0.316053 0.336741 0.349462 +20 2796471 3838441 0 33 NaN NaN NaN 0.316653 0.327668 0.351416 +20 3870124 4843609 0 9 NaN NaN NaN 0.356309 0.386814 0.414824 +20 4880308 6100230 0 28 NaN NaN NaN 0.331640 0.350959 0.365682 +20 6194421 6195664 0 2 NaN NaN NaN 0.362548 0.477504 0.498583 +20 7866261 25841650 0 186 NaN NaN NaN 0.260060 0.265292 0.273449 +20 25846283 31025231 0 23 NaN NaN NaN 0.306673 0.340602 0.427080 +20 31647126 35869619 0 71 NaN NaN NaN 0.246553 0.255700 0.274693 +20 36022539 37396262 0 36 NaN NaN NaN 0.302790 0.314936 0.328875 +20 38354742 62871232 0 356 NaN NaN NaN 0.485735 0.495190 0.499604 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/single-sample-ac.modelBegin.af.param b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/single-sample-ac.modelBegin.af.param index 1019aba1e75..521b8bbfdfe 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/single-sample-ac.modelBegin.af.param +++ b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/single-sample-ac.modelBegin.af.param @@ -1,6 +1,6 @@ @HD VN:1.6 @RG ID:GATKCopyNumber SM:SM-74P4M-1 PARAMETER_NAME POSTERIOR_10 POSTERIOR_20 POSTERIOR_30 POSTERIOR_40 POSTERIOR_50 POSTERIOR_60 POSTERIOR_70 POSTERIOR_80 POSTERIOR_90 -MEAN_BIAS 1.059231 1.073764 1.082538 1.091104 1.099210 1.107675 1.124835 1.142061 1.156225 -BIAS_VARIANCE 0.337549 0.355694 0.367646 0.371785 0.380377 0.397368 0.428090 0.446462 0.484917 -OUTLIER_PROBABILITY 0.009441 0.014040 0.016986 0.022264 0.027119 0.031235 0.034292 0.044029 0.047521 +MEAN_BIAS 1.073120 1.088526 1.097023 1.106053 1.108963 1.114478 1.122222 1.143390 1.152508 +BIAS_VARIANCE 0.339488 0.357663 0.387698 0.400873 0.407475 0.417786 0.436235 0.445876 0.461334 +OUTLIER_PROBABILITY 0.003840 0.008197 0.012835 0.015531 0.018884 0.021811 0.025675 0.032057 0.046058 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/single-sample-ac.modelBegin.seg b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/single-sample-ac.modelBegin.seg index 037bb2eeeaf..1900311f840 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/single-sample-ac.modelBegin.seg +++ b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/single-sample-ac.modelBegin.seg @@ -2,71 +2,71 @@ @SQ SN:20 LN:63025520 UR:http://www.broadinstitute.org/ftp/pub/seq/references/Homo_sapiens_assembly19.fasta AS:GRCh37 M5:0dec9660ec1efaaf33281c0d5ea2560f SP:Homo Sapiens @RG ID:GATKCopyNumber SM:SM-74P4M-1 CONTIG START END NUM_POINTS_COPY_RATIO NUM_POINTS_ALLELE_FRACTION LOG2_COPY_RATIO_POSTERIOR_10 LOG2_COPY_RATIO_POSTERIOR_50 LOG2_COPY_RATIO_POSTERIOR_90 MINOR_ALLELE_FRACTION_POSTERIOR_10 MINOR_ALLELE_FRACTION_POSTERIOR_50 MINOR_ALLELE_FRACTION_POSTERIOR_90 -20 138125 139576 0 5 NaN NaN NaN 0.402598 0.458690 0.489257 -20 168466 259156 0 13 NaN NaN NaN 0.466822 0.487670 0.497309 -20 259818 360306 0 8 NaN NaN NaN 0.408195 0.481713 0.498844 -20 368905 377226 0 3 NaN NaN NaN 0.452640 0.483733 0.496855 -20 389456 402921 0 4 NaN NaN NaN 0.439883 0.475307 0.494706 -20 425606 1285933 0 16 NaN NaN NaN 0.447089 0.479530 0.494184 -20 1417397 1896244 0 30 NaN NaN NaN 0.311661 0.374359 0.452280 -20 2036954 2552805 0 12 NaN NaN NaN 0.465444 0.488381 0.498581 -20 2593006 2621998 0 10 NaN NaN NaN 0.460950 0.485237 0.497729 -20 2624956 2779257 0 5 NaN NaN NaN 0.472348 0.492532 0.498647 -20 2796471 3838441 0 43 NaN NaN NaN 0.426428 0.485532 0.498052 -20 3858985 4055656 0 3 NaN NaN NaN 0.455864 0.484473 0.493982 -20 4138585 5068730 0 17 NaN NaN NaN 0.455611 0.484819 0.497596 -20 5159344 5482307 0 16 NaN NaN NaN 0.469968 0.489276 0.498473 -20 5528518 6100391 0 21 NaN NaN NaN 0.438522 0.487269 0.497109 -20 6194421 6798939 0 5 NaN NaN NaN 0.439933 0.487875 0.498050 -20 7866261 8625108 0 6 NaN NaN NaN 0.431096 0.480998 0.495239 -20 8625250 8639443 0 4 NaN NaN NaN 0.452132 0.490778 0.498100 -20 8665751 8703145 0 5 NaN NaN NaN 0.464700 0.486542 0.498388 -20 8707900 8742326 0 6 NaN NaN NaN 0.451179 0.482977 0.496961 -20 8755243 8770932 0 4 NaN NaN NaN 0.465033 0.490912 0.499373 -20 8773096 8773155 0 2 NaN NaN NaN 0.457933 0.481761 0.497595 -20 9108585 9376019 0 4 NaN NaN NaN 0.456850 0.490451 0.497941 -20 9417870 9510263 0 4 NaN NaN NaN 0.450214 0.471448 0.498505 -20 9624587 10024951 0 6 NaN NaN NaN 0.460308 0.487576 0.498544 -20 10026357 10032413 0 4 NaN NaN NaN 0.383593 0.482721 0.497445 -20 10329888 10393145 0 4 NaN NaN NaN 0.451740 0.483709 0.499096 -20 10426975 12113410 0 10 NaN NaN NaN 0.463732 0.483361 0.498896 -20 13054307 13074243 0 8 NaN NaN NaN 0.460254 0.480012 0.496881 -20 13090745 13845726 0 12 NaN NaN NaN 0.442980 0.480915 0.494221 -20 13872093 15967327 0 14 NaN NaN NaN 0.438024 0.478811 0.498088 -20 16729262 16730522 0 3 NaN NaN NaN 0.450649 0.487730 0.498631 -20 17028059 17460132 0 5 NaN NaN NaN 0.451572 0.474350 0.496080 -20 17474690 18327570 0 30 NaN NaN NaN 0.449200 0.480355 0.494322 -20 18429497 18429509 0 2 NaN NaN NaN 0.471028 0.486146 0.499530 -20 18432690 18446024 0 4 NaN NaN NaN 0.440645 0.487903 0.497942 -20 18806046 22714612 0 54 NaN NaN NaN 0.292756 0.326880 0.362237 -20 23345844 23807028 0 26 NaN NaN NaN 0.331547 0.481088 0.497273 -20 23842032 24938195 0 12 NaN NaN NaN 0.466514 0.492118 0.498481 -20 24939590 24959386 0 2 NaN NaN NaN 0.452099 0.485577 0.496816 -20 24993414 25011423 0 4 NaN NaN NaN 0.444157 0.485087 0.496990 -20 25038484 25059442 0 2 NaN NaN NaN 0.413650 0.477351 0.498441 -20 25190598 25286059 0 27 NaN NaN NaN 0.285450 0.335143 0.440814 -20 25288505 25424713 0 11 NaN NaN NaN 0.369824 0.438819 0.495847 -20 25434351 25470056 0 11 NaN NaN NaN 0.449188 0.484455 0.498332 -20 25597236 25755672 0 12 NaN NaN NaN 0.435903 0.485251 0.498070 -20 25756059 25841650 0 7 NaN NaN NaN 0.466450 0.489128 0.498105 -20 25846283 25900162 0 3 NaN NaN NaN 0.375980 0.459039 0.492023 -20 25900379 26134237 0 4 NaN NaN NaN 0.438987 0.484034 0.496397 -20 26138206 29449678 0 3 NaN NaN NaN 0.441430 0.490041 0.498022 -20 29516670 30037783 0 20 NaN NaN NaN 0.466100 0.486705 0.494903 -20 30053255 31025163 0 2 NaN NaN NaN 0.460866 0.491822 0.498690 -20 31647126 31676804 0 8 NaN NaN NaN 0.420384 0.476044 0.497287 -20 31811551 31826027 0 4 NaN NaN NaN 0.358847 0.479664 0.495201 -20 31828265 32330930 0 6 NaN NaN NaN 0.443005 0.484786 0.496641 -20 32340077 32710710 0 8 NaN NaN NaN 0.362631 0.428692 0.489771 -20 32935192 33006597 0 6 NaN NaN NaN 0.451948 0.485780 0.498018 -20 33030405 33150503 0 9 NaN NaN NaN 0.367456 0.441215 0.493514 -20 33178782 33882791 0 9 NaN NaN NaN 0.465225 0.486634 0.498055 -20 34218673 34782171 0 53 NaN NaN NaN 0.434052 0.476885 0.497838 -20 34974252 35869619 0 32 NaN NaN NaN 0.317636 0.363562 0.409450 -20 36022539 36944379 0 24 NaN NaN NaN 0.429298 0.476811 0.495536 -20 36946848 36965617 0 8 NaN NaN NaN 0.462958 0.483369 0.498854 -20 36989269 37291486 0 19 NaN NaN NaN 0.449951 0.479868 0.496863 -20 37366218 37396262 0 9 NaN NaN NaN 0.466491 0.484398 0.497237 -20 37404951 61444697 0 402 NaN NaN NaN 0.486953 0.494099 0.499177 -20 61444785 61453348 0 2 NaN NaN NaN 0.453088 0.473369 0.497858 -20 61453549 62904542 0 79 NaN NaN NaN 0.483483 0.491975 0.498251 +20 138125 139576 0 5 NaN NaN NaN 0.426222 0.488059 0.497040 +20 168466 259156 0 13 NaN NaN NaN 0.443282 0.485957 0.497914 +20 259818 360306 0 8 NaN NaN NaN 0.461611 0.485466 0.499138 +20 368905 377226 0 3 NaN NaN NaN 0.453615 0.493151 0.499605 +20 389456 402921 0 4 NaN NaN NaN 0.408054 0.478514 0.497515 +20 425606 1285933 0 16 NaN NaN NaN 0.447137 0.475828 0.496619 +20 1417397 1896244 0 30 NaN NaN NaN 0.330032 0.374767 0.482231 +20 2036954 2552805 0 12 NaN NaN NaN 0.439282 0.480480 0.497051 +20 2593006 2621998 0 10 NaN NaN NaN 0.474246 0.489058 0.498872 +20 2624956 2779257 0 5 NaN NaN NaN 0.468730 0.488064 0.497879 +20 2796471 3838441 0 43 NaN NaN NaN 0.449328 0.486880 0.498602 +20 3858985 4055656 0 3 NaN NaN NaN 0.468736 0.488955 0.497661 +20 4138585 5068730 0 17 NaN NaN NaN 0.445428 0.480759 0.496317 +20 5159344 5482307 0 16 NaN NaN NaN 0.430403 0.479287 0.496359 +20 5528518 6100391 0 21 NaN NaN NaN 0.448006 0.481651 0.496327 +20 6194421 6798939 0 5 NaN NaN NaN 0.434432 0.481700 0.497827 +20 7866261 8625108 0 6 NaN NaN NaN 0.434346 0.482866 0.496395 +20 8625250 8639443 0 4 NaN NaN NaN 0.475330 0.487548 0.497742 +20 8665751 8703145 0 5 NaN NaN NaN 0.462430 0.485926 0.498406 +20 8707900 8742326 0 6 NaN NaN NaN 0.445906 0.480030 0.497699 +20 8755243 8770932 0 4 NaN NaN NaN 0.430037 0.469146 0.494431 +20 8773096 8773155 0 2 NaN NaN NaN 0.457119 0.486893 0.496751 +20 9108585 9376019 0 4 NaN NaN NaN 0.456206 0.478940 0.496034 +20 9417870 9510263 0 4 NaN NaN NaN 0.430671 0.480090 0.495906 +20 9624587 10024951 0 6 NaN NaN NaN 0.458152 0.492844 0.498232 +20 10026357 10032413 0 4 NaN NaN NaN 0.391870 0.464744 0.492150 +20 10329888 10393145 0 4 NaN NaN NaN 0.421594 0.482032 0.497576 +20 10426975 12113410 0 10 NaN NaN NaN 0.449926 0.490064 0.498884 +20 13054307 13074243 0 8 NaN NaN NaN 0.433392 0.489434 0.498896 +20 13090745 13845726 0 12 NaN NaN NaN 0.441799 0.488774 0.498412 +20 13872093 15967327 0 14 NaN NaN NaN 0.452328 0.487045 0.498201 +20 16729262 16730522 0 3 NaN NaN NaN 0.448098 0.486241 0.499169 +20 17028059 17460132 0 5 NaN NaN NaN 0.475170 0.491615 0.499192 +20 17474690 18327570 0 30 NaN NaN NaN 0.405159 0.480025 0.498638 +20 18429497 18429509 0 2 NaN NaN NaN 0.456052 0.486957 0.497906 +20 18432690 18446024 0 4 NaN NaN NaN 0.465581 0.490439 0.498801 +20 18806046 22714612 0 54 NaN NaN NaN 0.303352 0.333011 0.368774 +20 23345844 23807028 0 26 NaN NaN NaN 0.343905 0.456677 0.490794 +20 23842032 24938195 0 12 NaN NaN NaN 0.453379 0.483515 0.493664 +20 24939590 24959386 0 2 NaN NaN NaN 0.442513 0.480715 0.498245 +20 24993414 25011423 0 4 NaN NaN NaN 0.453053 0.484471 0.498434 +20 25038484 25059442 0 2 NaN NaN NaN 0.420479 0.468886 0.496831 +20 25190598 25286059 0 27 NaN NaN NaN 0.266501 0.320065 0.425186 +20 25288505 25424713 0 11 NaN NaN NaN 0.405140 0.474849 0.495545 +20 25434351 25470056 0 11 NaN NaN NaN 0.390839 0.477456 0.498537 +20 25597236 25755672 0 12 NaN NaN NaN 0.416091 0.474444 0.491539 +20 25756059 25841650 0 7 NaN NaN NaN 0.457262 0.491213 0.498297 +20 25846283 25900162 0 3 NaN NaN NaN 0.388170 0.459837 0.495549 +20 25900379 26134237 0 4 NaN NaN NaN 0.439805 0.478245 0.498288 +20 26138206 29449678 0 3 NaN NaN NaN 0.423942 0.476739 0.497830 +20 29516670 30037783 0 20 NaN NaN NaN 0.469155 0.489050 0.499135 +20 30053255 31025163 0 2 NaN NaN NaN 0.463210 0.490150 0.498626 +20 31647126 31676804 0 8 NaN NaN NaN 0.412404 0.482477 0.497392 +20 31811551 31826027 0 4 NaN NaN NaN 0.357968 0.469971 0.497206 +20 31828265 32330930 0 6 NaN NaN NaN 0.441411 0.486296 0.498327 +20 32340077 32710710 0 8 NaN NaN NaN 0.354678 0.448327 0.494438 +20 32935192 33006597 0 6 NaN NaN NaN 0.463808 0.486403 0.497970 +20 33030405 33150503 0 9 NaN NaN NaN 0.372581 0.447059 0.493885 +20 33178782 33882791 0 9 NaN NaN NaN 0.444425 0.484330 0.498041 +20 34218673 34782171 0 53 NaN NaN NaN 0.390043 0.471183 0.496127 +20 34974252 35869619 0 32 NaN NaN NaN 0.332883 0.366635 0.409266 +20 36022539 36944379 0 24 NaN NaN NaN 0.422184 0.468599 0.493986 +20 36946848 36965617 0 8 NaN NaN NaN 0.443168 0.478156 0.493418 +20 36989269 37291486 0 19 NaN NaN NaN 0.456656 0.484830 0.497091 +20 37366218 37396262 0 9 NaN NaN NaN 0.456200 0.486735 0.497415 +20 37404951 61444697 0 402 NaN NaN NaN 0.488778 0.494717 0.498708 +20 61444785 61453348 0 2 NaN NaN NaN 0.457175 0.484159 0.497118 +20 61453549 62904542 0 79 NaN NaN NaN 0.471348 0.491716 0.497188 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/single-sample-ac.modelFinal.af.param b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/single-sample-ac.modelFinal.af.param index 5bb69d76ced..2388553c414 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/single-sample-ac.modelFinal.af.param +++ b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/single-sample-ac.modelFinal.af.param @@ -1,6 +1,6 @@ @HD VN:1.6 @RG ID:GATKCopyNumber SM:SM-74P4M-1 PARAMETER_NAME POSTERIOR_10 POSTERIOR_20 POSTERIOR_30 POSTERIOR_40 POSTERIOR_50 POSTERIOR_60 POSTERIOR_70 POSTERIOR_80 POSTERIOR_90 -MEAN_BIAS 1.069255 1.073963 1.078146 1.080389 1.082496 1.087311 1.089726 1.092997 1.097136 -BIAS_VARIANCE 0.048473 0.052003 0.052692 0.054348 0.056050 0.058032 0.060749 0.066364 0.069438 -OUTLIER_PROBABILITY 0.007462 0.011329 0.013651 0.015995 0.019026 0.019725 0.021521 0.023417 0.030548 +MEAN_BIAS 1.069270 1.073974 1.078154 1.080394 1.082500 1.087311 1.089724 1.092993 1.097128 +BIAS_VARIANCE 0.048473 0.052001 0.052690 0.054345 0.056045 0.058027 0.060742 0.066354 0.069425 +OUTLIER_PROBABILITY 0.007464 0.011329 0.013650 0.015994 0.019023 0.019721 0.021517 0.023412 0.030540 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/single-sample-cr-ac-nac.af.igv.seg b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/single-sample-cr-ac-nac.af.igv.seg index 225f9a579df..76300f3cf94 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/single-sample-cr-ac-nac.af.igv.seg +++ b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/single-sample-cr-ac-nac.af.igv.seg @@ -1,9 +1,9 @@ Sample Chromosome Start End Num_Probes Segment_Mean -SM-74P4M-1 20 68100 10032789 161 0.313835 -SM-74P4M-1 20 10033509 25269459 101 0.264271 -SM-74P4M-1 20 25270235 30126381 30 0.238465 -SM-74P4M-1 20 30132499 31673054 8 0.179862 -SM-74P4M-1 20 31673576 31677616 3 0.485299 -SM-74P4M-1 20 31678263 32700168 6 0.245252 -SM-74P4M-1 20 32847930 37667438 83 0.285801 -SM-74P4M-1 20 39316268 62905205 323 0.496246 +SM-74P4M-1 20 68100 10032789 161 0.315203 +SM-74P4M-1 20 10033509 25269459 101 0.265899 +SM-74P4M-1 20 25270235 30126381 30 0.236909 +SM-74P4M-1 20 30132499 31673054 8 0.186493 +SM-74P4M-1 20 31673576 31677616 3 0.490698 +SM-74P4M-1 20 31678263 32700168 6 0.249783 +SM-74P4M-1 20 32847930 37667438 83 0.286609 +SM-74P4M-1 20 39316268 62905205 323 0.495731 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/single-sample-cr-ac-nac.modelBegin.af.param b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/single-sample-cr-ac-nac.modelBegin.af.param index c0c49e635fa..61a6cea3b78 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/single-sample-cr-ac-nac.modelBegin.af.param +++ b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/single-sample-cr-ac-nac.modelBegin.af.param @@ -1,6 +1,6 @@ @HD VN:1.6 @RG ID:GATKCopyNumber SM:SM-74P4M-1 PARAMETER_NAME POSTERIOR_10 POSTERIOR_20 POSTERIOR_30 POSTERIOR_40 POSTERIOR_50 POSTERIOR_60 POSTERIOR_70 POSTERIOR_80 POSTERIOR_90 -MEAN_BIAS 1.082541 1.089286 1.092508 1.095321 1.098326 1.099497 1.102495 1.105622 1.112189 -BIAS_VARIANCE 0.019659 0.021178 0.022464 0.024675 0.025641 0.027148 0.028297 0.030730 0.034374 -OUTLIER_PROBABILITY 0.021688 0.026669 0.030519 0.033267 0.035834 0.038407 0.041214 0.043878 0.051744 +MEAN_BIAS 1.084872 1.088541 1.092772 1.095834 1.098880 1.102284 1.106854 1.109266 1.114323 +BIAS_VARIANCE 0.020075 0.021846 0.022786 0.024298 0.025410 0.026363 0.026992 0.028839 0.030105 +OUTLIER_PROBABILITY 0.022967 0.025106 0.028007 0.029870 0.032759 0.038956 0.041171 0.045245 0.052218 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/single-sample-cr-ac-nac.modelBegin.seg b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/single-sample-cr-ac-nac.modelBegin.seg index 9a0db9fbc00..b5d3b91004b 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/single-sample-cr-ac-nac.modelBegin.seg +++ b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/single-sample-cr-ac-nac.modelBegin.seg @@ -2,15 +2,15 @@ @SQ SN:20 LN:63025520 UR:http://www.broadinstitute.org/ftp/pub/seq/references/Homo_sapiens_assembly19.fasta AS:GRCh37 M5:0dec9660ec1efaaf33281c0d5ea2560f SP:Homo Sapiens @RG ID:GATKCopyNumber SM:SM-74P4M-1 CONTIG START END NUM_POINTS_COPY_RATIO NUM_POINTS_ALLELE_FRACTION LOG2_COPY_RATIO_POSTERIOR_10 LOG2_COPY_RATIO_POSTERIOR_50 LOG2_COPY_RATIO_POSTERIOR_90 MINOR_ALLELE_FRACTION_POSTERIOR_10 MINOR_ALLELE_FRACTION_POSTERIOR_50 MINOR_ALLELE_FRACTION_POSTERIOR_90 -20 68100 1515339 147 41 0.630723 0.646117 0.660660 0.305047 0.313671 0.327678 -20 1517550 1896353 14 17 0.608487 0.658923 0.696857 0.306168 0.323014 0.340140 -20 1901790 10025443 645 99 0.648899 0.661662 0.669651 0.311231 0.321183 0.328689 -20 10025971 10032789 3 4 0.851754 0.937309 1.018360 0.276943 0.311092 0.375408 -20 10033509 25250095 624 85 0.919940 0.929699 0.938472 0.259428 0.269950 0.283834 -20 25251768 25269459 12 16 1.043438 1.094261 1.143976 0.208448 0.224334 0.243159 -20 25270235 30126381 96 30 1.082384 1.110710 1.129877 0.221805 0.233278 0.250439 -20 30132499 31673054 299 8 1.732520 1.745058 1.762498 0.166325 0.184239 0.203376 -20 31673576 31677616 3 3 1.397140 1.520348 1.651265 0.458801 0.488136 0.496235 -20 31678263 32700168 131 6 1.630741 1.645129 1.659100 0.218809 0.247486 0.270917 -20 32847930 37667438 775 83 1.052254 1.059208 1.066407 0.279490 0.286181 0.292944 -20 39316268 62905205 2016 323 0.099098 0.110945 0.118745 0.485522 0.494328 0.499528 +20 68100 1515339 147 41 0.630723 0.646117 0.660660 0.306041 0.316449 0.327892 +20 1517550 1896353 14 17 0.608487 0.658923 0.696857 0.306108 0.321679 0.347804 +20 1901790 10025443 645 99 0.648899 0.661662 0.669651 0.308494 0.318280 0.333880 +20 10025971 10032789 3 4 0.851754 0.937309 1.018360 0.269719 0.311221 0.369340 +20 10033509 25250095 624 85 0.919940 0.929699 0.938472 0.256785 0.270760 0.281092 +20 25251768 25269459 12 16 1.043438 1.094261 1.143976 0.212137 0.230012 0.238392 +20 25270235 30126381 96 30 1.082384 1.110710 1.129877 0.226974 0.237011 0.248883 +20 30132499 31673054 299 8 1.732520 1.745058 1.762498 0.160462 0.180405 0.198971 +20 31673576 31677616 3 3 1.397140 1.520348 1.651265 0.466746 0.487870 0.499152 +20 31678263 32700168 131 6 1.630741 1.645129 1.659100 0.220848 0.247811 0.284476 +20 32847930 37667438 775 83 1.052254 1.059208 1.066407 0.272857 0.285127 0.292125 +20 39316268 62905205 2016 323 0.099098 0.110945 0.118745 0.486117 0.495185 0.499074 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/single-sample-cr-ac-nac.modelFinal.af.param b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/single-sample-cr-ac-nac.modelFinal.af.param index b00bec9a436..6d3cefb1270 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/single-sample-cr-ac-nac.modelFinal.af.param +++ b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/single-sample-cr-ac-nac.modelFinal.af.param @@ -1,6 +1,6 @@ @HD VN:1.6 @RG ID:GATKCopyNumber SM:SM-74P4M-1 PARAMETER_NAME POSTERIOR_10 POSTERIOR_20 POSTERIOR_30 POSTERIOR_40 POSTERIOR_50 POSTERIOR_60 POSTERIOR_70 POSTERIOR_80 POSTERIOR_90 -MEAN_BIAS 1.080207 1.086557 1.092483 1.094189 1.097895 1.100183 1.103923 1.106414 1.108650 -BIAS_VARIANCE 0.022009 0.023112 0.023435 0.025198 0.025866 0.026747 0.028270 0.029901 0.033231 -OUTLIER_PROBABILITY 0.023898 0.026615 0.031612 0.034091 0.036254 0.039554 0.042360 0.046122 0.049892 +MEAN_BIAS 1.077074 1.081220 1.088385 1.090058 1.094274 1.098242 1.103466 1.105847 1.111901 +BIAS_VARIANCE 0.021503 0.022658 0.023862 0.024584 0.025489 0.026395 0.027939 0.029703 0.031556 +OUTLIER_PROBABILITY 0.023494 0.025233 0.029628 0.031931 0.034945 0.040893 0.043193 0.045192 0.051006 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/single-sample-cr-ac-nac.modelFinal.seg b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/single-sample-cr-ac-nac.modelFinal.seg index 147b85a7fa2..f3a9625dffb 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/single-sample-cr-ac-nac.modelFinal.seg +++ b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/single-sample-cr-ac-nac.modelFinal.seg @@ -2,11 +2,11 @@ @SQ SN:20 LN:63025520 UR:http://www.broadinstitute.org/ftp/pub/seq/references/Homo_sapiens_assembly19.fasta AS:GRCh37 M5:0dec9660ec1efaaf33281c0d5ea2560f SP:Homo Sapiens @RG ID:GATKCopyNumber SM:SM-74P4M-1 CONTIG START END NUM_POINTS_COPY_RATIO NUM_POINTS_ALLELE_FRACTION LOG2_COPY_RATIO_POSTERIOR_10 LOG2_COPY_RATIO_POSTERIOR_50 LOG2_COPY_RATIO_POSTERIOR_90 MINOR_ALLELE_FRACTION_POSTERIOR_10 MINOR_ALLELE_FRACTION_POSTERIOR_50 MINOR_ALLELE_FRACTION_POSTERIOR_90 -20 68100 10032789 809 161 0.655754 0.661499 0.668267 0.308251 0.313835 0.328894 -20 10033509 25269459 636 101 0.926477 0.931931 0.939752 0.257722 0.264271 0.274803 -20 25270235 30126381 96 30 1.088282 1.111436 1.128151 0.227614 0.238465 0.250221 -20 30132499 31673054 299 8 1.732466 1.745043 1.757923 0.159672 0.179862 0.201951 -20 31673576 31677616 3 3 1.427238 1.550922 1.662376 0.447004 0.485299 0.497323 -20 31678263 32700168 131 6 1.630626 1.653423 1.665768 0.219492 0.245252 0.284086 -20 32847930 37667438 775 83 1.051902 1.059862 1.068650 0.280827 0.285801 0.297231 -20 39316268 62905205 2016 323 0.102976 0.107970 0.115261 0.488345 0.496246 0.499381 +20 68100 10032789 809 161 0.655754 0.661499 0.668267 0.310584 0.315203 0.323224 +20 10033509 25269459 636 101 0.926477 0.931931 0.939752 0.256087 0.265899 0.272362 +20 25270235 30126381 96 30 1.088282 1.111436 1.128151 0.230007 0.236909 0.252986 +20 30132499 31673054 299 8 1.732466 1.745043 1.757923 0.162750 0.186493 0.205354 +20 31673576 31677616 3 3 1.427238 1.550922 1.662376 0.472564 0.490698 0.497598 +20 31678263 32700168 131 6 1.630626 1.653423 1.665768 0.226282 0.249783 0.290498 +20 32847930 37667438 775 83 1.051902 1.059862 1.068650 0.278266 0.286609 0.299078 +20 39316268 62905205 2016 323 0.102976 0.107970 0.115261 0.486804 0.495731 0.499323 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/single-sample-cr-ac.af.igv.seg b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/single-sample-cr-ac.af.igv.seg index 78d02cf9218..2dd9a8567b0 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/single-sample-cr-ac.af.igv.seg +++ b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/single-sample-cr-ac.af.igv.seg @@ -1,7 +1,7 @@ Sample Chromosome Start End Num_Probes Segment_Mean -SM-74P4M-1 20 68100 10032789 179 0.323248 -SM-74P4M-1 20 10033509 25269459 122 0.272398 -SM-74P4M-1 20 25270235 30126381 37 0.243188 +SM-74P4M-1 20 68100 10032789 179 0.320157 +SM-74P4M-1 20 10033509 25269459 122 0.272185 +SM-74P4M-1 20 25270235 30126381 37 0.242382 SM-74P4M-1 20 30132499 32700168 15 0.187938 -SM-74P4M-1 20 32847930 37667438 107 0.289142 -SM-74P4M-1 20 39316268 62905205 375 0.495286 +SM-74P4M-1 20 32847930 37667438 107 0.292034 +SM-74P4M-1 20 39316268 62905205 375 0.495889 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/single-sample-cr-ac.modelBegin.af.param b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/single-sample-cr-ac.modelBegin.af.param index ea32ded5720..8e25257a7ad 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/single-sample-cr-ac.modelBegin.af.param +++ b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/single-sample-cr-ac.modelBegin.af.param @@ -1,6 +1,6 @@ @HD VN:1.6 @RG ID:GATKCopyNumber SM:SM-74P4M-1 PARAMETER_NAME POSTERIOR_10 POSTERIOR_20 POSTERIOR_30 POSTERIOR_40 POSTERIOR_50 POSTERIOR_60 POSTERIOR_70 POSTERIOR_80 POSTERIOR_90 -MEAN_BIAS 1.076897 1.081265 1.084786 1.087859 1.089407 1.091052 1.095368 1.099995 1.108130 -BIAS_VARIANCE 0.020408 0.023276 0.024087 0.025577 0.026864 0.027523 0.028538 0.030047 0.033259 +MEAN_BIAS 1.076916 1.081278 1.084794 1.087862 1.089408 1.091051 1.095361 1.099981 1.108105 +BIAS_VARIANCE 0.020409 0.023276 0.024087 0.025576 0.026863 0.027523 0.028538 0.030046 0.033258 OUTLIER_PROBABILITY 0.004867 0.006712 0.007672 0.009552 0.012081 0.013635 0.014816 0.016005 0.022970 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/single-sample-cr-ac.modelBegin.seg b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/single-sample-cr-ac.modelBegin.seg index e24d1bb9575..144cf2ce06e 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/single-sample-cr-ac.modelBegin.seg +++ b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/single-sample-cr-ac.modelBegin.seg @@ -2,13 +2,13 @@ @SQ SN:20 LN:63025520 UR:http://www.broadinstitute.org/ftp/pub/seq/references/Homo_sapiens_assembly19.fasta AS:GRCh37 M5:0dec9660ec1efaaf33281c0d5ea2560f SP:Homo Sapiens @RG ID:GATKCopyNumber SM:SM-74P4M-1 CONTIG START END NUM_POINTS_COPY_RATIO NUM_POINTS_ALLELE_FRACTION LOG2_COPY_RATIO_POSTERIOR_10 LOG2_COPY_RATIO_POSTERIOR_50 LOG2_COPY_RATIO_POSTERIOR_90 MINOR_ALLELE_FRACTION_POSTERIOR_10 MINOR_ALLELE_FRACTION_POSTERIOR_50 MINOR_ALLELE_FRACTION_POSTERIOR_90 -20 68100 1515339 147 42 0.626719 0.645147 0.660623 0.307707 0.314138 0.329227 -20 1517550 1552935 6 6 0.726103 0.786241 0.855298 0.316804 0.340116 0.379247 +20 68100 1515339 147 42 0.626719 0.645147 0.660623 0.307707 0.314138 0.329228 +20 1517550 1552935 6 6 0.726103 0.786241 0.855298 0.316804 0.340114 0.379242 20 1558733 10025443 653 127 0.651044 0.659090 0.671013 0.317168 0.327213 0.335960 -20 10025971 10032789 3 4 0.822829 0.937479 1.044323 0.270599 0.296850 0.319336 +20 10025971 10032789 3 4 0.822829 0.937479 1.044323 0.270599 0.296851 0.319338 20 10033509 25250095 624 107 0.921082 0.929067 0.937992 0.268533 0.275947 0.284892 -20 25251768 25269459 12 15 1.043642 1.097970 1.150960 0.222034 0.236953 0.251343 -20 25270235 30126381 96 37 1.081228 1.102493 1.126455 0.225637 0.243589 0.263630 +20 25251768 25269459 12 15 1.043642 1.097970 1.150960 0.222034 0.236953 0.251342 +20 25270235 30126381 96 37 1.081228 1.102493 1.126455 0.225636 0.243589 0.263632 20 30132499 32700168 433 15 1.709800 1.718894 1.729022 0.174826 0.185374 0.199121 20 32847930 37667438 775 107 1.051271 1.059245 1.066133 0.281511 0.291943 0.299750 -20 39316268 62905205 2016 375 0.104887 0.109087 0.114811 0.483340 0.495453 0.498917 +20 39316268 62905205 2016 375 0.104887 0.109087 0.114811 0.483345 0.495454 0.498917 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/single-sample-cr-ac.modelFinal.af.param b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/single-sample-cr-ac.modelFinal.af.param index 25020349480..9d747703a67 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/single-sample-cr-ac.modelFinal.af.param +++ b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/single-sample-cr-ac.modelFinal.af.param @@ -1,6 +1,6 @@ @HD VN:1.6 @RG ID:GATKCopyNumber SM:SM-74P4M-1 PARAMETER_NAME POSTERIOR_10 POSTERIOR_20 POSTERIOR_30 POSTERIOR_40 POSTERIOR_50 POSTERIOR_60 POSTERIOR_70 POSTERIOR_80 POSTERIOR_90 -MEAN_BIAS 1.069949 1.077288 1.080404 1.086073 1.089622 1.091396 1.095640 1.098735 1.106826 -BIAS_VARIANCE 0.023013 0.024830 0.025168 0.026149 0.027294 0.028833 0.031368 0.033373 0.034132 -OUTLIER_PROBABILITY 0.005715 0.008199 0.010229 0.011260 0.012979 0.015864 0.017977 0.018651 0.020826 +MEAN_BIAS 1.069694 1.075929 1.080599 1.086595 1.092146 1.094908 1.097217 1.101029 1.110114 +BIAS_VARIANCE 0.024472 0.025315 0.025913 0.027480 0.028710 0.029912 0.031488 0.033390 0.035967 +OUTLIER_PROBABILITY 0.005368 0.006470 0.008499 0.010622 0.011286 0.012661 0.014981 0.017846 0.018908 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/single-sample-cr-ac.modelFinal.seg b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/single-sample-cr-ac.modelFinal.seg index 394e65a12c2..bdba51ac1e5 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/single-sample-cr-ac.modelFinal.seg +++ b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/model-segments-expected/single-sample-cr-ac.modelFinal.seg @@ -2,9 +2,9 @@ @SQ SN:20 LN:63025520 UR:http://www.broadinstitute.org/ftp/pub/seq/references/Homo_sapiens_assembly19.fasta AS:GRCh37 M5:0dec9660ec1efaaf33281c0d5ea2560f SP:Homo Sapiens @RG ID:GATKCopyNumber SM:SM-74P4M-1 CONTIG START END NUM_POINTS_COPY_RATIO NUM_POINTS_ALLELE_FRACTION LOG2_COPY_RATIO_POSTERIOR_10 LOG2_COPY_RATIO_POSTERIOR_50 LOG2_COPY_RATIO_POSTERIOR_90 MINOR_ALLELE_FRACTION_POSTERIOR_10 MINOR_ALLELE_FRACTION_POSTERIOR_50 MINOR_ALLELE_FRACTION_POSTERIOR_90 -20 68100 10032789 809 179 0.653606 0.661038 0.666397 0.312161 0.323248 0.329342 -20 10033509 25269459 636 122 0.927319 0.933523 0.940957 0.260779 0.272398 0.281251 -20 25270235 30126381 96 37 1.093620 1.112015 1.132317 0.229343 0.243188 0.251905 -20 30132499 32700168 433 15 1.708206 1.718205 1.726553 0.179039 0.187938 0.201951 -20 32847930 37667438 775 107 1.053141 1.060196 1.068681 0.277226 0.289142 0.309241 -20 39316268 62905205 2016 375 0.102316 0.109465 0.117152 0.487002 0.495286 0.498842 +20 68100 10032789 809 179 0.653606 0.661038 0.666397 0.312545 0.320157 0.328522 +20 10033509 25269459 636 122 0.927319 0.933523 0.940957 0.262496 0.272185 0.279446 +20 25270235 30126381 96 37 1.093620 1.112015 1.132317 0.230371 0.242382 0.258668 +20 30132499 32700168 433 15 1.708206 1.718205 1.726553 0.172530 0.187938 0.203401 +20 32847930 37667438 775 107 1.053141 1.060196 1.068681 0.280516 0.292034 0.308108 +20 39316268 62905205 2016 375 0.102316 0.109465 0.117152 0.487086 0.495889 0.499001