Skip to content

Commit

Permalink
Modified HaplotypeBasedVariantRecaller to support non-flow reads (#8896)
Browse files Browse the repository at this point in the history
  • Loading branch information
ilyasoifer authored Jul 24, 2024
1 parent 59c9c1b commit 747df1a
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 15 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -135,11 +135,11 @@ public void traverse() {
}

// get reads overlapping haplotypes
final Map<SamReader, Collection<FlowBasedRead>> readsByReader = readsReader.getReads(haplotypeSpan, vcLoc);
final Map<SamReader, Collection<GATKRead>> readsByReader = readsReader.getReads(haplotypeSpan, vcLoc);
final List<VariantContext> variants = new LinkedList<>(Arrays.asList(vc));
if ( logger.isDebugEnabled() ) {
int readCount = 0;
for ( Collection<FlowBasedRead> reads : readsByReader.values() )
for ( Collection<GATKRead> reads : readsByReader.values() )
readCount += reads.size();
logger.debug(String.format("vcLoc %s, haplotypeSpan: %s, %d haplotypes, %d reads",
vcLoc.toString(), haplotypeSpan.toString(), processedHaplotypes.size(), readCount, variants.size()));
Expand All @@ -150,16 +150,16 @@ public void traverse() {
final List<Map<Integer, AlleleLikelihoods<GATKRead, Allele>>> genotypeLikelihoodsList = new LinkedList<>();
final List<AssemblyResultSet> assemblyResultList = new LinkedList<>();
final List<SAMFileHeader> readsHeaderList = new LinkedList<>();
for ( Map.Entry<SamReader, Collection<FlowBasedRead>> entry : readsByReader.entrySet() ) {
for ( Map.Entry<SamReader, Collection<GATKRead>> entry : readsByReader.entrySet() ) {
final AssemblyResultSet assemblyResult = new AssemblyResultSet();
processedHaplotypes.forEach(haplotype -> assemblyResult.add(haplotype));

final Map<String, List<GATKRead>> perSampleReadList = new LinkedHashMap<>();
final SamReader samReader = entry.getKey();
final Collection<FlowBasedRead> reads = entry.getValue();
final Collection<GATKRead> reads = entry.getValue();

List<GATKRead> gtakReads = new LinkedList<>();
reads.forEach(flowBasedRead -> gtakReads.add(flowBasedRead));
reads.forEach(read -> gtakReads.add(read));
perSampleReadList.put(sampleNames[0], gtakReads);
AssemblyRegion regionForGenotyping = new AssemblyRegion(haplotypeSpan, 0, samReader.getFileHeader());
assemblyResult.setPaddedReferenceLoc(haplotypeSpan);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,6 @@ public class TrimmedReadsReader {

private final List<SamReader> samReaders = new LinkedList<>();
private CountingReadFilter readFilter;
private final Map<String, Integer> readGroupMaxClass = new LinkedHashMap<>();
private final Map<String, String> readGroupFlowOrder = new LinkedHashMap<>();
private final FlowBasedArgumentCollection fbArgs = new FlowBasedArgumentCollection();

public TrimmedReadsReader(final List<Path> readsFiles, final Path referencePath, final int cloudPrefetchBuffer) {
Expand All @@ -44,11 +42,12 @@ public SAMSequenceDictionary getSamSequenceDictionary(final SamReader samReader)
return ((samReader != null) ? samReader : samReaders.get(0)).getFileHeader().getSequenceDictionary();
}

public Map<SamReader, Collection<FlowBasedRead>> getReads(final Locatable span, final Locatable vcLoc) {

final Map<SamReader, Collection<FlowBasedRead>> readsByReader = new LinkedHashMap<>();
public Map<SamReader, Collection<GATKRead>> getReads(final Locatable span, final Locatable vcLoc) {

final Map<SamReader, Collection<GATKRead>> readsByReader = new LinkedHashMap<>();
for ( SamReader samReader : samReaders ) {
final List<FlowBasedRead> reads = new LinkedList<>();
final List<GATKRead> reads = new LinkedList<>();
final SAMRecordIterator iter = samReader.query(span.getContig(), span.getStart(), span.getEnd(), false);
while (iter.hasNext()) {

Expand All @@ -72,7 +71,10 @@ public Map<SamReader, Collection<FlowBasedRead>> getReads(final Locatable span,
gatkRead = ReadClipper.hardClipToRegion(gatkRead, span.getStart(), span.getEnd());
if (gatkRead.isUnmapped() || gatkRead.getCigar().isEmpty())
continue;

if (!FlowBasedReadUtils.isFlowPlatform(samReader.getFileHeader(), gatkRead)){
reads.add(gatkRead);
continue;
}
// convert to a flow based read
FlowBasedReadUtils.ReadGroupInfo rgInfo = FlowBasedReadUtils.getReadGroupInfo(samReader.getFileHeader(), gatkRead);
final FlowBasedRead fbr = new FlowBasedRead(gatkRead, rgInfo.flowOrder, rgInfo.maxClass, fbArgs);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import org.broadinstitute.hellbender.GATKBaseTest;
import org.broadinstitute.hellbender.utils.SimpleInterval;
import org.broadinstitute.hellbender.utils.read.FlowBasedRead;
import org.broadinstitute.hellbender.utils.read.GATKRead;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
Expand Down Expand Up @@ -68,10 +69,10 @@ public void testBasic(final BamSource bamSources[], final Locatable span, final
Assert.assertNotNull(reader.getHeader(null));

// reads
Map<SamReader, Collection<FlowBasedRead>> reads = reader.getReads(span, vcLoc);
Map<SamReader, Collection<GATKRead>> reads = reader.getReads(span, vcLoc);
Assert.assertEquals(reads.size(), bamSources.length);
int bamSourceIndex = 0;
for ( Map.Entry<SamReader, Collection<FlowBasedRead>> entry : reads.entrySet() ) {
for ( Map.Entry<SamReader, Collection<GATKRead>> entry : reads.entrySet() ) {

final BamSource bamSource = bamSources[bamSourceIndex++];

Expand All @@ -83,8 +84,8 @@ public void testBasic(final BamSource bamSources[], final Locatable span, final
Assert.assertEquals(entry.getValue().size(), bamSource.readCount);

// verify first and last
FlowBasedRead firstRead = entry.getValue().iterator().next();
FlowBasedRead lastRead = entry.getValue().stream().reduce((prev, next) -> next).orElse(null);
FlowBasedRead firstRead = (FlowBasedRead) entry.getValue().iterator().next();
FlowBasedRead lastRead = (FlowBasedRead) entry.getValue().stream().reduce((prev, next) -> next).orElse(null);
Assert.assertEquals(firstRead.getName(), bamSource.firstReadName);
Assert.assertEquals(lastRead.getName(), bamSource.lastReadName);
}
Expand Down

0 comments on commit 747df1a

Please sign in to comment.