Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Demultiplexed FASTQ support #23

Merged
merged 16 commits into from
Sep 6, 2023
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
/*
* Copyright (c) 2022 The Broad Institute, Inc. All rights reserved.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
package org.broadinstitute.gpp.poolq3.barcode

import org.broadinstitute.gpp.poolq3.parser.{CloseableIterable, CloseableIterator, DmuxedIterable}
import org.broadinstitute.gpp.poolq3.types.Read

final class DmuxedBarcodeSource(parser: DmuxedIterable, rowPolicy: BarcodePolicy, umiPolicyOpt: Option[BarcodePolicy])
extends CloseableIterable[Barcodes] {

private def colBarcodeOpt = parser.indexBarcode.map(bc => FoundBarcode(bc.toCharArray, 0))
mtomko marked this conversation as resolved.
Show resolved Hide resolved

private[this] class BarcodeIterator(iterator: CloseableIterator[Read]) extends CloseableIterator[Barcodes] {
override def hasNext: Boolean = iterator.hasNext

override def next(): Barcodes = {
val nextRead = iterator.next()
val rowBarcodeOpt = rowPolicy.find(nextRead)
val umiBarcodeOpt = umiPolicyOpt.flatMap(_.find(nextRead))
Barcodes(rowBarcodeOpt, None, colBarcodeOpt, umiBarcodeOpt)
}

override def close(): Unit = iterator.close()
}

override def iterator: CloseableIterator[Barcodes] = new BarcodeIterator(parser.iterator)

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
/*
* Copyright (c) 2022 The Broad Institute, Inc. All rights reserved.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
package org.broadinstitute.gpp.poolq3.barcode

import org.broadinstitute.gpp.poolq3.parser.{CloseableIterable, CloseableIterator, DmuxedIterable}
import org.broadinstitute.gpp.poolq3.types.{Read, ReadIdCheckPolicy}

class DmuxedPairedEndBarcodeSource(
rowParser: DmuxedIterable,
revRowParser: DmuxedIterable,
rowPolicy: BarcodePolicy,
revRowPolicy: BarcodePolicy,
umiPolicyOpt: Option[BarcodePolicy],
readIdCheckPolicy: ReadIdCheckPolicy
) extends CloseableIterable[Barcodes] {

private def colBarcodeOpt = rowParser.indexBarcode.map(bc => FoundBarcode(bc.toCharArray, 0))
mtomko marked this conversation as resolved.
Show resolved Hide resolved

private[this] class BarcodeIterator(rowIterator: CloseableIterator[Read], revRowIterator: CloseableIterator[Read])
extends CloseableIterator[Barcodes] {

final override def hasNext: Boolean = rowIterator.hasNext && revRowIterator.hasNext

final override def next(): Barcodes = {
val nextRow = rowIterator.next()
val nextRevRow = revRowIterator.next()
readIdCheckPolicy.check(nextRow, nextRevRow)
val rowBarcodeOpt = rowPolicy.find(nextRow)
val revRowBarcodeOpt = revRowPolicy.find(nextRevRow)
val umiBarcodeOpt = umiPolicyOpt.flatMap(_.find(nextRow))
Barcodes(rowBarcodeOpt, revRowBarcodeOpt, colBarcodeOpt, umiBarcodeOpt)
}

final override def close(): Unit =
try rowIterator.close()
finally revRowIterator.close()

}

override def iterator: CloseableIterator[Barcodes] =
new BarcodeIterator(rowParser.iterator, revRowParser.iterator)

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
/*
* Copyright (c) 2022 The Broad Institute, Inc. All rights reserved.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
package org.broadinstitute.gpp.poolq3.barcode

import cats.syntax.all._
import munit.FunSuite
import org.broadinstitute.gpp.poolq3.parser.DmuxedIterable

class DmuxedBarcodeSourceTest extends FunSuite {

private[this] val rowPolicy = BarcodePolicy("FIXED@0", 10, skipShortReads = true)

def fb(s: String) = Barcodes(FoundBarcode(s.toCharArray, 0).some, None, None, None)

def fb(i: String, s: String) =
Barcodes(FoundBarcode(s.toCharArray, 0).some, None, FoundBarcode(i.toCharArray, 0).some, None)

test("it works") {
val iterable = DmuxedIterable(
List(
None -> List("AAAAAAAAAA", "AAAAAAAAAC", "AAAAAAAAAG"),
Some("CTCGAG") -> List("AAAAAAAAAA", "AACCCCGGTT", "AATTGGTTAA")
)
)

val src = new DmuxedBarcodeSource(iterable, rowPolicy, None)
assertEquals(
src.toList,
List(
fb("AAAAAAAAAA"),
fb("AAAAAAAAAC"),
fb("AAAAAAAAAG"),
fb("CTCGAG", "AAAAAAAAAA"),
fb("CTCGAG", "AACCCCGGTT"),
fb("CTCGAG", "AATTGGTTAA")
)
)
}

test("nothing works") {
val iterable = DmuxedIterable(Nil)
val src = new DmuxedBarcodeSource(iterable, rowPolicy, None)
assertEquals(src.toList, Nil)
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
/*
* Copyright (c) 2022 The Broad Institute, Inc. All rights reserved.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
package org.broadinstitute.gpp.poolq3.barcode

import cats.syntax.all._
import munit.FunSuite
import org.broadinstitute.gpp.poolq3.parser.DmuxedIterable
import org.broadinstitute.gpp.poolq3.types.ReadIdCheckPolicy

class DmuxedPairedEndBarcodeSourceTest extends FunSuite {

private[this] val rowPolicy = BarcodePolicy("FIXED@0", 4, skipShortReads = true)
private[this] val revRowPolicy = BarcodePolicy("FIXED@0", 3, skipShortReads = true)

def fb(r1: String, r2: String) =
Barcodes(FoundBarcode(r1.toCharArray, 0).some, FoundBarcode(r2.toCharArray, 0).some, None, None)

def fb(i: String, r1: String, r2: String) =
Barcodes(
FoundBarcode(r1.toCharArray, 0).some,
FoundBarcode(r2.toCharArray, 0).some,
FoundBarcode(i.toCharArray, 0).some,
None
)

test("it works") {
val iter1 =
DmuxedIterable(List(None -> List("AAAA", "CCCC", "GGGG"), Some("CTCGAG") -> List("TTAA", "CCGG", "AATT")))

val iter2 = DmuxedIterable(List(None -> List("AGA", "CTC", "GAG"), Some("CTCGAG") -> List("TGT", "CAC", "TCT")))

val src = new DmuxedPairedEndBarcodeSource(iter1, iter2, rowPolicy, revRowPolicy, None, ReadIdCheckPolicy.Lax)
assertEquals(
src.toList,
List(
fb("AAAA", "AGA"),
fb("CCCC", "CTC"),
fb("GGGG", "GAG"),
fb("CTCGAG", "TTAA", "TGT"),
fb("CTCGAG", "CCGG", "CAC"),
fb("CTCGAG", "AATT", "TCT")
)
)
}

test("nothing works") {
val i1 = DmuxedIterable(Nil)
val i2 = DmuxedIterable(Nil)
val src = new DmuxedPairedEndBarcodeSource(i1, i2, rowPolicy, revRowPolicy, None, ReadIdCheckPolicy.Illumina)
assertEquals(src.toList, Nil)
}

}