Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updates #22

Merged
merged 11 commits into from
Aug 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# Changelog

## 3.6.4
No user-facing changes

## 3.6.3
* Documentation for processing demultiplexed FASTQ files

Expand Down
19 changes: 9 additions & 10 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -2,34 +2,33 @@ val artifactId = "poolq"

inThisBuild(
List(
scalaVersion := "2.13.10",
scalaVersion := "2.13.11",
semanticdbEnabled := true,
semanticdbVersion := scalafixSemanticdb.revision,
scalafixDependencies += "com.github.liancheng" %% "organize-imports" % "0.6.0",
versionScheme := Some("early-semver")
)
)

lazy val versions = new {
val acyclic = "0.2.1"
val betterFiles = "3.9.1"
val betterFiles = "3.9.2"
val betterMonadicFor = "0.3.1"
val catsEffect3 = "3.4.5"
val cats = "2.9.0"
val commonsIo = "2.11.0"
val catsEffect3 = "3.5.1"
val cats = "2.10.0"
val commonsIo = "2.13.0"
val commonsText = "1.10.0"
val commonsMath3 = "3.6.1"
val fastutil = "8.5.11"
val fs2 = "3.5.0"
val fastutil = "8.5.12"
val fs2 = "3.8.0"
val kantanCodecs = "0.5.3"
val kantanCsv = "0.7.0"
val log4s = "1.10.0"
val logback = "1.2.11"
val munit = "0.7.29"
val munitCatsEffect3 = "1.0.7"
val samTools = "3.0.4"
val samTools = "3.0.5"
val scalaCheck = "1.17.0"
val scalaTest = "3.2.15"
val scalaTest = "3.2.16"
val scalaTestPlusScalaCheck = "3.2.2.0"
val scopt = "4.1.0"
val slf4j = "1.7.36"
Expand Down
2 changes: 1 addition & 1 deletion project/build.properties
Original file line number Diff line number Diff line change
@@ -1 +1 @@
sbt.version=1.8.2
sbt.version=1.9.4
12 changes: 6 additions & 6 deletions project/plugins.sbt
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
addSbtPlugin("ch.epfl.scala" % "sbt-missinglink" % "0.3.3")
addSbtPlugin("ch.epfl.scala" % "sbt-scalafix" % "0.10.4")
addSbtPlugin("ch.epfl.scala" % "sbt-missinglink" % "0.3.6")
addSbtPlugin("ch.epfl.scala" % "sbt-scalafix" % "0.11.0")
addSbtPlugin("com.codecommit" % "sbt-github-packages" % "0.5.3")
addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "1.2.0")
addSbtPlugin("com.eed3si9n" % "sbt-buildinfo" % "0.11.0")
addSbtPlugin("com.github.cb372" % "sbt-explicit-dependencies" % "0.2.16")
addSbtPlugin("com.github.cb372" % "sbt-explicit-dependencies" % "0.3.1")
addSbtPlugin("com.github.sbt" % "sbt-release" % "1.1.0")
addSbtPlugin("de.heikoseeberger" % "sbt-header" % "5.9.0")
addSbtPlugin("io.github.davidgregory084" % "sbt-tpolecat" % "0.4.1")
addSbtPlugin("de.heikoseeberger" % "sbt-header" % "5.10.0")
addSbtPlugin("org.typelevel" % "sbt-tpolecat" % "0.5.0")
addSbtPlugin("org.scalameta" % "sbt-scalafmt" % "2.5.0")
addSbtPlugin("org.scoverage" % "sbt-scoverage" % "1.9.3")
addSbtPlugin("org.scoverage" % "sbt-scoverage" % "2.0.8")
298 changes: 152 additions & 146 deletions src/main/scala/org/broadinstitute/gpp/poolq3/PoolQConfig.scala

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,12 @@ object BarcodeSet {

def apply(file: Path): BarcodeSet =
Using.resource(new FileInputStream(file.toFile)) { fin =>
val in = new BOMInputStream(fin, false, ByteOrderMark.UTF_8, ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_16LE)
val in = BOMInputStream
.builder()
.setInputStream(fin)
.setByteOrderMarks(ByteOrderMark.UTF_8, ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_16LE)
.setInclude(false)
.get()
val br = new BufferedReader(new InputStreamReader(in))
skipHeader(br, BarcodeRe)
br.lines()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,12 @@ object ReferenceData {

def apply(file: Path, quote: Char = '"'): ReferenceData = {
Using.resource(new FileInputStream(file.toFile)) { fin =>
val in = new BOMInputStream(fin, false, ByteOrderMark.UTF_8, ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_16LE)
val in = BOMInputStream
.builder()
.setInputStream(fin)
.setByteOrderMarks(ByteOrderMark.UTF_8, ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_16LE)
.setInclude(false)
.get()
val br = new BufferedReader(new InputStreamReader(in))
val delimiter = guessDelimiter(br)
val config =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -159,9 +159,8 @@ final class ScoringConsumer(
log.debug(s"Incrementing state for ($r, $c}).")
umiReference match {
case None =>
None
// we're not in UMI mode, so just increment the state
state.known.increment(None, (r, c))
val _ = state.known.increment(None, (r, c))
case Some(ref) =>
// we're in UMI mode
handleUmi(umi, ref, r, c)
Expand All @@ -182,7 +181,7 @@ final class ScoringConsumer(
val _ = state.known.increment(Some(u), (r, c))
} else {
// we found an unknown UMI barcode, so track it somehow
state.known.increment(None, (r, c))
val _ = state.known.increment(None, (r, c))
val _ = state.unknownUmi.increment(u)
}
case None =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ object UnexpectedSequenceWriter {
val colBc = fields(1)

// can't avoid the double hash lookup here without a big hassle
r.put(rowBc, r.getOrElseUpdate(rowBc, 0) + 1)
val _ = r.put(rowBc, r.getOrElseUpdate(rowBc, 0) + 1)
h.putIfAbsent(rowBc, new Object2IntOpenHashMap[String]())
h.get(rowBc).addTo(colBc, 1)
}
Expand All @@ -120,7 +120,7 @@ object UnexpectedSequenceWriter {
): Unit = {
val drop = r.toSeq.sortBy { case (_, count) => -count }.drop(n)
drop.foreach { case (bc, _) =>
r.remove(bc)
val _ = r.remove(bc)
h.remove(bc)
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,50 +5,53 @@
*/
package org.broadinstitute.gpp.poolq3.barcode

import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers._
import munit.FunSuite

class BarcodePolicyTest extends AnyFlatSpec {
class BarcodePolicyTest extends FunSuite {

"BarcodePolicy" should "choose a fixed barcode policy" in {
BarcodePolicy("FIXED@0", 8, false) should be(FixedOffsetPolicy(0, 8, false))
test("fixed barcode policy") {
assertEquals(BarcodePolicy("FIXED@0", 8, false), FixedOffsetPolicy(0, 8, false))
// this is a deprecated option but needs to be supported for the time being
BarcodePolicy("FIXED:0", 8, false) should be(FixedOffsetPolicy(0, 8, false))
assertEquals(BarcodePolicy("FIXED:0", 8, false), FixedOffsetPolicy(0, 8, false))
}

it should "choose a known prefix barcode policy" in {
BarcodePolicy("PREFIX:CACCG@7", 20, false) should be(IndexOfKnownPrefixPolicy("CACCG", 20, Some(7)))
BarcodePolicy("PREFIX:CACCG@7-9", 20, false) should be(IndexOfKnownPrefixPolicy("CACCG", 20, Some(7), Some(9)))
BarcodePolicy("PREFIX:CACCG@-9", 20, false) should be(IndexOfKnownPrefixPolicy("CACCG", 20, None, Some(9)))
test("known prefix barcode policy") {
assertEquals(BarcodePolicy("PREFIX:CACCG@7", 20, false), IndexOfKnownPrefixPolicy("CACCG", 20, Some(7)))
assertEquals(BarcodePolicy("PREFIX:CACCG@7-9", 20, false), IndexOfKnownPrefixPolicy("CACCG", 20, Some(7), Some(9)))
assertEquals(BarcodePolicy("PREFIX:CACCG@-9", 20, false), IndexOfKnownPrefixPolicy("CACCG", 20, None, Some(9)))
}

it should "let the user specify a shorter length with a fixed policy" in {
BarcodePolicy("FIXED@0:6", 6, true) should be(FixedOffsetPolicy(0, 6, true))
test("specify a shorter length with a fixed policy") {
assertEquals(BarcodePolicy("FIXED@0:6", 6, true), FixedOffsetPolicy(0, 6, true))
// this is a deprecated option but needs to be supported for the time being
BarcodePolicy("FIXED:0:6", 6, true) should be(FixedOffsetPolicy(0, 6, true))
assertEquals(BarcodePolicy("FIXED:0:6", 6, true), FixedOffsetPolicy(0, 6, true))
}

it should "let the user specify a shorter length with a known prefix policy" in {
BarcodePolicy("PREFIX:CACCG@7:19", 19, false) should be(IndexOfKnownPrefixPolicy("CACCG", 19, Some(7)))
test("specify a shorter length with a known prefix policy") {
assertEquals(BarcodePolicy("PREFIX:CACCG@7:19", 19, false), IndexOfKnownPrefixPolicy("CACCG", 19, Some(7)))
}

it should "let the user specify a keymask policy" in {
BarcodePolicy("KEYMASK:caccgNNNNttNNNNaa@3", 8, false) should be(
test("keymask policy") {
assertEquals(
BarcodePolicy("KEYMASK:caccgNNNNttNNNNaa@3", 8, false),
GeneralTemplatePolicy(KeyMask("caccgNNNNttNNNNaa"), Some(3), None)
)
BarcodePolicy("TEMPLATE:caccgNNNNttNNNNaa@3", 8, false) should be(
assertEquals(
BarcodePolicy("TEMPLATE:caccgNNNNttNNNNaa@3", 8, false),
GeneralTemplatePolicy(KeyMask("caccgNNNNttNNNNaa"), Some(3), None)
)
}

it should "recognize a split barcode situation" in {
BarcodePolicy("TEMPLATE:caccgNNNNNnnnnnntatgcNNNNaa@3", 9, false) should be(
test("split barcode situation") {
assertEquals(
BarcodePolicy("TEMPLATE:caccgNNNNNnnnnnntatgcNNNNaa@3", 9, false),
SplitBarcodePolicy("CACCG", 5, 6, "TATGC", 4, Some(3), None)
)
}

it should "let the user specify just a 3' limit" in {
BarcodePolicy("TEMPLATE:NNNNNNNNNNNNNNNNNNNNNNN@-1", 23, false) should be(
test("specify just a 3' limit") {
assertEquals(
BarcodePolicy("TEMPLATE:NNNNNNNNNNNNNNNNNNNNNNN@-1", 23, false),
GeneralTemplatePolicy(KeyMask("NNNNNNNNNNNNNNNNNNNNNNN"), None, Some(1))
)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,13 @@ class KeyMaskTest extends AnyFlatSpec {

"KeyMask.apply" should "construct the correct key mask from a pattern" in {
val km0 = KeyMask("NNNNNNNNNNNNNNNNN")
km0 should be(KeyMask.fromString(17, "1-17"))
val _ = km0 should be(KeyMask.fromString(17, "1-17"))
val km1 = KeyMask("NNNNNNNNNNNNNNNNNnNN")
km1 should be(KeyMask.fromString(20, "1-17,19-20"))
val _ = km1 should be(KeyMask.fromString(20, "1-17,19-20"))
val km2 = KeyMask("nNNNNNNNNNNNNNNNNNnNNn")
km2 should be(KeyMask.fromString(22, "2-18,20-21"))
val _ = km2 should be(KeyMask.fromString(22, "2-18,20-21"))
val km3 = KeyMask("nnnNNNNNNNNNNNNNNNNNnNNnN")
km3 should be(KeyMask.fromString(25, "4-20,22-23,25"))
val _ = km3 should be(KeyMask.fromString(25, "4-20,22-23,25"))
val km4 = KeyMask("nnnnNNNNNNNNNNNNNNNNNNNNnnnnnn")
km4 should be(KeyMask.fromString(30, "5-24"))
}
Expand All @@ -30,14 +30,14 @@ class KeyMaskTest extends AnyFlatSpec {
val km5 = KeyMask(
"caccgNNNNNNNNNNNNNNNNNNNNnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnNNNNNNNNNNNNNNNNNNNNN"
)
km5.contextLength should be(240)
km5.keyLengthInBases should be(41)
val _ = km5.contextLength should be(240)
val _ = km5.keyLengthInBases should be(41)
km5.keyRanges should be(Seq(KeyRange(5, 24), KeyRange(219, 239)))
}

"KeyMask.fromString" should "compute the correct key mask from a list of key ranges in either syntax" in {
val km1 = KeyMask.fromString(23, "4-20,22-23")
km1 should be(KeyMask(23, Seq(KeyRange(3, 19), KeyRange(21, 22))))
val _ = km1 should be(KeyMask(23, Seq(KeyRange(3, 19), KeyRange(21, 22))))
val km2 = KeyMask.fromString(23, "4..20,22..23")
km2 should be(KeyMask(23, Seq(KeyRange(3, 19), KeyRange(21, 22))))
}
Expand All @@ -59,7 +59,7 @@ class KeyMaskTest extends AnyFlatSpec {
}

it should "merge adjacent ranges" in {
KeyMask.mergeAdjacent(Seq(KeyRange(1, 9), KeyRange(10, 12), KeyRange(14, 17))) should be(
val _ = KeyMask.mergeAdjacent(Seq(KeyRange(1, 9), KeyRange(10, 12), KeyRange(14, 17))) should be(
Seq(KeyRange(1, 12), KeyRange(14, 17))
)
KeyMask.fromString(10, "1,2..4,5,6-8,9") should be(KeyMask.fromString(10, "1-9"))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,23 +5,22 @@
*/
package org.broadinstitute.gpp.poolq3.barcode

import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers._
import munit.FunSuite

/** This class provides additional tests for the KeyMask that are not found in the FISHR codebase from which KeyMask and
* its primary test class were lifted. We will try not to modify the copied test classes to make subsequent updates
* from FISHR easier. Instead, new PoolQ-specific tests will live here.
*/
class KeyMaskTest2 extends AnyFlatSpec {
class KeyMaskTest2 extends FunSuite {

"KeyMask.apply" should "construct the correct key mask from a pattern" in {
test("construct the correct key mask from a pattern") {
// 0 1 2
// 12345678901234567890123456789
val km0 = KeyMask("caccgNNNNNnnnnnnnnnttacaNNNNN")

// parsing should work how we expect
km0.keyRanges should be(Seq(KeyRange(5, 9), KeyRange(24, 28)))
km0.keyLengthInBases should be(10)
assertEquals(km0.keyRanges, Seq(KeyRange(5, 9), KeyRange(24, 28)))
assertEquals(km0.keyLengthInBases, 10)
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -14,32 +14,32 @@ import org.scalatest.matchers.should.Matchers._
class KeyRangeTest extends AnyFlatSpec {

"KeyRange" should "enforce well-formedness" in {
noException should be thrownBy KeyRange(3, 4)
noException should be thrownBy KeyRange(3, 3)
an[IllegalArgumentException] should be thrownBy KeyRange(3, 2)
val _ = noException should be thrownBy KeyRange(3, 4)
val _ = noException should be thrownBy KeyRange(3, 3)
val _ = an[IllegalArgumentException] should be thrownBy KeyRange(3, 2)
an[IllegalArgumentException] should be thrownBy KeyRange(-2, 2)
}

it should "have working compare()" in {
val ord = implicitly[Ordering[KeyRange]]
ord.compare(KeyRange(2, 5), KeyRange(2, 5)) should be(0)
KeyRange(2, 5) should be <= KeyRange(2, 5)
KeyRange(2, 5) should be >= KeyRange(2, 5)
val _ = ord.compare(KeyRange(2, 5), KeyRange(2, 5)) should be(0)
val _ = KeyRange(2, 5) should be <= KeyRange(2, 5)
val _ = KeyRange(2, 5) should be >= KeyRange(2, 5)

KeyRange(2, 5) should be < KeyRange(3, 4)
KeyRange(2, 5) should be < KeyRange(2, 6)
KeyRange(2, 5) should be > KeyRange(2, 4)
val _ = KeyRange(2, 5) should be < KeyRange(3, 4)
val _ = KeyRange(2, 5) should be < KeyRange(2, 6)
val _ = KeyRange(2, 5) should be > KeyRange(2, 4)
KeyRange(2, 5) should be > KeyRange(1, 32)
}

it should "be creatable from a string" in {
KeyRange("1-1") should be(KeyRange(0, 0))
KeyRange("1..1") should be(KeyRange(0, 0))
KeyRange("1") should be(KeyRange(0, 0))
KeyRange("1-6") should be(KeyRange(0, 5))
KeyRange("1..6") should be(KeyRange(0, 5))
an[IllegalArgumentException] should be thrownBy KeyRange("0-5")
an[IllegalArgumentException] should be thrownBy KeyRange("-1-5")
val _ = KeyRange("1-1") should be(KeyRange(0, 0))
val _ = KeyRange("1..1") should be(KeyRange(0, 0))
val _ = KeyRange("1") should be(KeyRange(0, 0))
val _ = KeyRange("1-6") should be(KeyRange(0, 5))
val _ = KeyRange("1..6") should be(KeyRange(0, 5))
val _ = an[IllegalArgumentException] should be thrownBy KeyRange("0-5")
val _ = an[IllegalArgumentException] should be thrownBy KeyRange("-1-5")
an[IllegalArgumentException] should be thrownBy KeyRange("6-5")
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,8 +113,12 @@ class TemplatePolicyTest extends AnyFlatSpec {
val keymask = KeyMask(pattern)
val kmp = new GeneralTemplatePolicy(keymask, Some(0))

kmp.find(Read("", read1)) should be(Some(FoundBarcode("TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT".toCharArray, 5)))
kmp.find(Read("", read2)) should be(Some(FoundBarcode("NTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT".toCharArray, 5)))
val _ = kmp.find(Read("", read1)) should be(
Some(FoundBarcode("TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT".toCharArray, 5))
)
val _ = kmp.find(Read("", read2)) should be(
Some(FoundBarcode("NTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT".toCharArray, 5))
)
kmp.find(Read("", read3)) should be(None)
}

Expand All @@ -131,8 +135,8 @@ class TemplatePolicyTest extends AnyFlatSpec {
(variable: String, r1: String, ns: String, r2: String, rest: String) =>
val read = Read("id", variable + fixed + prefix1 + r1 + ns + prefix2 + r2 + rest)
// warm up phase
nanoTimed(100)(_ => kmp.find(read))
nanoTimed(100)(_ => kpp.find(read))
val _ = nanoTimed(100)(_ => kmp.find(read))
val _ = nanoTimed(100)(_ => kpp.find(read))

// go!
val (ret1, t1) = nanoTimed(10000)(_ => kmp.find(read))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,17 @@ class OpenHashMapHistogramTest extends FunSuite with ScalaCheckSuite {
test("OpenHashMapHistogram should track frequencies") {
val h = new OpenHashMapHistogram[String]

h.increment("AAAA")
h.increment("AAAA")
h.increment("AAAA")
h.increment("AAAA")
val _ = h.increment("AAAA")
val _ = h.increment("AAAA")
val _ = h.increment("AAAA")
val _ = h.increment("AAAA")

assertEquals(h.count("AAAA"), 4)
assertEquals(h.count("CCCC"), 0)
}

property("OpenHashMapHistogram should track frequencies for arbitrary data") {
forAll { data: List[Int] =>
forAll { (data: List[Int]) =>
val expectedCounts: Map[Int, Int] = data.groupBy(identity).view.mapValues(_.length).toMap

val hist = new OpenHashMapHistogram[Int]
Expand Down
Loading