Skip to content

Commit

Permalink
WX-1462 POSIX-flavored sub() (#7374)
Browse files Browse the repository at this point in the history
  • Loading branch information
jgainerdewar authored Feb 27, 2024
1 parent 3713d16 commit a29fdf8
Show file tree
Hide file tree
Showing 37 changed files with 340 additions and 31 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ metadata {
"outputs.biscayne_new_engine_functions.minMaxIntFloatComposition": 1.0
"outputs.biscayne_new_engine_functions.maxIntVsMaxFloat": 1.79769313E+308

"outputs.biscayne_new_engine_functions.substituted": "WATtheWAT"

"outputs.biscayne_new_engine_functions.with_suffixes.0": "aaaS"
"outputs.biscayne_new_engine_functions.with_suffixes.1": "bbbS"
"outputs.biscayne_new_engine_functions.with_suffixes.2": "cccS"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ workflow biscayne_new_engine_functions {

meta {
description: "This test makes sure that these functions work in a real workflow"
functions_under_test: [ "keys", "as_map", "as_pairs", "collect_by_key", "suffix" ]
functions_under_test: [ "keys", "as_map", "as_pairs", "collect_by_key", "sub", "suffix" ]
}

Map[String, Int] x_map_in = {"a": 1, "b": 2, "c": 3}
Expand Down Expand Up @@ -51,6 +51,11 @@ workflow biscayne_new_engine_functions {
Float minMaxIntFloatComposition = min(max(biggestInt, smallFloat), smallestInt) # 1.0
Float maxIntVsMaxFloat = max(maxInt, maxFloat)

# sub():
# (Exists before Biscayne, but uses different regex flavor here)
# =================================================
String substituted = sub("AtheZ", "[[:upper:]]", "WAT")

# suffix():
# =================================================
Array[String] with_suffixes = suffix("S", some_strings)
Expand Down
4 changes: 3 additions & 1 deletion project/Dependencies.scala
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ object Dependencies {
private val postgresV = "42.4.4"
private val pprintV = "0.7.3"
private val rdf4jV = "3.7.1"
private val re2jV = "1.6"
private val refinedV = "0.10.1"
private val rhinoV = "1.7.14"

Expand Down Expand Up @@ -517,7 +518,8 @@ object Dependencies {
val wdlDependencies: List[ModuleID] = List(
"commons-io" % "commons-io" % commonsIoV,
"org.scala-graph" %% "graph-core" % scalaGraphV,
"com.chuusai" %% "shapeless" % shapelessV
"com.chuusai" %% "shapeless" % shapelessV,
"com.google.re2j" % "re2j" % re2jV,
) ++ betterFilesDependencies

val languageFactoryDependencies = List(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -180,13 +180,23 @@ object ExpressionElement {
def arg2: ExpressionElement
def arg3: ExpressionElement
}

// Pre-1.1 WDL versions have undefined regex flavor. Cromwell uses the standard Java flavor.
final case class Sub(input: ExpressionElement, pattern: ExpressionElement, replace: ExpressionElement)
extends ThreeParamFunctionCallElement {
override def arg1: ExpressionElement = input
override def arg2: ExpressionElement = pattern
override def arg3: ExpressionElement = replace
}

// As of WDL 1.1, WDL regular expressions are expected to be POSIX ERE flavor.
final case class SubPosix(input: ExpressionElement, pattern: ExpressionElement, replace: ExpressionElement)
extends ThreeParamFunctionCallElement {
override def arg1: ExpressionElement = input
override def arg2: ExpressionElement = pattern
override def arg3: ExpressionElement = replace
}

/**
* A single identifier lookup expression, eg Int x = y
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ package wdl.transforms.biscayne.ast2wdlom
import cats.syntax.validated._
import common.validation.ErrorOr.ErrorOr
import wdl.model.draft3.elements.ExpressionElement
import wdl.model.draft3.elements.ExpressionElement.{AsMap, AsPairs, CollectByKey, Keys, Max, Min, Sep, Suffix}
import wdl.model.draft3.elements.ExpressionElement.{AsMap, AsPairs, CollectByKey, Keys, Max, Min, Sep, SubPosix, Suffix}
import wdl.transforms.base.ast2wdlom.AstNodeToExpressionElement

object AstToNewExpressionElements {
Expand All @@ -15,6 +15,7 @@ object AstToNewExpressionElements {
"min" -> AstNodeToExpressionElement.validateTwoParamEngineFunction(Min, "min"),
"max" -> AstNodeToExpressionElement.validateTwoParamEngineFunction(Max, "max"),
"sep" -> AstNodeToExpressionElement.validateTwoParamEngineFunction(Sep, "sep"),
"sub" -> AstNodeToExpressionElement.validateThreeParamEngineFunction(SubPosix, "sub"),
"suffix" -> AstNodeToExpressionElement.validateTwoParamEngineFunction(Suffix, "suffix"),
"read_object" -> (_ =>
"read_object is no longer available in this WDL version. Consider using read_json instead".invalidNel
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,16 @@ object BiscayneExpressionValueConsumers {
expressionValueConsumer.expressionConsumedValueHooks(a.arg2)(expressionValueConsumer)
}

implicit val subPosixExpressionValueConsumer: ExpressionValueConsumer[SubPosix] =
new ExpressionValueConsumer[SubPosix] {
override def expressionConsumedValueHooks(a: SubPosix)(implicit
expressionValueConsumer: ExpressionValueConsumer[ExpressionElement]
): Set[UnlinkedConsumedValueHook] =
expressionValueConsumer.expressionConsumedValueHooks(a.arg1)(expressionValueConsumer) ++
expressionValueConsumer.expressionConsumedValueHooks(a.arg2)(expressionValueConsumer) ++
expressionValueConsumer.expressionConsumedValueHooks(a.arg3)(expressionValueConsumer)
}

implicit val suffixExpressionValueConsumer: ExpressionValueConsumer[Suffix] = new ExpressionValueConsumer[Suffix] {
override def expressionConsumedValueHooks(a: Suffix)(implicit
expressionValueConsumer: ExpressionValueConsumer[ExpressionElement]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ package object consumed {
case a: Zip => a.expressionConsumedValueHooks(expressionValueConsumer)
case a: Cross => a.expressionConsumedValueHooks(expressionValueConsumer)

case a: Sub => a.expressionConsumedValueHooks(expressionValueConsumer)
case a: SubPosix => a.expressionConsumedValueHooks(expressionValueConsumer)

// New WDL biscayne expressions:
case a: Keys => a.expressionConsumedValueHooks(expressionValueConsumer)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
package wdl.transforms.biscayne.linking.expression.files

import wdl.model.draft3.elements.ExpressionElement.{AsMap, AsPairs, CollectByKey, Keys, Max, Min, Sep, Suffix}
import wdl.model.draft3.elements.ExpressionElement.{AsMap, AsPairs, CollectByKey, Keys, Max, Min, Sep, SubPosix, Suffix}
import wdl.model.draft3.graph.expression.FileEvaluator
import wdl.transforms.base.linking.expression.files.EngineFunctionEvaluators
import wdl.transforms.base.linking.expression.files.EngineFunctionEvaluators.twoParameterFunctionPassthroughFileEvaluator
import wdl.transforms.base.linking.expression.files.EngineFunctionEvaluators.{
threeParameterFunctionPassthroughFileEvaluator,
twoParameterFunctionPassthroughFileEvaluator
}

object BiscayneFileEvaluators {

Expand All @@ -16,6 +19,8 @@ object BiscayneFileEvaluators {
EngineFunctionEvaluators.singleParameterPassthroughFileEvaluator

implicit val sepFunctionEvaluator: FileEvaluator[Sep] = twoParameterFunctionPassthroughFileEvaluator[Sep]
implicit val subPosixFunctionEvaluator: FileEvaluator[SubPosix] =
threeParameterFunctionPassthroughFileEvaluator[SubPosix]
implicit val suffixFunctionEvaluator: FileEvaluator[Suffix] = twoParameterFunctionPassthroughFileEvaluator[Suffix]

implicit val minFunctionEvaluator: FileEvaluator[Min] = twoParameterFunctionPassthroughFileEvaluator[Min]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,8 @@ package object files {
case a: Zip => a.predictFilesNeededToEvaluate(inputs, ioFunctionSet, coerceTo)(fileEvaluator, valueEvaluator)
case a: Cross => a.predictFilesNeededToEvaluate(inputs, ioFunctionSet, coerceTo)(fileEvaluator, valueEvaluator)

case a: Sub => a.predictFilesNeededToEvaluate(inputs, ioFunctionSet, coerceTo)(fileEvaluator, valueEvaluator)
case a: SubPosix =>
a.predictFilesNeededToEvaluate(inputs, ioFunctionSet, coerceTo)(fileEvaluator, valueEvaluator)

case a: Keys => a.predictFilesNeededToEvaluate(inputs, ioFunctionSet, coerceTo)(fileEvaluator, valueEvaluator)
case a: AsMap => a.predictFilesNeededToEvaluate(inputs, ioFunctionSet, coerceTo)(fileEvaluator, valueEvaluator)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package wdl.transforms.biscayne.linking.expression.types

import cats.implicits.catsSyntaxTuple2Semigroupal
import cats.implicits.{catsSyntaxTuple2Semigroupal, catsSyntaxTuple3Semigroupal}
import cats.syntax.validated._
import common.validation.ErrorOr._
import wdl.model.draft3.elements.ExpressionElement
Expand Down Expand Up @@ -102,6 +102,16 @@ object BiscayneTypeEvaluators {
}
}

implicit val subPosixFunctionEvaluator: TypeEvaluator[SubPosix] = new TypeEvaluator[SubPosix] {
override def evaluateType(a: SubPosix, linkedValues: Map[UnlinkedConsumedValueHook, GeneratedValueHandle])(implicit
expressionTypeEvaluator: TypeEvaluator[ExpressionElement]
): ErrorOr[WomType] =
(validateParamType(a.input, linkedValues, WomSingleFileType),
validateParamType(a.pattern, linkedValues, WomSingleFileType),
validateParamType(a.replace, linkedValues, WomSingleFileType)
) mapN { (_, _, _) => WomStringType }
}

implicit val suffixFunctionEvaluator: TypeEvaluator[Suffix] = new TypeEvaluator[Suffix] {
override def evaluateType(a: Suffix, linkedValues: Map[UnlinkedConsumedValueHook, GeneratedValueHandle])(implicit
expressionTypeEvaluator: TypeEvaluator[ExpressionElement]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ package object types {
case a: Zip => a.evaluateType(linkedValues)(typeEvaluator)
case a: Cross => a.evaluateType(linkedValues)(typeEvaluator)

case a: Sub => a.evaluateType(linkedValues)(typeEvaluator)
case a: SubPosix => a.evaluateType(linkedValues)(typeEvaluator)

case a: StdoutElement.type => a.evaluateType(linkedValues)(typeEvaluator)
case a: StderrElement.type => a.evaluateType(linkedValues)(typeEvaluator)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,15 @@ import cats.data.NonEmptyList
import cats.syntax.validated._
import cats.syntax.traverse._
import cats.instances.list._
import com.google.re2j.{Pattern => RE2JPattern}
import common.validation.ErrorOr._
import common.collections.EnhancedCollections._
import common.validation.ErrorOr
import wdl.model.draft3.elements.ExpressionElement
import wdl.model.draft3.elements.ExpressionElement._
import wdl.model.draft3.graph.expression.{EvaluatedValue, ForCommandInstantiationOptions, ValueEvaluator}
import wdl.transforms.base.linking.expression.values.EngineFunctionEvaluators.{
processThreeValidatedValues,
processTwoValidatedValues,
processValidatedSingleValue
}
Expand Down Expand Up @@ -218,6 +221,36 @@ object BiscayneValueEvaluators {
}
}

implicit val subPosixFunctionEvaluator: ValueEvaluator[SubPosix] = new ValueEvaluator[SubPosix] {
override def evaluateValue(a: SubPosix,
inputs: Map[String, WomValue],
ioFunctionSet: IoFunctionSet,
forCommandInstantiationOptions: Option[ForCommandInstantiationOptions]
)(implicit expressionValueEvaluator: ValueEvaluator[ExpressionElement]): ErrorOr[EvaluatedValue[WomString]] =
processThreeValidatedValues[WomString, WomString, WomString, WomString](
expressionValueEvaluator.evaluateValue(a.input, inputs, ioFunctionSet, forCommandInstantiationOptions)(
expressionValueEvaluator
),
expressionValueEvaluator.evaluateValue(a.pattern, inputs, ioFunctionSet, forCommandInstantiationOptions)(
expressionValueEvaluator
),
expressionValueEvaluator.evaluateValue(a.replace, inputs, ioFunctionSet, forCommandInstantiationOptions)(
expressionValueEvaluator
)
) { (input, pattern, replace) =>
ErrorOr(
EvaluatedValue(WomString(
RE2JPattern
.compile(pattern.valueString)
.matcher(input.valueString)
.replaceAll(replace.valueString)
),
Seq.empty
)
)
}
}

implicit val suffixFunctionEvaluator: ValueEvaluator[Suffix] = new ValueEvaluator[Suffix] {
override def evaluateValue(a: Suffix,
inputs: Map[String, WomValue],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,8 @@ package object values {
case a: Cross =>
a.evaluateValue(inputs, ioFunctionSet, forCommandInstantiationOptions)(expressionValueEvaluator)

case a: Sub => a.evaluateValue(inputs, ioFunctionSet, forCommandInstantiationOptions)(expressionValueEvaluator)
case a: SubPosix =>
a.evaluateValue(inputs, ioFunctionSet, forCommandInstantiationOptions)(expressionValueEvaluator)

case a: Keys => a.evaluateValue(inputs, ioFunctionSet, forCommandInstantiationOptions)(expressionValueEvaluator)
case a: AsMap =>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package wdl.transforms.biscayne

import java.util

import common.Checked
import common.assertion.ErrorOrAssertions._
import common.transforms.CheckedAtoB
Expand Down Expand Up @@ -98,6 +97,12 @@ class Ast2WdlomSpec extends AnyFlatSpec with CromwellTimeoutSpec with Matchers {
expr shouldBeValid NoneLiteralElement
}

it should "get the posix version when parsing the sub function" in {
val str = """sub("my input", "[A-Za-z]", "repl")"""
val expr = fromString[ExpressionElement](str, parser.parse_e)
expr shouldBeValid (SubPosix(StringLiteral("my input"), StringLiteral("[A-Za-z]"), StringLiteral("repl")))
}

it should "parse the new suffix function" in {
val str = "suffix(some_str, some_arr)"
val expr = fromString[ExpressionElement](str, parser.parse_e)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,15 @@ class BiscayneExpressionValueConsumersSpec extends AnyFlatSpec with CromwellTime
}
}

it should "discover the variable lookups within a sub() call" in {
val str = """ sub(my_input, "^[A-Z]$", "0") """
val expr = fromString[ExpressionElement](str, parser.parse_e)

expr.shouldBeValidPF { case e =>
e.expressionConsumedValueHooks should be(Set(UnlinkedIdentifierHook("my_input")))
}
}

it should "discover the variable lookups within a suffix() call" in {
val str = """ suffix(my_suffix, ["a", "b", c]) """
val expr = fromString[ExpressionElement](str, parser.parse_e)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,17 @@ class BiscayneFileEvaluatorSpec extends AnyFlatSpec with CromwellTimeoutSpec wit
}
}

it should "discover the file which would be required to evaluate a sub() function" in {
val str = """ sub(read_string("my_nice_file.txt"), "foo", "NEW_VAL") """
val expr = fromString[ExpressionElement](str, parser.parse_e)

expr.shouldBeValidPF { case e =>
e.predictFilesNeededToEvaluate(Map.empty, NoIoFunctionSet, WomStringType) shouldBeValid Set(
WomSingleFile("my_nice_file.txt")
)
}
}

it should "discover the file which would be required to evaluate a suffix() function" in {
val str = """ suffix(' # what a line', read_lines("foo.txt")) """
val expr = fromString[ExpressionElement](str, parser.parse_e)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,15 @@ class BiscayneTypeEvaluatorSpec extends AnyFlatSpec with CromwellTimeoutSpec wit
}
}

it should "evaluate the type of a sub() function as String" in {
val str = """ sub("input", "^pattern$", "s") """
val expr = fromString[ExpressionElement](str, parser.parse_e)

expr.shouldBeValidPF { case e =>
e.evaluateType(Map.empty) shouldBeValid WomStringType
}
}

it should "evaluate the type of a suffix() function as Array[String]" in {
val str = """ suffix('S', ["a", "b", "c"]) """
val expr = fromString[ExpressionElement](str, parser.parse_e)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,29 @@ class BiscayneValueEvaluatorSpec extends AnyFlatSpec with CromwellTimeoutSpec wi
}
}

it should "evaluate a POSIX-flavor regex in a sub expression correctly" in {
val str = """ sub("aB", "[[:lower:]]", "9") """
val expr = fromString[ExpressionElement](str, parser.parse_e)

val expectedString: WomString = WomString("9B")

expr.shouldBeValidPF { case e =>
e.evaluateValue(Map.empty, NoIoFunctionSet, None) shouldBeValid EvaluatedValue(expectedString, Seq.empty)
}
}

it should "fail to evaluate a Java-flavor regex in a sub expression" in {
val str = """ sub("aB", "\\p{Lower}", "9") """
val expr = fromString[ExpressionElement](str, parser.parse_e)

expr.shouldBeValidPF { case e =>
e.evaluateValue(Map.empty, NoIoFunctionSet, None)
.shouldBeInvalid(
"""error parsing regexp: invalid character class range: `\p{Lower}`"""
)
}
}

it should "evaluate a suffix expression correctly" in {
val str = """ suffix("S", ["a", "b", "c"]) """
val expr = fromString[ExpressionElement](str, parser.parse_e)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ package wdl.transforms.cascades.ast2wdlom
import cats.syntax.validated._
import common.validation.ErrorOr.ErrorOr
import wdl.model.draft3.elements.ExpressionElement
import wdl.model.draft3.elements.ExpressionElement.{AsMap, AsPairs, CollectByKey, Keys, Max, Min, Sep, Suffix}
import wdl.model.draft3.elements.ExpressionElement.{AsMap, AsPairs, CollectByKey, Keys, Max, Min, Sep, SubPosix, Suffix}
import wdl.transforms.base.ast2wdlom.AstNodeToExpressionElement

object AstToNewExpressionElements {
Expand All @@ -15,6 +15,7 @@ object AstToNewExpressionElements {
"min" -> AstNodeToExpressionElement.validateTwoParamEngineFunction(Min, "min"),
"max" -> AstNodeToExpressionElement.validateTwoParamEngineFunction(Max, "max"),
"sep" -> AstNodeToExpressionElement.validateTwoParamEngineFunction(Sep, "sep"),
"sub" -> AstNodeToExpressionElement.validateThreeParamEngineFunction(SubPosix, "sub"),
"suffix" -> AstNodeToExpressionElement.validateTwoParamEngineFunction(Suffix, "suffix"),
"read_object" -> (_ =>
"read_object is no longer available in this WDL version. Consider using read_json instead".invalidNel
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,16 @@ object cascadesExpressionValueConsumers {
expressionValueConsumer.expressionConsumedValueHooks(a.arg2)(expressionValueConsumer)
}

implicit val subPosixExpressionValueConsumer: ExpressionValueConsumer[SubPosix] =
new ExpressionValueConsumer[SubPosix] {
override def expressionConsumedValueHooks(a: SubPosix)(implicit
expressionValueConsumer: ExpressionValueConsumer[ExpressionElement]
): Set[UnlinkedConsumedValueHook] =
expressionValueConsumer.expressionConsumedValueHooks(a.arg1)(expressionValueConsumer) ++
expressionValueConsumer.expressionConsumedValueHooks(a.arg2)(expressionValueConsumer) ++
expressionValueConsumer.expressionConsumedValueHooks(a.arg3)(expressionValueConsumer)
}

implicit val suffixExpressionValueConsumer: ExpressionValueConsumer[Suffix] = new ExpressionValueConsumer[Suffix] {
override def expressionConsumedValueHooks(a: Suffix)(implicit
expressionValueConsumer: ExpressionValueConsumer[ExpressionElement]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ package object consumed {
case a: Zip => a.expressionConsumedValueHooks(expressionValueConsumer)
case a: Cross => a.expressionConsumedValueHooks(expressionValueConsumer)

case a: Sub => a.expressionConsumedValueHooks(expressionValueConsumer)
case a: SubPosix => a.expressionConsumedValueHooks(expressionValueConsumer)

// New WDL biscayne expressions:
case a: Keys => a.expressionConsumedValueHooks(expressionValueConsumer)
Expand Down
Loading

0 comments on commit a29fdf8

Please sign in to comment.