Skip to content

Commit

Permalink
[WX-963] Unzip Engine Function (#7368)
Browse files Browse the repository at this point in the history
  • Loading branch information
THWiseman authored Feb 27, 2024
1 parent a29fdf8 commit 5e59b02
Show file tree
Hide file tree
Showing 34 changed files with 391 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -60,4 +60,19 @@ metadata {
"outputs.biscayne_new_engine_functions.with_suffixes.0": "aaaS"
"outputs.biscayne_new_engine_functions.with_suffixes.1": "bbbS"
"outputs.biscayne_new_engine_functions.with_suffixes.2": "cccS"

"outputs.biscayne_new_engine_functions.unzipped_a.left.0": "A"
"outputs.biscayne_new_engine_functions.unzipped_a.right.0": "a"

"outputs.biscayne_new_engine_functions.unzipped_b.left.0": "A"
"outputs.biscayne_new_engine_functions.unzipped_b.left.1": "B"
"outputs.biscayne_new_engine_functions.unzipped_b.right.0": "a"
"outputs.biscayne_new_engine_functions.unzipped_b.right.1": "b"

"outputs.biscayne_new_engine_functions.unzipped_c.left.0": "one"
"outputs.biscayne_new_engine_functions.unzipped_c.left.1": "two"
"outputs.biscayne_new_engine_functions.unzipped_c.left.2": "three"
"outputs.biscayne_new_engine_functions.unzipped_c.right.0": 1.0
"outputs.biscayne_new_engine_functions.unzipped_c.right.1": 2.0
"outputs.biscayne_new_engine_functions.unzipped_c.right.2": 3.0
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ workflow biscayne_new_engine_functions {

meta {
description: "This test makes sure that these functions work in a real workflow"
functions_under_test: [ "keys", "as_map", "as_pairs", "collect_by_key", "sub", "suffix" ]
functions_under_test: [ "keys", "as_map", "as_pairs", "collect_by_key", "suffix", "unzip" ]
}

Map[String, Int] x_map_in = {"a": 1, "b": 2, "c": 3}
Expand All @@ -26,6 +26,10 @@ workflow biscayne_new_engine_functions {
# max float... near enough:
Float maxFloat = 179769313000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.0

Array[Pair[String,String]] zipped_a = [("A", "a")]
Array[Pair[String,String]] zipped_b = [("A", "a"),("B", "b")]
Array[Pair[String,Float]] zipped_c = [("one", 1.0),("two", 2.0),("three", 3.0)]

output {

# keys(), as_map(), as_pairs(), collect_by_key():
Expand Down Expand Up @@ -59,6 +63,12 @@ workflow biscayne_new_engine_functions {
# suffix():
# =================================================
Array[String] with_suffixes = suffix("S", some_strings)

# unzip():
# =================================================
Pair[Array[String], Array[String]] unzipped_a = unzip(zipped_a)
Pair[Array[String], Array[String]] unzipped_b = unzip(zipped_b)
Pair[Array[String], Array[String]] unzipped_c = unzip(zipped_c)
}
}

Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ object ExpressionElement {
final case class Ceil(param: ExpressionElement) extends OneParamFunctionCallElement
final case class Round(param: ExpressionElement) extends OneParamFunctionCallElement
final case class Glob(param: ExpressionElement) extends OneParamFunctionCallElement
final case class Unzip(param: ExpressionElement) extends OneParamFunctionCallElement

// 1- or 2-param functions:
sealed trait OneOrTwoParamFunctionCallElement extends FunctionCallElement {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,18 @@ package wdl.transforms.biscayne.ast2wdlom
import cats.syntax.validated._
import common.validation.ErrorOr.ErrorOr
import wdl.model.draft3.elements.ExpressionElement
import wdl.model.draft3.elements.ExpressionElement.{AsMap, AsPairs, CollectByKey, Keys, Max, Min, Sep, SubPosix, Suffix}
import wdl.model.draft3.elements.ExpressionElement.{
AsMap,
AsPairs,
CollectByKey,
Keys,
Max,
Min,
Sep,
SubPosix,
Suffix,
Unzip
}
import wdl.transforms.base.ast2wdlom.AstNodeToExpressionElement

object AstToNewExpressionElements {
Expand All @@ -17,6 +28,7 @@ object AstToNewExpressionElements {
"sep" -> AstNodeToExpressionElement.validateTwoParamEngineFunction(Sep, "sep"),
"sub" -> AstNodeToExpressionElement.validateThreeParamEngineFunction(SubPosix, "sub"),
"suffix" -> AstNodeToExpressionElement.validateTwoParamEngineFunction(Suffix, "suffix"),
"unzip" -> AstNodeToExpressionElement.validateOneParamEngineFunction(Unzip, "unzip"),
"read_object" -> (_ =>
"read_object is no longer available in this WDL version. Consider using read_json instead".invalidNel
),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,13 @@ object BiscayneExpressionValueConsumers {
expressionValueConsumer.expressionConsumedValueHooks(a.arg2)(expressionValueConsumer)
}

implicit val unzipExpressionValueConsumer: ExpressionValueConsumer[Unzip] = new ExpressionValueConsumer[Unzip] {
override def expressionConsumedValueHooks(a: Unzip)(implicit
expressionValueConsumer: ExpressionValueConsumer[ExpressionElement]
): Set[UnlinkedConsumedValueHook] =
expressionValueConsumer.expressionConsumedValueHooks(a.param)(expressionValueConsumer)
}

implicit val noneLiteralExpressionValueConsumer: ExpressionValueConsumer[NoneLiteralElement.type] =
new ExpressionValueConsumer[NoneLiteralElement.type] {
override def expressionConsumedValueHooks(a: NoneLiteralElement.type)(implicit
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ package object consumed {
case a: AsPairs => a.expressionConsumedValueHooks(expressionValueConsumer)
case a: CollectByKey => a.expressionConsumedValueHooks(expressionValueConsumer)
case a: Sep => sepExpressionValueConsumer.expressionConsumedValueHooks(a)(expressionValueConsumer)
case a: Unzip => a.expressionConsumedValueHooks(expressionValueConsumer)

case a: Min => a.expressionConsumedValueHooks(expressionValueConsumer)
case a: Max => a.expressionConsumedValueHooks(expressionValueConsumer)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,17 @@
package wdl.transforms.biscayne.linking.expression.files

import wdl.model.draft3.elements.ExpressionElement.{AsMap, AsPairs, CollectByKey, Keys, Max, Min, Sep, SubPosix, Suffix}
import wdl.model.draft3.elements.ExpressionElement.{
AsMap,
AsPairs,
CollectByKey,
Keys,
Max,
Min,
Sep,
SubPosix,
Suffix,
Unzip
}
import wdl.model.draft3.graph.expression.FileEvaluator
import wdl.transforms.base.linking.expression.files.EngineFunctionEvaluators
import wdl.transforms.base.linking.expression.files.EngineFunctionEvaluators.{
Expand All @@ -26,4 +37,6 @@ object BiscayneFileEvaluators {
implicit val minFunctionEvaluator: FileEvaluator[Min] = twoParameterFunctionPassthroughFileEvaluator[Min]
implicit val maxFunctionEvaluator: FileEvaluator[Max] = twoParameterFunctionPassthroughFileEvaluator[Max]

implicit val unzipFunctionEvaluator: FileEvaluator[Unzip] =
EngineFunctionEvaluators.singleParameterPassthroughFileEvaluator
}
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,7 @@ package object files {

case a: Zip => a.predictFilesNeededToEvaluate(inputs, ioFunctionSet, coerceTo)(fileEvaluator, valueEvaluator)
case a: Cross => a.predictFilesNeededToEvaluate(inputs, ioFunctionSet, coerceTo)(fileEvaluator, valueEvaluator)
case a: Unzip => a.predictFilesNeededToEvaluate(inputs, ioFunctionSet, coerceTo)(fileEvaluator, valueEvaluator)

case a: SubPosix =>
a.predictFilesNeededToEvaluate(inputs, ioFunctionSet, coerceTo)(fileEvaluator, valueEvaluator)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -120,4 +120,15 @@ object BiscayneTypeEvaluators {
validateParamType(a.array, linkedValues, WomArrayType(WomStringType))
) mapN { (_, _) => WomArrayType(WomStringType) }
}

implicit val unzipFunctionEvaluator: TypeEvaluator[Unzip] = new TypeEvaluator[Unzip] {
override def evaluateType(a: Unzip, linkedValues: Map[UnlinkedConsumedValueHook, GeneratedValueHandle])(implicit
expressionTypeEvaluator: TypeEvaluator[ExpressionElement]
): ErrorOr[WomType] =
validateParamType(a.param, linkedValues, WomArrayType(WomPairType(WomAnyType, WomAnyType))) flatMap {
case WomArrayType(WomNothingType) => WomPairType(WomArrayType(WomAnyType), WomArrayType(WomAnyType)).validNel
case WomArrayType(WomPairType(x, y)) => WomPairType(WomArrayType(x), WomArrayType(y)).validNel
case other => s"Cannot invoke 'unzip' on type '${other.stableName}'. Expected an array of pairs".invalidNel
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ package object types {

case a: Zip => a.evaluateType(linkedValues)(typeEvaluator)
case a: Cross => a.evaluateType(linkedValues)(typeEvaluator)
case a: Unzip => a.evaluateType(linkedValues)(typeEvaluator)

case a: SubPosix => a.evaluateType(linkedValues)(typeEvaluator)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -268,4 +268,39 @@ object BiscayneValueEvaluators {
EvaluatedValue(WomArray(arr.value.map(v => WomString(v.valueString + suffix.value))), Seq.empty).validNel
}
}

/**
* Unzip: Creates a pair of arrays, the first containing the elements from the left members of an array of pairs,
* and the second containing the right members. This is the inverse of the zip function.
* https://github.com/openwdl/wdl/blob/main/versions/1.1/SPEC.md#-pairarrayx-arrayy-unziparraypairx-y
* input: Array[Pair[X,Y]]
* output: Pair[Array[X], Array[Y]]
*/
implicit val unzipFunctionEvaluator: ValueEvaluator[Unzip] = new ValueEvaluator[Unzip] {
override def evaluateValue(a: Unzip,
inputs: Map[String, WomValue],
ioFunctionSet: IoFunctionSet,
forCommandInstantiationOptions: Option[ForCommandInstantiationOptions]
)(implicit expressionValueEvaluator: ValueEvaluator[ExpressionElement]): ErrorOr[EvaluatedValue[WomPair]] =
processValidatedSingleValue[WomArray, WomPair](
expressionValueEvaluator.evaluateValue(a.param, inputs, ioFunctionSet, forCommandInstantiationOptions)(
expressionValueEvaluator
)
) {
case WomArray(WomArrayType(WomAnyType), Seq()) =>
EvaluatedValue(
WomPair(WomArray(WomArrayType(WomAnyType), Seq.empty), WomArray(WomArrayType(WomAnyType), Seq.empty)),
Seq.empty
).validNel
case WomArray(WomArrayType(WomPairType(_, _)), values) =>
val zippedArrayOfPairs: Seq[(WomValue, WomValue)] = values map { case pair: WomPair =>
Tuple2(pair.left, pair.right)
}
val (left, right) = zippedArrayOfPairs.unzip
val unzippedPairOfArrays: WomPair = WomPair(WomArray(left), WomArray(right))
EvaluatedValue(unzippedPairOfArrays, Seq.empty).validNel
case other =>
s"Invalid call of 'unzip' on parameter of type '${other.womType.stableName}' (expected Array[Pair[X, Y]])".invalidNel
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,8 @@ package object values {
case a: Zip => a.evaluateValue(inputs, ioFunctionSet, forCommandInstantiationOptions)(expressionValueEvaluator)
case a: Cross =>
a.evaluateValue(inputs, ioFunctionSet, forCommandInstantiationOptions)(expressionValueEvaluator)
case a: Unzip =>
a.evaluateValue(inputs, ioFunctionSet, forCommandInstantiationOptions)(expressionValueEvaluator)

case a: SubPosix =>
a.evaluateValue(inputs, ioFunctionSet, forCommandInstantiationOptions)(expressionValueEvaluator)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -108,4 +108,10 @@ class Ast2WdlomSpec extends AnyFlatSpec with CromwellTimeoutSpec with Matchers {
val expr = fromString[ExpressionElement](str, parser.parse_e)
expr shouldBeValid (Suffix(IdentifierLookup("some_str"), IdentifierLookup("some_arr")))
}

it should "parse the new unzip function" in {
val str = "unzip(some_array_of_pairs)"
val expr = fromString[ExpressionElement](str, parser.parse_e)
expr shouldBeValid (Unzip(IdentifierLookup("some_array_of_pairs")))
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -84,4 +84,13 @@ class BiscayneExpressionValueConsumersSpec extends AnyFlatSpec with CromwellTime
e.expressionConsumedValueHooks should be(Set(UnlinkedIdentifierHook("my_array")))
}
}

it should "discover an array variable lookup within a unzip() call" in {
val str = """ unzip(my_array_of_pairs) """
val expr = fromString[ExpressionElement](str, parser.parse_e)

expr.shouldBeValidPF { case e =>
e.expressionConsumedValueHooks should be(Set(UnlinkedIdentifierHook("my_array_of_pairs")))
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -67,4 +67,15 @@ class BiscayneFileEvaluatorSpec extends AnyFlatSpec with CromwellTimeoutSpec wit
)
}
}

it should "discover the file which would be required to evaluate a unzip() function" in {
val str = """ unzip(read_lines("foo.txt")) """
val expr = fromString[ExpressionElement](str, parser.parse_e)

expr.shouldBeValidPF { case e =>
e.predictFilesNeededToEvaluate(Map.empty, NoIoFunctionSet, WomStringType) shouldBeValid Set(
WomSingleFile("foo.txt")
)
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -73,4 +73,26 @@ class BiscayneTypeEvaluatorSpec extends AnyFlatSpec with CromwellTimeoutSpec wit
e.evaluateType(Map.empty) shouldBeValid WomArrayType(WomStringType)
}
}

it should "evaluate the type of an unzip() function as Pair[Array[X], Array[Y]]" in {
val string_and_int = """ unzip([("one", 1),("two", 2),("three", 3)]) """
val string_and_int_expr = fromString[ExpressionElement](string_and_int, parser.parse_e)
string_and_int_expr.shouldBeValidPF { case e =>
e.evaluateType(Map.empty) shouldBeValid WomPairType(WomArrayType(WomStringType), WomArrayType(WomIntegerType))
}

val int_and_int = """ unzip([(1,2),(3,4),(5,6)]) """
val int_and_int_expr = fromString[ExpressionElement](int_and_int, parser.parse_e)
int_and_int_expr.shouldBeValidPF { case e =>
e.evaluateType(Map.empty) shouldBeValid WomPairType(WomArrayType(WomIntegerType), WomArrayType(WomIntegerType))
}
}

it should "evaluate the type of an unzip() function on an empty collection as Pair[Array[Any], Array[Any]]" in {
val empty = """ unzip([]) """
val empty_unzip_expr = fromString[ExpressionElement](empty, parser.parse_e)
empty_unzip_expr.shouldBeValidPF { case e =>
e.evaluateType(Map.empty) shouldBeValid WomPairType(WomArrayType(WomAnyType), WomArrayType(WomAnyType))
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import wdl.model.draft3.graph.expression.ValueEvaluator.ops._
import wdl.transforms.biscayne.Ast2WdlomSpec.{fromString, parser}
import wdl.transforms.biscayne.ast2wdlom._
import wom.expression.NoIoFunctionSet
import wom.types.{WomIntegerType, WomMapType, WomOptionalType, WomStringType}
import wom.types.{WomAnyType, WomArrayType, WomIntegerType, WomMapType, WomOptionalType, WomStringType}
import wom.values.{WomArray, WomInteger, WomMap, WomOptionalValue, WomPair, WomString}

class BiscayneValueEvaluatorSpec extends AnyFlatSpec with CromwellTimeoutSpec with Matchers {
Expand Down Expand Up @@ -240,4 +240,48 @@ class BiscayneValueEvaluatorSpec extends AnyFlatSpec with CromwellTimeoutSpec wi
e.evaluateValue(Map.empty, NoIoFunctionSet, None) shouldBeValid EvaluatedValue(expectedArray, Seq.empty)
}
}

it should "evaluate an unzip expression correctly" in {
val str = """ unzip([("one", 1),("two", 2),("three", 3)]) """
val expr = fromString[ExpressionElement](str, parser.parse_e)

val left: WomArray =
WomArray(WomArrayType(WomStringType), Seq(WomString("one"), WomString("two"), WomString("three")))
val right: WomArray = WomArray(WomArrayType(WomIntegerType), Seq(WomInteger(1), WomInteger(2), WomInteger(3)))
val expectedPair: WomPair = WomPair(left, right)

expr.shouldBeValidPF { case e =>
e.evaluateValue(Map.empty, NoIoFunctionSet, None) shouldBeValid EvaluatedValue(expectedPair, Seq.empty)
}
}

it should "evaluate an unzip on an empty collection correctly" in {
val str = """ unzip([])"""
val expr = fromString[ExpressionElement](str, parser.parse_e)

val left: WomArray = WomArray(WomArrayType(WomAnyType), Seq())
val right: WomArray = WomArray(WomArrayType(WomAnyType), Seq())
val expectedPair: WomPair = WomPair(left, right)

expr.shouldBeValidPF { case e =>
e.evaluateValue(Map.empty, NoIoFunctionSet, None) shouldBeValid EvaluatedValue(expectedPair, Seq.empty)
}
}

it should "fail to evaluate unzip on invalid pair" in {
val invalidPair = """ unzip([()])"""
val invalidPairExpr = fromString[ExpressionElement](invalidPair, parser.parse_e)
invalidPairExpr.shouldBeInvalid("Failed to parse expression (reason 1 of 1): No WDL support for 0-tuples")
}

it should "fail to evaluate unzip on heterogeneous pairs" in {
val invalidPair = """ unzip([ (1, 11.0), ([1,2,3], 2.0) ])"""
val invalidPairExpr = fromString[ExpressionElement](invalidPair, parser.parse_e)
invalidPairExpr.map(e =>
e.evaluateValue(Map.empty, NoIoFunctionSet, None)
.shouldBeInvalid(
"Could not construct array of type WomMaybeEmptyArrayType(WomPairType(WomIntegerType,WomFloatType)) with this value: List(WomPair(WomInteger(1),WomFloat(11.0)), WomPair([1, 2, 3],WomFloat(2.0)))"
)
)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,18 @@ package wdl.transforms.cascades.ast2wdlom
import cats.syntax.validated._
import common.validation.ErrorOr.ErrorOr
import wdl.model.draft3.elements.ExpressionElement
import wdl.model.draft3.elements.ExpressionElement.{AsMap, AsPairs, CollectByKey, Keys, Max, Min, Sep, SubPosix, Suffix}
import wdl.model.draft3.elements.ExpressionElement.{
AsMap,
AsPairs,
CollectByKey,
Keys,
Max,
Min,
Sep,
SubPosix,
Suffix,
Unzip
}
import wdl.transforms.base.ast2wdlom.AstNodeToExpressionElement

object AstToNewExpressionElements {
Expand All @@ -17,6 +28,7 @@ object AstToNewExpressionElements {
"sep" -> AstNodeToExpressionElement.validateTwoParamEngineFunction(Sep, "sep"),
"sub" -> AstNodeToExpressionElement.validateThreeParamEngineFunction(SubPosix, "sub"),
"suffix" -> AstNodeToExpressionElement.validateTwoParamEngineFunction(Suffix, "suffix"),
"unzip" -> AstNodeToExpressionElement.validateOneParamEngineFunction(Unzip, "unzip"),
"read_object" -> (_ =>
"read_object is no longer available in this WDL version. Consider using read_json instead".invalidNel
),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,4 +84,11 @@ object cascadesExpressionValueConsumers {
// None literals consume no values:
Set.empty[UnlinkedConsumedValueHook]
}

implicit val unzipExpressionValueConsumer: ExpressionValueConsumer[Unzip] = new ExpressionValueConsumer[Unzip] {
override def expressionConsumedValueHooks(a: Unzip)(implicit
expressionValueConsumer: ExpressionValueConsumer[ExpressionElement]
): Set[UnlinkedConsumedValueHook] =
expressionValueConsumer.expressionConsumedValueHooks(a.param)(expressionValueConsumer)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ package object consumed {

case a: Zip => a.expressionConsumedValueHooks(expressionValueConsumer)
case a: Cross => a.expressionConsumedValueHooks(expressionValueConsumer)
case a: Unzip => a.expressionConsumedValueHooks(expressionValueConsumer)

case a: SubPosix => a.expressionConsumedValueHooks(expressionValueConsumer)

Expand Down
Loading

0 comments on commit 5e59b02

Please sign in to comment.