From 0da4aaacad2b1c8c6415a57962c613e942147d92 Mon Sep 17 00:00:00 2001 From: Ruchi Munshi Date: Fri, 2 Dec 2016 17:31:35 -0500 Subject: [PATCH 1/3] Update WdlTool version --- build.sbt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.sbt b/build.sbt index 2f1c44d..44e002d 100644 --- a/build.sbt +++ b/build.sbt @@ -10,7 +10,7 @@ scalaVersion := "2.11.8" lazy val versionSettings = Seq( // Upcoming release, or current if we're on the master branch - git.baseVersion := "0.7", + git.baseVersion := "0.8", // Shorten the git commit hash git.gitHeadCommit := git.gitHeadCommit.value map { _.take(7) }, From 58ab47922c03f61a184092d6461ce2a27ff95d9c Mon Sep 17 00:00:00 2001 From: Chris Llanwarne Date: Tue, 6 Dec 2016 11:22:42 -0500 Subject: [PATCH 2/3] Added a graph print feature --- README.md | 103 ++++++++++++++++++++++++ build.sbt | 2 +- src/main/scala/wdltool/GraphPrint.scala | 77 ++++++++++++++++++ src/main/scala/wdltool/Main.scala | 32 +++++++- 4 files changed, 210 insertions(+), 4 deletions(-) create mode 100644 src/main/scala/wdltool/GraphPrint.scala diff --git a/README.md b/README.md index 55a2b6d..6ff656f 100644 --- a/README.md +++ b/README.md @@ -165,6 +165,109 @@ $ java -jar wdltool.jar highlight test.wdl html } ``` +## graph + +The syntax of the graph command is: +``` +wdltool graph [--all] wdlFile.wdl +``` + +Given a WDL file input, command generates the data-flow graph through the system in `.dot` format. + +For example the fork-join WDL: +``` +task mkFile { + command { + for i in `seq 1 1000` + do + echo $i + done + } + output { + File numbers = stdout() + } + runtime {docker: "ubuntu:latest"} +} + +task grep { + String pattern + File in_file + command { + grep '${pattern}' ${in_file} | wc -l + } + output { + Int count = read_int(stdout()) + } + runtime {docker: "ubuntu:latest"} +} + +task wc { + File in_file + command { + cat ${in_file} | wc -l + } + output { + Int count = read_int(stdout()) + } + runtime {docker: "ubuntu:latest"} +} + +task join { + Int grepCount + Int wcCount + command { + expr ${wcCount} / ${grepCount} + } + output { + Int proportion = read_int(stdout()) + } + runtime {docker: "ubuntu:latest"} +} + +workflow forkjoin { + call mkFile + call grep { input: in_file = mkFile.numbers } + call wc { input: in_file=mkFile.numbers } + call join { input: wcCount = wc.count, grepCount = grep.count } + output { + join.proportion + } +} +``` + +Produces the DAG: +``` +digraph forkjoin { + "call forkjoin.mkFile" -> "call forkjoin.wc" + "call forkjoin.mkFile" -> "call forkjoin.grep" + "call forkjoin.wc" -> "call forkjoin.join" + "call forkjoin.grep" -> "call forkjoin.join" +} +``` + +### The --all flag + +If this flag is set, all WDL graph nodes become nodes in the generated DAG, even if they are not "executed". Typically this will mean task declarations and call outputs. +For example in the above example, with `--all` you would get: + +``` +digraph forkjoin { + "call forkjoin.grep" -> "String forkjoin.grep.pattern" + "call forkjoin.grep" -> "output { forkjoin.grep.count = read_int(stdout()) }" + "call forkjoin.grep" -> "File forkjoin.grep.in_file" + "call forkjoin.wc" -> "output { forkjoin.wc.count = read_int(stdout()) }" + "call forkjoin.grep" -> "call forkjoin.join" + "call forkjoin.wc" -> "File forkjoin.wc.in_file" + "call forkjoin.mkFile" -> "call forkjoin.grep" + "call forkjoin.join" -> "output { forkjoin.join.proportion = read_int(stdout()) }" + "call forkjoin.join" -> "Int forkjoin.join.wcCount" + "call forkjoin.wc" -> "call forkjoin.join" + "call forkjoin.mkFile" -> "output { forkjoin.mkFile.numbers = stdout() }" + "call forkjoin.mkFile" -> "call forkjoin.wc" + "call forkjoin.join" -> "Int forkjoin.join.grepCount" +} +``` + # Getting Started with WDL For documentation and many examples on how to use WDL see [the WDL website](https://software.broadinstitute.org/wdl/). diff --git a/build.sbt b/build.sbt index 44e002d..a081bde 100644 --- a/build.sbt +++ b/build.sbt @@ -34,7 +34,7 @@ resolvers ++= Seq( ) libraryDependencies ++= Seq( - "org.broadinstitute" %% "wdl4s" % "0.7-799567f-SNAP", + "org.broadinstitute" %% "wdl4s" % "0.8-020c10c-SNAP", //---------- Test libraries -------------------// "org.scalatest" %% "scalatest" % "2.2.5" % Test ) diff --git a/src/main/scala/wdltool/GraphPrint.scala b/src/main/scala/wdltool/GraphPrint.scala new file mode 100644 index 0000000..9587468 --- /dev/null +++ b/src/main/scala/wdltool/GraphPrint.scala @@ -0,0 +1,77 @@ +package wdltool + +import java.nio.file.{Files, Paths} + +import wdl4s.{CallOutput, Declaration, If, Scatter, _} +import scala.collection.JavaConverters._ + +object GraphPrint { + + case class WorkflowDigraph(workflowName: String, digraph: Set[String]) + + def generateWorkflowDigraph(file: String, allNodesMode: Boolean): WorkflowDigraph = { + val namespace = WdlNamespaceWithWorkflow.load(Files.readAllLines(Paths.get(file)).asScala.mkString(System.lineSeparator()), Seq(WdlNamespace.fileResolver _)) + + val digraph = if (allNodesMode) { + listAllGraphNodes(namespace) + } else { + val executables = GraphPrint.listExecutableGraphNodes(namespace.workflow) + listAllGraphNodes(namespace, graphNode => executables.contains(graphNode)) + } + + WorkflowDigraph(namespace.workflow.unqualifiedName, digraph) + } + + private def defaultFilter: GraphNode => Boolean = _ => true + + private def listAllGraphNodes(namespace: WdlNamespaceWithWorkflow, filter: GraphNode => Boolean = defaultFilter): Set[String] = { + + val graphNodes = namespace.descendants collect { + case g: GraphNode if filter(g) => g + } + + graphNodes flatMap { graphNode => + val name = graphName(graphNode) + val initialSet: Set[String] = graphNode match { + case c: Call => Set(s""""${dotSafe(name)}"""") + case _ => Set.empty + } + val upstreamLinks = graphNode.upstream collect { + case upstream if filter(upstream) => + val upstreamName = graphName(upstream) + s""""${dotSafe(upstreamName)}" -> "${dotSafe(name)}"""" + } + + initialSet ++ upstreamLinks + } + } + + private def listExecutableGraphNodes(s: Scope): Set[GraphNode] = { + s.children.toSet flatMap { child: Scope => child match { + case call: Call => Set[GraphNode](call) + case scatter: Scatter => Set[GraphNode](scatter) ++ listExecutableGraphNodes(scatter) + case i: If => Set[GraphNode](i) ++ listExecutableGraphNodes(i) + case declaration: Declaration => Set[GraphNode](declaration) + case _ => Set.empty[GraphNode] + }} + } + + + private def dotSafe(s: String) = s.replaceAllLiterally("\"", "\\\"") + + private def graphName(g: GraphNode): String = g match { + case d: Declaration => + val exprString = d.expression.map(e => " = " + e.toWdlString).getOrElse("") + s"${d.wdlType.toWdlString} ${d.fullyQualifiedName}$exprString" + case c: Call => + s"call ${c.fullyQualifiedName}" + case i: If => + s"if (${i.condition.toWdlString})" + case s: Scatter => + s"scatter (${s.item} in ${s.collection.toWdlString})" + case c: CallOutput => + val exprString = c.expression.map(e => " = " + e.toWdlString).getOrElse("") + s"output { ${c.fullyQualifiedName}$exprString }" + case other => s"${other.getClass.getSimpleName}: ${other.fullyQualifiedName}" + } +} diff --git a/src/main/scala/wdltool/Main.scala b/src/main/scala/wdltool/Main.scala index 7f39d9a..04b9854 100644 --- a/src/main/scala/wdltool/Main.scala +++ b/src/main/scala/wdltool/Main.scala @@ -3,9 +3,10 @@ package wdltool import java.nio.file.Paths import wdl4s.formatter.{AnsiSyntaxHighlighter, HtmlSyntaxHighlighter, SyntaxFormatter} -import wdl4s.{AstTools, WdlNamespace, WdlNamespaceWithWorkflow} +import wdl4s._ import spray.json._ + import scala.util.{Failure, Success, Try} object Main extends App { @@ -33,6 +34,7 @@ object Main extends App { case Some(x) if x == Actions.Highlight => highlight(args.tail) case Some(x) if x == Actions.Inputs => inputs(args.tail) case Some(x) if x == Actions.Parse => parse(args.tail) + case Some(x) if x == Actions.Graph => graph(args.tail) case _ => BadUsageTermination } } @@ -72,6 +74,23 @@ object Main extends App { } } + def graph(args: Seq[String]): Termination = { + continueIf(args.length == 1 || (args.length == 2 && args.head.equals("--all"))) { + + val (file, allNodesMode) = + if (args.size == 1) (args.head, false) + else (args(1), true) + + val workflowDigraph = GraphPrint.generateWorkflowDigraph(file, allNodesMode) + + val result = s"""|digraph ${workflowDigraph.workflowName} { + | ${workflowDigraph.digraph.mkString(System.lineSeparator + " ")} + |} + |""" + SuccessfulTermination(result.stripMargin) + } + } + private[this] def continueIf(valid: => Boolean)(block: => Termination): Termination = if (valid) block else BadUsageTermination private[this] def loadWdl(path: String)(f: WdlNamespace => Termination): Termination = { @@ -88,7 +107,7 @@ object Main extends App { } yield action object Actions extends Enumeration { - val Parse, Validate, Highlight, Inputs = Value + val Parse, Validate, Highlight, Inputs, Graph = Value } val UsageMessage = """ @@ -119,7 +138,14 @@ object Main extends App { | abstract syntax tree if it is valid, and a syntax error | otherwise. Note that higher-level AST checks are not done | via this sub-command and the 'validate' subcommand should - | be used for full validation + | be used for full validation. + |graph [--all] + | + | Reads a WDL file against the grammar and prints out a + | .dot of the DAG if it is valid, and a syntax error + | otherwise. + | Use [--all] to show all graph nodes in the WDL spec, + | even the non-executable nodes. """.stripMargin val termination = dispatchCommand(args) From 99c7d2208f22c7b6b76eedba6f79149b495f0754 Mon Sep 17 00:00:00 2001 From: Khalid Shakir Date: Thu, 5 Jan 2017 13:36:29 -0500 Subject: [PATCH 3/3] Updated wdl4s to a release version. --- build.sbt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.sbt b/build.sbt index a081bde..a65c713 100644 --- a/build.sbt +++ b/build.sbt @@ -34,7 +34,7 @@ resolvers ++= Seq( ) libraryDependencies ++= Seq( - "org.broadinstitute" %% "wdl4s" % "0.8-020c10c-SNAP", + "org.broadinstitute" %% "wdl4s" % "0.8", //---------- Test libraries -------------------// "org.scalatest" %% "scalatest" % "2.2.5" % Test )