diff --git a/examples/wdlviz.py b/examples/wdlviz.py index 2591da2e..9ee507dc 100755 --- a/examples/wdlviz.py +++ b/examples/wdlviz.py @@ -88,55 +88,63 @@ def wdlviz( Project the workflow's built-in dependency graph onto a graphviz representation """ # References: - # 1. WDL object model -- https://miniwdl.readthedocs.io/en/latest/WDL.html#module-WDL.Tree + # 1. WDL AST object model -- https://miniwdl.readthedocs.io/en/latest/WDL.html#module-WDL.Tree # 2. graphviz API -- https://graphviz.readthedocs.io/en/stable/manual.html # initialiaze Digraph fontname = "Roboto" top = graphviz.Digraph() top.attr( - label=workflow.name, + label=f"<{workflow.name}>", labelloc="t", fontname=fontname, compound="true", rankdir=rankdir, - concentrate="true", splines=splines, ) top.attr("node", fontname=fontname) top.attr("edge", color="#00000080") - # recursively add graphviz nodes for each workflow node. + # Recursively process the workflow AST into the Digraph. + # We'll identify nodes by their python id() instead of the workflow_node_id strings exposed + # in the miniwdl AST. The latter are only unique within a given workflow, while for graphviz + # we need globally unique id's (even when we have nested subworkflows). nodes_visited = set() - subworkflows_visited = set() - def add_node(graph: graphviz.Digraph, node: WDL.WorkflowNode): - nonlocal nodes_visited, subworkflows_visited + def add_node(graph, workflow, node): + nonlocal nodes_visited if isinstance(node, WDL.WorkflowSection): # scatter/conditional section: add a cluster subgraph to contain its body with graph.subgraph(name=f"cluster-{id(node)}") as sg: label = "scatter" if isinstance(node, WDL.Scatter) else "if" sg.attr(label=f"{label}({node.expr})", fontname=fontname, rank="same") for child in node.body: - add_node(sg, child) + add_node(sg, workflow, child) # Add an invisible node inside the subgraph, which provides a sink for dependencies # of the scatter/conditional expression itself sg.node(str(id(node)), "", style="invis", height="0", width="0", margin="0") - nodes_visited.add(node.workflow_node_id) - nodes_visited |= set(g.workflow_node_id for g in node.gathers.values()) + graph.edge(str(id(workflow)), str(id(node)), style="invis") # helps layout + nodes_visited.add(id(node)) + nodes_visited |= set(id(g) for g in node.gathers.values()) elif isinstance(node, WDL.Call) or ( isinstance(node, WDL.Decl) - and (inputs or nodes_visited.intersection(node.workflow_node_dependencies)) + and ( + inputs + or nodes_visited.intersection( + id(workflow.get_node(it)) for it in node.workflow_node_dependencies + ) + ) ): name = node.name if isinstance(node, WDL.Call) and isinstance(node.callee, WDL.Workflow): # subworkflow call: add a cluster subgraph for the called workflow; only once, if # the subworkflow is called in multiple places. - if id(node.callee) not in subworkflows_visited: - subworkflows_visited.add(id(node.callee)) + if id(node.callee) not in nodes_visited: + nodes_visited.add(id(node.callee)) with top.subgraph(name=f"cluster-{id(node.callee)}") as sg: - sg.attr(label=node.callee.name, fontname=fontname, rank="max") + sg.attr(label=f"<{node.callee.name}>", fontname=fontname, rank="max") add_workflow(sg, node.callee) + graph.edge(str(id(workflow)), str(id(node.callee)), style="invis") # helps layout # dotted edge from call to subworkflow graph.edge( f"{id(node)}:s", @@ -146,25 +154,17 @@ def add_node(graph: graphviz.Digraph, node: WDL.WorkflowNode): arrowhead="none", constraint="false", ) - # invisible edge for subworkflow hierarchy - top.edge( - f"{id(workflow)}", - f"{id(node.callee)}", - style="invis", - height="0", - width="0", - margin="0", - ) - name = f"{node.callee.name} as {name}" + name = f"<{node.callee.name} as {name}>" # node for call or decl graph.node( str(id(node)), name, shape=("cds" if isinstance(node, WDL.Call) else "plaintext"), ) - nodes_visited.add(node.workflow_node_id) + graph.edge(str(id(workflow)), str(id(node)), style="invis") # helps layout + nodes_visited.add(id(node)) - # add edge for each dependency between workflow nodes + # add edge for each dependency between (visited) workflow nodes def add_edges(graph, workflow, node): for dep_id in node.workflow_node_dependencies: dep = workflow.get_node(dep_id) @@ -172,7 +172,7 @@ def add_edges(graph, workflow, node): # final_referee if isinstance(dep, WDL.Tree.Gather): dep = dep.final_referee - if dep.workflow_node_id in nodes_visited and node.workflow_node_id in nodes_visited: + if id(dep) in nodes_visited and id(node) in nodes_visited: lhead = None if isinstance(node, WDL.WorkflowSection): lhead = f"cluster-{id(node)}" @@ -182,8 +182,19 @@ def add_edges(graph, workflow, node): add_edges(graph, workflow, child) def add_workflow(graph, workflow): + # invisible source/sink node for the workflow subgraph + graph.node( + str(id(workflow)), + "", + style="invis", + height="0", + width="0", + margin="0", + ) + + # workflow body nodes for node in workflow.body: - add_node(graph, node) + add_node(graph, workflow, node) # cluster of the input decls if inputs: @@ -191,8 +202,8 @@ def add_workflow(graph, workflow): for inp in workflow.inputs or []: assert inp.workflow_node_id.startswith("decl-") sg.node(str(id(inp)), inp.workflow_node_id[5:], shape="plaintext") - nodes_visited.add(inp.workflow_node_id) - sg.attr(label="inputs", fontname=fontname) + nodes_visited.add(id(inp)) + sg.attr(label="<inputs>", fontname=fontname) # cluster of the output decls if outputs: @@ -200,18 +211,10 @@ def add_workflow(graph, workflow): for outp in workflow.outputs or []: assert outp.workflow_node_id.startswith("output-") sg.node(str(id(outp)), outp.workflow_node_id[7:], shape="plaintext") - nodes_visited.add(outp.workflow_node_id) - sg.attr(label="outputs", fontname=fontname) - - graph.node( # sink - str(id(workflow)), - "", - style="invis", - height="0", - width="0", - margin="0", - ) + nodes_visited.add(id(outp)) + sg.attr(label="<outputs>", fontname=fontname) + # edges for node in (workflow.inputs or []) + workflow.body + (workflow.outputs or []): add_edges(graph, workflow, node)