Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Loops and SDFG Scope Blocks #1356

Closed
wants to merge 33 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
b1877c7
Sync
phschaad Aug 25, 2023
26e5a89
Initial loop generation
phschaad Aug 28, 2023
13b7a1b
Fix missing edge after loop blocks
phschaad Aug 28, 2023
71a3e06
Get state fusion to work
phschaad Aug 29, 2023
da44c98
Provide iterator over all states of CFG blocks
phschaad Aug 29, 2023
dc9510e
Refactor
phschaad Aug 29, 2023
5e712c6
Remove redundant methods
phschaad Aug 29, 2023
4a93300
Refactor
phschaad Aug 30, 2023
f98006e
Improve class structure
phschaad Aug 31, 2023
125c1b6
Class structure improvement
phschaad Sep 5, 2023
a320d09
Refactor
phschaad Sep 5, 2023
75195fa
Re-work multistate inline
phschaad Sep 5, 2023
0b0a3c8
Adds legacy compatibility to loopscopeblocks
phschaad Sep 7, 2023
b29a3f5
Fix conditions
phschaad Sep 7, 2023
8086bb1
Remove stale lines
phschaad Sep 7, 2023
ffa60dc
Bugfix
phschaad Sep 18, 2023
ad14839
Merge remote-tracking branch 'origin/master' into loop_proposal_integral
phschaad Sep 18, 2023
4744d08
Sync
phschaad Sep 19, 2023
41a0abf
Add fallback to legacy state machines to compilation
phschaad Sep 19, 2023
89fe7b1
Bugfixes
phschaad Sep 20, 2023
2067554
Bugfixes
phschaad Sep 20, 2023
d543b2a
Adapt dead code elimination
phschaad Sep 21, 2023
5675d2d
Bugfix state elimination
phschaad Sep 22, 2023
53c026b
Merge branch 'master' into loop_proposal_integral
phschaad Sep 22, 2023
606a1e3
Merge branch 'master' into loop_proposal_integral
alexnick83 Oct 9, 2023
4a24872
Removed subscript.
alexnick83 Oct 9, 2023
774760d
Updated webclient.
alexnick83 Oct 9, 2023
38bf9a2
Use SDFG.nodes, which returns a list.
alexnick83 Oct 9, 2023
1610503
Fix setting of loop variable. Use state.sdfg.
alexnick83 Oct 9, 2023
12576da
Used state.sdfg.
alexnick83 Oct 9, 2023
7235e6d
Sync
phschaad Oct 16, 2023
95c2efc
Merge branch 'loop_proposal_integral' of github.com:spcl/dace into lo…
phschaad Oct 16, 2023
f7c7011
Add legacy state reach pass
phschaad Oct 18, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions dace/codegen/codegen.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from dace.codegen.targets import cpp, cpu

from dace.codegen.instrumentation import InstrumentationProvider
from dace.sdfg.state import SDFGState
from dace.sdfg.state import SDFGState, ScopeBlock


def generate_headers(sdfg: SDFG, frame: framecode.DaCeCodeGenerator) -> str:
Expand Down Expand Up @@ -100,13 +100,13 @@ def _get_codegen_targets(sdfg: SDFG, frame: framecode.DaCeCodeGenerator):
frame.targets.add(disp.get_scope_dispatcher(node.schedule))
elif isinstance(node, dace.nodes.Node):
state: SDFGState = parent
nsdfg = state.parent
nsdfg = state.sdfg
frame.targets.add(disp.get_node_dispatcher(nsdfg, state, node))

# Array allocation
if isinstance(node, dace.nodes.AccessNode):
state: SDFGState = parent
nsdfg = state.parent
nsdfg = state.sdfg
desc = node.desc(nsdfg)
frame.targets.add(disp.get_array_dispatcher(desc.storage))

Expand All @@ -124,13 +124,13 @@ def _get_codegen_targets(sdfg: SDFG, frame: framecode.DaCeCodeGenerator):
dst_node = leaf_e.dst
if leaf_e.data.is_empty():
continue
tgt = disp.get_copy_dispatcher(node, dst_node, leaf_e, state.parent, state)
tgt = disp.get_copy_dispatcher(node, dst_node, leaf_e, state.sdfg, state)
if tgt is not None:
frame.targets.add(tgt)
else:
# Rooted at dst_node
dst_node = mtree.root().edge.dst
tgt = disp.get_copy_dispatcher(node, dst_node, e, state.parent, state)
tgt = disp.get_copy_dispatcher(node, dst_node, e, state.sdfg, state)
if tgt is not None:
frame.targets.add(tgt)

Expand All @@ -149,7 +149,7 @@ def _get_codegen_targets(sdfg: SDFG, frame: framecode.DaCeCodeGenerator):
disp.instrumentation[sdfg.instrument] = provider_mapping[sdfg.instrument]


def generate_code(sdfg, validate=True) -> List[CodeObject]:
def generate_code(sdfg: SDFG, validate=True) -> List[CodeObject]:
"""
Generates code as a list of code objects for a given SDFG.

Expand Down
8 changes: 4 additions & 4 deletions dace/codegen/control_flow.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ class SingleState(ControlFlow):
last_state: bool = False

def as_cpp(self, codegen, symbols) -> str:
sdfg = self.state.parent
sdfg = self.state.sdfg

expr = '__state_{}_{}:;\n'.format(sdfg.sdfg_id, self.state.label)
if self.state.number_of_nodes() > 0:
Expand Down Expand Up @@ -221,7 +221,7 @@ def as_cpp(self, codegen, symbols) -> str:
# In a general block, emit transitions and assignments after each
# individual state
if isinstance(elem, SingleState):
sdfg = elem.state.parent
sdfg = elem.state.sdfg
out_edges = sdfg.out_edges(elem.state)
for j, e in enumerate(out_edges):
if e not in self.gotos_to_ignore:
Expand Down Expand Up @@ -361,7 +361,7 @@ class ForScope(ControlFlow):

def as_cpp(self, codegen, symbols) -> str:

sdfg = self.guard.parent
sdfg = self.guard.sdfg

# Initialize to either "int i = 0" or "i = 0" depending on whether
# the type has been defined
Expand Down Expand Up @@ -415,7 +415,7 @@ class WhileScope(ControlFlow):

def as_cpp(self, codegen, symbols) -> str:
if self.test is not None:
sdfg = self.guard.parent
sdfg = self.guard.sdfg
test = unparse_interstate_edge(self.test.code[0], sdfg, codegen=codegen)
else:
test = 'true'
Expand Down
10 changes: 5 additions & 5 deletions dace/codegen/instrumentation/papi.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from dace.sdfg.graph import SubgraphView
from dace.memlet import Memlet
from dace.sdfg import scope_contains_scope
from dace.sdfg.state import StateGraphView
from dace.sdfg.state import DataflowGraphView

import sympy as sp
import os
Expand Down Expand Up @@ -392,7 +392,7 @@ def should_instrument_entry(map_entry: EntryNode) -> bool:
return cond

@staticmethod
def has_surrounding_perfcounters(node, dfg: StateGraphView):
def has_surrounding_perfcounters(node, dfg: DataflowGraphView):
""" Returns true if there is a possibility that this node is part of a
section that is profiled. """
parent = dfg.entry_node(node)
Expand Down Expand Up @@ -605,7 +605,7 @@ def get_memlet_byte_size(sdfg: dace.SDFG, memlet: Memlet):
return memlet.volume * memdata.dtype.bytes

@staticmethod
def get_out_memlet_costs(sdfg: dace.SDFG, state_id: int, node: nodes.Node, dfg: StateGraphView):
def get_out_memlet_costs(sdfg: dace.SDFG, state_id: int, node: nodes.Node, dfg: DataflowGraphView):
scope_dict = sdfg.node(state_id).scope_dict()

out_costs = 0
Expand Down Expand Up @@ -636,7 +636,7 @@ def get_out_memlet_costs(sdfg: dace.SDFG, state_id: int, node: nodes.Node, dfg:
return out_costs

@staticmethod
def get_tasklet_byte_accesses(tasklet: nodes.CodeNode, dfg: StateGraphView, sdfg: dace.SDFG, state_id: int) -> str:
def get_tasklet_byte_accesses(tasklet: nodes.CodeNode, dfg: DataflowGraphView, sdfg: dace.SDFG, state_id: int) -> str:
""" Get the amount of bytes processed by `tasklet`. The formula is
sum(inedges * size) + sum(outedges * size) """
in_accum = []
Expand Down Expand Up @@ -693,7 +693,7 @@ def get_memory_input_size(node, sdfg, state_id) -> str:
return sym2cpp(input_size)

@staticmethod
def accumulate_byte_movement(outermost_node, node, dfg: StateGraphView, sdfg, state_id):
def accumulate_byte_movement(outermost_node, node, dfg: DataflowGraphView, sdfg, state_id):

itvars = dict() # initialize an empty dict

Expand Down
10 changes: 5 additions & 5 deletions dace/codegen/targets/cuda.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,9 +204,9 @@ def preprocess(self, sdfg: SDFG) -> None:
for state, node, defined_syms in sdutil.traverse_sdfg_with_defined_symbols(sdfg, recursive=True):
if (isinstance(node, nodes.MapEntry)
and node.map.schedule in (dtypes.ScheduleType.GPU_Device, dtypes.ScheduleType.GPU_Persistent)):
if state.parent not in shared_transients:
shared_transients[state.parent] = state.parent.shared_transients()
self._arglists[node] = state.scope_subgraph(node).arglist(defined_syms, shared_transients[state.parent])
if state.sdfg not in shared_transients:
shared_transients[state.sdfg] = state.sdfg.shared_transients()
self._arglists[node] = state.scope_subgraph(node).arglist(defined_syms, shared_transients[state.sdfg])

def _compute_pool_release(self, top_sdfg: SDFG):
"""
Expand Down Expand Up @@ -831,7 +831,7 @@ def increment(streams):
# Remove CUDA streams from paths of non-gpu copies and CPU tasklets
for node, graph in sdfg.all_nodes_recursive():
if isinstance(graph, SDFGState):
cur_sdfg = graph.parent
cur_sdfg = graph.sdfg

if (isinstance(node, (nodes.EntryNode, nodes.ExitNode)) and node.schedule in dtypes.GPU_SCHEDULES):
# Node must have GPU stream, remove childpath and continue
Expand Down Expand Up @@ -1421,7 +1421,7 @@ def generate_scope(self, sdfg, dfg_scope, state_id, function_stream, callsite_st
visited = set()
for node, parent in dfg_scope.all_nodes_recursive():
if isinstance(node, nodes.AccessNode):
nsdfg: SDFG = parent.parent
nsdfg: SDFG = parent.sdfg
desc = node.desc(nsdfg)
if (nsdfg, node.data) in visited:
continue
Expand Down
2 changes: 1 addition & 1 deletion dace/codegen/targets/fpga.py
Original file line number Diff line number Diff line change
Expand Up @@ -1332,7 +1332,7 @@ def partition_kernels(self, state: dace.SDFGState, default_kernel: int = 0):
"""

concurrent_kernels = 0 # Max number of kernels
sdfg = state.parent
sdfg = state.sdfg

def increment(kernel_id):
if concurrent_kernels > 0:
Expand Down
24 changes: 14 additions & 10 deletions dace/codegen/targets/framecode.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,8 @@ def free_symbols(self, obj: Any):
if k in self.fsyms:
return self.fsyms[k]
if hasattr(obj, 'used_symbols'):
result = obj.used_symbols(all_symbols=False)
intermediate = obj.used_symbols(all_symbols=False)
result = intermediate[0] if type(intermediate) is tuple else intermediate
else:
result = obj.free_symbols
self.fsyms[k] = result
Expand Down Expand Up @@ -395,9 +396,14 @@ def generate_external_memory_management(self, sdfg: SDFG, callsite_stream: CodeI
# Footer
callsite_stream.write('}', sdfg)

def generate_state(self, sdfg, state, global_stream, callsite_stream, generate_state_footer=True):
def generate_state(self,
sdfg: SDFG,
state: SDFGState,
global_stream: CodeIOStream,
callsite_stream: CodeIOStream,
generate_state_footer=True) -> None:

sid = sdfg.node_id(state)
sid = state.parent.node_id(state)

# Emit internal transient array allocation
self.allocate_arrays_in_scope(sdfg, state, global_stream, callsite_stream)
Expand Down Expand Up @@ -444,7 +450,7 @@ def generate_state(self, sdfg, state, global_stream, callsite_stream, generate_s
if instr is not None:
instr.on_state_end(sdfg, state, callsite_stream, global_stream)

def generate_states(self, sdfg, global_stream, callsite_stream):
def generate_states(self, sdfg: SDFG, global_stream: CodeIOStream, callsite_stream: CodeIOStream):
states_generated = set()

opbar = progress.OptionalProgressBar(sdfg.number_of_nodes(), title=f'Generating code (SDFG {sdfg.sdfg_id})')
Expand Down Expand Up @@ -491,7 +497,7 @@ def _get_schedule(self, scope: Union[nodes.EntryNode, SDFGState, SDFG]) -> dtype
elif isinstance(scope, nodes.EntryNode):
return scope.schedule
elif isinstance(scope, (SDFGState, SDFG)):
sdfg: SDFG = (scope if isinstance(scope, SDFG) else scope.parent)
sdfg: SDFG = (scope if isinstance(scope, SDFG) else scope.sdfg)
if sdfg.parent_nsdfg_node is None:
return TOP_SCHEDULE

Expand Down Expand Up @@ -526,8 +532,7 @@ def _can_allocate(self, sdfg: SDFG, state: SDFGState, desc: data.Data, scope: Un

def determine_allocation_lifetime(self, top_sdfg: SDFG):
"""
Determines where (at which scope/state/SDFG) each data descriptor
will be allocated/deallocated.
Determines where (at which scope/state/SDFG) each data descriptor will be allocated/deallocated.

:param top_sdfg: The top-level SDFG to determine for.
"""
Expand All @@ -543,8 +548,7 @@ def determine_allocation_lifetime(self, top_sdfg: SDFG):
#############################################
# Look for all states in which a scope-allocated array is used in
instances: Dict[str, List[Tuple[SDFGState, nodes.AccessNode]]] = collections.defaultdict(list)
array_names = sdfg.arrays.keys(
) #set(k for k, v in sdfg.arrays.items() if v.lifetime == dtypes.AllocationLifetime.Scope)
array_names = sdfg.arrays.keys()
# Iterate topologically to get state-order
for state in sdfg.topological_sort():
for node in state.data_nodes():
Expand Down Expand Up @@ -721,7 +725,7 @@ def determine_allocation_lifetime(self, top_sdfg: SDFG):
if curscope is None:
curscope = curstate
elif isinstance(curscope, (SDFGState, SDFG)):
cursdfg: SDFG = (curscope if isinstance(curscope, SDFG) else curscope.parent)
cursdfg: SDFG = (curscope if isinstance(curscope, SDFG) else curscope.sdfg)
# Go one SDFG up
if cursdfg.parent_nsdfg_node is None:
curscope = None
Expand Down
21 changes: 0 additions & 21 deletions dace/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,27 +152,6 @@ def _prod(sequence):
return functools.reduce(lambda a, b: a * b, sequence, 1)


def find_new_name(name: str, existing_names: Sequence[str]) -> str:
"""
Returns a name that matches the given ``name`` as a prefix, but does not
already exist in the given existing name set. The behavior is typically
to append an underscore followed by a unique (increasing) number. If the
name does not already exist in the set, it is returned as-is.

:param name: The given name to find.
:param existing_names: The set of existing names.
:return: A new name that is not in existing_names.
"""
if name not in existing_names:
return name
cur_offset = 0
new_name = name + '_' + str(cur_offset)
while new_name in existing_names:
cur_offset += 1
new_name = name + '_' + str(cur_offset)
return new_name


@make_properties
class Data:
""" Data type descriptors that can be used as references to memory.
Expand Down
Loading
Loading