Skip to content

Commit

Permalink
Merge branch 'master' into used-symbol-fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
tbennun authored Sep 29, 2023
2 parents f7efe03 + 3e73304 commit 3c2fa3f
Show file tree
Hide file tree
Showing 73 changed files with 4,589 additions and 665 deletions.
1 change: 1 addition & 0 deletions dace/codegen/compiled_sdfg.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,7 @@ def get_workspace_sizes(self) -> Dict[dtypes.StorageType, int]:
result: Dict[dtypes.StorageType, int] = {}
for storage in self.external_memory_types:
func = self._lib.get_symbol(f'__dace_get_external_memory_size_{storage.name}')
func.restype = ctypes.c_size_t
result[storage] = func(self._libhandle, *self._lastargs[1])

return result
Expand Down
85 changes: 63 additions & 22 deletions dace/codegen/control_flow.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,9 @@ class ControlFlow:
# a string with its generated code.
dispatch_state: Callable[[SDFGState], str]

# The parent control flow block of this one, used to avoid generating extraneous ``goto``s
parent: Optional['ControlFlow']

@property
def first_state(self) -> SDFGState:
"""
Expand Down Expand Up @@ -222,11 +225,18 @@ def as_cpp(self, codegen, symbols) -> str:
out_edges = sdfg.out_edges(elem.state)
for j, e in enumerate(out_edges):
if e not in self.gotos_to_ignore:
# If this is the last generated edge and it leads
# to the next state, skip emitting goto
# Skip gotos to immediate successors
successor = None
if (j == (len(out_edges) - 1) and (i + 1) < len(self.elements)):
successor = self.elements[i + 1].first_state
# If this is the last generated edge
if j == (len(out_edges) - 1):
if (i + 1) < len(self.elements):
# If last edge leads to next state in block
successor = self.elements[i + 1].first_state
elif i == len(self.elements) - 1:
# If last edge leads to first state in next block
next_block = _find_next_block(self)
if next_block is not None:
successor = next_block.first_state

expr += elem.generate_transition(sdfg, e, successor)
else:
Expand Down Expand Up @@ -350,6 +360,9 @@ class ForScope(ControlFlow):
init_edges: List[InterstateEdge] #: All initialization edges

def as_cpp(self, codegen, symbols) -> str:

sdfg = self.guard.parent

# Initialize to either "int i = 0" or "i = 0" depending on whether
# the type has been defined
defined_vars = codegen.dispatcher.defined_vars
Expand All @@ -359,9 +372,8 @@ def as_cpp(self, codegen, symbols) -> str:
init = self.itervar
else:
init = f'{symbols[self.itervar]} {self.itervar}'
init += ' = ' + self.init

sdfg = self.guard.parent
init += ' = ' + unparse_interstate_edge(self.init_edges[0].data.assignments[self.itervar],
sdfg, codegen=codegen)

preinit = ''
if self.init_edges:
Expand Down Expand Up @@ -478,13 +490,14 @@ def children(self) -> List[ControlFlow]:

def _loop_from_structure(sdfg: SDFG, guard: SDFGState, enter_edge: Edge[InterstateEdge],
leave_edge: Edge[InterstateEdge], back_edges: List[Edge[InterstateEdge]],
dispatch_state: Callable[[SDFGState], str]) -> Union[ForScope, WhileScope]:
dispatch_state: Callable[[SDFGState],
str], parent_block: GeneralBlock) -> Union[ForScope, WhileScope]:
"""
Helper method that constructs the correct structured loop construct from a
set of states. Can construct for or while loops.
"""

body = GeneralBlock(dispatch_state, [], [], [], [], [], True)
body = GeneralBlock(dispatch_state, parent_block, [], [], [], [], [], True)

guard_inedges = sdfg.in_edges(guard)
increment_edges = [e for e in guard_inedges if e in back_edges]
Expand Down Expand Up @@ -535,10 +548,10 @@ def _loop_from_structure(sdfg: SDFG, guard: SDFGState, enter_edge: Edge[Intersta
# Also ignore assignments in increment edge (handled in for stmt)
body.assignments_to_ignore.append(increment_edge)

return ForScope(dispatch_state, itvar, guard, init, condition, update, body, init_edges)
return ForScope(dispatch_state, parent_block, itvar, guard, init, condition, update, body, init_edges)

# Otherwise, it is a while loop
return WhileScope(dispatch_state, guard, condition, body)
return WhileScope(dispatch_state, parent_block, guard, condition, body)


def _cases_from_branches(
Expand Down Expand Up @@ -617,6 +630,31 @@ def _child_of(node: SDFGState, parent: SDFGState, ptree: Dict[SDFGState, SDFGSta
return False


def _find_next_block(block: ControlFlow) -> Optional[ControlFlow]:
"""
Returns the immediate successor control flow block.
"""
# Find block in parent
parent = block.parent
if parent is None:
return None
ind = next(i for i, b in enumerate(parent.children) if b is block)
if ind == len(parent.children) - 1 or isinstance(parent, (IfScope, IfElseChain, SwitchCaseScope)):
# If last block, or other children are not reachable from current node (branches),
# recursively continue upwards
return _find_next_block(parent)
return parent.children[ind + 1]


def _reset_block_parents(block: ControlFlow):
"""
Fixes block parents after processing.
"""
for child in block.children:
child.parent = block
_reset_block_parents(child)


def _structured_control_flow_traversal(sdfg: SDFG,
start: SDFGState,
ptree: Dict[SDFGState, SDFGState],
Expand Down Expand Up @@ -645,7 +683,7 @@ def _structured_control_flow_traversal(sdfg: SDFG,
"""

def make_empty_block():
return GeneralBlock(dispatch_state, [], [], [], [], [], True)
return GeneralBlock(dispatch_state, parent_block, [], [], [], [], [], True)

# Traverse states in custom order
visited = set() if visited is None else visited
Expand All @@ -657,7 +695,7 @@ def make_empty_block():
if node in visited or node is stop:
continue
visited.add(node)
stateblock = SingleState(dispatch_state, node)
stateblock = SingleState(dispatch_state, parent_block, node)

oe = sdfg.out_edges(node)
if len(oe) == 0: # End state
Expand Down Expand Up @@ -708,23 +746,25 @@ def make_empty_block():
if (len(oe) == 2 and oe[0].data.condition_sympy() == sp.Not(oe[1].data.condition_sympy())):
# If without else
if oe[0].dst is mergestate:
branch_block = IfScope(dispatch_state, sdfg, node, oe[1].data.condition, cblocks[oe[1]])
branch_block = IfScope(dispatch_state, parent_block, sdfg, node, oe[1].data.condition,
cblocks[oe[1]])
elif oe[1].dst is mergestate:
branch_block = IfScope(dispatch_state, sdfg, node, oe[0].data.condition, cblocks[oe[0]])
branch_block = IfScope(dispatch_state, parent_block, sdfg, node, oe[0].data.condition,
cblocks[oe[0]])
else:
branch_block = IfScope(dispatch_state, sdfg, node, oe[0].data.condition, cblocks[oe[0]],
cblocks[oe[1]])
branch_block = IfScope(dispatch_state, parent_block, sdfg, node, oe[0].data.condition,
cblocks[oe[0]], cblocks[oe[1]])
else:
# If there are 2 or more edges (one is not the negation of the
# other):
switch = _cases_from_branches(oe, cblocks)
if switch:
# If all edges are of form "x == y" for a single x and
# integer y, it is a switch/case
branch_block = SwitchCaseScope(dispatch_state, sdfg, node, switch[0], switch[1])
branch_block = SwitchCaseScope(dispatch_state, parent_block, sdfg, node, switch[0], switch[1])
else:
# Otherwise, create if/else if/.../else goto exit chain
branch_block = IfElseChain(dispatch_state, sdfg, node,
branch_block = IfElseChain(dispatch_state, parent_block, sdfg, node,
[(e.data.condition, cblocks[e] if e in cblocks else make_empty_block())
for e in oe])
# End of branch classification
Expand All @@ -739,11 +779,11 @@ def make_empty_block():
loop_exit = None
scope = None
if ptree[oe[0].dst] == node and ptree[oe[1].dst] != node:
scope = _loop_from_structure(sdfg, node, oe[0], oe[1], back_edges, dispatch_state)
scope = _loop_from_structure(sdfg, node, oe[0], oe[1], back_edges, dispatch_state, parent_block)
body_start = oe[0].dst
loop_exit = oe[1].dst
elif ptree[oe[1].dst] == node and ptree[oe[0].dst] != node:
scope = _loop_from_structure(sdfg, node, oe[1], oe[0], back_edges, dispatch_state)
scope = _loop_from_structure(sdfg, node, oe[1], oe[0], back_edges, dispatch_state, parent_block)
body_start = oe[1].dst
loop_exit = oe[0].dst

Expand Down Expand Up @@ -836,7 +876,8 @@ def structured_control_flow_tree(sdfg: SDFG, dispatch_state: Callable[[SDFGState
if len(common_frontier) == 1:
branch_merges[state] = next(iter(common_frontier))

root_block = GeneralBlock(dispatch_state, [], [], [], [], [], True)
root_block = GeneralBlock(dispatch_state, None, [], [], [], [], [], True)
_structured_control_flow_traversal(sdfg, sdfg.start_state, ptree, branch_merges, back_edges, dispatch_state,
root_block)
_reset_block_parents(root_block)
return root_block
31 changes: 31 additions & 0 deletions dace/codegen/cppunparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@
import numpy as np
import os
import tokenize
import warnings

import sympy
import dace
Expand Down Expand Up @@ -733,6 +734,21 @@ def _Num(self, t):
if isinstance(t.n, complex):
dtype = dtypes.DTYPE_TO_TYPECLASS[complex]

# Handle large integer values
if isinstance(t.n, int):
bits = t.n.bit_length()
if bits == 32: # Integer, potentially unsigned
if t.n >= 0: # unsigned
repr_n += 'U'
else: # signed, 64-bit
repr_n += 'LL'
elif 32 < bits <= 63:
repr_n += 'LL'
elif bits == 64 and t.n >= 0:
repr_n += 'ULL'
elif bits >= 64:
warnings.warn(f'Value wider than 64 bits encountered in expression ({t.n}), emitting as-is')

if repr_n.endswith("j"):
self.write("%s(0, %s)" % (dtype, repr_n.replace("inf", INFSTR)[:-1]))
else:
Expand Down Expand Up @@ -831,8 +847,23 @@ def _Tuple(
self.write(")")

unop = {"Invert": "~", "Not": "!", "UAdd": "+", "USub": "-"}
unop_lambda = {'Invert': (lambda x: ~x), 'Not': (lambda x: not x), 'UAdd': (lambda x: +x), 'USub': (lambda x: -x)}

def _UnaryOp(self, t):
# Dispatch constants after applying the operation
if sys.version_info[:2] < (3, 8):
if isinstance(t.operand, ast.Num):
newval = self.unop_lambda[t.op.__class__.__name__](t.operand.n)
newnode = ast.Num(n=newval)
self.dispatch(newnode)
return
else:
if isinstance(t.operand, ast.Constant):
newval = self.unop_lambda[t.op.__class__.__name__](t.operand.value)
newnode = ast.Constant(value=newval)
self.dispatch(newnode)
return

self.write("(")
self.write(self.unop[t.op.__class__.__name__])
self.write(" ")
Expand Down
8 changes: 6 additions & 2 deletions dace/codegen/targets/cpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,11 @@ def ptr(name: str, desc: data.Data, sdfg: SDFG = None, framecode=None) -> str:
from dace.codegen.targets.framecode import DaCeCodeGenerator # Avoid import loop
framecode: DaCeCodeGenerator = framecode

if '.' in name:
root = name.split('.')[0]
if root in sdfg.arrays and isinstance(sdfg.arrays[root], data.Structure):
name = name.replace('.', '->')

# Special case: If memory is persistent and defined in this SDFG, add state
# struct to name
if (desc.transient and desc.lifetime in (dtypes.AllocationLifetime.Persistent, dtypes.AllocationLifetime.External)):
Expand Down Expand Up @@ -992,8 +997,7 @@ def _Name(self, t: ast.Name):
if t.id not in self.sdfg.arrays:
return super()._Name(t)

# Replace values with their code-generated names (for example,
# persistent arrays)
# Replace values with their code-generated names (for example, persistent arrays)
desc = self.sdfg.arrays[t.id]
self.write(ptr(t.id, desc, self.sdfg, self.codegen))

Expand Down
18 changes: 6 additions & 12 deletions dace/codegen/targets/cpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,11 +59,11 @@ def __init__(self, frame_codegen, sdfg):
def _visit_structure(struct: data.Structure, args: dict, prefix: str = ''):
for k, v in struct.members.items():
if isinstance(v, data.Structure):
_visit_structure(v, args, f'{prefix}.{k}')
_visit_structure(v, args, f'{prefix}->{k}')
elif isinstance(v, data.StructArray):
_visit_structure(v.stype, args, f'{prefix}.{k}')
_visit_structure(v.stype, args, f'{prefix}->{k}')
elif isinstance(v, data.Data):
args[f'{prefix}.{k}'] = v
args[f'{prefix}->{k}'] = v

# Keeps track of generated connectors, so we know how to access them in nested scopes
arglist = dict(self._frame.arglist)
Expand Down Expand Up @@ -221,8 +221,8 @@ def allocate_view(self, sdfg: SDFG, dfg: SDFGState, state_id: int, node: nodes.A
if isinstance(v, data.Data):
ctypedef = dtypes.pointer(v.dtype).ctype if isinstance(v, data.Array) else v.dtype.ctype
defined_type = DefinedType.Scalar if isinstance(v, data.Scalar) else DefinedType.Pointer
self._dispatcher.declared_arrays.add(f"{name}.{k}", defined_type, ctypedef)
self._dispatcher.defined_vars.add(f"{name}.{k}", defined_type, ctypedef)
self._dispatcher.declared_arrays.add(f"{name}->{k}", defined_type, ctypedef)
self._dispatcher.defined_vars.add(f"{name}->{k}", defined_type, ctypedef)
# TODO: Find a better way to do this (the issue is with pointers of pointers)
if atype.endswith('*'):
atype = atype[:-1]
Expand Down Expand Up @@ -299,9 +299,6 @@ def allocate_array(self, sdfg, dfg, state_id, node, nodedesc, function_stream, d
name = node.data
alloc_name = cpp.ptr(name, nodedesc, sdfg, self._frame)
name = alloc_name
# NOTE: `expr` may only be a name or a sequence of names and dots. The latter indicates nested data and
# NOTE: structures. Since structures are implemented as pointers, we replace dots with arrows.
alloc_name = alloc_name.replace('.', '->')

if nodedesc.transient is False:
return
Expand Down Expand Up @@ -331,7 +328,7 @@ def allocate_array(self, sdfg, dfg, state_id, node, nodedesc, function_stream, d
if isinstance(v, data.Data):
ctypedef = dtypes.pointer(v.dtype).ctype if isinstance(v, data.Array) else v.dtype.ctype
defined_type = DefinedType.Scalar if isinstance(v, data.Scalar) else DefinedType.Pointer
self._dispatcher.declared_arrays.add(f"{name}.{k}", defined_type, ctypedef)
self._dispatcher.declared_arrays.add(f"{name}->{k}", defined_type, ctypedef)
self.allocate_array(sdfg, dfg, state_id, nodes.AccessNode(f"{name}.{k}"), v, function_stream,
declaration_stream, allocation_stream)
return
Expand Down Expand Up @@ -1184,9 +1181,6 @@ def memlet_definition(self,
if not types:
types = self._dispatcher.defined_vars.get(ptr, is_global=True)
var_type, ctypedef = types
# NOTE: `expr` may only be a name or a sequence of names and dots. The latter indicates nested data and
# NOTE: structures. Since structures are implemented as pointers, we replace dots with arrows.
ptr = ptr.replace('.', '->')

if fpga.is_fpga_array(desc):
decouple_array_interfaces = Config.get_bool("compiler", "xilinx", "decouple_array_interfaces")
Expand Down
15 changes: 11 additions & 4 deletions dace/codegen/targets/cuda.py
Original file line number Diff line number Diff line change
Expand Up @@ -1939,6 +1939,13 @@ def generate_kernel_scope(self, sdfg: SDFG, dfg_scope: ScopeSubgraphView, state_
kernel_params: list, function_stream: CodeIOStream, kernel_stream: CodeIOStream):
node = dfg_scope.source_nodes()[0]

# Get the thread/block index type
ttype = Config.get('compiler', 'cuda', 'thread_id_type')
tidtype = getattr(dtypes, ttype, False)
if not isinstance(tidtype, dtypes.typeclass):
raise ValueError(f'Configured type "{ttype}" for ``thread_id_type`` does not match any DaCe data type. '
'See ``dace.dtypes`` for available types (for example ``int32``).')

# allocating shared memory for dynamic threadblock maps
if has_dtbmap:
kernel_stream.write(
Expand Down Expand Up @@ -1990,8 +1997,8 @@ def generate_kernel_scope(self, sdfg: SDFG, dfg_scope: ScopeSubgraphView, state_

expr = _topy(bidx[i]).replace('__DAPB%d' % i, block_expr)

kernel_stream.write('int %s = %s;' % (varname, expr), sdfg, state_id, node)
self._dispatcher.defined_vars.add(varname, DefinedType.Scalar, 'int')
kernel_stream.write(f'{tidtype.ctype} {varname} = {expr};', sdfg, state_id, node)
self._dispatcher.defined_vars.add(varname, DefinedType.Scalar, tidtype.ctype)

# Delinearize beyond the third dimension
if len(krange) > 3:
Expand All @@ -2010,8 +2017,8 @@ def generate_kernel_scope(self, sdfg: SDFG, dfg_scope: ScopeSubgraphView, state_
)

expr = _topy(bidx[i]).replace('__DAPB%d' % i, block_expr)
kernel_stream.write('int %s = %s;' % (varname, expr), sdfg, state_id, node)
self._dispatcher.defined_vars.add(varname, DefinedType.Scalar, 'int')
kernel_stream.write(f'{tidtype.ctype} {varname} = {expr};', sdfg, state_id, node)
self._dispatcher.defined_vars.add(varname, DefinedType.Scalar, tidtype.ctype)

# Dispatch internal code
assert CUDACodeGen._in_device_code is False
Expand Down
6 changes: 3 additions & 3 deletions dace/codegen/targets/framecode.py
Original file line number Diff line number Diff line change
Expand Up @@ -471,7 +471,7 @@ def dispatch_state(state: SDFGState) -> str:
# If disabled, generate entire graph as general control flow block
states_topological = list(sdfg.topological_sort(sdfg.start_state))
last = states_topological[-1]
cft = cflow.GeneralBlock(dispatch_state,
cft = cflow.GeneralBlock(dispatch_state, None,
[cflow.SingleState(dispatch_state, s, s is last) for s in states_topological], [],
[], [], [], False)

Expand Down Expand Up @@ -886,8 +886,8 @@ def generate_code(self,

# NOTE: NestedSDFGs frequently contain tautologies in their symbol mapping, e.g., `'i': i`. Do not
# redefine the symbols in such cases.
if (not is_top_level and isvarName in sdfg.parent_nsdfg_node.symbol_mapping.keys()
and str(sdfg.parent_nsdfg_node.symbol_mapping[isvarName] == isvarName)):
if (not is_top_level and isvarName in sdfg.parent_nsdfg_node.symbol_mapping
and str(sdfg.parent_nsdfg_node.symbol_mapping[isvarName]) == str(isvarName)):
continue
isvar = data.Scalar(isvarType)
callsite_stream.write('%s;\n' % (isvar.as_arg(with_types=True, name=isvarName)), sdfg)
Expand Down
Loading

0 comments on commit 3c2fa3f

Please sign in to comment.