Skip to content

Commit

Permalink
Merge branch 'master' into various-fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
tbennun authored Nov 15, 2023
2 parents 9cf66f2 + 43ca982 commit 0b286a0
Show file tree
Hide file tree
Showing 76 changed files with 7,471 additions and 1,094 deletions.
9 changes: 7 additions & 2 deletions dace/cli/sdfv.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,15 @@ def view(sdfg: dace.SDFG, filename: Optional[Union[str, int]] = None):
"""
# If vscode is open, try to open it inside vscode
if filename is None:
if 'VSCODE_IPC_HOOK_CLI' in os.environ or 'VSCODE_GIT_IPC_HANDLE' in os.environ:
filename = tempfile.mktemp(suffix='.sdfg')
if (
'VSCODE_IPC_HOOK' in os.environ
or 'VSCODE_IPC_HOOK_CLI' in os.environ
or 'VSCODE_GIT_IPC_HANDLE' in os.environ
):
fd, filename = tempfile.mkstemp(suffix='.sdfg')
sdfg.save(filename)
os.system(f'code {filename}')
os.close(fd)
return

if type(sdfg) is dace.SDFG:
Expand Down
3 changes: 2 additions & 1 deletion dace/codegen/compiled_sdfg.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,7 @@ def get_state_struct(self) -> ctypes.Structure:
return ctypes.cast(self._libhandle, ctypes.POINTER(self._try_parse_state_struct())).contents

def _try_parse_state_struct(self) -> Optional[Type[ctypes.Structure]]:
from dace.codegen.targets.cpp import mangle_dace_state_struct_name # Avoid import cycle
# the path of the main sdfg file containing the state struct
main_src_path = os.path.join(os.path.dirname(os.path.dirname(self._lib._library_filename)), "src", "cpu",
self._sdfg.name + ".cpp")
Expand All @@ -247,7 +248,7 @@ def _try_parse_state_struct(self) -> Optional[Type[ctypes.Structure]]:
code_flat = code.replace("\n", " ")

# try to find the first struct definition that matches the name we are looking for in the sdfg file
match = re.search(f"struct {self._sdfg.name}_t {{(.*?)}};", code_flat)
match = re.search(f"struct {mangle_dace_state_struct_name(self._sdfg)} {{(.*?)}};", code_flat)
if match is None or len(match.groups()) != 1:
return None

Expand Down
2 changes: 1 addition & 1 deletion dace/codegen/instrumentation/data/data_dump.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ def __init__(self):

def _generate_report_setter(self, sdfg: SDFG) -> str:
return f'''
DACE_EXPORTED void __dace_set_instrumented_data_report({sdfg.name}_t *__state, const char *dirpath) {{
DACE_EXPORTED void __dace_set_instrumented_data_report({cpp.mangle_dace_state_struct_name(sdfg)} *__state, const char *dirpath) {{
__state->serializer->set_folder(dirpath);
}}
'''
Expand Down
13 changes: 8 additions & 5 deletions dace/codegen/instrumentation/papi.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from dace.sdfg.graph import SubgraphView
from dace.memlet import Memlet
from dace.sdfg import scope_contains_scope
from dace.sdfg.state import StateGraphView
from dace.sdfg.state import DataflowGraphView

import sympy as sp
import os
Expand Down Expand Up @@ -392,7 +392,7 @@ def should_instrument_entry(map_entry: EntryNode) -> bool:
return cond

@staticmethod
def has_surrounding_perfcounters(node, dfg: StateGraphView):
def has_surrounding_perfcounters(node, dfg: DataflowGraphView):
""" Returns true if there is a possibility that this node is part of a
section that is profiled. """
parent = dfg.entry_node(node)
Expand Down Expand Up @@ -605,7 +605,7 @@ def get_memlet_byte_size(sdfg: dace.SDFG, memlet: Memlet):
return memlet.volume * memdata.dtype.bytes

@staticmethod
def get_out_memlet_costs(sdfg: dace.SDFG, state_id: int, node: nodes.Node, dfg: StateGraphView):
def get_out_memlet_costs(sdfg: dace.SDFG, state_id: int, node: nodes.Node, dfg: DataflowGraphView):
scope_dict = sdfg.node(state_id).scope_dict()

out_costs = 0
Expand Down Expand Up @@ -636,7 +636,10 @@ def get_out_memlet_costs(sdfg: dace.SDFG, state_id: int, node: nodes.Node, dfg:
return out_costs

@staticmethod
def get_tasklet_byte_accesses(tasklet: nodes.CodeNode, dfg: StateGraphView, sdfg: dace.SDFG, state_id: int) -> str:
def get_tasklet_byte_accesses(tasklet: nodes.CodeNode,
dfg: DataflowGraphView,
sdfg: dace.SDFG,
state_id: int) -> str:
""" Get the amount of bytes processed by `tasklet`. The formula is
sum(inedges * size) + sum(outedges * size) """
in_accum = []
Expand Down Expand Up @@ -693,7 +696,7 @@ def get_memory_input_size(node, sdfg, state_id) -> str:
return sym2cpp(input_size)

@staticmethod
def accumulate_byte_movement(outermost_node, node, dfg: StateGraphView, sdfg, state_id):
def accumulate_byte_movement(outermost_node, node, dfg: DataflowGraphView, sdfg, state_id):

itvars = dict() # initialize an empty dict

Expand Down
16 changes: 16 additions & 0 deletions dace/codegen/targets/cpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,22 @@
from dace.codegen.dispatcher import TargetDispatcher


def mangle_dace_state_struct_name(sdfg: Union[SDFG, str]) -> str:
"""This function creates a unique type name for the `SDFG`'s state `struct`.
The function uses the `compiler.codegen_state_struct_suffix`
configuration entry for deriving the type name of the state `struct`.
:param sdfg: The SDFG for which the name should be generated.
"""
name = sdfg if isinstance(sdfg, str) else sdfg.name
state_suffix = Config.get("compiler", "codegen_state_struct_suffix")
type_name = f"{name}{state_suffix}"
if not dtypes.validate_name(type_name):
raise ValueError(f"The mangled type name `{type_name}` of the state struct of SDFG '{name}' is invalid.")
return type_name


def copy_expr(
dispatcher,
sdfg,
Expand Down
2 changes: 1 addition & 1 deletion dace/codegen/targets/cpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -1532,7 +1532,7 @@ def generate_nsdfg_header(self, sdfg, state, state_id, node, memlet_references,

if state_struct:
toplevel_sdfg: SDFG = sdfg.sdfg_list[0]
arguments.append(f'{toplevel_sdfg.name}_t *__state')
arguments.append(f'{cpp.mangle_dace_state_struct_name(toplevel_sdfg)} *__state')

# Add "__restrict__" keywords to arguments that do not alias with others in the context of this SDFG
restrict_args = []
Expand Down
113 changes: 92 additions & 21 deletions dace/codegen/targets/cuda.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,8 @@
# Copyright 2019-2021 ETH Zurich and the DaCe authors. All rights reserved.
import ast
import copy
import ctypes
import functools
import os
import warnings
from typing import Any, Dict, List, Set, Tuple, Union
from typing import Dict, List, Set, Tuple, Union

import networkx as nx
import sympy
Expand All @@ -14,7 +11,6 @@
import dace
from dace import data as dt
from dace import dtypes, registry
from dace import sdfg as sd
from dace import subsets, symbolic
from dace.codegen import common, cppunparse
from dace.codegen.codeobject import CodeObject
Expand All @@ -23,7 +19,7 @@
from dace.codegen.targets import cpp
from dace.codegen.common import update_persistent_desc
from dace.codegen.targets.cpp import (codeblock_to_cpp, cpp_array_expr, memlet_copy_to_absolute_strides, sym2cpp,
synchronize_streams, unparse_cr, unparse_cr_split)
synchronize_streams, unparse_cr, mangle_dace_state_struct_name)
from dace.codegen.targets.target import IllegalCopy, TargetCodeGenerator, make_absolute
from dace.config import Config
from dace.frontend import operations
Expand Down Expand Up @@ -345,12 +341,12 @@ def get_generated_codeobjects(self):
{file_header}
DACE_EXPORTED int __dace_init_cuda({sdfg.name}_t *__state{params});
DACE_EXPORTED int __dace_exit_cuda({sdfg.name}_t *__state);
DACE_EXPORTED int __dace_init_cuda({sdfg_state_name} *__state{params});
DACE_EXPORTED int __dace_exit_cuda({sdfg_state_name} *__state);
{other_globalcode}
int __dace_init_cuda({sdfg.name}_t *__state{params}) {{
int __dace_init_cuda({sdfg_state_name} *__state{params}) {{
int count;
// Check that we are able to run {backend} code
Expand Down Expand Up @@ -389,7 +385,7 @@ def get_generated_codeobjects(self):
return 0;
}}
int __dace_exit_cuda({sdfg.name}_t *__state) {{
int __dace_exit_cuda({sdfg_state_name} *__state) {{
{exitcode}
// Synchronize and check for CUDA errors
Expand All @@ -409,7 +405,7 @@ def get_generated_codeobjects(self):
return __err;
}}
DACE_EXPORTED bool __dace_gpu_set_stream({sdfg.name}_t *__state, int streamid, gpuStream_t stream)
DACE_EXPORTED bool __dace_gpu_set_stream({sdfg_state_name} *__state, int streamid, gpuStream_t stream)
{{
if (streamid < 0 || streamid >= {nstreams})
return false;
Expand All @@ -419,14 +415,15 @@ def get_generated_codeobjects(self):
return true;
}}
DACE_EXPORTED void __dace_gpu_set_all_streams({sdfg.name}_t *__state, gpuStream_t stream)
DACE_EXPORTED void __dace_gpu_set_all_streams({sdfg_state_name} *__state, gpuStream_t stream)
{{
for (int i = 0; i < {nstreams}; ++i)
__state->gpu_context->streams[i] = stream;
}}
{localcode}
""".format(params=params_comma,
sdfg_state_name=mangle_dace_state_struct_name(self._global_sdfg),
initcode=initcode.getvalue(),
exitcode=exitcode.getvalue(),
other_globalcode=self._globalcode.getvalue(),
Expand All @@ -445,7 +442,7 @@ def node_dispatch_predicate(self, sdfg, state, node):
if hasattr(node, 'schedule'): # NOTE: Works on nodes and scopes
if node.schedule in dtypes.GPU_SCHEDULES:
return True
if isinstance(node, nodes.NestedSDFG) and CUDACodeGen._in_device_code:
if CUDACodeGen._in_device_code:
return True
return False

Expand Down Expand Up @@ -1324,11 +1321,11 @@ def generate_devicelevel_state(self, sdfg, state, function_stream, callsite_stre

if write_scope == 'grid':
callsite_stream.write("if (blockIdx.x == 0 "
"&& threadIdx.x == 0) "
"{ // sub-graph begin", sdfg, state.node_id)
"&& threadIdx.x == 0) "
"{ // sub-graph begin", sdfg, state.node_id)
elif write_scope == 'block':
callsite_stream.write("if (threadIdx.x == 0) "
"{ // sub-graph begin", sdfg, state.node_id)
"{ // sub-graph begin", sdfg, state.node_id)
else:
callsite_stream.write("{ // subgraph begin", sdfg, state.node_id)
else:
Expand Down Expand Up @@ -1567,7 +1564,7 @@ def generate_scope(self, sdfg, dfg_scope, state_id, function_stream, callsite_st
self.scope_entry_stream = old_entry_stream
self.scope_exit_stream = old_exit_stream

state_param = [f'{self._global_sdfg.name}_t *__state']
state_param = [f'{mangle_dace_state_struct_name(self._global_sdfg)} *__state']

# Write callback function definition
self._localcode.write(
Expand Down Expand Up @@ -2519,15 +2516,17 @@ def generate_devicelevel_scope(self, sdfg, dfg_scope, state_id, function_stream,
def generate_node(self, sdfg, dfg, state_id, node, function_stream, callsite_stream):
if self.node_dispatch_predicate(sdfg, dfg, node):
# Dynamically obtain node generator according to class name
gen = getattr(self, '_generate_' + type(node).__name__)
gen(sdfg, dfg, state_id, node, function_stream, callsite_stream)
return
gen = getattr(self, '_generate_' + type(node).__name__, False)
if gen is not False: # Not every node type has a code generator here
gen(sdfg, dfg, state_id, node, function_stream, callsite_stream)
return

if not CUDACodeGen._in_device_code:
self._cpu_codegen.generate_node(sdfg, dfg, state_id, node, function_stream, callsite_stream)
return

self._locals.clear_scope(self._code_state.indentation + 1)
if isinstance(node, nodes.ExitNode):
self._locals.clear_scope(self._code_state.indentation + 1)

if CUDACodeGen._in_device_code and isinstance(node, nodes.MapExit):
return # skip
Expand Down Expand Up @@ -2591,6 +2590,78 @@ def _generate_MapExit(self, sdfg, dfg, state_id, node, function_stream, callsite

self._cpu_codegen._generate_MapExit(sdfg, dfg, state_id, node, function_stream, callsite_stream)

def _get_thread_id(self) -> str:
result = 'threadIdx.x'
if self._block_dims[1] != 1:
result += f' + ({sym2cpp(self._block_dims[0])}) * threadIdx.y'
if self._block_dims[2] != 1:
result += f' + ({sym2cpp(self._block_dims[0] * self._block_dims[1])}) * threadIdx.z'
return result

def _get_warp_id(self) -> str:
return f'(({self._get_thread_id()}) / warpSize)'

def _get_block_id(self) -> str:
result = 'blockIdx.x'
if self._block_dims[1] != 1:
result += f' + gridDim.x * blockIdx.y'
if self._block_dims[2] != 1:
result += f' + gridDim.x * gridDim.y * blockIdx.z'
return result

def _generate_condition_from_location(self, name: str, index_expr: str, node: nodes.Tasklet,
callsite_stream: CodeIOStream) -> str:
if name not in node.location:
return 0

location: Union[int, str, subsets.Range] = node.location[name]
if isinstance(location, str) and ':' in location:
location = subsets.Range.from_string(location)
elif symbolic.issymbolic(location):
location = sym2cpp(location)

if isinstance(location, subsets.Range):
# Range of indices
if len(location) != 1:
raise ValueError(f'Only one-dimensional ranges are allowed for {name} specialization, {location} given')
begin, end, stride = location[0]
rb, re, rs = sym2cpp(begin), sym2cpp(end), sym2cpp(stride)
cond = ''
cond += f'(({index_expr}) >= {rb}) && (({index_expr}) <= {re})'
if stride != 1:
cond += f' && ((({index_expr}) - {rb}) % {rs} == 0)'

callsite_stream.write(f'if ({cond}) {{')
else:
# Single-element
callsite_stream.write(f'if (({index_expr}) == {location}) {{')

return 1

def _generate_Tasklet(self, sdfg: SDFG, dfg, state_id: int, node: nodes.Tasklet, function_stream: CodeIOStream,
callsite_stream: CodeIOStream):
generated_preamble_scopes = 0
if self._in_device_code:
# If location dictionary prescribes that the code should run on a certain group of threads/blocks,
# add condition
generated_preamble_scopes += self._generate_condition_from_location('gpu_thread', self._get_thread_id(),
node, callsite_stream)
generated_preamble_scopes += self._generate_condition_from_location('gpu_warp', self._get_warp_id(), node,
callsite_stream)
generated_preamble_scopes += self._generate_condition_from_location('gpu_block', self._get_block_id(), node,
callsite_stream)

# Call standard tasklet generation
old_codegen = self._cpu_codegen.calling_codegen
self._cpu_codegen.calling_codegen = self
self._cpu_codegen._generate_Tasklet(sdfg, dfg, state_id, node, function_stream, callsite_stream)
self._cpu_codegen.calling_codegen = old_codegen

if generated_preamble_scopes > 0:
# Generate appropriate postamble
for i in range(generated_preamble_scopes):
callsite_stream.write('}', sdfg, state_id, node)

def make_ptr_vector_cast(self, *args, **kwargs):
return cpp.make_ptr_vector_cast(*args, **kwargs)

Expand Down
2 changes: 1 addition & 1 deletion dace/codegen/targets/fpga.py
Original file line number Diff line number Diff line change
Expand Up @@ -652,7 +652,7 @@ def generate_state(self, sdfg: dace.SDFG, state: dace.SDFGState, function_stream
kernel_args_opencl = []

# Include state in args
kernel_args_opencl.append(f"{self._global_sdfg.name}_t *__state")
kernel_args_opencl.append(f"{cpp.mangle_dace_state_struct_name(self._global_sdfg)} *__state")
kernel_args_call_host.append(f"__state")

for is_output, arg_name, arg, interface_id in state_parameters:
Expand Down
Loading

0 comments on commit 0b286a0

Please sign in to comment.