Skip to content

Commit

Permalink
Merge branch 'master' into struct_access_interstate_edge_bug
Browse files Browse the repository at this point in the history
  • Loading branch information
phschaad authored Feb 25, 2024
2 parents ee3ce8c + 213e3ce commit 202070d
Show file tree
Hide file tree
Showing 94 changed files with 2,137 additions and 853 deletions.
14 changes: 9 additions & 5 deletions dace/codegen/compiled_sdfg.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,8 @@ def _array_interface_ptr(array: Any, storage: dtypes.StorageType) -> int:
"""
if hasattr(array, 'data_ptr'):
return array.data_ptr()
if isinstance(array, ctypes.Array):
return ctypes.addressof(array)

if storage == dtypes.StorageType.GPU_Global:
try:
Expand Down Expand Up @@ -508,13 +510,15 @@ def _construct_args(self, kwargs) -> Tuple[Tuple[Any], Tuple[Any]]:
if atype.optional is False: # If array cannot be None
raise TypeError(f'Passing a None value to a non-optional array in argument "{a}"')
# Otherwise, None values are passed as null pointers below
elif isinstance(arg, ctypes._Pointer):
pass
else:
raise TypeError(f'Passing an object (type {type(arg).__name__}) to an array in argument "{a}"')
elif is_array and not is_dtArray:
# GPU scalars and return values are pointers, so this is fine
if atype.storage != dtypes.StorageType.GPU_Global and not a.startswith('__return'):
raise TypeError(f'Passing an array to a scalar (type {atype.dtype.ctype}) in argument "{a}"')
elif (is_dtArray and is_ndarray and not isinstance(atype, dt.StructArray)
elif (is_dtArray and is_ndarray and not isinstance(atype, dt.ContainerArray)
and atype.dtype.as_numpy_dtype() != arg.dtype):
# Make exception for vector types
if (isinstance(atype.dtype, dtypes.vector) and atype.dtype.vtype.as_numpy_dtype() == arg.dtype):
Expand Down Expand Up @@ -565,14 +569,14 @@ def _construct_args(self, kwargs) -> Tuple[Tuple[Any], Tuple[Any]]:
arg_ctypes = tuple(at.dtype.as_ctypes() for at in argtypes)

constants = self.sdfg.constants
callparams = tuple((arg, actype, atype, aname)
callparams = tuple((actype(arg.get()) if isinstance(arg, symbolic.symbol) else arg, actype, atype, aname)
for arg, actype, atype, aname in zip(arglist, arg_ctypes, argtypes, argnames)
if not (symbolic.issymbolic(arg) and (hasattr(arg, 'name') and arg.name in constants)))

symbols = self._free_symbols
initargs = tuple(
actype(arg) if not isinstance(arg, ctypes._SimpleCData) else arg for arg, actype, atype, aname in callparams
if aname in symbols)
actype(arg) if not isinstance(arg, (ctypes._SimpleCData, ctypes._Pointer)) else arg
for arg, actype, atype, aname in callparams if aname in symbols)

try:
# Replace arrays with their base host/device pointers
Expand All @@ -581,7 +585,7 @@ def _construct_args(self, kwargs) -> Tuple[Tuple[Any], Tuple[Any]]:
if dtypes.is_array(arg):
newargs[i] = ctypes.c_void_p(_array_interface_ptr(
arg, atype.storage)) # `c_void_p` is subclass of `ctypes._SimpleCData`.
elif not isinstance(arg, (ctypes._SimpleCData)):
elif not isinstance(arg, (ctypes._SimpleCData, ctypes._Pointer)):
newargs[i] = actype(arg)
else:
newargs[i] = arg
Expand Down
12 changes: 6 additions & 6 deletions dace/codegen/control_flow.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ class SingleState(ControlFlow):
def as_cpp(self, codegen, symbols) -> str:
sdfg = self.state.parent

expr = '__state_{}_{}:;\n'.format(sdfg.sdfg_id, self.state.label)
expr = '__state_{}_{}:;\n'.format(sdfg.cfg_id, self.state.label)
if self.state.number_of_nodes() > 0:
expr += '{\n'
expr += self.dispatch_state(self.state)
Expand All @@ -138,7 +138,7 @@ def as_cpp(self, codegen, symbols) -> str:

# If any state has no children, it should jump to the end of the SDFG
if not self.last_state and sdfg.out_degree(self.state) == 0:
expr += 'goto __state_exit_{};\n'.format(sdfg.sdfg_id)
expr += 'goto __state_exit_{};\n'.format(sdfg.cfg_id)
return expr

def generate_transition(self,
Expand Down Expand Up @@ -175,7 +175,7 @@ def generate_transition(self,

if (not edge.data.is_unconditional()
or ((successor is None or edge.dst is not successor) and not assignments_only)):
expr += 'goto __state_{}_{};\n'.format(sdfg.sdfg_id, edge.dst.label)
expr += 'goto __state_{}_{};\n'.format(sdfg.cfg_id, edge.dst.label)

if not edge.data.is_unconditional() and not assignments_only:
expr += '}\n'
Expand Down Expand Up @@ -257,7 +257,7 @@ def as_cpp(self, codegen, symbols) -> str:
# One unconditional edge
if (len(out_edges) == 1 and out_edges[0].data.is_unconditional()):
continue
expr += f'goto __state_exit_{sdfg.sdfg_id};\n'
expr += f'goto __state_exit_{sdfg.cfg_id};\n'

return expr

Expand Down Expand Up @@ -326,7 +326,7 @@ def as_cpp(self, codegen, symbols) -> str:
# execution should end, so we emit an "else goto exit" here.
if len(self.body) > 0:
expr += ' else {\n'
expr += 'goto __state_exit_{};\n'.format(self.sdfg.sdfg_id)
expr += 'goto __state_exit_{};\n'.format(self.sdfg.cfg_id)
if len(self.body) > 0:
expr += '\n}'
return expr
Expand Down Expand Up @@ -475,7 +475,7 @@ def as_cpp(self, codegen, symbols) -> str:
expr += f'case {case}: {{\n'
expr += body.as_cpp(codegen, symbols)
expr += 'break;\n}\n'
expr += f'default: goto __state_exit_{self.sdfg.sdfg_id};'
expr += f'default: goto __state_exit_{self.sdfg.cfg_id};'
expr += '\n}\n'
return expr

Expand Down
4 changes: 2 additions & 2 deletions dace/codegen/dispatcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -505,11 +505,11 @@ def get_copy_dispatcher(self, src_node, dst_node, edge, sdfg, state):
dst_is_data = True

# Skip copies to/from views where edge matches
if src_is_data and isinstance(src_node.desc(sdfg), (dt.StructureView, dt.View)):
if src_is_data and isinstance(src_node.desc(sdfg), dt.View):
e = sdutil.get_view_edge(state, src_node)
if e is edge:
return None
if dst_is_data and isinstance(dst_node.desc(sdfg), (dt.StructureView, dt.View)):
if dst_is_data and isinstance(dst_node.desc(sdfg), dt.View):
e = sdutil.get_view_edge(state, dst_node)
if e is edge:
return None
Expand Down
4 changes: 2 additions & 2 deletions dace/codegen/instrumentation/data/data_dump.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ def on_node_end(self, sdfg: SDFG, state: SDFGState, node: nodes.AccessNode, oute
# Create UUID
state_id = sdfg.node_id(state)
node_id = state.node_id(node)
uuid = f'{sdfg.sdfg_id}_{state_id}_{node_id}'
uuid = f'{sdfg.cfg_id}_{state_id}_{node_id}'

# Get optional pre/postamble for instrumenting device data
preamble, postamble = '', ''
Expand Down Expand Up @@ -277,7 +277,7 @@ def on_node_begin(self, sdfg: SDFG, state: SDFGState, node: nodes.AccessNode, ou
# Create UUID
state_id = sdfg.node_id(state)
node_id = state.node_id(node)
uuid = f'{sdfg.sdfg_id}_{state_id}_{node_id}'
uuid = f'{sdfg.cfg_id}_{state_id}_{node_id}'

# Get optional pre/postamble for instrumenting device data
preamble, postamble = '', ''
Expand Down
4 changes: 2 additions & 2 deletions dace/codegen/instrumentation/gpu_events.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,11 +65,11 @@ def _report(self, timer_name: str, sdfg=None, state=None, node=None):
int __dace_micros_{id} = (int) (__dace_ms_{id} * 1000.0);
unsigned long int __dace_ts_end_{id} = std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::high_resolution_clock::now().time_since_epoch()).count();
unsigned long int __dace_ts_start_{id} = __dace_ts_end_{id} - __dace_micros_{id};
__state->report.add_completion("{timer_name}", "GPU", __dace_ts_start_{id}, __dace_ts_end_{id}, {sdfg_id}, {state_id}, {node_id});'''.format(
__state->report.add_completion("{timer_name}", "GPU", __dace_ts_start_{id}, __dace_ts_end_{id}, {cfg_id}, {state_id}, {node_id});'''.format(
id=idstr,
timer_name=timer_name,
backend=self.backend,
sdfg_id=sdfg.sdfg_id,
cfg_id=sdfg.cfg_id,
state_id=state_id,
node_id=node_id)

Expand Down
52 changes: 26 additions & 26 deletions dace/codegen/instrumentation/likwid.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ def on_sdfg_end(self, sdfg, local_stream, global_stream):
'''
local_stream.write(outer_code, sdfg)

for region, sdfg_id, state_id, node_id in self._regions:
for region, cfg_id, state_id, node_id in self._regions:
report_code = f'''
#pragma omp parallel
{{
Expand All @@ -187,7 +187,7 @@ def on_sdfg_end(self, sdfg, local_stream, global_stream):
for (int t = 0; t < num_threads; t++)
{{
__state->report.add_completion("Timer", "likwid", 0, time[t] * 1000 * 1000, t, {sdfg_id}, {state_id}, {node_id});
__state->report.add_completion("Timer", "likwid", 0, time[t] * 1000 * 1000, t, {cfg_id}, {state_id}, {node_id});
}}
for (int i = 0; i < nevents; i++)
Expand All @@ -196,7 +196,7 @@ def on_sdfg_end(self, sdfg, local_stream, global_stream):
for (int t = 0; t < num_threads; t++)
{{
__state->report.add_counter("{region}", "likwid", event_name, events[t][i], t, {sdfg_id}, {state_id}, {node_id});
__state->report.add_counter("{region}", "likwid", event_name, events[t][i], t, {cfg_id}, {state_id}, {node_id});
}}
}}
}}
Expand All @@ -214,11 +214,11 @@ def on_state_begin(self, sdfg, state, local_stream, global_stream):
return

if state.instrument == dace.InstrumentationType.LIKWID_CPU:
sdfg_id = sdfg.sdfg_id
cfg_id = sdfg.cfg_id
state_id = sdfg.node_id(state)
node_id = -1
region = f"state_{sdfg_id}_{state_id}_{node_id}"
self._regions.append((region, sdfg_id, state_id, node_id))
region = f"state_{cfg_id}_{state_id}_{node_id}"
self._regions.append((region, cfg_id, state_id, node_id))

marker_code = f'''
#pragma omp parallel
Expand Down Expand Up @@ -250,10 +250,10 @@ def on_state_end(self, sdfg, state, local_stream, global_stream):
return

if state.instrument == dace.InstrumentationType.LIKWID_CPU:
sdfg_id = sdfg.sdfg_id
cfg_id = sdfg.cfg_id
state_id = sdfg.node_id(state)
node_id = -1
region = f"state_{sdfg_id}_{state_id}_{node_id}"
region = f"state_{cfg_id}_{state_id}_{node_id}"

marker_code = f'''
#pragma omp parallel
Expand All @@ -272,12 +272,12 @@ def on_scope_entry(self, sdfg, state, node, outer_stream, inner_stream, global_s
elif node.schedule not in LIKWIDInstrumentationCPU.perf_whitelist_schedules:
raise TypeError("Unsupported schedule on scope")

sdfg_id = sdfg.sdfg_id
cfg_id = sdfg.cfg_id
state_id = sdfg.node_id(state)
node_id = state.node_id(node)
region = f"scope_{sdfg_id}_{state_id}_{node_id}"
region = f"scope_{cfg_id}_{state_id}_{node_id}"

self._regions.append((region, sdfg_id, state_id, node_id))
self._regions.append((region, cfg_id, state_id, node_id))
marker_code = f'''
#pragma omp parallel
{{
Expand All @@ -294,10 +294,10 @@ def on_scope_exit(self, sdfg, state, node, outer_stream, inner_stream, global_st
if not self._likwid_used or entry_node.instrument != dace.InstrumentationType.LIKWID_CPU:
return

sdfg_id = sdfg.sdfg_id
cfg_id = sdfg.cfg_id
state_id = sdfg.node_id(state)
node_id = state.node_id(entry_node)
region = f"scope_{sdfg_id}_{state_id}_{node_id}"
region = f"scope_{cfg_id}_{state_id}_{node_id}"

marker_code = f'''
#pragma omp parallel
Expand Down Expand Up @@ -366,7 +366,7 @@ def on_sdfg_end(self, sdfg, local_stream, global_stream):
if not self._likwid_used or sdfg.parent is not None:
return

for region, sdfg_id, state_id, node_id in self._regions:
for region, cfg_id, state_id, node_id in self._regions:
report_code = f'''
{{
double *events = (double*) malloc(MAX_NUM_EVENTS * sizeof(double));
Expand All @@ -377,14 +377,14 @@ def on_sdfg_end(self, sdfg, local_stream, global_stream):
LIKWID_NVMARKER_GET("{region}", &ngpus, &nevents, &events, &time, &count);
__state->report.add_completion("Timer", "likwid_gpu", 0, time * 1000 * 1000, 0, {sdfg_id}, {state_id}, {node_id});
__state->report.add_completion("Timer", "likwid_gpu", 0, time * 1000 * 1000, 0, {cfg_id}, {state_id}, {node_id});
int gid = nvmon_getIdOfActiveGroup();
for (int i = 0; i < nevents; i++)
{{
char* event_name = nvmon_getEventName(gid, i);
__state->report.add_counter("{region}", "likwid_gpu", event_name, events[i], 0, {sdfg_id}, {state_id}, {node_id});
__state->report.add_counter("{region}", "likwid_gpu", event_name, events[i], 0, {cfg_id}, {state_id}, {node_id});
}}
free(events);
Expand All @@ -402,11 +402,11 @@ def on_state_begin(self, sdfg, state, local_stream, global_stream):
return

if state.instrument == dace.InstrumentationType.LIKWID_GPU:
sdfg_id = sdfg.sdfg_id
cfg_id = sdfg.cfg_id
state_id = sdfg.node_id(state)
node_id = -1
region = f"state_{sdfg_id}_{state_id}_{node_id}"
self._regions.append((region, sdfg_id, state_id, node_id))
region = f"state_{cfg_id}_{state_id}_{node_id}"
self._regions.append((region, cfg_id, state_id, node_id))

marker_code = f'''
LIKWID_NVMARKER_REGISTER("{region}");
Expand All @@ -424,10 +424,10 @@ def on_state_end(self, sdfg, state, local_stream, global_stream):
return

if state.instrument == dace.InstrumentationType.LIKWID_GPU:
sdfg_id = sdfg.sdfg_id
cfg_id = sdfg.cfg_id
state_id = sdfg.node_id(state)
node_id = -1
region = f"state_{sdfg_id}_{state_id}_{node_id}"
region = f"state_{cfg_id}_{state_id}_{node_id}"

marker_code = f'''
LIKWID_NVMARKER_STOP("{region}");
Expand All @@ -443,12 +443,12 @@ def on_scope_entry(self, sdfg, state, node, outer_stream, inner_stream, global_s
elif node.schedule not in LIKWIDInstrumentationGPU.perf_whitelist_schedules:
raise TypeError("Unsupported schedule on scope")

sdfg_id = sdfg.sdfg_id
cfg_id = sdfg.cfg_id
state_id = sdfg.node_id(state)
node_id = state.node_id(node)
region = f"scope_{sdfg_id}_{state_id}_{node_id}"
region = f"scope_{cfg_id}_{state_id}_{node_id}"

self._regions.append((region, sdfg_id, state_id, node_id))
self._regions.append((region, cfg_id, state_id, node_id))
marker_code = f'''
LIKWID_NVMARKER_REGISTER("{region}");
Expand All @@ -465,10 +465,10 @@ def on_scope_exit(self, sdfg, state, node, outer_stream, inner_stream, global_st
if not self._likwid_used or entry_node.instrument != dace.InstrumentationType.LIKWID_GPU:
return

sdfg_id = sdfg.sdfg_id
cfg_id = sdfg.cfg_id
state_id = sdfg.node_id(state)
node_id = state.node_id(entry_node)
region = f"scope_{sdfg_id}_{state_id}_{node_id}"
region = f"scope_{cfg_id}_{state_id}_{node_id}"

marker_code = f'''
LIKWID_NVMARKER_STOP("{region}");
Expand Down
2 changes: 1 addition & 1 deletion dace/codegen/instrumentation/provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ class types, given the currently-registered extensions of this class.

def _idstr(self, sdfg, state, node):
""" Returns a unique identifier string from a node or state. """
result = str(sdfg.sdfg_id)
result = str(sdfg.cfg_id)
if state is not None:
result += '_' + str(sdfg.node_id(state))
if node is not None:
Expand Down
8 changes: 4 additions & 4 deletions dace/codegen/instrumentation/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
def _uuid_to_dict(uuid: UUIDType) -> Dict[str, int]:
result = {}
if uuid[0] != -1:
result['sdfg_id'] = uuid[0]
result['cfg_id'] = uuid[0]
if uuid[1] != -1:
result['state_id'] = uuid[1]
if uuid[2] != -1:
Expand Down Expand Up @@ -83,13 +83,13 @@ def get_event_uuid_and_other_info(event) -> Tuple[UUIDType, Dict[str, Any]]:
other_info = {}
if 'args' in event:
args = event['args']
if 'sdfg_id' in args and args['sdfg_id'] is not None:
uuid = (args['sdfg_id'], -1, -1)
if 'cfg_id' in args and args['cfg_id'] is not None:
uuid = (args['cfg_id'], -1, -1)
if 'state_id' in args and args['state_id'] is not None:
uuid = (uuid[0], args['state_id'], -1)
if 'id' in args and args['id'] is not None:
uuid = (uuid[0], uuid[1], args['id'])
other_info = {k: v for k, v in args.items() if k not in ('sdfg_id', 'state_id', 'id')}
other_info = {k: v for k, v in args.items() if k not in ('cfg_id', 'state_id', 'id')}
return uuid, other_info

def __init__(self, filename: str):
Expand Down
4 changes: 2 additions & 2 deletions dace/codegen/instrumentation/timer.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@ def on_tend(self, timer_name: str, stream: CodeIOStream, sdfg=None, state=None,
stream.write('''auto __dace_tend_{id} = std::chrono::high_resolution_clock::now();
unsigned long int __dace_ts_start_{id} = std::chrono::duration_cast<std::chrono::microseconds>(__dace_tbegin_{id}.time_since_epoch()).count();
unsigned long int __dace_ts_end_{id} = std::chrono::duration_cast<std::chrono::microseconds>(__dace_tend_{id}.time_since_epoch()).count();
__state->report.add_completion("{timer_name}", "Timer", __dace_ts_start_{id}, __dace_ts_end_{id}, {sdfg_id}, {state_id}, {node_id});'''
.format(timer_name=timer_name, id=idstr, sdfg_id=sdfg.sdfg_id, state_id=state_id, node_id=node_id))
__state->report.add_completion("{timer_name}", "Timer", __dace_ts_start_{id}, __dace_ts_end_{id}, {cfg_id}, {state_id}, {node_id});'''
.format(timer_name=timer_name, id=idstr, cfg_id=sdfg.cfg_id, state_id=state_id, node_id=node_id))

# Code generation hooks
def on_state_begin(self, sdfg, state, local_stream, global_stream):
Expand Down
2 changes: 1 addition & 1 deletion dace/codegen/prettycode.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def write(self, contents, sdfg=None, state_id=None, node_id=None):

# If SDFG/state/node location is given, annotate this line
if sdfg is not None:
location_identifier = ' ////__DACE:%d' % sdfg.sdfg_id
location_identifier = ' ////__DACE:%d' % sdfg.cfg_id
if state_id is not None:
location_identifier += ':' + str(state_id)
if node_id is not None:
Expand Down
Loading

0 comments on commit 202070d

Please sign in to comment.