Skip to content

Commit

Permalink
Merge branch 'master' into mpi4py_dev
Browse files Browse the repository at this point in the history
  • Loading branch information
alexnick83 committed Sep 15, 2023
2 parents 69213b3 + 680a956 commit 215eadf
Show file tree
Hide file tree
Showing 24 changed files with 933 additions and 105 deletions.
1 change: 1 addition & 0 deletions dace/codegen/compiled_sdfg.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,7 @@ def get_workspace_sizes(self) -> Dict[dtypes.StorageType, int]:
result: Dict[dtypes.StorageType, int] = {}
for storage in self.external_memory_types:
func = self._lib.get_symbol(f'__dace_get_external_memory_size_{storage.name}')
func.restype = ctypes.c_size_t
result[storage] = func(self._libhandle, *self._lastargs[1])

return result
Expand Down
31 changes: 31 additions & 0 deletions dace/codegen/cppunparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@
import numpy as np
import os
import tokenize
import warnings

import sympy
import dace
Expand Down Expand Up @@ -733,6 +734,21 @@ def _Num(self, t):
if isinstance(t.n, complex):
dtype = dtypes.DTYPE_TO_TYPECLASS[complex]

# Handle large integer values
if isinstance(t.n, int):
bits = t.n.bit_length()
if bits == 32: # Integer, potentially unsigned
if t.n >= 0: # unsigned
repr_n += 'U'
else: # signed, 64-bit
repr_n += 'LL'
elif 32 < bits <= 63:
repr_n += 'LL'
elif bits == 64 and t.n >= 0:
repr_n += 'ULL'
elif bits >= 64:
warnings.warn(f'Value wider than 64 bits encountered in expression ({t.n}), emitting as-is')

if repr_n.endswith("j"):
self.write("%s(0, %s)" % (dtype, repr_n.replace("inf", INFSTR)[:-1]))
else:
Expand Down Expand Up @@ -831,8 +847,23 @@ def _Tuple(
self.write(")")

unop = {"Invert": "~", "Not": "!", "UAdd": "+", "USub": "-"}
unop_lambda = {'Invert': (lambda x: ~x), 'Not': (lambda x: not x), 'UAdd': (lambda x: +x), 'USub': (lambda x: -x)}

def _UnaryOp(self, t):
# Dispatch constants after applying the operation
if sys.version_info[:2] < (3, 8):
if isinstance(t.operand, ast.Num):
newval = self.unop_lambda[t.op.__class__.__name__](t.operand.n)
newnode = ast.Num(n=newval)
self.dispatch(newnode)
return
else:
if isinstance(t.operand, ast.Constant):
newval = self.unop_lambda[t.op.__class__.__name__](t.operand.value)
newnode = ast.Constant(value=newval)
self.dispatch(newnode)
return

self.write("(")
self.write(self.unop[t.op.__class__.__name__])
self.write(" ")
Expand Down
15 changes: 11 additions & 4 deletions dace/codegen/targets/cuda.py
Original file line number Diff line number Diff line change
Expand Up @@ -1939,6 +1939,13 @@ def generate_kernel_scope(self, sdfg: SDFG, dfg_scope: ScopeSubgraphView, state_
kernel_params: list, function_stream: CodeIOStream, kernel_stream: CodeIOStream):
node = dfg_scope.source_nodes()[0]

# Get the thread/block index type
ttype = Config.get('compiler', 'cuda', 'thread_id_type')
tidtype = getattr(dtypes, ttype, False)
if not isinstance(tidtype, dtypes.typeclass):
raise ValueError(f'Configured type "{ttype}" for ``thread_id_type`` does not match any DaCe data type. '
'See ``dace.dtypes`` for available types (for example ``int32``).')

# allocating shared memory for dynamic threadblock maps
if has_dtbmap:
kernel_stream.write(
Expand Down Expand Up @@ -1990,8 +1997,8 @@ def generate_kernel_scope(self, sdfg: SDFG, dfg_scope: ScopeSubgraphView, state_

expr = _topy(bidx[i]).replace('__DAPB%d' % i, block_expr)

kernel_stream.write('int %s = %s;' % (varname, expr), sdfg, state_id, node)
self._dispatcher.defined_vars.add(varname, DefinedType.Scalar, 'int')
kernel_stream.write(f'{tidtype.ctype} {varname} = {expr};', sdfg, state_id, node)
self._dispatcher.defined_vars.add(varname, DefinedType.Scalar, tidtype.ctype)

# Delinearize beyond the third dimension
if len(krange) > 3:
Expand All @@ -2010,8 +2017,8 @@ def generate_kernel_scope(self, sdfg: SDFG, dfg_scope: ScopeSubgraphView, state_
)

expr = _topy(bidx[i]).replace('__DAPB%d' % i, block_expr)
kernel_stream.write('int %s = %s;' % (varname, expr), sdfg, state_id, node)
self._dispatcher.defined_vars.add(varname, DefinedType.Scalar, 'int')
kernel_stream.write(f'{tidtype.ctype} {varname} = {expr};', sdfg, state_id, node)
self._dispatcher.defined_vars.add(varname, DefinedType.Scalar, tidtype.ctype)

# Dispatch internal code
assert CUDACodeGen._in_device_code is False
Expand Down
11 changes: 11 additions & 0 deletions dace/config_schema.yml
Original file line number Diff line number Diff line change
Expand Up @@ -413,6 +413,17 @@ required:
a specified larger block size in the third dimension. Default value is
derived from hardware limits on common GPUs.
thread_id_type:
type: str
title: Thread/block index data type
default: int32
description: >
Defines the data type for a thread and block index in the generated code.
The type is based on the type-classes in ``dace.dtypes``. For example,
``uint64`` is equivalent to ``dace.uint64``. Change this setting when large
index types are needed to address memory offsets that are beyond the 32-bit
range, or to reduce memory usage.
#############################################
# General FPGA flags
Expand Down
90 changes: 74 additions & 16 deletions dace/frontend/fortran/ast_components.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved.
from fparser.two.Fortran2008 import Fortran2008 as f08
from fparser.two import Fortran2008
from fparser.two import Fortran2003 as f03
from fparser.two import symbol_table

Expand Down Expand Up @@ -523,6 +524,31 @@ def declaration_type_spec(self, node: FASTNode):
def assumed_shape_spec_list(self, node: FASTNode):
return node

def parse_shape_specification(self, dim: f03.Explicit_Shape_Spec, size: List[FASTNode], offset: List[int]):

dim_expr = [i for i in dim.children if i is not None]

# handle size definition
if len(dim_expr) == 1:
dim_expr = dim_expr[0]
#now to add the dimension to the size list after processing it if necessary
size.append(self.create_ast(dim_expr))
offset.append(1)
# Here we support arrays that have size declaration - with initial offset.
elif len(dim_expr) == 2:
# extract offets
for expr in dim_expr:
if not isinstance(expr, f03.Int_Literal_Constant):
raise TypeError("Array offsets must be constant expressions!")
offset.append(int(dim_expr[0].tostr()))

fortran_size = int(dim_expr[1].tostr()) - int(dim_expr[0].tostr()) + 1
fortran_ast_size = f03.Int_Literal_Constant(str(fortran_size))

size.append(self.create_ast(fortran_ast_size))
else:
raise TypeError("Array dimension must be at most two expressions")

def type_declaration_stmt(self, node: FASTNode):

#decide if its a intrinsic variable type or a derived type
Expand Down Expand Up @@ -574,33 +600,44 @@ def type_declaration_stmt(self, node: FASTNode):

alloc = False
symbol = False
attr_size = None
attr_offset = None
for i in attributes:
if i.string.lower() == "allocatable":
alloc = True
if i.string.lower() == "parameter":
symbol = True

if isinstance(i, Fortran2008.Attr_Spec_List):

dimension_spec = get_children(i, "Dimension_Attr_Spec")
if len(dimension_spec) == 0:
continue

attr_size = []
attr_offset = []
sizes = get_child(dimension_spec[0], ["Explicit_Shape_Spec_List"])

for shape_spec in get_children(sizes, [f03.Explicit_Shape_Spec]):
self.parse_shape_specification(shape_spec, attr_size, attr_offset)

vardecls = []

for var in names:
#first handle dimensions
size = None
offset = None
var_components = self.create_children(var)
array_sizes = get_children(var, "Explicit_Shape_Spec_List")
actual_name = get_child(var_components, ast_internal_classes.Name_Node)
if len(array_sizes) == 1:
array_sizes = array_sizes[0]
size = []
offset = []
for dim in array_sizes.children:
#sanity check
if isinstance(dim, f03.Explicit_Shape_Spec):
dim_expr = [i for i in dim.children if i is not None]
if len(dim_expr) == 1:
dim_expr = dim_expr[0]
#now to add the dimension to the size list after processing it if necessary
size.append(self.create_ast(dim_expr))
else:
raise TypeError("Array dimension must be a single expression")
self.parse_shape_specification(dim, size, offset)
#handle initializiation
init = None

Expand All @@ -615,32 +652,53 @@ def type_declaration_stmt(self, node: FASTNode):

if symbol == False:

vardecls.append(
ast_internal_classes.Var_Decl_Node(name=actual_name.name,
type=testtype,
alloc=alloc,
sizes=size,
kind=kind,
line_number=node.item.span))
if attr_size is None:
vardecls.append(
ast_internal_classes.Var_Decl_Node(name=actual_name.name,
type=testtype,
alloc=alloc,
sizes=size,
offsets=offset,
kind=kind,
line_number=node.item.span))
else:
vardecls.append(
ast_internal_classes.Var_Decl_Node(name=actual_name.name,
type=testtype,
alloc=alloc,
sizes=attr_size,
offsets=attr_offset,
kind=kind,
line_number=node.item.span))
else:
if size is None:
if size is None and attr_size is None:
self.symbols[actual_name.name] = init
vardecls.append(
ast_internal_classes.Symbol_Decl_Node(name=actual_name.name,
type=testtype,
alloc=alloc,
init=init,
line_number=node.item.span))
elif attr_size is not None:
vardecls.append(
ast_internal_classes.Symbol_Array_Decl_Node(name=actual_name.name,
type=testtype,
alloc=alloc,
sizes=attr_size,
offsets=attr_offset,
kind=kind,
init=init,
line_number=node.item.span))
else:
vardecls.append(
ast_internal_classes.Symbol_Array_Decl_Node(name=actual_name.name,
type=testtype,
alloc=alloc,
sizes=size,
offsets=offset,
kind=kind,
init=init,
line_number=node.item.span))

return ast_internal_classes.Decl_Stmt_Node(vardecl=vardecls, line_number=node.item.span)

def entity_decl(self, node: FASTNode):
Expand Down
12 changes: 11 additions & 1 deletion dace/frontend/fortran/ast_internal_classes.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Copyright 2019-2023 ETH Zurich and the DaCe authors. All rights reserved.
from typing import Any, List, Tuple, Type, TypeVar, Union, overload
from typing import Any, List, Optional, Tuple, Type, TypeVar, Union, overload

# The node class is the base class for all nodes in the AST. It provides attributes including the line number and fields.
# Attributes are not used when walking the tree, but are useful for debugging and for code generation.
Expand All @@ -11,6 +11,14 @@ def __init__(self, *args, **kwargs): # real signature unknown
self.integrity_exceptions = []
self.read_vars = []
self.written_vars = []
self.parent: Optional[
Union[
Subroutine_Subprogram_Node,
Function_Subprogram_Node,
Main_Program_Node,
Module_Node
]
] = None
for k, v in kwargs.items():
setattr(self, k, v)

Expand Down Expand Up @@ -199,6 +207,7 @@ class Symbol_Array_Decl_Node(Statement_Node):
)
_fields = (
'sizes',
'offsets'
'typeref',
'init',
)
Expand All @@ -213,6 +222,7 @@ class Var_Decl_Node(Statement_Node):
)
_fields = (
'sizes',
'offsets',
'typeref',
'init',
)
Expand Down
Loading

0 comments on commit 215eadf

Please sign in to comment.