diff --git a/dace/codegen/CMakeLists.txt b/dace/codegen/CMakeLists.txt index 223db74104..5482d4d30d 100644 --- a/dace/codegen/CMakeLists.txt +++ b/dace/codegen/CMakeLists.txt @@ -265,6 +265,8 @@ endif() # Create HIP object files if(DACE_ENABLE_HIP) + enable_language(HIP) + # Get local AMD architectures if (NOT DEFINED LOCAL_HIP_ARCHITECTURES) # Compile and run a test program @@ -304,8 +306,8 @@ if(DACE_ENABLE_HIP) set(DACE_LIBS ${DACE_LIBS} hip::host) set_source_files_properties(${DACE_HIP_FILES} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1) - hip_prepare_target_commands(${DACE_PROGRAM_NAME} OBJ DACE_HIP_OBJECTS DACE_HIP_SOURCES ${DACE_HIP_FILES}) - set(DACE_OBJECTS ${DACE_OBJECTS} ${DACE_HIP_OBJECTS}) + set_source_files_properties(${DACE_HIP_FILES} PROPERTIES LANGUAGE HIP) + set(DACE_OBJECTS ${DACE_OBJECTS} ${DACE_HIP_FILES}) endif() # DACE_ENABLE_HIP # create verilator RTL simulation objects diff --git a/dace/codegen/compiled_sdfg.py b/dace/codegen/compiled_sdfg.py index ff7bc6084e..c2ca3316d7 100644 --- a/dace/codegen/compiled_sdfg.py +++ b/dace/codegen/compiled_sdfg.py @@ -31,9 +31,8 @@ def __init__(self, library_filename, program_name): :param program_name: Name of the DaCe program (for use in finding the stub library loader). """ - self._stub_filename = os.path.join( - os.path.dirname(os.path.realpath(library_filename)), - f'libdacestub_{program_name}.{Config.get("compiler", "library_extension")}') + self._stub_filename = os.path.join(os.path.dirname(os.path.realpath(library_filename)), + f'libdacestub_{program_name}.{Config.get("compiler", "library_extension")}') self._library_filename = os.path.realpath(library_filename) self._stub = None self._lib = None @@ -219,7 +218,6 @@ def __init__(self, sdfg, lib: ReloadableDLL, argnames: List[str] = None): self.has_gpu_code = True break - def get_exported_function(self, name: str, restype=None) -> Optional[Callable[..., Any]]: """ Tries to find a symbol by name in the compiled SDFG, and convert it to a callable function @@ -233,7 +231,6 @@ def get_exported_function(self, name: str, restype=None) -> Optional[Callable[.. except KeyError: # Function not found return None - def get_state_struct(self) -> ctypes.Structure: """ Attempt to parse the SDFG source code and extract the state struct. This method will parse the first consecutive entries in the struct that are pointers. As soon as a non-pointer or other unparseable field is @@ -247,7 +244,6 @@ def get_state_struct(self) -> ctypes.Structure: return ctypes.cast(self._libhandle, ctypes.POINTER(self._try_parse_state_struct())).contents - def _try_parse_state_struct(self) -> Optional[Type[ctypes.Structure]]: from dace.codegen.targets.cpp import mangle_dace_state_struct_name # Avoid import cycle # the path of the main sdfg file containing the state struct @@ -375,7 +371,6 @@ def _get_error_text(self, result: Union[str, int]) -> str: else: return result - def __call__(self, *args, **kwargs): """ Forwards the Python call to the compiled ``SDFG``. @@ -400,13 +395,12 @@ def __call__(self, *args, **kwargs): elif len(args) > 0 and self.argnames is not None: kwargs.update( # `_construct_args` will handle all of its arguments as kwargs. - {aname: arg for aname, arg in zip(self.argnames, args)} - ) - argtuple, initargtuple = self._construct_args(kwargs) # Missing arguments will be detected here. - # Return values are cached in `self._lastargs`. + {aname: arg + for aname, arg in zip(self.argnames, args)}) + argtuple, initargtuple = self._construct_args(kwargs) # Missing arguments will be detected here. + # Return values are cached in `self._lastargs`. return self.fast_call(argtuple, initargtuple, do_gpu_check=True) - def fast_call( self, callargs: Tuple[Any, ...], @@ -455,7 +449,6 @@ def fast_call( self._lib.unload() raise - def __del__(self): if self._initialized is True: self.finalize() @@ -463,7 +456,6 @@ def __del__(self): self._libhandle = ctypes.c_void_p(0) self._lib.unload() - def _construct_args(self, kwargs) -> Tuple[Tuple[Any], Tuple[Any]]: """ Main function that controls argument construction for calling @@ -486,7 +478,7 @@ def _construct_args(self, kwargs) -> Tuple[Tuple[Any], Tuple[Any]]: typedict = self._typedict if len(kwargs) > 0: # Construct mapping from arguments to signature - arglist = [] + arglist = [] argtypes = [] argnames = [] for a in sig: @@ -536,10 +528,9 @@ def _construct_args(self, kwargs) -> Tuple[Tuple[Any], Tuple[Any]]: 'you are doing, you can override this error in the ' 'configuration by setting compiler.allow_view_arguments ' 'to True.') - elif (not isinstance(atype, (dt.Array, dt.Structure)) and - not isinstance(atype.dtype, dtypes.callback) and - not isinstance(arg, (atype.dtype.type, sp.Basic)) and - not (isinstance(arg, symbolic.symbol) and arg.dtype == atype.dtype)): + elif (not isinstance(atype, (dt.Array, dt.Structure)) and not isinstance(atype.dtype, dtypes.callback) + and not isinstance(arg, (atype.dtype.type, sp.Basic)) + and not (isinstance(arg, symbolic.symbol) and arg.dtype == atype.dtype)): is_int = isinstance(arg, int) if is_int and atype.dtype.type == np.int64: pass @@ -573,29 +564,23 @@ def _construct_args(self, kwargs) -> Tuple[Tuple[Any], Tuple[Any]]: # Retain only the element datatype for upcoming checks and casts arg_ctypes = tuple(at.dtype.as_ctypes() for at in argtypes) - constants = self.sdfg.constants - callparams = tuple( - (actype(arg.get()) - if isinstance(arg, symbolic.symbol) - else arg, actype, atype, aname - ) - for arg, actype, atype, aname in zip(arglist, arg_ctypes, argtypes, argnames) - if not (symbolic.issymbolic(arg) and (hasattr(arg, 'name') and arg.name in constants)) - ) + constants = self.sdfg.constants + callparams = tuple((arg, actype, atype, aname) + for arg, actype, atype, aname in zip(arglist, arg_ctypes, argtypes, argnames) + if not (symbolic.issymbolic(arg) and (hasattr(arg, 'name') and arg.name in constants))) symbols = self._free_symbols initargs = tuple( - actype(arg) if not isinstance(arg, ctypes._SimpleCData) else arg - for arg, actype, atype, aname in callparams - if aname in symbols - ) + actype(arg) if not isinstance(arg, ctypes._SimpleCData) else arg for arg, actype, atype, aname in callparams + if aname in symbols) try: # Replace arrays with their base host/device pointers newargs = [None] * len(callparams) for i, (arg, actype, atype, _) in enumerate(callparams): if dtypes.is_array(arg): - newargs[i] = ctypes.c_void_p(_array_interface_ptr(arg, atype.storage)) # `c_void_p` is subclass of `ctypes._SimpleCData`. + newargs[i] = ctypes.c_void_p(_array_interface_ptr( + arg, atype.storage)) # `c_void_p` is subclass of `ctypes._SimpleCData`. elif not isinstance(arg, (ctypes._SimpleCData)): newargs[i] = actype(arg) else: @@ -607,11 +592,9 @@ def _construct_args(self, kwargs) -> Tuple[Tuple[Any], Tuple[Any]]: self._lastargs = newargs, initargs return self._lastargs - def clear_return_values(self): self._create_new_arrays = True - def _create_array(self, _: str, dtype: np.dtype, storage: dtypes.StorageType, shape: Tuple[int], strides: Tuple[int], total_size: int): ndarray = np.ndarray @@ -636,7 +619,6 @@ def ndarray(*args, buffer=None, **kwargs): # Create an array with the properties of the SDFG array return ndarray(shape, dtype, buffer=zeros(total_size, dtype), strides=strides) - def _initialize_return_values(self, kwargs): # Obtain symbol values from arguments and constants syms = dict() @@ -687,7 +669,6 @@ def _initialize_return_values(self, kwargs): arr = self._create_array(*shape_desc) self._return_arrays.append(arr) - def _convert_return_values(self): # Return the values as they would be from a Python function if self._return_arrays is None or len(self._return_arrays) == 0: diff --git a/dace/data.py b/dace/data.py index cceaa4139c..c8b7225b34 100644 --- a/dace/data.py +++ b/dace/data.py @@ -37,6 +37,38 @@ def create_datadescriptor(obj, no_custom_desc=False): return obj.__descriptor__() elif not no_custom_desc and hasattr(obj, 'descriptor'): return obj.descriptor + elif type(obj).__module__ == "torch" and type(obj).__name__ == "Tensor": + # special case for torch tensors. Maybe __array__ could be used here for a more + # general solution, but torch doesn't support __array__ for cuda tensors. + try: + # If torch is importable, define translations between typeclasses and torch types. These are reused by daceml. + # conversion happens here in pytorch: + # https://github.com/pytorch/pytorch/blob/143ef016ee1b6a39cf69140230d7c371de421186/torch/csrc/utils/tensor_numpy.cpp#L237 + import torch + TYPECLASS_TO_TORCH_DTYPE = { + dtypes.bool_: torch.bool, + dtypes.int8: torch.int8, + dtypes.int16: torch.int16, + dtypes.int32: torch.int32, + dtypes.int64: torch.int64, + dtypes.uint8: torch.uint8, + dtypes.float16: torch.float16, + dtypes.float32: torch.float32, + dtypes.float64: torch.float64, + dtypes.complex64: torch.complex64, + dtypes.complex128: torch.complex128, + } + + TORCH_DTYPE_TO_TYPECLASS = {v: k for k, v in TYPECLASS_TO_TORCH_DTYPE.items()} + + storage = dtypes.StorageType.GPU_Global if obj.device.type == 'cuda' else dtypes.StorageType.Default + + return Array(dtype=TORCH_DTYPE_TO_TYPECLASS[obj.dtype], + strides=obj.stride(), + shape=tuple(obj.shape), + storage=storage) + except ImportError: + raise ValueError("Attempted to convert a torch.Tensor, but torch could not be imported") elif dtypes.is_array(obj) and (hasattr(obj, '__array_interface__') or hasattr(obj, '__cuda_array_interface__')): if dtypes.is_gpu_array(obj): interface = obj.__cuda_array_interface__ @@ -79,38 +111,6 @@ def create_datadescriptor(obj, no_custom_desc=False): dtype = dtypes.typeclass(obj.dtype.type) itemsize = obj.itemsize return Array(dtype=dtype, shape=obj.shape, strides=tuple(s // itemsize for s in obj.strides), storage=storage) - elif type(obj).__module__ == "torch" and type(obj).__name__ == "Tensor": - # special case for torch tensors. Maybe __array__ could be used here for a more - # general solution, but torch doesn't support __array__ for cuda tensors. - try: - # If torch is importable, define translations between typeclasses and torch types. These are reused by daceml. - # conversion happens here in pytorch: - # https://github.com/pytorch/pytorch/blob/143ef016ee1b6a39cf69140230d7c371de421186/torch/csrc/utils/tensor_numpy.cpp#L237 - import torch - TYPECLASS_TO_TORCH_DTYPE = { - dtypes.bool_: torch.bool, - dtypes.int8: torch.int8, - dtypes.int16: torch.int16, - dtypes.int32: torch.int32, - dtypes.int64: torch.int64, - dtypes.uint8: torch.uint8, - dtypes.float16: torch.float16, - dtypes.float32: torch.float32, - dtypes.float64: torch.float64, - dtypes.complex64: torch.complex64, - dtypes.complex128: torch.complex128, - } - - TORCH_DTYPE_TO_TYPECLASS = {v: k for k, v in TYPECLASS_TO_TORCH_DTYPE.items()} - - storage = dtypes.StorageType.GPU_Global if obj.device.type == 'cuda' else dtypes.StorageType.Default - - return Array(dtype=TORCH_DTYPE_TO_TYPECLASS[obj.dtype], - strides=obj.stride(), - shape=tuple(obj.shape), - storage=storage) - except ImportError: - raise ValueError("Attempted to convert a torch.Tensor, but torch could not be imported") elif symbolic.issymbolic(obj): return Scalar(symbolic.symtype(obj)) elif isinstance(obj, dtypes.typeclass): diff --git a/dace/frontend/python/wrappers.py b/dace/frontend/python/wrappers.py index dc05289536..509ca5612d 100644 --- a/dace/frontend/python/wrappers.py +++ b/dace/frontend/python/wrappers.py @@ -12,12 +12,9 @@ def ndarray(shape, dtype=numpy.float64, *args, **kwargs): - """ Returns a numpy ndarray where all symbols have been evaluated to - numbers and types are converted to numpy types. """ - repldict = {sym: sym.get() for sym in symbolic.symlist(shape).values()} - new_shape = [int(s.subs(repldict) if symbolic.issymbolic(s) else s) for s in shape] + """ Returns a numpy ndarray where all types are converted to numpy types. """ new_dtype = dtype.type if isinstance(dtype, dtypes.typeclass) else dtype - return numpy.ndarray(shape=new_shape, dtype=new_dtype, *args, **kwargs) + return numpy.ndarray(shape=shape, dtype=new_dtype, *args, **kwargs) stream: Type[Deque[T]] = deque diff --git a/dace/jupyter.py b/dace/jupyter.py index 0f338908cd..c7ac85d6ee 100755 --- a/dace/jupyter.py +++ b/dace/jupyter.py @@ -8,7 +8,7 @@ def _connected(): try: - urllib.request.urlopen('https://spcl.github.io/dace/webclient2/dist/sdfv.js', timeout=1) + urllib.request.urlopen('https://spcl.github.io/dace-webclient/dist/sdfv.js', timeout=1) return True except urllib.error.URLError: return False @@ -31,31 +31,22 @@ def isnotebook(): def preamble(): # Emit javascript headers for SDFG renderer sdfv_js_deps = ['sdfv.js'] - sdfv_css_deps = ['sdfv.css'] + offline_sdfv_js_deps = ['sdfv_jupyter.js'] result = '' - # Rely on internet connection for Material icons - result += '' - # Try to load dependencies from online sources if _connected(): for dep in sdfv_js_deps: - result += '\n' % dep - for dep in sdfv_css_deps: - result += '\n' % dep + result += '\n' % dep return result # Load local dependencies - root_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'dace', 'viewer', 'webclient') - for dep in sdfv_js_deps: + root_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'viewer', 'webclient') + for dep in offline_sdfv_js_deps: file = os.path.join(root_path, 'dist', dep) - with open(file, 'r') as fp: + with open(file) as fp: result += '\n' % fp.read() - for dep in sdfv_css_deps: - file = os.path.join(root_path, dep) - with open(file, 'r') as fp: - result += '\n' % fp.read() # Run this code once return result diff --git a/dace/runtime/include/dace/cuda/halfvec.cuh b/dace/runtime/include/dace/cuda/halfvec.cuh index 6be6c64e6a..4283c2aab0 100644 --- a/dace/runtime/include/dace/cuda/halfvec.cuh +++ b/dace/runtime/include/dace/cuda/halfvec.cuh @@ -530,12 +530,13 @@ namespace dace { namespace math { HALF_VEC_UFUNC(exp) HALF_VEC_UFUNC(tanh) } } -#endif // Vector comparison functions DACE_DFI half2 max(half2 a, half2 b) { return make_half2(max(a.x, b.x), max(a.y, b.y)); } +#endif + DACE_DFI half4 max(half4 a, half b) { half2 bvec = __half2half2(b); diff --git a/dace/sdfg/sdfg.py b/dace/sdfg/sdfg.py index 0f55817e23..a95393b992 100644 --- a/dace/sdfg/sdfg.py +++ b/dace/sdfg/sdfg.py @@ -1427,9 +1427,13 @@ def _repr_html_(self): # Create renderer canvas and load SDFG result += """ +
+
+\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n" + "\n" ], "text/plain": [ "" @@ -834,81 +818,6 @@ "446 ms ± 41.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" ] }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - ":4: NumbaWarning: \u001b[1m\n", - "Compilation is falling back to object mode WITH looplifting enabled because Function \"someforloop\" failed type inference due to: \u001b[1mUntyped global name 'element_update':\u001b[0m \u001b[1m\u001b[1mCannot determine Numba type of \u001b[0m\n", - "\u001b[1m\n", - "File \"\", line 7:\u001b[0m\n", - "\u001b[1mdef someforloop(A):\n", - " \n", - " for j in range(A.shape[1]):\n", - "\u001b[1m A[i, j] = element_update(A[i, j])\n", - "\u001b[0m \u001b[1m^\u001b[0m\u001b[0m\n", - "\u001b[0m\u001b[0m\n", - " def someforloop(A):\n", - ":4: NumbaWarning: \u001b[1m\n", - "Compilation is falling back to object mode WITHOUT looplifting enabled because Function \"someforloop\" failed type inference due to: \u001b[1m\u001b[1mCannot determine Numba type of \u001b[0m\n", - "\u001b[1m\n", - "File \"\", line 5:\u001b[0m\n", - "\u001b[1mdef someforloop(A):\n", - "\u001b[1m for i in range(A.shape[0]):\n", - "\u001b[0m \u001b[1m^\u001b[0m\u001b[0m\n", - "\u001b[0m\u001b[0m\n", - " def someforloop(A):\n", - "/home/user/anaconda3/envs/py38/lib/python3.8/site-packages/numba/core/object_mode_passes.py:151: NumbaWarning: \u001b[1mFunction \"someforloop\" was compiled in object mode without forceobj=True, but has lifted loops.\n", - "\u001b[1m\n", - "File \"\", line 5:\u001b[0m\n", - "\u001b[1mdef someforloop(A):\n", - "\u001b[1m for i in range(A.shape[0]):\n", - "\u001b[0m \u001b[1m^\u001b[0m\u001b[0m\n", - "\u001b[0m\n", - " warnings.warn(errors.NumbaWarning(warn_msg,\n", - "/home/user/anaconda3/envs/py38/lib/python3.8/site-packages/numba/core/object_mode_passes.py:161: NumbaDeprecationWarning: \u001b[1m\n", - "Fall-back from the nopython compilation path to the object mode compilation path has been detected, this is deprecated behaviour.\n", - "\n", - "For more information visit https://numba.pydata.org/numba-doc/latest/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit\n", - "\u001b[1m\n", - "File \"\", line 5:\u001b[0m\n", - "\u001b[1mdef someforloop(A):\n", - "\u001b[1m for i in range(A.shape[0]):\n", - "\u001b[0m \u001b[1m^\u001b[0m\u001b[0m\n", - "\u001b[0m\n", - " warnings.warn(errors.NumbaDeprecationWarning(msg,\n", - ":4: NumbaWarning: \u001b[1m\n", - "Compilation is falling back to object mode WITHOUT looplifting enabled because Function \"someforloop\" failed type inference due to: \u001b[1mUntyped global name 'element_update':\u001b[0m \u001b[1m\u001b[1mCannot determine Numba type of \u001b[0m\n", - "\u001b[1m\n", - "File \"\", line 7:\u001b[0m\n", - "\u001b[1mdef someforloop(A):\n", - " \n", - " for j in range(A.shape[1]):\n", - "\u001b[1m A[i, j] = element_update(A[i, j])\n", - "\u001b[0m \u001b[1m^\u001b[0m\u001b[0m\n", - "\u001b[0m\u001b[0m\n", - " def someforloop(A):\n", - "/home/user/anaconda3/envs/py38/lib/python3.8/site-packages/numba/core/object_mode_passes.py:151: NumbaWarning: \u001b[1mFunction \"someforloop\" was compiled in object mode without forceobj=True.\n", - "\u001b[1m\n", - "File \"\", line 5:\u001b[0m\n", - "\u001b[1mdef someforloop(A):\n", - "\u001b[1m for i in range(A.shape[0]):\n", - "\u001b[0m \u001b[1m^\u001b[0m\u001b[0m\n", - "\u001b[0m\n", - " warnings.warn(errors.NumbaWarning(warn_msg,\n", - "/home/user/anaconda3/envs/py38/lib/python3.8/site-packages/numba/core/object_mode_passes.py:161: NumbaDeprecationWarning: \u001b[1m\n", - "Fall-back from the nopython compilation path to the object mode compilation path has been detected, this is deprecated behaviour.\n", - "\n", - "For more information visit https://numba.pydata.org/numba-doc/latest/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit\n", - "\u001b[1m\n", - "File \"\", line 5:\u001b[0m\n", - "\u001b[1mdef someforloop(A):\n", - "\u001b[1m for i in range(A.shape[0]):\n", - "\u001b[0m \u001b[1m^\u001b[0m\u001b[0m\n", - "\u001b[0m\n", - " warnings.warn(errors.NumbaDeprecationWarning(msg,\n" - ] - }, { "name": "stdout", "output_type": "stream", diff --git a/tutorials/codegen.ipynb b/tutorials/codegen.ipynb index 0de9fab2d9..a6effd7996 100644 --- a/tutorials/codegen.ipynb +++ b/tutorials/codegen.ipynb @@ -32,22 +32,7 @@ { "data": { "text/html": [ - "\n", - " \n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n" + "\n" ], "text/plain": [ "" @@ -83,15 +68,20 @@ "data": { "text/html": [ "\n", - "
\n", + "
\n", + "
\n", + "
\n", "\n", + "" ], "text/plain": [ - "" + "SDFG (simple)" ] }, "execution_count": 2, @@ -822,7 +812,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.6" + "version": "3.12.1" } }, "nbformat": 4, diff --git a/tutorials/explicit.ipynb b/tutorials/explicit.ipynb index 5ecd3fe590..45d172cf35 100644 --- a/tutorials/explicit.ipynb +++ b/tutorials/explicit.ipynb @@ -17,22 +17,7 @@ { "data": { "text/html": [ - "\n", - " \n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n" + "\n" ], "text/plain": [ "" @@ -137,15 +122,20 @@ "data": { "text/html": [ "\n", - "
\n", + "
\n", + "
\n", + "
\n", "\n", + "" ], "text/plain": [ - "" + "SDFG (transpose)" ] }, "execution_count": 5, @@ -216,12 +206,12 @@ "outputs": [], "source": [ "N = dace.symbol('N')\n", - "N.set(255)\n", + "n = 255\n", "\n", - "storage = dace.ndarray(shape=[N], dtype=dace.int32)\n", + "storage = dace.ndarray(shape=[n], dtype=dace.int32)\n", "# The size of \"output\" will actually be lesser or equal to N, but we need to \n", "# statically allocate the memory.\n", - "output = dace.ndarray(shape=[N], dtype=dace.int32)\n", + "output = dace.ndarray(shape=[n], dtype=dace.int32)\n", "# The size is a scalar\n", "output_size = dace.scalar(dtype=dace.uint32)" ] @@ -296,9 +286,9 @@ "source": [ "# Define some random integers and zero outputs\n", "import numpy as np\n", - "storage[:] = np.random.randint(0, 100, size=N.get())\n", + "storage[:] = np.random.randint(0, 100, size=n)\n", "output_size[0] = 0\n", - "output[:] = np.zeros(N.get()).astype(np.int32)\n", + "output[:] = np.zeros(n).astype(np.int32)\n", "\n", "# Compute expected output using numpy\n", "expected = storage[np.where(storage > thres)]" @@ -335,7 +325,7 @@ } ], "source": [ - "qfunc(data=storage, output=output, outsz=output_size, threshold=thres, N=N)\n", + "qfunc(data=storage, output=output, outsz=output_size, threshold=thres, N=n)\n", "output_size" ] }, @@ -375,9 +365,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.3" + "version": "3.12.1" } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 } diff --git a/tutorials/getting_started.ipynb b/tutorials/getting_started.ipynb index 31f3c13b11..4405c28d56 100644 --- a/tutorials/getting_started.ipynb +++ b/tutorials/getting_started.ipynb @@ -19,22 +19,7 @@ { "data": { "text/html": [ - "\n", - " \n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n" + "\n" ], "text/plain": [ "" @@ -81,8 +66,8 @@ { "data": { "text/plain": [ - "array([[0.19830156, 0.1679383 , 0.19932212],\n", - " [0.59336771, 0.20975676, 0.05706468]])" + "array([[0.74867876, 0.85403223, 0.16573784],\n", + " [0.71994615, 0.29855314, 0.21483992]])" ] }, "execution_count": 3, @@ -104,8 +89,8 @@ { "data": { "text/plain": [ - "array([[0.39660312, 0.33587661, 0.39864423],\n", - " [1.18673541, 0.41951352, 0.11412936]])" + "array([[1.49735752, 1.70806445, 0.33147568],\n", + " [1.4398923 , 0.59710627, 0.42967985]])" ] }, "execution_count": 4, @@ -135,15 +120,20 @@ "data": { "text/html": [ "\n", - "
\n", + "
\n", + "
\n", + "
\n", "\n", + "" ], "text/plain": [ - "" + "SDFG (getstarted)" ] }, "execution_count": 5, @@ -218,15 +208,20 @@ "data": { "text/html": [ "\n", - "
\n", + "
\n", + "
\n", + "
\n", + "\n", "" ], "text/plain": [ - "" + "SDFG (getstarted_sym)" ] }, "execution_count": 8, @@ -253,19 +248,19 @@ { "data": { "text/plain": [ - "array([[0.22917578, 0.2371489 , 1.41801157, ..., 0.65790524, 0.95110319,\n", - " 0.17970065],\n", - " [0.38880002, 1.76962412, 0.10092406, ..., 0.01563938, 1.59546665,\n", - " 0.91307168],\n", - " [0.65812086, 0.41412414, 1.53311494, ..., 0.34473083, 1.71308857,\n", - " 0.46120345],\n", + "array([[1.63216549, 1.26522381, 0.21606686, ..., 0.56988572, 1.12572538,\n", + " 1.72701877],\n", + " [0.3829452 , 1.52386969, 0.82165197, ..., 1.3105662 , 1.19336786,\n", + " 1.43671993],\n", + " [1.55277426, 1.50918516, 1.30665626, ..., 1.06562809, 1.53069088,\n", + " 1.10071159],\n", " ...,\n", - " [1.65819208, 0.38832393, 0.539072 , ..., 1.00576714, 0.84533283,\n", - " 0.93827821],\n", - " [1.39887643, 1.50380279, 1.85950996, ..., 0.48506318, 1.51421076,\n", - " 1.84443427],\n", - " [1.81472469, 1.50510688, 1.69746795, ..., 0.34524469, 0.63041897,\n", - " 0.76394633]])" + " [0.60629736, 1.73240929, 1.26797782, ..., 1.72034476, 1.56691557,\n", + " 0.22283613],\n", + " [1.96245486, 1.60559508, 0.02009914, ..., 1.40944583, 1.44560312,\n", + " 0.37804927],\n", + " [1.17875002, 0.96963921, 0.28278902, ..., 1.56747976, 0.4616313 ,\n", + " 0.94999278]])" ] }, "execution_count": 9, @@ -288,7 +283,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ @@ -304,7 +299,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ @@ -320,7 +315,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ @@ -329,14 +324,14 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "14.2 ms ± 465 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" + "12 ms ± 143 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" ] } ], @@ -346,14 +341,14 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "8.6 ms ± 44.6 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" + "3.86 ms ± 271 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" ] } ], @@ -372,25 +367,30 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", - "
\n", + "
\n", + "
\n", + "
\n", + "\n", "" ], "text/plain": [ - "" + "SDFG (sse_sigma)" ] }, - "execution_count": 15, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } @@ -436,7 +436,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.7" + "version": "3.12.1" } }, "nbformat": 4, diff --git a/tutorials/numpy_frontend.ipynb b/tutorials/numpy_frontend.ipynb index 8e83f0a204..fafda2f1b1 100644 --- a/tutorials/numpy_frontend.ipynb +++ b/tutorials/numpy_frontend.ipynb @@ -24,22 +24,7 @@ { "data": { "text/html": [ - "\n", - " \n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n" + "\n" ], "text/plain": [ "" @@ -121,15 +106,20 @@ "data": { "text/html": [ "\n", - "
\n", + "
\n", + "
\n", + "
\n", "\n", + "" ], "text/plain": [ - "" + "SDFG (gemm)" ] }, "execution_count": 5, @@ -276,15 +266,20 @@ "data": { "text/html": [ "\n", - "
\n", + "
\n", + "
\n", + "
\n", + "\n", "" ], "text/plain": [ - "" + "SDFG (forloop)" ] }, "execution_count": 10, @@ -332,15 +327,20 @@ "data": { "text/html": [ "\n", - "
\n", + "
\n", + "
\n", + "
\n", + "\n", "" ], "text/plain": [ - "" + "SDFG (maptest)" ] }, "execution_count": 11, @@ -475,9 +475,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.7" + "version": "3.12.1" } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 } diff --git a/tutorials/sdfg_api.ipynb b/tutorials/sdfg_api.ipynb index 586115d85c..645158ce88 100644 --- a/tutorials/sdfg_api.ipynb +++ b/tutorials/sdfg_api.ipynb @@ -19,22 +19,7 @@ { "data": { "text/html": [ - "\n", - " \n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n" + "\n" ], "text/plain": [ "" @@ -126,15 +111,20 @@ "data": { "text/html": [ "\n", - "
\n", + "
\n", + "
\n", + "
\n", "\n", + "" ], "text/plain": [ - "" + "SDFG (jacobi2d)" ] }, "execution_count": 5, @@ -209,15 +199,20 @@ "data": { "text/html": [ "\n", - "
\n", + "
\n", + "
\n", + "
\n", + "\n", "" ], "text/plain": [ - "" + "SDFG (jacobi2d)" ] }, "execution_count": 7, @@ -244,15 +239,20 @@ "data": { "text/html": [ "\n", - "
\n", + "
\n", + "
\n", + "
\n", "\n", + "" ], "text/plain": [ - "" + "SDFG (jacobi2d)" ] }, "execution_count": 8, @@ -286,15 +286,20 @@ "data": { "text/html": [ "\n", - "
\n", + "
\n", + "
\n", + "
\n", + "\n", "" ], "text/plain": [ - "" + "SDFG (jacobi2d)" ] }, "execution_count": 9, @@ -341,15 +346,20 @@ "data": { "text/html": [ "\n", - "
\n", + "
\n", + "
\n", + "
\n", "\n", + "" ], "text/plain": [ - "" + "SDFG (jacobi2d)" ] }, "execution_count": 11, @@ -384,15 +394,20 @@ "data": { "text/html": [ "\n", - "
\n", + "
\n", + "
\n", + "
\n", + "\n", "" ], "text/plain": [ - "" + "SDFG (jacobi2d)" ] }, "execution_count": 12, @@ -464,16 +479,7 @@ "cell_type": "code", "execution_count": 14, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING: Casting scalar argument \"N\" from int to \n", - "WARNING: Casting scalar argument \"T\" from int to \n" - ] - } - ], + "outputs": [], "source": [ "sdfg(A=inp, N=N, T=T)" ] @@ -487,7 +493,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Difference: 6.037729e-07\n" + "Difference: 1.6358224e-06\n" ] } ], @@ -512,9 +518,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.3" + "version": "3.12.1" } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 } diff --git a/tutorials/transformations.ipynb b/tutorials/transformations.ipynb index 511e8b1be3..d54b294e6e 100644 --- a/tutorials/transformations.ipynb +++ b/tutorials/transformations.ipynb @@ -31,22 +31,7 @@ { "data": { "text/html": [ - "\n", - " \n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n" + "\n" ], "text/plain": [ "" @@ -84,15 +69,20 @@ "data": { "text/html": [ "\n", - "
\n", + "
\n", + "
\n", + "
\n", "\n", + "" ], "text/plain": [ - "" + "SDFG (dbladd)" ] }, "execution_count": 2, @@ -121,15 +111,20 @@ "data": { "text/html": [ "\n", - "
\n", + "
\n", + "
\n", + "
\n", + "\n", "" ], "text/plain": [ - "" + "SDFG (dbladd)" ] }, "execution_count": 3, @@ -159,15 +154,20 @@ "data": { "text/html": [ "\n", - "
\n", + "
\n", + "
\n", + "
\n", "\n", + "" ], "text/plain": [ - "" + "SDFG (dbladd)" ] }, "execution_count": 4, @@ -196,15 +196,20 @@ "data": { "text/html": [ "\n", - "
\n", + "
\n", + "
\n", + "
\n", + "\n", "" ], "text/plain": [ - "" + "SDFG (dbladd)" ] }, "execution_count": 5, @@ -237,8 +242,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "Match: MapTiling in _Mult__map: ['__i0', '__i1']\n", - "Match: MapTiling in _Add__map: ['__i0', '__i1']\n" + "Match: MapTiling in [MapEntry (_Mult__map[__i0=0:1000, __i1=0:1000])]\n", + "Match: MapTiling in [MapEntry (_Add__map[__i0=0:1000, __i1=0:1000])]\n" ] } ], @@ -275,7 +280,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Match found in state assign_4_4 . Nodes: [MapExit (_Mult__map[__i0=0:1000, __i1=0:1000]), AccessNode (__tmp1), MapEntry (_Add__map[__i0=0:1000, __i1=0:1000])]\n" + "Match found in state BinOp_5 . Nodes: [MapExit (_Mult__map[__i0=0:1000, __i1=0:1000]), AccessNode (__tmp0), MapEntry (_Add__map[__i0=0:1000, __i1=0:1000])]\n" ] } ], @@ -310,7 +315,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "_Mult__map[__i0=0:1000, __i1=0:1000] -> __tmp1 -> _Add__map[__i0=0:1000, __i1=0:1000]\n" + "_Mult__map[__i0=0:1000, __i1=0:1000] -> __tmp0 -> _Add__map[__i0=0:1000, __i1=0:1000]\n" ] } ], @@ -342,15 +347,20 @@ "data": { "text/html": [ "\n", - "
\n", + "
\n", + "
\n", + "
\n", "\n", + "" ], "text/plain": [ - "" + "SDFG (dbladd)" ] }, "execution_count": 9, @@ -388,15 +398,20 @@ "data": { "text/html": [ "\n", - "
\n", + "
\n", + "
\n", + "
\n", + "\n", "" ], "text/plain": [ - "" + "SDFG (dbladd)" ] }, "execution_count": 10, @@ -434,59 +449,132 @@ "name": "stdout", "output_type": "stream", "text": [ - "0. Transformation FPGATransformSDFG in dbladd\n", - "1. Transformation FPGATransformState in assign_4_4\n", - "2. Transformation GPUTransformLocalStorage in outer_fused[__i0=0:1000, __i1=0:1000]\n", - "3. Transformation GPUTransformMap in outer_fused[__i0=0:1000, __i1=0:1000]\n", - "4. Transformation GPUTransformSDFG in dbladd\n", - "5. Transformation MapExpansion in outer_fused: ['__i0', '__i1']\n", - "6. Transformation MapFission in outer_fused\n", - "7. Transformation MapTiling in outer_fused: ['__i0', '__i1']\n", - "8. Transformation NestSDFG in dbladd\n", - "9. Transformation StripMining in outer_fused: ['__i0', '__i1']\n", - "10. Transformation Vectorization in 6 -> 3 -> 7\n", - "Select the pattern to apply (0 - 10 or name$id): 7(tile_sizes=(128,))\n", - "You selected (7) pattern MapTiling in outer_fused: ['__i0', '__i1'] with parameters {'tile_sizes': (128,)}\n", - "0. Transformation FPGATransformSDFG in dbladd\n", - "1. Transformation FPGATransformState in assign_4_4\n", - "2. Transformation GPUTransformLocalStorage in outer_fused[__i0=128*tile___i0:Min(1000, 128*tile___i0 + 128), __i1=128*tile___i1:Min(1000, 128*tile___i1 + 128)]\n", - "3. Transformation GPUTransformLocalStorage in merged_tile___i0_outer_fused[tile___i0=0:int_ceil(1000, 128), tile___i1=0:int_ceil(1000, 128)]\n", - "4. Transformation GPUTransformMap in outer_fused[__i0=128*tile___i0:Min(1000, 128*tile___i0 + 128), __i1=128*tile___i1:Min(1000, 128*tile___i1 + 128)]\n", - "5. Transformation GPUTransformMap in merged_tile___i0_outer_fused[tile___i0=0:int_ceil(1000, 128), tile___i1=0:int_ceil(1000, 128)]\n", - "6. Transformation GPUTransformSDFG in dbladd\n", - "7. Transformation InLocalStorage in 8 -> 6\n", - "8. Transformation MapExpansion in outer_fused: ['__i0', '__i1']\n", - "9. Transformation MapExpansion in merged_tile___i0_outer_fused: ['tile___i0', 'tile___i1']\n", - "10. Transformation MapFission in outer_fused\n", - "11. Transformation MapTiling in outer_fused: ['__i0', '__i1']\n", - "12. Transformation MapTiling in merged_tile___i0_outer_fused: ['tile___i0', 'tile___i1']\n", - "13. Transformation NestSDFG in dbladd\n", - "14. Transformation OutLocalStorage in 7 -> 9\n", + "0. Transformation ElementWiseArrayOperation in [MapEntry (outer_fused[__i0=0:1000, __i1=0:1000])]\n", + "1. Transformation ElementWiseArrayOperation2D in [MapEntry (outer_fused[__i0=0:1000, __i1=0:1000])]\n", + "2. Transformation FPGATransformSDFG in []\n", + "3. Transformation FPGATransformState in [SDFGState (BinOp_5)]\n", + "4. Transformation GPUTransformLocalStorage in outer_fused[__i0=0:1000, __i1=0:1000]\n", + "5. Transformation GPUTransformMap in outer_fused[__i0=0:1000, __i1=0:1000]\n", + "6. Transformation GPUTransformSDFG in []\n", + "7. Transformation MapDimShuffle in [MapEntry (outer_fused[__i0=0:1000, __i1=0:1000])]\n", + "8. Transformation MapExpansion in [MapEntry (outer_fused[__i0=0:1000, __i1=0:1000])]\n", + "9. Transformation MapFission in [MapEntry (outer_fused[__i0=0:1000, __i1=0:1000])]\n", + "10. Transformation MapTiling in [MapEntry (outer_fused[__i0=0:1000, __i1=0:1000])]\n", + "11. Transformation MapTilingWithOverlap in [MapEntry (outer_fused[__i0=0:1000, __i1=0:1000])]\n", + "12. Transformation MapUnroll in [MapEntry (outer_fused[__i0=0:1000, __i1=0:1000])]\n", + "13. Transformation NestSDFG in []\n", + "14. Transformation ReductionNOperation in [MapEntry (outer_fused[__i0=0:1000, __i1=0:1000])]\n", "15. Transformation StripMining in outer_fused: ['__i0', '__i1']\n", - "16. Transformation StripMining in merged_tile___i0_outer_fused: ['tile___i0', 'tile___i1']\n", - "17. Transformation Vectorization in 6 -> 3 -> 7\n", - "Select the pattern to apply (0 - 17 or name$id): \n", - "You did not select a valid option. Quitting optimization ...\n" + "16. Transformation TaskletFusion in [Tasklet (_Mult_), AccessNode (__tmp0), Tasklet (_Add_)]\n" ] }, { - "data": { - "text/html": [ - "\n", - "
\n", - "" - ], - "text/plain": [ - "" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" + "name": "stdin", + "output_type": "stream", + "text": [ + "Select the pattern to apply (0 - 16 or name$id): MapExpansion$0\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "You selected (MapExpansion$0) pattern MapExpansion in [MapEntry (outer_fused[__i0=0:1000, __i1=0:1000])] with parameters {}\n", + "0. Transformation ElementWiseArrayOperation in [MapEntry (outer_fused___i1[__i1=0:1000])]\n", + "1. Transformation FPGATransformSDFG in []\n", + "2. Transformation FPGATransformState in [SDFGState (BinOp_5)]\n", + "3. Transformation GPUGridStridedTiling in [MapEntry (outer_fused[__i0=0:1000]), MapEntry (outer_fused___i1[__i1=0:1000])]\n", + "4. Transformation GPUTransformLocalStorage in outer_fused[__i0=0:1000]\n", + "5. Transformation GPUTransformMap in outer_fused[__i0=0:1000]\n", + "6. Transformation GPUTransformMap in outer_fused___i1[__i1=0:1000]\n", + "7. Transformation GPUTransformSDFG in []\n", + "8. Transformation InLocalStorage in outer_fused[__i0=0:1000] -> outer_fused___i1[__i1=0:1000]\n", + "9. Transformation MPITransformMap in [MapEntry (outer_fused[__i0=0:1000])]\n", + "10. Transformation MPITransformMap in [MapEntry (outer_fused___i1[__i1=0:1000])]\n", + "11. Transformation MapDimShuffle in [MapEntry (outer_fused[__i0=0:1000])]\n", + "12. Transformation MapDimShuffle in [MapEntry (outer_fused___i1[__i1=0:1000])]\n", + "13. Transformation MapFission in [MapEntry (outer_fused___i1[__i1=0:1000])]\n", + "14. Transformation MapInterchange in [MapEntry (outer_fused[__i0=0:1000]), MapEntry (outer_fused___i1[__i1=0:1000])]\n", + "15. Transformation MapTiling in [MapEntry (outer_fused[__i0=0:1000])]\n", + "16. Transformation MapTiling in [MapEntry (outer_fused___i1[__i1=0:1000])]\n", + "17. Transformation MapTilingWithOverlap in [MapEntry (outer_fused[__i0=0:1000])]\n", + "18. Transformation MapTilingWithOverlap in [MapEntry (outer_fused___i1[__i1=0:1000])]\n", + "19. Transformation MapToForLoop in [MapEntry (outer_fused[__i0=0:1000])]\n", + "20. Transformation MapToForLoop in [MapEntry (outer_fused___i1[__i1=0:1000])]\n", + "21. Transformation MapUnroll in [MapEntry (outer_fused[__i0=0:1000])]\n", + "22. Transformation NestSDFG in []\n", + "23. Transformation OutLocalStorage in outer_fused___i1[__i1=0:1000] -> outer_fused[__i0=0:1000]\n", + "24. Transformation ReductionNOperation in [MapEntry (outer_fused___i1[__i1=0:1000])]\n", + "25. Transformation StripMining in outer_fused: ['__i0']\n", + "26. Transformation StripMining in outer_fused___i1: ['__i1']\n", + "27. Transformation TaskletFusion in [Tasklet (_Mult_), AccessNode (__tmp0), Tasklet (_Add_)]\n" + ] + }, + { + "name": "stdin", + "output_type": "stream", + "text": [ + "Select the pattern to apply (0 - 27 or name$id): MapTiling$0(tile_sizes=(128,))\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "You selected (MapTiling$0) pattern MapTiling in [MapEntry (outer_fused[__i0=0:1000])] with parameters {'tile_sizes': (128,)}\n", + "0. Transformation ElementWiseArrayOperation in [MapEntry (outer_fused___i1[__i1=0:1000])]\n", + "1. Transformation FPGATransformSDFG in []\n", + "2. Transformation FPGATransformState in [SDFGState (BinOp_5)]\n", + "3. Transformation GPUGridStridedTiling in [MapEntry (outer_fused[__i0=tile___i0:Min(999, tile___i0 + 127) + 1]), MapEntry (outer_fused___i1[__i1=0:1000])]\n", + "4. Transformation GPUGridStridedTiling in [MapEntry (outer_fused[tile___i0=0:1000:128]), MapEntry (outer_fused[__i0=tile___i0:Min(999, tile___i0 + 127) + 1])]\n", + "5. Transformation GPUTransformLocalStorage in outer_fused[tile___i0=0:1000:128]\n", + "6. Transformation GPUTransformMap in outer_fused[__i0=tile___i0:Min(999, tile___i0 + 127) + 1]\n", + "7. Transformation GPUTransformMap in outer_fused___i1[__i1=0:1000]\n", + "8. Transformation GPUTransformMap in outer_fused[tile___i0=0:1000:128]\n", + "9. Transformation GPUTransformSDFG in []\n", + "10. Transformation InLocalStorage in outer_fused[__i0=tile___i0:Min(999, tile___i0 + 127) + 1] -> outer_fused___i1[__i1=0:1000]\n", + "11. Transformation InLocalStorage in outer_fused[tile___i0=0:1000:128] -> outer_fused[__i0=tile___i0:Min(999, tile___i0 + 127) + 1]\n", + "12. Transformation MPITransformMap in [MapEntry (outer_fused[__i0=tile___i0:Min(999, tile___i0 + 127) + 1])]\n", + "13. Transformation MPITransformMap in [MapEntry (outer_fused___i1[__i1=0:1000])]\n", + "14. Transformation MPITransformMap in [MapEntry (outer_fused[tile___i0=0:1000:128])]\n", + "15. Transformation MapDimShuffle in [MapEntry (outer_fused[__i0=tile___i0:Min(999, tile___i0 + 127) + 1])]\n", + "16. Transformation MapDimShuffle in [MapEntry (outer_fused___i1[__i1=0:1000])]\n", + "17. Transformation MapDimShuffle in [MapEntry (outer_fused[tile___i0=0:1000:128])]\n", + "18. Transformation MapFission in [MapEntry (outer_fused___i1[__i1=0:1000])]\n", + "19. Transformation MapInterchange in [MapEntry (outer_fused[__i0=tile___i0:Min(999, tile___i0 + 127) + 1]), MapEntry (outer_fused___i1[__i1=0:1000])]\n", + "20. Transformation MapTiling in [MapEntry (outer_fused[__i0=tile___i0:Min(999, tile___i0 + 127) + 1])]\n", + "21. Transformation MapTiling in [MapEntry (outer_fused___i1[__i1=0:1000])]\n", + "22. Transformation MapTiling in [MapEntry (outer_fused[tile___i0=0:1000:128])]\n", + "23. Transformation MapTilingWithOverlap in [MapEntry (outer_fused[__i0=tile___i0:Min(999, tile___i0 + 127) + 1])]\n", + "24. Transformation MapTilingWithOverlap in [MapEntry (outer_fused___i1[__i1=0:1000])]\n", + "25. Transformation MapTilingWithOverlap in [MapEntry (outer_fused[tile___i0=0:1000:128])]\n", + "26. Transformation MapToForLoop in [MapEntry (outer_fused[__i0=tile___i0:Min(999, tile___i0 + 127) + 1])]\n", + "27. Transformation MapToForLoop in [MapEntry (outer_fused___i1[__i1=0:1000])]\n", + "28. Transformation MapToForLoop in [MapEntry (outer_fused[tile___i0=0:1000:128])]\n", + "29. Transformation MapUnroll in [MapEntry (outer_fused[tile___i0=0:1000:128])]\n", + "30. Transformation NestSDFG in []\n", + "31. Transformation OutLocalStorage in outer_fused[__i0=tile___i0:Min(999, tile___i0 + 127) + 1] -> outer_fused[tile___i0=0:1000:128]\n", + "32. Transformation OutLocalStorage in outer_fused___i1[__i1=0:1000] -> outer_fused[__i0=tile___i0:Min(999, tile___i0 + 127) + 1]\n", + "33. Transformation ReductionNOperation in [MapEntry (outer_fused___i1[__i1=0:1000])]\n", + "34. Transformation StripMining in outer_fused: ['__i0']\n", + "35. Transformation StripMining in outer_fused___i1: ['__i1']\n", + "36. Transformation StripMining in outer_fused: ['tile___i0']\n", + "37. Transformation TaskletFusion in [Tasklet (_Mult_), AccessNode (__tmp0), Tasklet (_Add_)]\n" + ] + }, + { + "name": "stdin", + "output_type": "stream", + "text": [ + "Select the pattern to apply (0 - 37 or name$id): \n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "You did not select a valid option. Quitting optimization ...\n" + ] } ], "source": [ @@ -565,15 +653,20 @@ "data": { "text/html": [ "\n", - "
\n", + "
\n", + "
\n", + "
\n", + "\n", "" ], "text/plain": [ - "" + "SDFG (dbladd)" ] }, "execution_count": 12, @@ -721,15 +814,20 @@ "data": { "text/html": [ "\n", - "
\n", + "
\n", + "
\n", + "
\n", + "\n", "" ], "text/plain": [ - "" + "SDFG (dbladd)" ] }, "execution_count": 15, @@ -780,7 +878,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.5" + "version": "3.12.1" } }, "nbformat": 4,