From 66913220ea600492db59cf8e536271b36c1554bd Mon Sep 17 00:00:00 2001 From: alexnick83 <31545860+alexnick83@users.noreply.github.com> Date: Sat, 21 Oct 2023 11:22:06 +0200 Subject: [PATCH] Option for utilizing GPU global memory (#1405) * Added option to change storage of non-transient data to GPU global memory. * Fixed typos. --- dace/transformation/auto/auto_optimize.py | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/dace/transformation/auto/auto_optimize.py b/dace/transformation/auto/auto_optimize.py index 54dbc8d4ac..644df59e5c 100644 --- a/dace/transformation/auto/auto_optimize.py +++ b/dace/transformation/auto/auto_optimize.py @@ -515,11 +515,29 @@ def make_transients_persistent(sdfg: SDFG, return result +def apply_gpu_storage(sdfg: SDFG) -> None: + """ Changes the storage of the SDFG's input and output data to GPU global memory. """ + + written_scalars = set() + for state in sdfg.nodes(): + for node in state.data_nodes(): + desc = node.desc(sdfg) + if isinstance(desc, dt.Scalar) and not desc.transient and state.in_degree(node) > 0: + written_scalars.add(node.data) + + for name, desc in sdfg.arrays.items(): + if not desc.transient and desc.storage == dtypes.StorageType.Default: + if isinstance(desc, dt.Scalar) and not name in written_scalars: + continue + desc.storage = dtypes.StorageType.GPU_Global + + def auto_optimize(sdfg: SDFG, device: dtypes.DeviceType, validate: bool = True, validate_all: bool = False, - symbols: Dict[str, int] = None) -> SDFG: + symbols: Dict[str, int] = None, + use_gpu_storage: bool = False) -> SDFG: """ Runs a basic sequence of transformations to optimize a given SDFG to decent performance. In particular, performs the following: @@ -539,6 +557,7 @@ def auto_optimize(sdfg: SDFG, have been applied. :param validate_all: If True, validates the SDFG after every step. :param symbols: Optional dict that maps symbols (str/symbolic) to int/float + :param use_gpu_storage: If True, changes the storage of non-transient data to GPU global memory. :return: The optimized SDFG. :note: Operates in-place on the given SDFG. :note: This function is still experimental and may harm correctness in @@ -565,6 +584,8 @@ def auto_optimize(sdfg: SDFG, # Apply GPU transformations and set library node implementations if device == dtypes.DeviceType.GPU: + if use_gpu_storage: + apply_gpu_storage(sdfg) sdfg.apply_gpu_transformations() sdfg.simplify()