diff --git a/loki/transformations/single_column/scc.py b/loki/transformations/single_column/scc.py index 2e06ee7a3..3b7f7d7dc 100644 --- a/loki/transformations/single_column/scc.py +++ b/loki/transformations/single_column/scc.py @@ -11,6 +11,7 @@ from loki.transformations.hoist_variables import HoistTemporaryArraysAnalysis from loki.transformations.pool_allocator import TemporariesPoolAllocatorTransformation +from loki.transformations.raw_stack_allocator import TemporariesRawStackTransformation from loki.transformations.single_column.base import SCCBaseTransformation from loki.transformations.single_column.annotate import SCCAnnotateTransformation @@ -20,7 +21,9 @@ ) -__all__ = ['SCCVectorPipeline', 'SCCHoistPipeline', 'SCCStackPipeline'] +__all__ = [ + 'SCCVectorPipeline', 'SCCHoistPipeline', 'SCCStackPipeline', 'SCCRawStackPipeline' +] """ @@ -171,3 +174,50 @@ TemporariesPoolAllocatorTransformation ) ) + +""" +SCC-style transformation that additionally pre-allocates a "stack" +pool allocator and replaces local temporaries with indexed sub-arrays +of this preallocated array. + +For details of the kernel and driver-side transformations, please +refer to :any:`SCCVectorPipeline` + +In addition, this pipeline will invoke +:any:`TemporariesRawStackTransformation` to back the remaining +locally allocated arrays from a "stack" pool allocator that is +pre-allocated in the driver routine and passed down via arguments. + +Parameters +---------- +horizontal : :any:`Dimension` + :any:`Dimension` object describing the variable conventions used in code + to define the horizontal data dimension and iteration space. +block_dim : :any:`Dimension` + Optional ``Dimension`` object to define the blocking dimension + to use for hoisted column arrays if hoisting is enabled. +directive : string or None + Directives flavour to use for parallelism annotations; either + ``'openacc'`` or ``None``. +trim_vector_sections : bool + Flag to trigger trimming of extracted vector sections to remove + nodes that are not assignments involving vector parallel arrays. +demote_local_arrays : bool + Flag to trigger local array demotion to scalar variables where possible +check_bounds : bool, optional + Insert bounds-checks in the kernel to make sure the allocated + stack size is not exceeded (default: `True`) +driver_horizontal : str, optional + Override string if a separate variable name should be used for the + horizontal when allocating the stack in the driver. +""" +SCCRawStackPipeline = partial( + Pipeline, classes=( + SCCBaseTransformation, + SCCDevectorTransformation, + SCCDemoteTransformation, + SCCRevectorTransformation, + SCCAnnotateTransformation, + TemporariesRawStackTransformation + ) +) diff --git a/scripts/loki_transform.py b/scripts/loki_transform.py index 088c62de9..c48274447 100644 --- a/scripts/loki_transform.py +++ b/scripts/loki_transform.py @@ -37,16 +37,12 @@ from loki.transformations.hoist_variables import HoistTemporaryArraysAnalysis from loki.transformations.inline import InlineTransformation from loki.transformations.pool_allocator import TemporariesPoolAllocatorTransformation -from loki.transformations.raw_stack_allocator import TemporariesRawStackTransformation from loki.transformations.remove_code import RemoveCodeTransformation from loki.transformations.sanitise import SanitiseTransformation from loki.transformations.single_column import ( - ExtractSCATransformation, CLAWTransformation, - SCCVectorPipeline, SCCHoistPipeline, SCCStackPipeline, + ExtractSCATransformation, CLAWTransformation, SCCVectorPipeline, + SCCHoistPipeline, SCCStackPipeline, SCCRawStackPipeline, HoistTemporaryArraysDeviceAllocatableTransformation, - SCCBaseTransformation, SCCDevectorTransformation, - SCCRevectorTransformation, SCCDemoteTransformation, - SCCAnnotateTransformation ) from loki.transformations.transpile import FortranCTransformation @@ -305,29 +301,19 @@ def transform_subroutine(self, routine, **kwargs): horizontal=horizontal, block_dim=block_dim, directive=directive, check_bounds=False, - trim_vector_sections=trim_vector_sections ) + trim_vector_sections=trim_vector_sections + ) scheduler.process( pipeline ) if mode == 'scc-raw-stack': - # Apply the basic SCC transformation set - scheduler.process( SCCBaseTransformation( - horizontal=horizontal, directive=directive - )) - scheduler.process( SCCDevectorTransformation( - horizontal=horizontal, trim_vector_sections=trim_vector_sections - )) - scheduler.process( SCCDemoteTransformation(horizontal=horizontal)) - scheduler.process( SCCRevectorTransformation(horizontal=horizontal)) - - scheduler.process( SCCAnnotateTransformation( - horizontal=horizontal, directive=directive, block_dim=block_dim - )) - - transformation = TemporariesRawStackTransformation( - block_dim=block_dim, horizontal=horizontal, - directive=directive, driver_horizontal='NPROMA' + pipeline = SCCStackPipeline( + horizontal=horizontal, + block_dim=block_dim, directive=directive, + check_bounds=False, + trim_vector_sections=trim_vector_sections, + driver_horizontal='NPROMA' ) - scheduler.process(transformation=transformation) + scheduler.process( pipeline ) if mode in ['cuf-parametrise', 'cuf-hoist', 'cuf-dynamic']: # These transformations requires complex constructor arguments,