diff --git a/loki/transformations/block_index_transformations.py b/loki/transformations/block_index_transformations.py index c77f550a8..65e017a2f 100644 --- a/loki/transformations/block_index_transformations.py +++ b/loki/transformations/block_index_transformations.py @@ -21,7 +21,7 @@ from loki.transformations.sanitise import resolve_associates from loki.transformations.utilities import ( recursive_expression_map_update, get_integer_variable, - get_loop_bounds, check_routine_pragmas + get_loop_bounds, check_routine_sequential ) from loki.transformations.single_column.base import SCCBaseTransformation @@ -246,8 +246,8 @@ def process_kernel(self, routine, item, successors, targets, exclude_arrays): v_index = get_integer_variable(routine, name=self.horizontal.index) SCCBaseTransformation.resolve_masked_stmts(routine, loop_variable=v_index) - # Bail if routine is marked as sequential or routine has already been processed - if check_routine_pragmas(routine, directive=None): + # Bail if routine is marked as sequential + if check_routine_sequential(routine): return bounds = get_loop_bounds(routine, self.horizontal) diff --git a/loki/transformations/single_column/annotate.py b/loki/transformations/single_column/annotate.py index b0fdea690..d3283a963 100644 --- a/loki/transformations/single_column/annotate.py +++ b/loki/transformations/single_column/annotate.py @@ -5,22 +5,20 @@ # granted to it by virtue of its status as an intergovernmental organisation # nor does it submit to any jurisdiction. -import re - from loki.batch import Transformation from loki.expression import ( symbols as sym, FindVariables, is_dimension_constant ) from loki.ir import ( - nodes as ir, FindNodes, Transformer, pragmas_attached, - pragma_regions_attached, is_loki_pragma + nodes as ir, FindNodes, pragmas_attached, is_loki_pragma, + get_pragma_parameters, Transformer ) from loki.logging import info from loki.tools import as_tuple, flatten from loki.types import DerivedType from loki.transformations.utilities import ( - find_driver_loops, get_local_arrays, check_routine_pragmas + find_driver_loops, get_local_arrays ) @@ -34,9 +32,6 @@ class SCCAnnotateTransformation(Transformation): Parameters ---------- - horizontal : :any:`Dimension` - :any:`Dimension` object describing the variable conventions used in code - to define the horizontal data dimension and iteration space. block_dim : :any:`Dimension` Optional ``Dimension`` object to define the blocking dimension to use for hoisted column arrays if hoisting is enabled. @@ -45,23 +40,19 @@ class SCCAnnotateTransformation(Transformation): ``'openacc'`` or ``None``. """ - def __init__(self, horizontal, directive, block_dim): - self.horizontal = horizontal + def __init__(self, directive, block_dim): self.directive = directive self.block_dim = block_dim - @classmethod - def kernel_annotate_vector_loops_openacc(cls, routine, horizontal): + def annotate_vector_loops(self, routine): """ - Insert ``!$acc loop vector`` annotations around horizontal vector - loops, including the necessary private variable declarations. + Insert ``!$acc loop vector`` for previously marked loops, + including addition of the necessary private variable declarations. Parameters ---------- routine : :any:`Subroutine` The subroutine in the vector loops should be removed. - horizontal: :any:`Dimension` - The dimension object specifying the horizontal vector dimension """ # Find any local arrays that need explicitly privatization @@ -78,59 +69,37 @@ def kernel_annotate_vector_loops_openacc(cls, routine, horizontal): f'{[a.name for a in private_arrays]}' ) - mapper = {} - with pragma_regions_attached(routine): - for region in FindNodes(ir.PragmaRegion).visit(routine.body): - if is_loki_pragma(region.pragma, starts_with='vector-reduction'): - if (reduction_clause := re.search(r'reduction\([\w:0-9 \t]+\)', region.pragma.content)): - - loops = FindNodes(ir.Loop).visit(region) - assert len(loops) == 1 - pragma = ir.Pragma(keyword='acc', content=f'loop vector {reduction_clause[0]}') - mapper[loops[0]] = loops[0].clone(pragma=(pragma,)) - mapper[region.pragma] = None - mapper[region.pragma_post] = None - with pragmas_attached(routine, ir.Loop): for loop in FindNodes(ir.Loop).visit(routine.body): - if loop.variable == horizontal.index and not loop in mapper: - # Construct pragma and wrap entire body in vector loop - private_arrs = ', '.join(v.name for v in private_arrays) - pragma = () - private_clause = '' if not private_arrays else f' private({private_arrs})' - pragma = ir.Pragma(keyword='acc', content=f'loop vector{private_clause}') - mapper[loop] = loop.clone(pragma=(pragma,)) - - routine.body = Transformer(mapper).visit(routine.body) - - @classmethod - def kernel_annotate_sequential_loops_openacc(cls, routine, horizontal, block_dim=None, ignore=()): + for pragma in as_tuple(loop.pragma): + if is_loki_pragma(pragma, starts_with='loop vector reduction'): + # Turn reduction pragmas into `!$acc` equivalent + pragma._update(keyword='acc') + continue + + if is_loki_pragma(pragma, starts_with='loop vector'): + # Turn general vector pragmas into `!$acc` and add private clause + private_arrs = ', '.join(v.name for v in private_arrays) + private_clause = '' if not private_arrays else f' private({private_arrs})' + pragma._update(keyword='acc', content=f'loop vector{private_clause}') + + def annotate_sequential_loops(self, routine): """ - Insert ``!$acc loop seq`` annotations around all loops that - are not horizontal vector loops. + Insert ``!$acc loop seq`` annotations for all loops previously + marked with ``!$loki loop seq``. Parameters ---------- routine : :any:`Subroutine` The subroutine in which to annotate sequential loops - horizontal: :any:`Dimension` - The dimension object specifying the horizontal vector dimension - block_dim: :any: `Dimension` - The dimension object specifying the blocking dimension - ignore: list or tuple - Loops to be ignored for annotation """ - block_dim_index = None if block_dim is None else block_dim.index with pragmas_attached(routine, ir.Loop): - for loop in FindNodes(ir.Loop).visit(routine.body): - # Skip loops explicitly marked with `!$loki/claw nodep` - if loop.pragma and any('nodep' in p.content.lower() for p in as_tuple(loop.pragma)): + if not is_loki_pragma(loop.pragma, starts_with='loop seq'): continue - if loop.variable != horizontal.index and loop.variable != block_dim_index and loop not in ignore: - # Perform pragma addition in place to avoid nested loop replacements - loop._update(pragma=(ir.Pragma(keyword='acc', content='loop seq'),)) + # Replace internal `!$loki loop seq`` pragam with `!$acc` equivalent + loop._update(pragma=(ir.Pragma(keyword='acc', content='loop seq'),)) # Warn if we detect vector insisde sequential loop nesting nested_loops = FindNodes(ir.Loop).visit(loop.body) @@ -138,10 +107,10 @@ def kernel_annotate_sequential_loops_openacc(cls, routine, horizontal, block_dim if any('loop vector' in pragma.content for pragma in loop_pragmas): info(f'[Loki-SCC::Annotate] Detected vector loop in sequential loop in {routine.name}') - @classmethod - def kernel_annotate_subroutine_present_openacc(cls, routine): + def annotate_kernel_routine(self, routine): """ - Insert ``!$acc data present`` annotations around the body of a subroutine. + Insert ``!$acc routine seq/vector`` directives and wrap + subroutine body in ``!$acc data present`` directives. Parameters ---------- @@ -149,34 +118,42 @@ def kernel_annotate_subroutine_present_openacc(cls, routine): The subroutine to which annotations will be added """ + # Update `!$loki routine seq/vector` pragmas with `!$acc` + pragma_map = {} + for pragma in FindNodes(ir.Pragma).visit(routine.ir): + if is_loki_pragma(pragma, starts_with='routine'): + # We have to re-insert the pragma here, in case it was + # falsely attributed to the body! + routine.spec.append(pragma.clone(keyword='acc')) + pragma_map[pragma] = None + pragma_transformer = Transformer(pragma_map) + routine.spec = pragma_transformer.visit(routine.spec) + routine.body = pragma_transformer.visit(routine.body) + # Get the names of all array and derived type arguments args = [a for a in routine.arguments if isinstance(a, sym.Array)] args += [a for a in routine.arguments if isinstance(a.type.dtype, DerivedType)] argnames = [str(a.name) for a in args] - routine.body.prepend(ir.Pragma(keyword='acc', content=f'data present({", ".join(argnames)})')) - # Add comment to prevent false-attachment in case it is preceded by an "END DO" statement - routine.body.append((ir.Comment(text=''), ir.Pragma(keyword='acc', content='end data'))) - - @classmethod - def insert_annotations(cls, routine, horizontal): - - # Mark all parallel vector loops as `!$acc loop vector` - cls.kernel_annotate_vector_loops_openacc(routine, horizontal) - - # Mark all non-parallel loops as `!$acc loop seq` - cls.kernel_annotate_sequential_loops_openacc(routine, horizontal) - - # Wrap the routine body in `!$acc data present` markers - # to ensure device-resident data is used for array and struct arguments. - cls.kernel_annotate_subroutine_present_openacc(routine) - - # Mark routine as `!$acc routine vector` to make it device-callable - routine.spec.append(ir.Pragma(keyword='acc', content='routine vector')) + if argnames: + routine.body.prepend(ir.Pragma(keyword='acc', content=f'data present({", ".join(argnames)})')) + # Add comment to prevent false-attachment in case it is preceded by an "END DO" statement + routine.body.append((ir.Comment(text=''), ir.Pragma(keyword='acc', content='end data'))) def transform_subroutine(self, routine, **kwargs): """ - Apply SCCAnnotate utilities to a :any:`Subroutine`. + Apply OpenACC annotations according to ``!$loki`` placeholder + directives. + + This routine effectively converts neutral ``!$loki loop`` and + ``!$loki routine`` annotations into the corresponding + ``!$acc`` equivalent directives. It also adds ``!$acc data + present`` clauses around kernel routine bodies and adds + ``private`` clauses to loop annotations. + + If the ``directive`` provided is not ``openacc``, no change is + applied. In the future, we aim to support ``OpenMP`` + equivalent directives here. Parameters ---------- @@ -189,78 +166,39 @@ def transform_subroutine(self, routine, **kwargs): role = kwargs['role'] targets = as_tuple(kwargs.get('targets')) + if not self.directive == 'openacc': + return + if role == 'kernel': - self.process_kernel(routine) - if role == 'driver': - self.process_driver(routine, targets=targets) + # Bail if this routine has been processed before + for p in FindNodes(ir.Pragma).visit(routine.ir): + # Check if `!$acc routine` has already been added + if p.keyword.lower() == 'acc' and 'routine' in p.content.lower(): + return - def process_kernel(self, routine): - """ - Applies the SCCAnnotate utilities to a "kernel". This consists of inserting the relevant - ``'openacc'`` annotations at the :any:`Loop` and :any:`Subroutine` level. + # Mark all parallel vector loops as `!$acc loop vector` + self.annotate_vector_loops(routine) - Parameters - ---------- - routine : :any:`Subroutine` - Subroutine to apply this transformation to. - """ + # Mark all non-parallel loops as `!$acc loop seq` + self.annotate_sequential_loops(routine) - # Bail if routine is marked as sequential - if check_routine_pragmas(routine, self.directive): - return + # Wrap the routine body in `!$acc data present` markers to + # ensure all arguments are device-resident. + self.annotate_kernel_routine(routine) - if self.directive == 'openacc': - self.insert_annotations(routine, self.horizontal) - # Remove the vector section wrappers - # These have been inserted by SCCDevectorTransformation - section_mapper = {s: s.body for s in FindNodes(ir.Section).visit(routine.body) if s.label == 'vector_section'} - if section_mapper: - routine.body = Transformer(section_mapper).visit(routine.body) + if role == 'driver': + # Mark all parallel vector loops as `!$acc loop vector` + self.annotate_vector_loops(routine) - def process_driver(self, routine, targets=None): - """ - Apply the relevant ``'openacc'`` annotations to the driver loop. + # Mark all non-parallel loops as `!$acc loop seq` + self.annotate_sequential_loops(routine) - Parameters - ---------- - routine : :any:`Subroutine` - Subroutine to apply this transformation to. - targets : list or string - List of subroutines that are to be considered as part of - the transformation call tree. - """ + with pragmas_attached(routine, ir.Loop, attach_pragma_post=True): + driver_loops = find_driver_loops(routine=routine, targets=targets) + for loop in driver_loops: + self.annotate_driver_loop(loop) - # For the thread block size, find the horizontal size variable that is available in - # the driver - num_threads = None - symbol_map = routine.symbol_map - for size_expr in self.horizontal.size_expressions: - if size_expr in symbol_map: - num_threads = size_expr - break - - with pragmas_attached(routine, ir.Loop, attach_pragma_post=True): - driver_loops = find_driver_loops(routine=routine, targets=targets) - for loop in driver_loops: - loops = FindNodes(ir.Loop).visit(loop.body) - kernel_loops = [l for l in loops if l.variable == self.horizontal.index] - if kernel_loops: - assert not loop == kernel_loops[0] - self.annotate_driver( - self.directive, loop, kernel_loops, self.block_dim, num_threads - ) - - if self.directive == 'openacc': - # Mark all non-parallel loops as `!$acc loop seq` - self.kernel_annotate_sequential_loops_openacc(routine, self.horizontal, self.block_dim, - ignore=driver_loops) - - # Remove the vector section wrappers - # These have been inserted by SCCDevectorTransformation - section_mapper = {s: s.body for s in FindNodes(ir.Section).visit(routine.body) if s.label == 'vector_section'} - if section_mapper: - routine.body = Transformer(section_mapper).visit(routine.body) @classmethod def device_alloc_column_locals(cls, routine, column_locals): @@ -283,58 +221,43 @@ def device_alloc_column_locals(cls, routine, column_locals): routine.body.prepend((ir.Comment(''), pragma, ir.Comment(''))) routine.body.append((ir.Comment(''), pragma_post, ir.Comment(''))) - @classmethod - def annotate_driver(cls, directive, driver_loop, kernel_loops, block_dim, num_threads): + def annotate_driver_loop(self, loop): """ Annotate driver block loop with ``'openacc'`` pragmas. Parameters ---------- - directive : string or None - Directives flavour to use for parallelism annotations; either - ``'openacc'`` or ``None``. - driver_loop : :any:`Loop` - Driver ``Loop`` to wrap in ``'opencc'`` pragmas. - kernel_loops : list of :any:`Loop` - Vector ``Loop`` to wrap in ``'opencc'`` pragmas if hoisting is enabled. - block_dim : :any:`Dimension` - Optional ``Dimension`` object to define the blocking dimension - to detect hoisted temporary arrays and excempt them from marking. - num_threads : str - The size expression that determines the number of threads per thread block + loop : :any:`Loop` + Driver :any:`Loop` to wrap in ``'opencc'`` pragmas. """ # Mark driver loop as "gang parallel". - if directive == 'openacc': - arrays = FindVariables(unique=True).visit(driver_loop) + if self.directive == 'openacc': + arrays = FindVariables(unique=True).visit(loop) arrays = [v for v in arrays if isinstance(v, sym.Array)] arrays = [v for v in arrays if not v.type.intent] arrays = [v for v in arrays if not v.type.pointer] # Filter out arrays that are explicitly allocated with block dimension - sizes = block_dim.size_expressions + sizes = self.block_dim.size_expressions arrays = [v for v in arrays if not any(d in sizes for d in as_tuple(v.shape))] private_arrays = ', '.join(set(v.name for v in arrays)) private_clause = '' if not private_arrays else f' private({private_arrays})' - vector_length_clause = '' if not num_threads else f' vector_length({num_threads})' - - # Annotate vector loops with OpenACC pragmas - if kernel_loops: - for loop in as_tuple(kernel_loops): - loop._update(pragma=(ir.Pragma(keyword='acc', content='loop vector'),)) - - if driver_loop.pragma is None or (len(driver_loop.pragma) == 1 and - driver_loop.pragma[0].keyword.lower() == "loki" and - driver_loop.pragma[0].content.lower() == "driver-loop"): - p_content = f'parallel loop gang{private_clause}{vector_length_clause}' - driver_loop._update(pragma=(ir.Pragma(keyword='acc', content=p_content),)) - driver_loop._update(pragma_post=(ir.Pragma(keyword='acc', content='end parallel loop'),)) - - # add acc parallel loop gang if the only existing pragma is acc data - elif len(driver_loop.pragma) == 1: - if (driver_loop.pragma[0].keyword == 'acc' and - driver_loop.pragma[0].content.lower().lstrip().startswith('data ')): - p_content = f'parallel loop gang{private_clause}{vector_length_clause}' - driver_loop._update(pragma=(driver_loop.pragma[0], ir.Pragma(keyword='acc', content=p_content))) - driver_loop._update(pragma_post=(ir.Pragma(keyword='acc', content='end parallel loop'), - driver_loop.pragma_post[0])) + + for pragma in as_tuple(loop.pragma): + if is_loki_pragma(pragma, starts_with='loop driver'): + # Replace `!$loki loop driver` pragma with OpenACC equivalent + params = get_pragma_parameters(loop.pragma, starts_with='loop driver') + vlength = params.get('vector_length') + vlength_clause = f' vector_length({vlength})' if vlength else '' + + content = f'parallel loop gang{private_clause}{vlength_clause}' + pragma_new = ir.Pragma(keyword='acc', content=content) + pragma_post = ir.Pragma(keyword='acc', content='end parallel loop') + + # Replace existing loki pragma and add post-pragma + loop_pragmas = tuple(p for p in as_tuple(loop.pragma) if p is not pragma) + loop._update( + pragma=loop_pragmas + (pragma_new,), + pragma_post=(pragma_post,) + as_tuple(loop.pragma_post) + ) diff --git a/loki/transformations/single_column/base.py b/loki/transformations/single_column/base.py index be730c418..b69f14f1d 100644 --- a/loki/transformations/single_column/base.py +++ b/loki/transformations/single_column/base.py @@ -14,7 +14,7 @@ from loki.transformations.sanitise import resolve_associates from loki.transformations.utilities import ( - get_integer_variable, get_loop_bounds, check_routine_pragmas + get_integer_variable, get_loop_bounds, check_routine_sequential ) @@ -164,7 +164,7 @@ def process_kernel(self, routine): """ # Bail if routine is marked as sequential or routine has already been processed - if check_routine_pragmas(routine, self.directive): + if check_routine_sequential(routine): return # Bail if routine is elemental diff --git a/loki/transformations/single_column/tests/test_scc.py b/loki/transformations/single_column/tests/test_scc.py index ffb0fe44b..71549ceac 100644 --- a/loki/transformations/single_column/tests/test_scc.py +++ b/loki/transformations/single_column/tests/test_scc.py @@ -298,8 +298,7 @@ def test_scc_annotate_openacc(frontend, horizontal, blocking): scc_transform = (SCCDevectorTransformation(horizontal=horizontal),) scc_transform += (SCCDemoteTransformation(horizontal=horizontal),) scc_transform += (SCCRevectorTransformation(horizontal=horizontal),) - scc_transform += (SCCAnnotateTransformation(horizontal=horizontal, - directive='openacc', block_dim=blocking),) + scc_transform += (SCCAnnotateTransformation(directive='openacc', block_dim=blocking),) for transform in scc_transform: transform.apply(driver, role='driver', targets=['compute_column']) transform.apply(kernel, role='kernel') @@ -407,9 +406,7 @@ def test_scc_nested(frontend, horizontal, blocking): scc_pipeline.apply(inner_kernel, role='kernel') # Apply annotate twice to test bailing out mechanism - scc_annotate = SCCAnnotateTransformation( - horizontal=horizontal, directive='openacc', block_dim=blocking - ) + scc_annotate = SCCAnnotateTransformation(directive='openacc', block_dim=blocking) scc_annotate.apply(driver, role='driver', targets=['compute_column']) scc_annotate.apply(outer_kernel, role='kernel', targets=['compute_q']) scc_annotate.apply(inner_kernel, role='kernel') @@ -753,9 +750,10 @@ def test_scc_multiple_acc_pragmas(frontend, horizontal, blocking): @pytest.mark.parametrize('frontend', available_frontends()) -def test_scc_base_routine_seq_pragma(frontend, horizontal): +def test_scc_annotate_routine_seq_pragma(frontend, horizontal, blocking): """ - Test that `!$loki routine seq` pragmas are replaced correctly by `!$acc routine seq` pragmas. + Test that `!$loki routine seq` pragmas are replaced correctly by + `!$acc routine seq` pragmas. """ fcode = """ @@ -764,8 +762,8 @@ def test_scc_base_routine_seq_pragma(frontend, horizontal): integer, intent(in) :: nang real, dimension(nang), intent(inout) :: work -!$loki routine seq integer :: k +!$loki routine seq do k=1,nang work(k) = 1. @@ -776,20 +774,57 @@ def test_scc_base_routine_seq_pragma(frontend, horizontal): routine = Subroutine.from_source(fcode, frontend=frontend) - pragmas = FindNodes(Pragma).visit(routine.spec) + pragmas = FindNodes(Pragma).visit(routine.ir) assert len(pragmas) == 1 assert pragmas[0].keyword == 'loki' assert pragmas[0].content == 'routine seq' - transformation = SCCBaseTransformation(horizontal=horizontal, directive='openacc') + transformation = SCCAnnotateTransformation(directive='openacc', block_dim=blocking) transformation.transform_subroutine(routine, role='kernel', targets=['some_kernel',]) + # Ensure the routine pragma is in the first pragma in the spec pragmas = FindNodes(Pragma).visit(routine.spec) assert len(pragmas) == 1 assert pragmas[0].keyword == 'acc' assert pragmas[0].content == 'routine seq' +@pytest.mark.parametrize('frontend', available_frontends()) +def test_scc_annotate_empty_data_clause(frontend, horizontal, blocking): + """ + Test that we do not generate empty `!$acc data` clauses. + """ + + fcode = """ + subroutine some_kernel(n) + implicit none + ! Scalars should not show up in `!$acc data` clause + integer, intent(inout) :: n +!$loki routine seq + integer :: k + + k = n + do k=1, 3 + n = k + 1 + enddo + end subroutine some_kernel + """ + routine = Subroutine.from_source(fcode, frontend=frontend) + + pragmas = FindNodes(Pragma).visit(routine.ir) + assert len(pragmas) == 1 + assert pragmas[0].keyword == 'loki' + assert pragmas[0].content == 'routine seq' + + transformation = SCCAnnotateTransformation(directive='openacc', block_dim=blocking) + transformation.transform_subroutine(routine, role='kernel', targets=['some_kernel',]) + + # Ensure the routine pragma is in the first pragma in the spec + pragmas = FindNodes(Pragma).visit(routine.ir) + assert len(pragmas) == 1 + assert pragmas[0].keyword == 'acc' + assert pragmas[0].content == 'routine seq' + @pytest.mark.parametrize('frontend', available_frontends()) def test_scc_vector_reduction(frontend, horizontal, blocking): diff --git a/loki/transformations/single_column/tests/test_scc_hoist.py b/loki/transformations/single_column/tests/test_scc_hoist.py index 533e89bb6..344c980a9 100644 --- a/loki/transformations/single_column/tests/test_scc_hoist.py +++ b/loki/transformations/single_column/tests/test_scc_hoist.py @@ -261,9 +261,7 @@ def test_scc_hoist_multiple_kernels_loops(tmp_path, frontend, trim_vector_sectio transformation += (SCCDevectorTransformation(horizontal=horizontal, trim_vector_sections=trim_vector_sections),) transformation += (SCCDemoteTransformation(horizontal=horizontal),) transformation += (SCCRevectorTransformation(horizontal=horizontal),) - transformation += (SCCAnnotateTransformation( - horizontal=horizontal, directive='openacc', block_dim=blocking, - ),) + transformation += (SCCAnnotateTransformation(directive='openacc', block_dim=blocking),) for transform in transformation: scheduler.process(transformation=transform) diff --git a/loki/transformations/single_column/tests/test_scc_vector.py b/loki/transformations/single_column/tests/test_scc_vector.py index 99dbf919f..a496e03ce 100644 --- a/loki/transformations/single_column/tests/test_scc_vector.py +++ b/loki/transformations/single_column/tests/test_scc_vector.py @@ -10,8 +10,7 @@ from loki import Subroutine, Sourcefile, Dimension, fgen from loki.frontend import available_frontends from loki.ir import ( - FindNodes, Assignment, CallStatement, Conditional, Comment, Loop, - Pragma, Section + nodes as ir, FindNodes, pragmas_attached, is_loki_pragma ) from loki.transformations.single_column import ( SCCDevectorTransformation, SCCRevectorTransformation, SCCVectorPipeline @@ -91,38 +90,53 @@ def test_scc_revector_transformation(frontend, horizontal): driver = Subroutine.from_source(fcode_driver, frontend=frontend) # Ensure we have three loops in the kernel prior to transformation - kernel_loops = FindNodes(Loop).visit(kernel.body) + kernel_loops = FindNodes(ir.Loop).visit(kernel.body) assert len(kernel_loops) == 3 scc_transform = (SCCDevectorTransformation(horizontal=horizontal),) scc_transform += (SCCRevectorTransformation(horizontal=horizontal),) for transform in scc_transform: - transform.apply(driver, role='driver') + transform.apply(driver, role='driver', targets=('compute_column',)) transform.apply(kernel, role='kernel') # Ensure we have two nested loops in the kernel # (the hoisted horizontal and the native vertical) - kernel_loops = FindNodes(Loop).visit(kernel.body) - assert len(kernel_loops) == 2 - assert kernel_loops[1] in FindNodes(Loop).visit(kernel_loops[0].body) - assert kernel_loops[0].variable == 'jl' - assert kernel_loops[0].bounds == 'start:end' - assert kernel_loops[1].variable == 'jk' - assert kernel_loops[1].bounds == '2:nz' + with pragmas_attached(kernel, node_type=ir.Loop): + kernel_loops = FindNodes(ir.Loop).visit(kernel.body) + assert len(kernel_loops) == 2 + assert kernel_loops[1] in FindNodes(ir.Loop).visit(kernel_loops[0].body) + assert kernel_loops[0].variable == 'jl' + assert kernel_loops[0].bounds == 'start:end' + assert kernel_loops[1].variable == 'jk' + assert kernel_loops[1].bounds == '2:nz' + + # Check internal loop pragma annotations + assert kernel_loops[0].pragma + assert is_loki_pragma(kernel_loops[0].pragma, starts_with='loop vector') + assert kernel_loops[1].pragma + assert is_loki_pragma(kernel_loops[1].pragma, starts_with='loop seq') # Ensure all expressions and array indices are unchanged - assigns = FindNodes(Assignment).visit(kernel.body) + assigns = FindNodes(ir.Assignment).visit(kernel.body) assert fgen(assigns[1]).lower() == 't(jl, jk) = c*jk' assert fgen(assigns[2]).lower() == 'q(jl, jk) = q(jl, jk - 1) + t(jl, jk)*c' assert fgen(assigns[3]).lower() == 'q(jl, nz) = q(jl, nz)*c' - # Ensure driver remains unaffected - driver_loops = FindNodes(Loop).visit(driver.body) - assert len(driver_loops) == 1 - assert driver_loops[0].variable == 'b' - assert driver_loops[0].bounds == '1:nb' - - kernel_calls = FindNodes(CallStatement).visit(driver_loops[0]) + # Ensure that vector-section labels have been removed + sections = FindNodes(ir.Section).visit(kernel.body) + assert all(not s.label for s in sections) + + # Ensure driver remains unaffected and is marked + with pragmas_attached(driver, node_type=ir.Loop): + driver_loops = FindNodes(ir.Loop).visit(driver.body) + assert len(driver_loops) == 1 + assert driver_loops[0].variable == 'b' + assert driver_loops[0].bounds == '1:nb' + assert driver_loops[0].pragma and len(driver_loops[0].pragma) == 1 + assert is_loki_pragma(driver_loops[0].pragma[0], starts_with='loop driver') + assert 'vector_length(nlon)' in driver_loops[0].pragma[0].content + + kernel_calls = FindNodes(ir.CallStatement).visit(driver_loops[0]) assert len(kernel_calls) == 1 assert kernel_calls[0].name == 'compute_column' @@ -193,38 +207,53 @@ def test_scc_revector_transformation_aliased_bounds(frontend, horizontal_bounds_ definitions=bnds_type_mod.definitions).subroutines[0] # Ensure we have three loops in the kernel prior to transformation - kernel_loops = FindNodes(Loop).visit(kernel.body) + kernel_loops = FindNodes(ir.Loop).visit(kernel.body) assert len(kernel_loops) == 3 scc_transform = (SCCDevectorTransformation(horizontal=horizontal_bounds_aliases),) scc_transform += (SCCRevectorTransformation(horizontal=horizontal_bounds_aliases),) for transform in scc_transform: - transform.apply(driver, role='driver') + transform.apply(driver, role='driver', targets=('compute_column',)) transform.apply(kernel, role='kernel') # Ensure we have two nested loops in the kernel # (the hoisted horizontal and the native vertical) - kernel_loops = FindNodes(Loop).visit(kernel.body) - assert len(kernel_loops) == 2 - assert kernel_loops[1] in FindNodes(Loop).visit(kernel_loops[0].body) - assert kernel_loops[0].variable == 'jl' - assert kernel_loops[0].bounds == 'bnds%start:bnds%end' - assert kernel_loops[1].variable == 'jk' - assert kernel_loops[1].bounds == '2:nz' + with pragmas_attached(kernel, node_type=ir.Loop): + kernel_loops = FindNodes(ir.Loop).visit(kernel.body) + assert len(kernel_loops) == 2 + assert kernel_loops[1] in FindNodes(ir.Loop).visit(kernel_loops[0].body) + assert kernel_loops[0].variable == 'jl' + assert kernel_loops[0].bounds == 'bnds%start:bnds%end' + assert kernel_loops[1].variable == 'jk' + assert kernel_loops[1].bounds == '2:nz' + + # Check internal loop pragma annotations + assert kernel_loops[0].pragma + assert is_loki_pragma(kernel_loops[0].pragma, starts_with='loop vector') + assert kernel_loops[1].pragma + assert is_loki_pragma(kernel_loops[1].pragma, starts_with='loop seq') # Ensure all expressions and array indices are unchanged - assigns = FindNodes(Assignment).visit(kernel.body) + assigns = FindNodes(ir.Assignment).visit(kernel.body) assert fgen(assigns[1]).lower() == 't(jl, jk) = c*jk' assert fgen(assigns[2]).lower() == 'q(jl, jk) = q(jl, jk - 1) + t(jl, jk)*c' assert fgen(assigns[3]).lower() == 'q(jl, nz) = q(jl, nz)*c' - # Ensure driver remains unaffected - driver_loops = FindNodes(Loop).visit(driver.body) - assert len(driver_loops) == 1 - assert driver_loops[0].variable == 'b' - assert driver_loops[0].bounds == '1:nb' - - kernel_calls = FindNodes(CallStatement).visit(driver_loops[0]) + # Ensure that vector-section labels have been removed + sections = FindNodes(ir.Section).visit(kernel.body) + assert all(not s.label for s in sections) + + # Ensure driver remains unaffected and is marked + with pragmas_attached(driver, node_type=ir.Loop): + driver_loops = FindNodes(ir.Loop).visit(driver.body) + assert len(driver_loops) == 1 + assert driver_loops[0].variable == 'b' + assert driver_loops[0].bounds == '1:nb' + assert driver_loops[0].pragma and len(driver_loops[0].pragma) == 1 + assert is_loki_pragma(driver_loops[0].pragma[0], starts_with='loop driver') + assert 'vector_length(nlon)' in driver_loops[0].pragma[0].content + + kernel_calls = FindNodes(ir.CallStatement).visit(driver_loops[0]) assert len(kernel_calls) == 1 assert kernel_calls[0].name == 'compute_column' @@ -280,7 +309,7 @@ def test_scc_devector_transformation(frontend, horizontal): kernel = Subroutine.from_source(fcode_kernel, frontend=frontend) # Check number of horizontal loops prior to transformation - loops = [l for l in FindNodes(Loop).visit(kernel.body) if l.variable == 'jl'] + loops = [l for l in FindNodes(ir.Loop).visit(kernel.body) if l.variable == 'jl'] assert len(loops) == 4 # Test SCCDevector transform for kernel with scope-splitting outer loop @@ -288,20 +317,23 @@ def test_scc_devector_transformation(frontend, horizontal): scc_transform.apply(kernel, role='kernel') # Check removal of horizontal loops - loops = [l for l in FindNodes(Loop).visit(kernel.body) if l.variable == 'jl'] + loops = [l for l in FindNodes(ir.Loop).visit(kernel.body) if l.variable == 'jl'] assert not loops # Check number and content of vector sections - sections = [s for s in FindNodes(Section).visit(kernel.body) if s.label == 'vector_section'] + sections = [ + s for s in FindNodes(ir.Section).visit(kernel.body) + if s.label == 'vector_section' + ] assert len(sections) == 4 - assigns = FindNodes(Assignment).visit(sections[0]) + assigns = FindNodes(ir.Assignment).visit(sections[0]) assert len(assigns) == 2 - assigns = FindNodes(Assignment).visit(sections[1]) + assigns = FindNodes(ir.Assignment).visit(sections[1]) assert len(assigns) == 1 - assigns = FindNodes(Assignment).visit(sections[2]) + assigns = FindNodes(ir.Assignment).visit(sections[2]) assert len(assigns) == 1 - assigns = FindNodes(Assignment).visit(sections[3]) + assigns = FindNodes(ir.Assignment).visit(sections[3]) assert len(assigns) == 1 @@ -348,15 +380,17 @@ def test_scc_vector_inlined_call(frontend, horizontal): for transform in scc_transform: transform.apply(routine, role='kernel', targets=['some_kernel', 'some_inlined_kernel']) - # Check loki pragma has been removed - assert not FindNodes(Pragma).visit(routine.body) + # Check only `!$loki loop vector` pragma has been inserted + pragmas = FindNodes(ir.Pragma).visit(routine.body) + assert len(pragmas) == 1 + assert is_loki_pragma(pragmas[0], starts_with='loop vector') # Check that 'some_inlined_kernel' remains within vector-parallel region - loops = FindNodes(Loop).visit(routine.body) + loops = FindNodes(ir.Loop).visit(routine.body) assert len(loops) == 1 - calls = FindNodes(CallStatement).visit(loops[0].body) + calls = FindNodes(ir.CallStatement).visit(loops[0].body) assert len(calls) == 1 - calls = FindNodes(CallStatement).visit(routine.body) + calls = FindNodes(ir.CallStatement).visit(routine.body) assert len(calls) == 2 @@ -393,9 +427,12 @@ def test_scc_vector_section_trim_simple(frontend, horizontal, trim_vector_sectio for transform in scc_transform: transform.apply(routine, role='kernel', targets=['some_kernel',]) - assign = FindNodes(Assignment).visit(routine.body)[0] - loop = FindNodes(Loop).visit(routine.body)[0] - comment = [c for c in FindNodes(Comment).visit(routine.body) if c.text == '! random comment'][0] + assign = FindNodes(ir.Assignment).visit(routine.body)[0] + loop = FindNodes(ir.Loop).visit(routine.body)[0] + comment = [ + c for c in FindNodes(ir.Comment).visit(routine.body) + if c.text == '! random comment' + ][0] # check we found the right assignment assert assign.lhs.name.lower() == 'flag0' @@ -457,8 +494,8 @@ def test_scc_vector_section_trim_nested(frontend, horizontal, trim_vector_sectio for transform in scc_transform: transform.apply(routine, role='kernel', targets=['some_kernel',]) - cond = FindNodes(Conditional).visit(routine.body)[0] - loop = FindNodes(Loop).visit(routine.body)[0] + cond = FindNodes(ir.Conditional).visit(routine.body)[0] + loop = FindNodes(ir.Loop).visit(routine.body)[0] if trim_vector_sections: assert cond not in loop.body @@ -509,19 +546,19 @@ def test_scc_vector_section_trim_complex( ) scc_pipeline.apply(routine, role='kernel', targets=['some_kernel',]) - assign = FindNodes(Assignment).visit(routine.body)[0] + assign = FindNodes(ir.Assignment).visit(routine.body)[0] # check we found the right assignment assert assign.lhs.name.lower() == 'flag1' - cond = FindNodes(Conditional).visit(routine.body)[0] - loop = FindNodes(Loop).visit(routine.body)[0] + cond = FindNodes(ir.Conditional).visit(routine.body)[0] + loop = FindNodes(ir.Loop).visit(routine.body)[0] assert cond in loop.body assert cond not in routine.body.body if trim_vector_sections: assert assign not in loop.body - assert(len(FindNodes(Assignment).visit(loop.body)) == 3) + assert(len(FindNodes(ir.Assignment).visit(loop.body)) == 3) else: assert assign in loop.body - assert(len(FindNodes(Assignment).visit(loop.body)) == 4) + assert(len(FindNodes(ir.Assignment).visit(loop.body)) == 4) diff --git a/loki/transformations/single_column/vector.py b/loki/transformations/single_column/vector.py index b938e5359..7d200d960 100644 --- a/loki/transformations/single_column/vector.py +++ b/loki/transformations/single_column/vector.py @@ -5,6 +5,8 @@ # granted to it by virtue of its status as an intergovernmental organisation # nor does it submit to any jurisdiction. +import re + from more_itertools import split_at from loki.analyse import dataflow_analysis_attached @@ -14,7 +16,7 @@ ) from loki.ir import ( nodes as ir, FindNodes, FindScopes, Transformer, - NestedTransformer, is_loki_pragma, pragmas_attached + NestedTransformer, is_loki_pragma, pragmas_attached, pragma_regions_attached ) from loki.tools import as_tuple, flatten from loki.types import BasicType @@ -22,13 +24,13 @@ from loki.transformations.array_indexing import demote_variables from loki.transformations.utilities import ( get_integer_variable, get_loop_bounds, find_driver_loops, - get_local_arrays, check_routine_pragmas + get_local_arrays, check_routine_sequential ) __all__ = [ 'SCCDevectorTransformation', 'SCCRevectorTransformation', - 'SCCDemoteTransformation' + 'SCCDemoteTransformation', 'wrap_vector_section' ] @@ -97,7 +99,7 @@ def extract_vector_sections(cls, section, horizontal): # check if calls have been enriched if not call.routine is BasicType.DEFERRED: # check if called routine is marked as sequential - if check_routine_pragmas(routine=call.routine, directive=None): + if check_routine_sequential(routine=call.routine): continue if call in section: @@ -249,6 +251,39 @@ def process_driver(self, routine, targets=()): routine.body = Transformer(driver_loop_map).visit(routine.body) +def wrap_vector_section(section, routine, horizontal, insert_pragma=True): + """ + Wrap a section of nodes in a vector-level loop across the horizontal. + + Parameters + ---------- + section : tuple of :any:`Node` + A section of nodes to be wrapped in a vector-level loop + routine : :any:`Subroutine` + The subroutine in the vector loops should be removed. + horizontal: :any:`Dimension` + The dimension specifying the horizontal vector dimension + insert_pragma: bool, optional + Adds a ``!$loki vector`` pragma around the created loop + """ + bounds = get_loop_bounds(routine, dimension=horizontal) + + # Create a single loop around the horizontal from a given body + index = get_integer_variable(routine, horizontal.index) + bounds = sym.LoopRange(bounds) + + # Ensure we clone all body nodes, to avoid recursion issues + body = Transformer().visit(section) + + # Add a marker pragma for later annotations + pragma = (ir.Pragma('loki', content='loop vector'),) if insert_pragma else None + vector_loop = ir.Loop(variable=index, bounds=bounds, body=body, pragma=pragma) + + # Add a comment before and after the pragma-annotated loop to ensure + # we do not overlap with neighbouring pragmas + return (ir.Comment(''), vector_loop, ir.Comment('')) + + class SCCRevectorTransformation(Transformation): """ A transformation to wrap thread-parallel IR sections within a horizontal loop. @@ -265,56 +300,154 @@ def __init__(self, horizontal, remove_vector_section=False): self.horizontal = horizontal self.remove_vector_section = remove_vector_section - @classmethod - def wrap_vector_section(cls, section, routine, horizontal): + def revector_section(self, routine, section): """ - Wrap a section of nodes in a vector-level loop across the horizontal. + Wrap all thread-parallel :any:`Section` objects within a given + code section in a horizontal loop and mark interior loops as + ``!$loki loop seq``. Parameters ---------- - section : tuple of :any:`Node` - A section of nodes to be wrapped in a vector-level loop routine : :any:`Subroutine` - The subroutine in the vector loops should be removed. - horizontal: :any:`Dimension` - The dimension specifying the horizontal vector dimension + Subroutine to apply this transformation to. + section : tuple of :any:`Node` + Code section in which to replace vector-parallel + :any:`Section` objects. + """ + # Wrap all thread-parallel sections into horizontal thread loops + mapper = { + s: wrap_vector_section(s.body, routine, self.horizontal) + for s in FindNodes(ir.Section).visit(section) + if s.label == 'vector_section' + } + return Transformer(mapper).visit(section) + + def mark_vector_reductions(self, routine, section): + """ + Mark vector-reduction loops in marked vector-reduction + regions. + + If a region explicitly marked with + ``!$loki vector-reduction()``/ + ``!$loki end vector-reduction`` is encountered, we replace + existing ``!$loki loop vector`` loop pragmas and add the + reduction keyword and clause. These will be turned into + OpenACC equivalents by :any:`SCCAnnotate`. + """ + with pragma_regions_attached(routine): + for region in FindNodes(ir.PragmaRegion).visit(section): + if is_loki_pragma(region.pragma, starts_with='vector-reduction'): + if (reduction_clause := re.search(r'reduction\([\w:0-9 \t]+\)', region.pragma.content)): + + loops = FindNodes(ir.Loop).visit(region) + assert len(loops) == 1 + pragma = ir.Pragma(keyword='loki', content=f'loop vector {reduction_clause[0]}') + # Update loop and region in place to remove marker pragmas + loops[0]._update(pragma=(pragma,)) + region._update(pragma=None, pragma_post=None) + + + def mark_seq_loops(self, section): """ - bounds = get_loop_bounds(routine, dimension=horizontal) + Mark interior sequential loops in a thread-parallel section + with ``!$loki loop seq`` for later annotation. - # Create a single loop around the horizontal from a given body - index = get_integer_variable(routine, horizontal.index) - bounds = sym.LoopRange(bounds) + This utility requires loop-pragmas to be attached via + :any:`pragmas_attached`. It also updates loops in-place. + + Parameters + ---------- + section : tuple of :any:`Node` + Code section in which to mark "seq loops". + """ + for loop in FindNodes(ir.Loop).visit(section): - # Ensure we clone all body nodes, to avoid recursion issues - vector_loop = ir.Loop(variable=index, bounds=bounds, body=Transformer().visit(section)) + # Skip loops explicitly marked with `!$loki/claw nodep` + if loop.pragma and any('nodep' in p.content.lower() for p in as_tuple(loop.pragma)): + continue - # Add a comment before and after the pragma-annotated loop to ensure - # we do not overlap with neighbouring pragmas - return (ir.Comment(''), vector_loop, ir.Comment('')) + # Mark loop as sequential with `!$loki loop seq` + if loop.variable != self.horizontal.index: + loop._update(pragma=(ir.Pragma(keyword='loki', content='loop seq'),)) + + def mark_driver_loop(self, routine, loop): + """ + Add ``!$loki loop driver`` pragmas to outer block loops and + add ``vector-length(size)`` clause for later annotations. + + This method assumes that pragmas have been attached via + :any:`pragmas_attached`. + """ + # Find a horizontal size variable to mark vector_length + symbol_map = routine.symbol_map + sizes = tuple( + symbol_map.get(size) for size in self.horizontal.size_expressions + if size in symbol_map + ) + vector_length = f' vector_length({sizes[0]})' if sizes else '' + + # Replace existing `!$loki loop driver markers, but leave all others + pragma = ir.Pragma(keyword='loki', content=f'loop driver{vector_length}') + loop_pragmas = tuple( + p for p in as_tuple(loop.pragma) if not is_loki_pragma(p, starts_with='driver-loop') + ) + loop._update(pragma=loop_pragmas + (pragma,)) def transform_subroutine(self, routine, **kwargs): """ - Apply SCCRevector utilities to a :any:`Subroutine`. - It wraps all thread-parallel sections within - a horizontal loop. The markers placed by :any:`SCCDevectorTransformation` are removed + Wrap vector-parallel sections in vector :any:`Loop` objects. + + This wraps all thread-parallel sections within "kernel" + routines or within the parallel loops in "driver" routines. + + The markers placed by :any:`SCCDevectorTransformation` are removed Parameters ---------- routine : :any:`Subroutine` Subroutine to apply this transformation to. + role : str + Must be either ``"kernel"`` or ``"driver"`` + targets : tuple or str + Tuple of target routine names for determining "driver" loops """ - mapper = {s.body: self.wrap_vector_section(s.body, routine, self.horizontal) - for s in FindNodes(ir.Section).visit(routine.body) - if s.label == 'vector_section'} - routine.body = NestedTransformer(mapper).visit(routine.body) - - if self.remove_vector_section: - # Remove the vector section wrappers - # These have been inserted by SCCDevectorTransformation - section_mapper = {s: s.body for s in FindNodes(ir.Section).visit(routine.body) - if s.label == 'vector_section'} - if section_mapper: - routine.body = Transformer(section_mapper).visit(routine.body) + role = kwargs['role'] + targets = kwargs.get('targets', ()) + + if role == 'kernel': + # Skip if kernel is marked as `!$loki routine seq` + if check_routine_sequential(routine): + return + + # Revector all marked vector sections within the kernel body + routine.body = self.revector_section(routine, routine.body) + + with pragmas_attached(routine, ir.Loop): + # Check for explicitly labelled vector-reduction regions + self.mark_vector_reductions(routine, routine.body) + + # Mark sequential loops inside vector sections + self.mark_seq_loops(routine.body) + + # Mark subroutine as vector parallel for later annotation + routine.spec.append(ir.Pragma(keyword='loki', content='routine vector')) + + if role == 'driver': + with pragmas_attached(routine, ir.Loop): + driver_loops = find_driver_loops(routine=routine, targets=targets) + + for loop in driver_loops: + # Revector all marked sections within the driver loop body + loop._update(body=self.revector_section(routine, loop.body)) + + # Check for explicitly labelled vector-reduction regions + self.mark_vector_reductions(routine, loop.body) + + # Mark sequential loops inside vector sections + self.mark_seq_loops(loop.body) + + # Mark outer driver loops + self.mark_driver_loop(routine, loop) class SCCDemoteTransformation(Transformation): diff --git a/loki/transformations/tests/test_utilities.py b/loki/transformations/tests/test_utilities.py index f9fb65697..53a91ca74 100644 --- a/loki/transformations/tests/test_utilities.py +++ b/loki/transformations/tests/test_utilities.py @@ -19,7 +19,7 @@ single_variable_declaration, recursive_expression_map_update, convert_to_lower_case, replace_intrinsics, rename_variables, get_integer_variable, get_loop_bounds, is_driver_loop, - find_driver_loops, get_local_arrays, check_routine_pragmas + find_driver_loops, get_local_arrays, check_routine_sequential ) @@ -520,11 +520,11 @@ def test_transform_utilites_get_local_arrays(frontend, tmp_path): @pytest.mark.parametrize('frontend', available_frontends()) -def test_transform_utilites_check_routine_pragmas(frontend, tmp_path): - """ Test :any:`check_routine_pragmas` utility. """ +def test_transform_utilites_check_routine_sequential(frontend, tmp_path): + """ Test :any:`check_routine_sequential` utility. """ fcode = """ -module test_check_routine_pragmas_mod +module test_check_routine_sequential_mod implicit none contains @@ -546,12 +546,10 @@ def test_transform_utilites_check_routine_pragmas(frontend, tmp_path): i = i + 1 end subroutine test_acc_vec -end module test_check_routine_pragmas_mod +end module test_check_routine_sequential_mod """ module = Module.from_source(fcode, frontend=frontend, xmods=[tmp_path]) - # TODO: This utility needs some serious clean-up, so we're just testing - # the bare basics here and promise to do better next time ;) - assert check_routine_pragmas(module['test_acc_seq'], directive=None) - assert check_routine_pragmas(module['test_loki_seq'], directive=None) - assert check_routine_pragmas(module['test_acc_vec'], directive='openacc') + assert not check_routine_sequential(module['test_acc_seq']) + assert check_routine_sequential(module['test_loki_seq']) + assert not check_routine_sequential(module['test_acc_vec']) diff --git a/loki/transformations/utilities.py b/loki/transformations/utilities.py index 9fb7ad07b..76ab9eafd 100644 --- a/loki/transformations/utilities.py +++ b/loki/transformations/utilities.py @@ -19,7 +19,7 @@ ) from loki.ir import ( nodes as ir, Import, TypeDef, VariableDeclaration, - StatementFunction, Transformer, FindNodes + StatementFunction, Transformer, FindNodes, is_loki_pragma ) from loki.module import Module from loki.subroutine import Subroutine @@ -32,7 +32,7 @@ 'sanitise_imports', 'replace_selected_kind', 'single_variable_declaration', 'recursive_expression_map_update', 'get_integer_variable', 'get_loop_bounds', 'find_driver_loops', - 'get_local_arrays', 'check_routine_pragmas' + 'get_local_arrays', 'check_routine_sequential' ] @@ -585,7 +585,8 @@ def is_driver_loop(loop, targets): """ if loop.pragma: for pragma in loop.pragma: - if pragma.keyword.lower() == "loki" and pragma.content.lower() == "driver-loop": + if is_loki_pragma(pragma, starts_with='driver-loop') or \ + is_loki_pragma(pragma, starts_with='loop driver'): return True for call in FindNodes(ir.CallStatement).visit(loop.body): if call.name in targets: @@ -651,41 +652,17 @@ def get_local_arrays(routine, section, unique=True): return arrays -def check_routine_pragmas(routine, directive): +def check_routine_sequential(routine): """ - Check if routine is marked as sequential or has already been processed. + Check if routine is marked as "sequential". Parameters ---------- routine : :any:`Subroutine` Subroutine to perform checks on. - directive: string or None - Directives flavour to use for parallelism annotations; either - ``'openacc'`` or ``None``. - """ - - pragmas = FindNodes(ir.Pragma).visit(routine.ir) - routine_pragmas = [p for p in pragmas if p.keyword.lower() in ['loki', 'acc']] - routine_pragmas = [p for p in routine_pragmas if 'routine' in p.content.lower()] - - seq_pragmas = [r for r in routine_pragmas if 'seq' in r.content.lower()] - if seq_pragmas: - loki_seq_pragmas = [r for r in routine_pragmas if 'loki' == r.keyword.lower()] - if loki_seq_pragmas: - if directive == 'openacc': - # Mark routine as acc seq - mapper = {seq_pragmas[0]: None} - routine.spec = Transformer(mapper).visit(routine.spec) - routine.body = Transformer(mapper).visit(routine.body) - - # Append the acc pragma to routine.spec, regardless of where the corresponding - # loki pragma is found - routine.spec.append(ir.Pragma(keyword='acc', content='routine seq')) - return True - - vec_pragmas = [r for r in routine_pragmas if 'vector' in r.content.lower()] - if vec_pragmas: - if directive == 'openacc': + """ + for pragma in FindNodes(ir.Pragma).visit(routine.ir): + if is_loki_pragma(pragma, starts_with='routine seq'): return True return False