diff --git a/devito/ir/clusters/algorithms.py b/devito/ir/clusters/algorithms.py index faab63b238..eac09c0f8d 100644 --- a/devito/ir/clusters/algorithms.py +++ b/devito/ir/clusters/algorithms.py @@ -6,13 +6,16 @@ import sympy from devito.exceptions import InvalidOperator +from devito.finite_differences.elementary import Max, Min from devito.ir.support import (Any, Backward, Forward, IterationSpace, erange, pull_dims) +from devito.ir.equations import OpMin, OpMax from devito.ir.clusters.analysis import analyze from devito.ir.clusters.cluster import Cluster, ClusterGroup from devito.ir.clusters.visitors import Queue, QueueStateful, cluster_pass from devito.mpi.halo_scheme import HaloScheme, HaloTouch -from devito.symbolics import retrieve_indexed, uxreplace, xreplace_indices +from devito.symbolics import (limits_mapper, retrieve_indexed, uxreplace, + xreplace_indices) from devito.tools import (DefaultOrderedDict, Stamp, as_mapper, flatten, is_integer, timed_pass, toposort) from devito.types import Array, Eq, Symbol @@ -41,7 +44,7 @@ def clusterize(exprs, **kwargs): # Determine relevant computational properties (e.g., parallelism) clusters = analyze(clusters) - # Input normalization (e.g., SSA) + # Input normalization clusters = normalize(clusters, **kwargs) # Derive the necessary communications for distributed-memory parallelism @@ -432,8 +435,11 @@ def normalize(clusters, **kwargs): sregistry = kwargs['sregistry'] clusters = normalize_nested_indexeds(clusters, sregistry) - clusters = normalize_reductions_dense(clusters, sregistry, options) - clusters = normalize_reductions_sparse(clusters, sregistry, options) + if options['mapify-reduce']: + clusters = normalize_reductions_dense(clusters, sregistry) + else: + clusters = normalize_reductions_minmax(clusters) + clusters = normalize_reductions_sparse(clusters, sregistry) return clusters @@ -475,26 +481,62 @@ def pull_indexeds(expr, subs, mapper, parent=None): return cluster.rebuild(processed) -def normalize_reductions_dense(cluster, sregistry, options): +@cluster_pass(mode='dense') +def normalize_reductions_minmax(cluster): + """ + Initialize the reduction variables to their neutral element and use them + to compute the reduction. + """ + dims = [d for d in cluster.ispace.itdims + if cluster.properties.is_parallel_atomic(d)] + if not dims: + return cluster + + init = [] + processed = [] + for e in cluster.exprs: + lhs, rhs = e.args + f = lhs.function + + if e.operation is OpMin: + if not f.is_Input: + expr = Eq(lhs, limits_mapper[lhs.dtype].max) + ispace = cluster.ispace.project(lambda i: i not in dims) + init.append(cluster.rebuild(exprs=expr, ispace=ispace)) + + processed.append(e.func(lhs, Min(lhs, rhs))) + + elif e.operation is OpMax: + if not f.is_Input: + expr = Eq(lhs, limits_mapper[lhs.dtype].min) + ispce = cluster.ispace.project(lambda i: i not in dims) + init.append(cluster.rebuild(exprs=expr, ispace=ispce)) + + processed.append(e.func(lhs, Max(lhs, rhs))) + + else: + processed.append(e) + + return init + [cluster.rebuild(processed)] + + +def normalize_reductions_dense(cluster, sregistry): """ Extract the right-hand sides of reduction Eq's in to temporaries. """ - return _normalize_reductions_dense(cluster, sregistry, options, {}) + return _normalize_reductions_dense(cluster, sregistry, {}) @cluster_pass(mode='dense') -def _normalize_reductions_dense(cluster, sregistry, options, mapper): - opt_mapify_reduce = options['mapify-reduce'] - +def _normalize_reductions_dense(cluster, sregistry, mapper): dims = [d for d in cluster.ispace.itdims if cluster.properties.is_parallel_atomic(d)] - if not dims: return cluster processed = [] for e in cluster.exprs: - if e.is_Reduction and opt_mapify_reduce: + if e.is_Reduction: # Transform `e` into what is in essence an explicit map-reduce # For example, turn: # `s += f(u[x], v[x], ...)` @@ -529,7 +571,7 @@ def _normalize_reductions_dense(cluster, sregistry, options, mapper): @cluster_pass(mode='sparse') -def normalize_reductions_sparse(cluster, sregistry, options): +def normalize_reductions_sparse(cluster, sregistry): """ Extract the right-hand sides of reduction Eq's in to temporaries. """ diff --git a/devito/ir/equations/equation.py b/devito/ir/equations/equation.py index bdcaa3c802..e1e487df46 100644 --- a/devito/ir/equations/equation.py +++ b/devito/ir/equations/equation.py @@ -79,7 +79,7 @@ def __repr__(self): elif self.operation is OpInc: return '%s += %s' % (self.lhs, self.rhs) else: - return '%s = %s(%s, %s)' % (self.lhs, self.operation, self.lhs, self.rhs) + return '%s = %s(%s)' % (self.lhs, self.operation, self.rhs) # Pickling support __reduce_ex__ = Pickable.__reduce_ex__ diff --git a/devito/passes/clusters/factorization.py b/devito/passes/clusters/factorization.py index 997b6d81fc..33253e245e 100644 --- a/devito/passes/clusters/factorization.py +++ b/devito/passes/clusters/factorization.py @@ -180,8 +180,7 @@ def _collect_nested(expr): return expr, {'funcs': expr} elif expr.is_Pow: return expr, {'pows': expr} - elif (expr.is_Symbol or - expr.is_Indexed or + elif (expr.is_Symbol or expr.is_Indexed or not expr.args or isinstance(expr, (BasicWrapperMixin, AbstractObject))): return expr, {} elif expr.is_Add: diff --git a/devito/passes/iet/misc.py b/devito/passes/iet/misc.py index e4db2c41c9..c4a5cb6397 100644 --- a/devito/passes/iet/misc.py +++ b/devito/passes/iet/misc.py @@ -1,6 +1,7 @@ from functools import singledispatch import cgen +import numpy as np import sympy from devito.finite_differences import Max, Min @@ -9,7 +10,7 @@ filter_iterations, retrieve_iteration_tree, pull_dims) from devito.passes.iet.engine import iet_pass from devito.ir.iet.efunc import DeviceFunction, EntryFunction -from devito.symbolics import evalrel, has_integer_args +from devito.symbolics import ValueLimit, evalrel, has_integer_args, limits_mapper from devito.tools import as_mapper, filter_ordered, split __all__ = ['avoid_denormals', 'hoist_prodders', 'relax_incr_dimensions', @@ -136,10 +137,20 @@ def relax_incr_dimensions(iet, options=None, **kwargs): @iet_pass def generate_macros(iet): + # Generate Macros from higher-level SymPy objects applications = FindApplications().visit(iet) headers = set().union(*[_generate_macros(i) for i in applications]) - return iet, {'headers': headers} + # Some special Symbols may represent Macros defined in standard libraries, + # so we need to include the respective includes + limits = FindApplications(ValueLimit).visit(iet) + includes = set() + if limits & (set(limits_mapper[np.int32]) | set(limits_mapper[np.int64])): + includes.add('limits.h') + elif limits & (set(limits_mapper[np.float32]) | set(limits_mapper[np.float64])): + includes.add('float.h') + + return iet, {'headers': headers, 'includes': includes} @singledispatch diff --git a/devito/passes/iet/parpragma.py b/devito/passes/iet/parpragma.py index 252cd9ddd6..cc5d5dd5f9 100644 --- a/devito/passes/iet/parpragma.py +++ b/devito/passes/iet/parpragma.py @@ -1,3 +1,5 @@ +from collections import defaultdict + import numpy as np import cgen as c from cached_property import cached_property @@ -173,7 +175,7 @@ def _make_clause_reduction_from_imask(cls, reductions): Build a string representing of a reduction clause given a list of 2-tuples `(symbol, ir.Operation)`. """ - args = [] + mapper = defaultdict(list) for i, imask, r in reductions: if i.is_Indexed: f = i.function @@ -188,10 +190,13 @@ def _make_clause_reduction_from_imask(cls, reductions): else: assert isinstance(k, tuple) and len(k) == 2 bounds.append('[%s:%s]' % k) - args.append('%s%s' % (i.name, ''.join(bounds))) + mapper[r.name].append('%s%s' % (i.name, ''.join(bounds))) else: - args.append(str(i)) - return 'reduction(%s:%s)' % (r.name, ','.join(args)) + mapper[r.name].append(str(i)) + + args = ['reduction(%s:%s)' % (k, ','.join(v)) for k, v in mapper.items()] + + return ' '.join(args) @cached_property def collapsed(self): diff --git a/devito/symbolics/extended_sympy.py b/devito/symbolics/extended_sympy.py index 8ff49b5e5e..07dfdf13a4 100644 --- a/devito/symbolics/extended_sympy.py +++ b/devito/symbolics/extended_sympy.py @@ -7,9 +7,10 @@ from sympy import Expr, Function, Number, Tuple, sympify from sympy.core.decorators import call_highest_priority -from devito.tools import (Pickable, as_tuple, is_integer, float2, float3, float4, # noqa - double2, double3, double4, int2, int3, int4) from devito.finite_differences.elementary import Min, Max +from devito.tools import (Pickable, Bunch, as_tuple, is_integer, float2, # noqa + float3, float4, double2, double3, double4, int2, int3, + int4) from devito.types import Symbol from devito.types.basic import Basic @@ -17,9 +18,9 @@ 'CallFromComposite', 'FieldFromPointer', 'FieldFromComposite', 'ListInitializer', 'Byref', 'IndexedPointer', 'Cast', 'DefFunction', 'MathFunction', 'InlineIf', 'ReservedWord', 'Keyword', 'String', - 'Macro', 'Class', 'MacroArgument', 'CustomType', 'Deref', - 'Namespace', 'Rvalue', 'INT', 'FLOAT', 'DOUBLE', 'VOID', 'Null', - 'SizeOf', 'rfunc', 'cast_mapper', 'BasicWrapperMixin'] + 'Macro', 'Class', 'MacroArgument', 'CustomType', 'Deref', 'Namespace', + 'Rvalue', 'INT', 'FLOAT', 'DOUBLE', 'VOID', 'Null', 'SizeOf', 'rfunc', + 'cast_mapper', 'BasicWrapperMixin', 'ValueLimit', 'limits_mapper'] class CondEq(sympy.Eq): @@ -497,6 +498,9 @@ def __str__(self): def _hashable_content(self): return (self.value,) + def _sympystr(self, printer): + return str(self) + # Pickling support __reduce_ex__ = Pickable.__reduce_ex__ @@ -533,6 +537,24 @@ def __str__(self): __repr__ = __str__ +class ValueLimit(ReservedWord, sympy.Expr): + + """ + Symbolic representation of the so called limits macros, which provide the + minimum and maximum limits for various types, such as INT_MIN, INT_MAX etc. + """ + + pass + + +limits_mapper = { + np.int32: Bunch(min=ValueLimit('INT_MIN'), max=ValueLimit('INT_MAX')), + np.int64: Bunch(min=ValueLimit('LONG_MIN'), max=ValueLimit('LONG_MAX')), + np.float32: Bunch(min=-ValueLimit('FLT_MAX'), max=ValueLimit('FLT_MAX')), + np.float64: Bunch(min=-ValueLimit('DBL_MAX'), max=ValueLimit('DBL_MAX')), +} + + class DefFunction(Function, Pickable): """ diff --git a/devito/symbolics/printer.py b/devito/symbolics/printer.py index fbaf2757d2..7e720ae689 100644 --- a/devito/symbolics/printer.py +++ b/devito/symbolics/printer.py @@ -251,7 +251,7 @@ def _print_Fallback(self, expr): _print_MacroArgument = _print_Fallback _print_IndexedBase = _print_Fallback _print_IndexSum = _print_Fallback - _print_Keyword = _print_Fallback + _print_ReservedWord = _print_Fallback _print_Basic = _print_Fallback diff --git a/tests/test_dle.py b/tests/test_dle.py index 7bcfcae52e..b2896dac4f 100644 --- a/tests/test_dle.py +++ b/tests/test_dle.py @@ -7,8 +7,8 @@ from conftest import assert_structure, assert_blocking, _R, skipif from devito import (Grid, Function, TimeFunction, SparseTimeFunction, SpaceDimension, CustomDimension, Dimension, SubDimension, - PrecomputedSparseTimeFunction, Eq, Inc, ReduceMax, Operator, - configuration, dimensions, info, cos) + PrecomputedSparseTimeFunction, Eq, Inc, ReduceMin, ReduceMax, + Operator, configuration, dimensions, info, cos) from devito.exceptions import InvalidArgument from devito.ir.iet import (Iteration, FindNodes, IsPerfectIteration, retrieve_iteration_tree, Expression) @@ -877,7 +877,7 @@ def test_mapify_reduction_sparse(self): def test_array_max_reduction(self): """ - Test generation of OpenMP sum-reduction clauses involving Function's. + Test generation of OpenMP max-reduction clauses involving Function's. """ grid = Grid(shape=(3, 3, 3)) i = Dimension(name='i') @@ -902,6 +902,40 @@ def test_array_max_reduction(self): with pytest.raises(NotImplementedError): Operator(eqn, opt=('advanced', {'openmp': True})) + def test_array_minmax_reduction(self): + """ + Test generation of OpenMP combined min- and max-reduction clauses + involving Function's. + """ + grid = Grid(shape=(3, 3, 3)) + i = Dimension(name='i') + + f = Function(name='f', grid=grid) + n = Function(name='n', grid=grid, shape=(2,), dimensions=(i,)) + r0 = Symbol(name='r0', dtype=grid.dtype) + r1 = Symbol(name='r1', dtype=grid.dtype) + + f.data[:] = np.arange(0, 27).reshape((3, 3, 3)) + + eqns = [ReduceMax(r0, f), + ReduceMin(r1, f), + Eq(n[0], r0), + Eq(n[1], r1)] + + if not Ompizer._support_array_reduction(configuration['compiler']): + return + + op = Operator(eqns) + + if configuration['language'] == 'openmp': + iterations = FindNodes(Iteration).visit(op) + expected = "reduction(max:r0) reduction(min:r1)" + assert expected in iterations[0].pragmas[0].value + + op() + assert n.data[0] == 26 + assert n.data[1] == 0 + def test_incs_no_atomic(self): """ Test that `Inc`'s don't get a `#pragma omp atomic` if performing