From 27a0145a42f5dfce270ed5d9811b1838e457a4ce Mon Sep 17 00:00:00 2001 From: Fabio Luporini Date: Tue, 10 Oct 2023 14:29:57 +0000 Subject: [PATCH] compiler: Prioritize large sparse loops over tiny ones --- devito/passes/iet/parpragma.py | 28 +++++++++++++++++++--------- tests/test_dle.py | 19 ++++++++++++++++++- 2 files changed, 37 insertions(+), 10 deletions(-) diff --git a/devito/passes/iet/parpragma.py b/devito/passes/iet/parpragma.py index 1deb4f3f8bf..6bfe2a4c423 100644 --- a/devito/passes/iet/parpragma.py +++ b/devito/passes/iet/parpragma.py @@ -266,23 +266,33 @@ def _score_candidate(self, n0, root, collapsable=()): # Number of fully-parallel collapsable Iterations key = lambda i: i.is_ParallelNoAtomic - fpiters = list(takewhile(key, nest)) - nfpiters = len(fpiters) + fp_iters = list(takewhile(key, nest)) + n_fp_iters = len(fp_iters) + + # Number of parallel-if-atomic collapsable Iterations + key = lambda i: i.is_ParallelAtomic + pia_iters = list(takewhile(key, nest)) + n_pia_iters = len(pia_iters) # Prioritize the Dimensions that are more likely to define larger # iteration spaces - fpdims = [i.dim for i in fpiters] key = lambda d: (not d.is_Derived or - d.is_Custom or # NOTE: might use a refinement + (d.is_Custom and not is_integer(d.symbolic_size)) or (d.is_Block and d._depth == 1)) - nfpiters_large = len([d for d in fpdims if key(d)]) + + fpdims = [i.dim for i in fp_iters] + n_fp_iters_large = len([d for d in fpdims if key(d)]) + + piadims = [i.dim for i in pia_iters] + n_pia_iters_large = len([d for d in piadims if key(d)]) return ( - int(nfpiters == n), # Fully-parallel nest - int(nfpiters == 0 and n), # Fully-atomic nest - nfpiters_large, + int(n_fp_iters == n), # Fully-parallel nest + n_fp_iters_large, + n_pia_iters_large, + n_pia_iters, -(n0 + 1), # The outer, the better - nfpiters, + n_fp_iters, n, ) diff --git a/tests/test_dle.py b/tests/test_dle.py index 3a94f46a9dd..520405f8399 100644 --- a/tests/test_dle.py +++ b/tests/test_dle.py @@ -284,7 +284,7 @@ def test_cache_blocking_structure_optrelax_prec_inject(): 'openmp': True, 'par-collapse-ncores': 1})) - assert_structure(op, ['t,p_s0_blk0,p_s', 't,p_s0_blk0,p_s,rsx,rsy'], + assert_structure(op, ['t', 't,p_s0_blk0,p_s,rsx,rsy'], 't,p_s0_blk0,p_s,rsx,rsy') @@ -897,6 +897,23 @@ def test_incr_perfect_outer(self): op() assert np.all(w.data == 10) + def test_incr_perfect_sparse_outer(self): + grid = Grid(shape=(3, 3, 3)) + + u = TimeFunction(name='u', grid=grid) + s = SparseTimeFunction(name='u', grid=grid, npoint=1, nt=11) + + eqns = s.inject(u, expr=s) + + op = Operator(eqns, opt=('advanced', {'par-collapse-ncores': 0})) + + iters = FindNodes(Iteration).visit(op) + assert len(iters) == 5 + assert iters[0].is_Sequential + assert all(i.is_ParallelAtomic for i in iters[1:]) + assert iters[1].pragmas[0].value == 'omp for schedule(dynamic,chunk_size)' + assert all(not i.pragmas for i in iters[2:]) + @pytest.mark.parametrize('exprs,simd_level,expected', [ (['Eq(y.symbolic_max, g[0, x], implicit_dims=(t, x))', 'Inc(h1[0, 0], 1, implicit_dims=(t, x, y))'],