From d6a20ae6ef0de24659794331115be575a1cf491d Mon Sep 17 00:00:00 2001 From: George Bisbas Date: Tue, 22 Oct 2024 18:43:10 +0300 Subject: [PATCH 01/16] compiler: Initialize additional halo opt pass --- devito/ir/iet/nodes.py | 5 +- devito/ir/support/basic.py | 2 +- devito/mpi/halo_scheme.py | 8 +- devito/passes/iet/mpi.py | 156 ++++++++++++++++++++++++++++++++++++- tests/mfe_1d.py | 42 ++++++++++ tests/test_mpi.py | 51 +++++++++++- 6 files changed, 257 insertions(+), 7 deletions(-) create mode 100644 tests/mfe_1d.py diff --git a/devito/ir/iet/nodes.py b/devito/ir/iet/nodes.py index 9bcc3460f6..ab87264255 100644 --- a/devito/ir/iet/nodes.py +++ b/devito/ir/iet/nodes.py @@ -1457,8 +1457,9 @@ def __init__(self, body, halo_scheme): self._halo_scheme = halo_scheme def __repr__(self): - functions = "(%s)" % ",".join(i.name for i in self.functions) - return "<%s%s>" % (self.__class__.__name__, functions) + functions = "(%s" % ",".join(i.name for i in self.functions) + loc_indices = "[%s])" % ",".join(str(i) for i in self.halo_scheme.loc_indices2) + return "<%s%s%s>" % (self.__class__.__name__, functions, loc_indices) @property def halo_scheme(self): diff --git a/devito/ir/support/basic.py b/devito/ir/support/basic.py index bb558c8d58..6f59b442e8 100644 --- a/devito/ir/support/basic.py +++ b/devito/ir/support/basic.py @@ -25,7 +25,7 @@ class IndexMode(Tag): REGULAR = IndexMode('regular') IRREGULAR = IndexMode('irregular') -# Symbols to create mock data depdendencies +# Symbols to create mock data dependencies mocksym0 = Symbol(name='__⋈_0__') mocksym1 = Symbol(name='__⋈_1__') diff --git a/devito/mpi/halo_scheme.py b/devito/mpi/halo_scheme.py index 7074b4c5d5..e75c7d514d 100644 --- a/devito/mpi/halo_scheme.py +++ b/devito/mpi/halo_scheme.py @@ -95,7 +95,8 @@ def __init__(self, exprs, ispace): def __repr__(self): fnames = ",".join(i.name for i in set(self._mapper)) - return "HaloScheme<%s>" % fnames + loc_indices = "[%s]" % ",".join(str(i) for i in self.loc_indices2) + return "HaloScheme<%s%s>" % (fnames, loc_indices) def __eq__(self, other): return (isinstance(other, HaloScheme) and @@ -120,6 +121,7 @@ def union(self, halo_schemes): """ Create a new HaloScheme from the union of a set of HaloSchemes. """ + # import pdb; pdb.set_trace() halo_schemes = [hs for hs in halo_schemes if hs is not None] if not halo_schemes: return None @@ -365,6 +367,10 @@ def distributed_aindices(self): def loc_indices(self): return set().union(*[i.loc_indices.keys() for i in self.fmapper.values()]) + @cached_property + def loc_indices2(self): + return set().union(*[i.loc_indices.values() for i in self.fmapper.values()]) + @cached_property def arguments(self): return self.dimensions | set(flatten(self.honored.values())) diff --git a/devito/passes/iet/mpi.py b/devito/passes/iet/mpi.py index 9f49440709..ca68205f21 100644 --- a/devito/passes/iet/mpi.py +++ b/devito/passes/iet/mpi.py @@ -7,11 +7,11 @@ retrieve_iteration_tree) from devito.ir.support import PARALLEL, Scope from devito.ir.support.guards import GuardFactorEq -from devito.mpi.halo_scheme import HaloScheme +from devito.mpi.halo_scheme import HaloScheme, HaloSchemeEntry from devito.mpi.reduction_scheme import DistReduce from devito.mpi.routines import HaloExchangeBuilder, ReductionBuilder from devito.passes.iet.engine import iet_pass -from devito.tools import generator +from devito.tools import generator, frozendict __all__ = ['mpiize'] @@ -24,6 +24,9 @@ def optimize_halospots(iet, **kwargs): """ iet = _drop_halospots(iet) iet = _hoist_halospots(iet) + + iet = _merge_halospots_byfunc(iet) + iet = _merge_halospots(iet) iet = _drop_if_unwritten(iet, **kwargs) iet = _mark_overlappable(iet) @@ -122,6 +125,16 @@ def rule1(dep, candidates, loc_dims): iet = Transformer(mapper, nested=True).visit(iet) # Clean up: de-nest HaloSpots if necessary + iet = denest_halospots(iet) + + return iet + + +def denest_halospots(iet): + """ + Denest nested HaloSpots. + # TOFIX: This also merges HaloSpots that have different loc_indices + """ mapper = {} for hs in FindNodes(HaloSpot).visit(iet): if hs.body.is_HaloSpot: @@ -210,6 +223,145 @@ def rule2(dep, hs, loc_indices): return iet +def _merge_halospots_byfunc(iet): + """ + Merge HaloSpots on the same Iteration tree level where all data dependencies + would be honored. + """ + + # Merge rules -- if the retval is True, then it means the input `dep` is not + # a stopper to halo merging + + def rule0(dep, hs, loc_indices): + # E.g., `dep=W -> R` => True + return not any(d in hs.dimensions or dep.distance_mapper[d] is S.Infinity + for d in dep.cause) + + def rule1(dep, hs, loc_indices): + # TODO This is apparently never hit, but feeling uncomfortable to remove it + return (dep.is_regular and + dep.read is not None and + all(not any(dep.read.touched_halo(d.root)) for d in dep.cause)) + + def rule2(dep, hs, loc_indices): + # E.g., `dep=W -> R` and `loc_indices={t: t0}` => True + return any(dep.distance_mapper[d] == 0 and dep.source[d] is not v + for d, v in loc_indices.items()) + + rules = [rule0, rule1, rule2] + + # Analysis + cond_mapper = MapHaloSpots().visit(iet) + cond_mapper = {hs: {i for i in v if i.is_Conditional and + not isinstance(i.condition, GuardFactorEq)} + for hs, v in cond_mapper.items()} + + iter_mapper = MapNodes(Iteration, HaloSpot, 'immediate').visit(iet) + + mapper = {} + hoist_mapper = {} + raw_loc_indices = {} + + for iter, halo_spots in iter_mapper.items(): + if iter is None or len(halo_spots) <= 1: + continue + + scope = Scope([e.expr for e in FindNodes(Expression).visit(iter)]) + + candidates = [] + + for i, _ in enumerate(halo_spots): + hs0 = halo_spots[i] + + for hs1 in halo_spots[i+1:]: + # If there are Conditionals involved, both `hs0` and `hs1` must be + # within the same Conditional, otherwise we would break the control + # flow semantics + if cond_mapper.get(hs0) != cond_mapper.get(hs1): + continue + + for f, v in hs1.fmapper.items(): + for dep in scope.d_flow.project(f): + if not any(r(dep, hs1, v.loc_indices) for r in rules): + break + else: + try: + reps = (*hs0.functions, *hs1.functions).count(f) + candidates.append((hs0, hs1, reps)) + except ValueError: + # import pdb;pdb.set_trace() + pass + + if candidates: + ordered_candidates = sorted([candidates[-1]], key=lambda x: x[-1]) + + for hs0, hs1, reps in ordered_candidates: + # We assume that the latter HaloSpot is lifted while the former + # stays in place. We only work with twins "{t0, t1}" and not identicals + + # Ensure they are different + bool_locs = hs0.halo_scheme.loc_indices2 == hs1.halo_scheme.loc_indices2 + if bool_locs: + continue + + # We do not work with less than two occurences of the function + if reps < 2: + continue + + hs1_temp = hs1 + + # This drops the latter halospot + hs1_temp = hs1_temp._rebuild(halo_scheme=hs1_temp.halo_scheme.drop(f)) + mapper[hs1] = hs1_temp.halo_scheme + + # Now we need to reconstruct a HaloSchemeEntry for the hoisted HaloSpot + loc_indices = hs1.halo_scheme.fmapper[f].loc_indices + loc_dirs = hs1.halo_scheme.fmapper[f].loc_dirs + halos = hs1.halo_scheme.fmapper[f].halos + dims = hs1.halo_scheme.fmapper[f].dims + + # Since lifted, we need to update the loc_indices + # with known values + + for d in loc_indices: + root_min = loc_indices[d].symbolic_min + new_min = root_min.subs(loc_indices[d].root, + loc_indices[d].root.symbolic_min) + raw_loc_indices[d] = new_min + + raw_loc_indices = frozendict(raw_loc_indices) + + # Is that safe in terms of immutability ? + hs1.halo_scheme.fmapper[f] = HaloSchemeEntry(raw_loc_indices, + loc_dirs, + halos, + dims) + # import pdb;pdb.set_trace() + + hs1_new = hs1._rebuild(halo_scheme=hs1.halo_scheme, body=iter) + # mapper[hs0.body] = hs1_new + hoist_mapper[iter] = hs1_new + + # Post-process analysis + # This is also necessary to avoid the no-children error + for i, hs in mapper.items(): + try: + if hs.is_void: + mapper[i] = i.body + except AttributeError: + pass + + # Transform the IET merging/dropping HaloSpots as according to the analysis + iet = Transformer(hoist_mapper, nested=False).visit(iet) + iet = Transformer(mapper, nested=False).visit(iet) + + # Clean up: de-nest HaloSpots if necessary + # THis also merges HaloSpots that have different loc_indices + # iet = denest_halospots(iet) + + return iet + + def _drop_if_unwritten(iet, options=None, **kwargs): """ Drop HaloSpots for unwritten Functions. diff --git a/tests/mfe_1d.py b/tests/mfe_1d.py new file mode 100644 index 0000000000..4641f8ed17 --- /dev/null +++ b/tests/mfe_1d.py @@ -0,0 +1,42 @@ +import numpy as np + +from devito import (SpaceDimension, Grid, TimeFunction, Eq, Operator, + solve, Constant, norm) +from examples.seismic.source import TimeAxis, Receiver + +# Space related +extent = (1500., ) +shape = (201, ) +x = SpaceDimension(name='x', spacing=Constant(name='h_x', value=extent[0]/(shape[0]-1))) +grid = Grid(extent=extent, shape=shape, dimensions=(x, )) + +# Time related +t0, tn = 0., 30. +dt = (10. / np.sqrt(2.)) / 6. +time_range = TimeAxis(start=t0, stop=tn, step=dt) + +# Velocity and pressure fields +so = 2 +to = 1 +v = TimeFunction(name='v', grid=grid, space_order=so, time_order=to) +tau = TimeFunction(name='tau', grid=grid, space_order=so, time_order=to) + +# The receiver +nrec = 1 +rec = Receiver(name="rec", grid=grid, npoint=nrec, time_range=time_range) +rec.coordinates.data[:, 0] = np.linspace(0., extent[0], num=nrec) +rec_term = rec.interpolate(expr=v) + +# First order elastic-like dependencies equations +pde_v = v.dt - (tau.dx) +pde_tau = (tau.dt - ((v.forward).dx)) + +u_v = Eq(v.forward, solve(pde_v, v.forward)) +u_tau = Eq(tau.forward, solve(pde_tau, tau.forward)) + +op = Operator([u_v] + [u_tau] + rec_term) +op.apply(dt=dt) + +print(norm(v)) +print(norm(tau)) +# print(op.ccode) \ No newline at end of file diff --git a/tests/test_mpi.py b/tests/test_mpi.py index 74e09575c1..d374fff70a 100644 --- a/tests/test_mpi.py +++ b/tests/test_mpi.py @@ -7,7 +7,8 @@ SparseTimeFunction, Dimension, ConditionalDimension, div, SubDimension, SubDomain, Eq, Ne, Inc, NODE, Operator, norm, inner, configuration, switchconfig, generic_derivative, - PrecomputedSparseFunction, DefaultDimension, Buffer) + PrecomputedSparseFunction, DefaultDimension, Buffer, + solve) from devito.arch.compiler import OneapiCompiler from devito.data import LEFT, RIGHT from devito.ir.iet import (Call, Conditional, Iteration, FindNodes, FindSymbols, @@ -17,8 +18,10 @@ ComputeCall) from devito.mpi.distributed import CustomTopology from devito.tools import Bunch +from devito.types.dimension import SpaceDimension from examples.seismic.acoustic import acoustic_setup +from examples.seismic import Receiver, TimeAxis class TestDistributor: @@ -1005,6 +1008,51 @@ def test_avoid_haloupdate_if_distr_but_sequential(self, mode): calls = FindNodes(Call).visit(op) assert len(calls) == 0 + @pytest.mark.parallel(mode=1) + def test_issue_2448(self, mode): + # TOFIX: Placeholder test for issue 2448 + # Space related + extent = (1500., ) + shape = (201, ) + x = SpaceDimension(name='x', spacing=Constant(name='h_x', + value=extent[0]/(shape[0]-1))) + grid = Grid(extent=extent, shape=shape, dimensions=(x, )) + + # Time related + t0, tn = 0., 30. + dt = (10. / np.sqrt(2.)) / 6. + time_range = TimeAxis(start=t0, stop=tn, step=dt) + + # Velocity and pressure fields + so = 2 + to = 1 + v = TimeFunction(name='v', grid=grid, space_order=so, time_order=to) + tau = TimeFunction(name='tau', grid=grid, space_order=so, time_order=to) + + # The receiver + nrec = 1 + rec = Receiver(name="rec", grid=grid, npoint=nrec, time_range=time_range) + rec.coordinates.data[:, 0] = np.linspace(0., extent[0], num=nrec) + rec_term = rec.interpolate(expr=v) + + # First order elastic-like dependencies equations + pde_v = v.dt - (tau.dx) + pde_tau = (tau.dt - ((v.forward).dx)) + u_v = Eq(v.forward, solve(pde_v, v.forward)) + + u_tau = Eq(tau.forward, solve(pde_tau, tau.forward)) + + op = Operator([u_v] + [u_tau] + rec_term) + + calls = [i for i in FindNodes(Call).visit(op) + if isinstance(i, HaloUpdateCall)] + + # The correct we want + assert len(calls) == 3 + assert calls[0].arguments[0] is v + assert calls[1].arguments[0] is tau + assert calls[2].arguments[0] is v + @pytest.mark.parallel(mode=1) def test_avoid_haloupdate_with_subdims(self, mode): grid = Grid(shape=(4,)) @@ -1357,6 +1405,7 @@ def test_merge_haloupdate_if_diff_locindices_v1(self, mode): op = Operator(eqns) calls = FindNodes(Call).visit(op) + print(op.ccode) assert len(calls) == 1 op.apply(time_M=1) From 30ebe26bf3196267325293e3bdc0839b48030df1 Mon Sep 17 00:00:00 2001 From: George Bisbas Date: Wed, 23 Oct 2024 12:32:08 +0100 Subject: [PATCH 02/16] compiler: Improve candidate selection and mapper application --- devito/ir/iet/nodes.py | 17 +++- devito/passes/iet/mpi.py | 162 ++++++++++++++++++--------------------- tests/test_mpi.py | 3 +- 3 files changed, 90 insertions(+), 92 deletions(-) diff --git a/devito/ir/iet/nodes.py b/devito/ir/iet/nodes.py index ab87264255..b49dace92d 100644 --- a/devito/ir/iet/nodes.py +++ b/devito/ir/iet/nodes.py @@ -1457,9 +1457,20 @@ def __init__(self, body, halo_scheme): self._halo_scheme = halo_scheme def __repr__(self): - functions = "(%s" % ",".join(i.name for i in self.functions) - loc_indices = "[%s])" % ",".join(str(i) for i in self.halo_scheme.loc_indices2) - return "<%s%s%s>" % (self.__class__.__name__, functions, loc_indices) + function_strings = [] + for func in self.functions: + loc_indices = set().union(*[self.halo_scheme.fmapper[func].loc_indices.values()]) + loc_indices = list(loc_indices) + if loc_indices: + loc_indices_str = str(loc_indices) + else: + loc_indices_str = "" + + function_strings.append(f"{func.name}{loc_indices_str}") + + functions = ",".join(function_strings) + + return "<%s(%s)>" % (self.__class__.__name__, functions) @property def halo_scheme(self): diff --git a/devito/passes/iet/mpi.py b/devito/passes/iet/mpi.py index ca68205f21..3aace13d69 100644 --- a/devito/passes/iet/mpi.py +++ b/devito/passes/iet/mpi.py @@ -24,9 +24,7 @@ def optimize_halospots(iet, **kwargs): """ iet = _drop_halospots(iet) iet = _hoist_halospots(iet) - iet = _merge_halospots_byfunc(iet) - iet = _merge_halospots(iet) iet = _drop_if_unwritten(iet, **kwargs) iet = _mark_overlappable(iet) @@ -87,6 +85,7 @@ def rule1(dep, candidates, loc_dims): # Analysis hsmapper = {} imapper = defaultdict(list) + # Look for parent Iterations of children HaloSpots for iters, halo_spots in MapNodes(Iteration, HaloSpot, 'groupby').visit(iet).items(): for hs in halo_spots: hsmapper[hs] = hs.halo_scheme @@ -111,6 +110,7 @@ def rule1(dep, candidates, loc_dims): if not any(r(dep, candidates, loc_dims) for r in rules): break else: + # Only adjoints enter here? hsmapper[hs] = hsmapper[hs].drop(f) imapper[i].append(hs.halo_scheme.project(f)) break @@ -118,6 +118,7 @@ def rule1(dep, candidates, loc_dims): # Post-process analysis mapper = {i: HaloSpot(i._rebuild(), HaloScheme.union(hss)) for i, hss in imapper.items()} + mapper.update({i: i.body if hs.is_void else i._rebuild(halo_scheme=hs) for i, hs in hsmapper.items()}) @@ -181,12 +182,13 @@ def rule2(dep, hs, loc_indices): iter_mapper = MapNodes(Iteration, HaloSpot, 'immediate').visit(iet) mapper = {} - for i, halo_spots in iter_mapper.items(): - if i is None or len(halo_spots) <= 1: + for iter, halo_spots in iter_mapper.items(): + if iter is None or len(halo_spots) <= 1: continue - scope = Scope([e.expr for e in FindNodes(Expression).visit(i)]) + scope = Scope([e.expr for e in FindNodes(Expression).visit(iter)]) + # TOFIX: Why only comparing against the first HaloSpot? hs0 = halo_spots[0] mapper[hs0] = hs0.halo_scheme @@ -228,7 +230,6 @@ def _merge_halospots_byfunc(iet): Merge HaloSpots on the same Iteration tree level where all data dependencies would be honored. """ - # Merge rules -- if the retval is True, then it means the input `dep` is not # a stopper to halo merging @@ -256,20 +257,17 @@ def rule2(dep, hs, loc_indices): not isinstance(i.condition, GuardFactorEq)} for hs, v in cond_mapper.items()} - iter_mapper = MapNodes(Iteration, HaloSpot, 'immediate').visit(iet) + # Precompute scopes to save time + scopes = {i: Scope([e.expr for e in v]) for i, v in MapNodes().visit(iet).items()} - mapper = {} - hoist_mapper = {} - raw_loc_indices = {} + candidates = [] - for iter, halo_spots in iter_mapper.items(): - if iter is None or len(halo_spots) <= 1: + # Loop over iterations and their halospots to detect candidates + for iters, halo_spots in MapNodes(Iteration, HaloSpot, 'groupby').visit(iet).items(): + if iters is None or len(halo_spots) <= 1: continue - scope = Scope([e.expr for e in FindNodes(Expression).visit(iter)]) - - candidates = [] - + # Try to detect pairs of halospots that refer to the same function for i, _ in enumerate(halo_spots): hs0 = halo_spots[i] @@ -280,84 +278,74 @@ def rule2(dep, hs, loc_indices): if cond_mapper.get(hs0) != cond_mapper.get(hs1): continue + # Ensure no common loc_indices are shared for the two candidates + if any(i in hs0.halo_scheme.loc_indices2 for i in hs1.halo_scheme.loc_indices2): # noqa + continue + for f, v in hs1.fmapper.items(): - for dep in scope.d_flow.project(f): - if not any(r(dep, hs1, v.loc_indices) for r in rules): - break - else: - try: - reps = (*hs0.functions, *hs1.functions).count(f) - candidates.append((hs0, hs1, reps)) - except ValueError: - # import pdb;pdb.set_trace() - pass + if not hs1.halo_scheme.fmapper[f].loc_indices: + continue - if candidates: - ordered_candidates = sorted([candidates[-1]], key=lambda x: x[-1]) + for iter in iters: + if iter not in scopes: + continue - for hs0, hs1, reps in ordered_candidates: - # We assume that the latter HaloSpot is lifted while the former - # stays in place. We only work with twins "{t0, t1}" and not identicals + for dep in scopes[iter].d_flow.project(f): + if not any(r(dep, hs1, v.loc_indices) for r in rules): + break + else: + # Count the number of repetitions of the function + reps = (*hs0.functions, *hs1.functions).count(f) + # We only care for functions appearing in both + if reps < 2: + continue + else: + candidates.append((hs0, hs1, f, reps, iter)) - # Ensure they are different - bool_locs = hs0.halo_scheme.loc_indices2 == hs1.halo_scheme.loc_indices2 - if bool_locs: - continue + # Mapper for the iterations (hoisting the halospots) + imapper = defaultdict(list) + # Mapper for the halospots (dropping the functions) + hs_mapper = {} + + if candidates: + # The latter HaloSpot (hs1) is lifted while the former (hs0) stays in place + for hs0, hs1, f, reps, iter in candidates: + + # Drop more functions if the Halospot already exists in mapper + if hs1 in hs_mapper: + hs_mapper[hs1] = hs_mapper[hs1].drop(f) + else: + hs_mapper[hs1] = hs1.halo_scheme.drop(f) + + # Reconstruct a HaloSchemeEntry for the hoisted HaloSpot + fm = hs1.halo_scheme.fmapper[f] + + raw_loc_indices = {} + # Since lifted, we need to update the loc_indices with known values + # TODO: Can I get this in a more elegant way? + for d in fm.loc_indices: + root_min = fm.loc_indices[d].symbolic_min + new_min = root_min.subs(fm.loc_indices[d].root, + fm.loc_indices[d].root.symbolic_min) + raw_loc_indices[d] = new_min + + # Should HaloSchemeEntry be turned into a class and not be a namedtuple + # anymore? Then here we can use a ._rebuild + hs1.halo_scheme.fmapper[f] = HaloSchemeEntry(frozendict(raw_loc_indices), + fm.loc_dirs, + fm.halos, + fm.dims) + + imapper[iter].append(hs1.halo_scheme.project(f)) - # We do not work with less than two occurences of the function - if reps < 2: - continue + # Post-process analysis + mapper = {i: HaloSpot(i._rebuild(), HaloScheme.union(hss)) + for i, hss in imapper.items()} - hs1_temp = hs1 - - # This drops the latter halospot - hs1_temp = hs1_temp._rebuild(halo_scheme=hs1_temp.halo_scheme.drop(f)) - mapper[hs1] = hs1_temp.halo_scheme - - # Now we need to reconstruct a HaloSchemeEntry for the hoisted HaloSpot - loc_indices = hs1.halo_scheme.fmapper[f].loc_indices - loc_dirs = hs1.halo_scheme.fmapper[f].loc_dirs - halos = hs1.halo_scheme.fmapper[f].halos - dims = hs1.halo_scheme.fmapper[f].dims - - # Since lifted, we need to update the loc_indices - # with known values - - for d in loc_indices: - root_min = loc_indices[d].symbolic_min - new_min = root_min.subs(loc_indices[d].root, - loc_indices[d].root.symbolic_min) - raw_loc_indices[d] = new_min - - raw_loc_indices = frozendict(raw_loc_indices) - - # Is that safe in terms of immutability ? - hs1.halo_scheme.fmapper[f] = HaloSchemeEntry(raw_loc_indices, - loc_dirs, - halos, - dims) - # import pdb;pdb.set_trace() - - hs1_new = hs1._rebuild(halo_scheme=hs1.halo_scheme, body=iter) - # mapper[hs0.body] = hs1_new - hoist_mapper[iter] = hs1_new - - # Post-process analysis - # This is also necessary to avoid the no-children error - for i, hs in mapper.items(): - try: - if hs.is_void: - mapper[i] = i.body - except AttributeError: - pass - - # Transform the IET merging/dropping HaloSpots as according to the analysis - iet = Transformer(hoist_mapper, nested=False).visit(iet) - iet = Transformer(mapper, nested=False).visit(iet) + mapper.update({i: i.body if hs.is_void else i._rebuild(halo_scheme=hs) + for i, hs in hs_mapper.items()}) - # Clean up: de-nest HaloSpots if necessary - # THis also merges HaloSpots that have different loc_indices - # iet = denest_halospots(iet) + iet = Transformer(mapper, nested=True).visit(iet) return iet diff --git a/tests/test_mpi.py b/tests/test_mpi.py index d374fff70a..41e0f28fa4 100644 --- a/tests/test_mpi.py +++ b/tests/test_mpi.py @@ -1186,7 +1186,6 @@ def test_hoist_haloupdate_from_innerloop(self, mode): calls = FindNodes(Call).visit(op) assert len(calls) == 1 - # Also make sure the Call is at the right place in the IET assert op.body.body[-1].body[1].body[0].body[0].body[0].body[0].is_Call assert op.body.body[-1].body[1].body[0].body[0].body[1].is_Iteration @@ -1406,7 +1405,7 @@ def test_merge_haloupdate_if_diff_locindices_v1(self, mode): calls = FindNodes(Call).visit(op) print(op.ccode) - assert len(calls) == 1 + assert len(calls) == 2 op.apply(time_M=1) glb_pos_map = f.grid.distributor.glb_pos_map From 982e83be291142883c5f025312b0620e0014f583 Mon Sep 17 00:00:00 2001 From: George Bisbas Date: Tue, 29 Oct 2024 17:32:53 +0200 Subject: [PATCH 03/16] compiler: Drop duplicated code --- devito/ir/iet/nodes.py | 10 +- devito/mpi/halo_scheme.py | 59 +++++-- devito/passes/iet/mpi.py | 323 ++++++++++++++++++-------------------- tests/mfe_1d.py | 42 ----- tests/test_mpi.py | 47 +++++- 5 files changed, 255 insertions(+), 226 deletions(-) delete mode 100644 tests/mfe_1d.py diff --git a/devito/ir/iet/nodes.py b/devito/ir/iet/nodes.py index b49dace92d..fc85f3c545 100644 --- a/devito/ir/iet/nodes.py +++ b/devito/ir/iet/nodes.py @@ -1457,18 +1457,18 @@ def __init__(self, body, halo_scheme): self._halo_scheme = halo_scheme def __repr__(self): - function_strings = [] - for func in self.functions: - loc_indices = set().union(*[self.halo_scheme.fmapper[func].loc_indices.values()]) + fstrings = [] + for f in self.functions: + loc_indices = set().union(*[self.halo_scheme.fmapper[f].loc_indices.values()]) loc_indices = list(loc_indices) if loc_indices: loc_indices_str = str(loc_indices) else: loc_indices_str = "" - function_strings.append(f"{func.name}{loc_indices_str}") + fstrings.append(f"{f.name}{loc_indices_str}") - functions = ",".join(function_strings) + functions = ",".join(fstrings) return "<%s(%s)>" % (self.__class__.__name__, functions) diff --git a/devito/mpi/halo_scheme.py b/devito/mpi/halo_scheme.py index e75c7d514d..a330ce636c 100644 --- a/devito/mpi/halo_scheme.py +++ b/devito/mpi/halo_scheme.py @@ -28,7 +28,39 @@ class HaloLabel(Tag): STENCIL = HaloLabel('stencil') -HaloSchemeEntry = namedtuple('HaloSchemeEntry', 'loc_indices loc_dirs halos dims') +class HaloSchemeEntry: + + def __init__(self, loc_indices, loc_dirs, halos, dims): + self.loc_indices = loc_indices + self.loc_dirs = loc_dirs + self.halos = halos + self.dims = dims + + def __repr__(self): + return (f"HaloSchemeEntry(loc_indices={self.loc_indices}, " + f"loc_dirs={self.loc_dirs}, halos={self.halos}, dims={self.dims})") + + def __eq__(self, other): + if not isinstance(other, HaloSchemeEntry): + return False + return (self.loc_indices == other.loc_indices and + self.loc_dirs == other.loc_dirs and + self.halos == other.halos and + self.dims == other.dims) + + def __hash__(self): + return hash((frozenset(self.loc_indices.items()), + frozenset(self.loc_dirs.items()), + frozenset(self.halos), + frozenset(self.dims))) + + def rebuild(self, **kwargs): + loc_indices = kwargs.get('loc_indices', self.loc_indices) + loc_dirs = kwargs.get('loc_dirs', self.loc_dirs) + halos = kwargs.get('halos', self.halos) + dims = kwargs.get('dims', self.dims) + return HaloSchemeEntry(loc_indices, loc_dirs, halos, dims) + Halo = namedtuple('Halo', 'dim side') @@ -94,9 +126,20 @@ def __init__(self, exprs, ispace): self._honored = frozendict(self._honored) def __repr__(self): - fnames = ",".join(i.name for i in set(self._mapper)) - loc_indices = "[%s]" % ",".join(str(i) for i in self.loc_indices2) - return "HaloScheme<%s%s>" % (fnames, loc_indices) + fstrings = [] + for f in self.fmapper: + loc_indices = set().union(*[self._mapper[f].loc_indices.values()]) + loc_indices = list(loc_indices) + if loc_indices: + loc_indices_str = str(loc_indices) + else: + loc_indices_str = "" + + fstrings.append(f"{f.name}{loc_indices_str}") + + functions = ",".join(fstrings) + + return "%s<%s>" % (self.__class__.__name__, functions) def __eq__(self, other): return (isinstance(other, HaloScheme) and @@ -121,7 +164,6 @@ def union(self, halo_schemes): """ Create a new HaloScheme from the union of a set of HaloSchemes. """ - # import pdb; pdb.set_trace() halo_schemes = [hs for hs in halo_schemes if hs is not None] if not halo_schemes: return None @@ -368,7 +410,7 @@ def loc_indices(self): return set().union(*[i.loc_indices.keys() for i in self.fmapper.values()]) @cached_property - def loc_indices2(self): + def loc_values(self): return set().union(*[i.loc_indices.values() for i in self.fmapper.values()]) @cached_property @@ -640,9 +682,8 @@ def _uxreplace_dispatch_haloscheme(hs0, rule): # Nope, let's try with the next Indexed, if any continue - hse = HaloSchemeEntry(frozendict(loc_indices), - frozendict(loc_dirs), - hse0.halos, hse0.dims) + hse = hse0.rebuild(loc_indices=frozendict(loc_indices), + loc_dirs=frozendict(loc_dirs)) else: continue diff --git a/devito/passes/iet/mpi.py b/devito/passes/iet/mpi.py index 3aace13d69..913164a7aa 100644 --- a/devito/passes/iet/mpi.py +++ b/devito/passes/iet/mpi.py @@ -7,7 +7,7 @@ retrieve_iteration_tree) from devito.ir.support import PARALLEL, Scope from devito.ir.support.guards import GuardFactorEq -from devito.mpi.halo_scheme import HaloScheme, HaloSchemeEntry +from devito.mpi.halo_scheme import HaloScheme from devito.mpi.reduction_scheme import DistReduce from devito.mpi.routines import HaloExchangeBuilder, ReductionBuilder from devito.passes.iet.engine import iet_pass @@ -24,7 +24,7 @@ def optimize_halospots(iet, **kwargs): """ iet = _drop_halospots(iet) iet = _hoist_halospots(iet) - iet = _merge_halospots_byfunc(iet) + iet = _hoist_and_drop(iet) iet = _merge_halospots(iet) iet = _drop_if_unwritten(iet, **kwargs) iet = _mark_overlappable(iet) @@ -62,23 +62,6 @@ def _hoist_halospots(iet): would be honored. """ - # Hoisting rules -- if the retval is True, then it means the input `dep` is not - # a stopper to halo hoisting - - def rule0(dep, candidates, loc_dims): - # E.g., `dep=W -> R` and `candidates=({time}, {x})` => False - # E.g., `dep=W -> R`, `dep.cause={t,time}` and - # `candidates=({x},)` => True - return (all(i & set(dep.distance_mapper) for i in candidates) and - not any(i & dep.cause for i in candidates) and - not any(i & loc_dims for i in candidates)) - - def rule1(dep, candidates, loc_dims): - # A reduction isn't a stopper to hoisting - return dep.write is not None and dep.write.is_reduction - - rules = [rule0, rule1] - # Precompute scopes to save time scopes = {i: Scope([e.expr for e in v]) for i, v in MapNodes().visit(iet).items()} @@ -107,10 +90,9 @@ def rule1(dep, candidates, loc_dims): continue for dep in scopes[i].d_flow.project(f): - if not any(r(dep, candidates, loc_dims) for r in rules): + if not any(r(dep, candidates, loc_dims) for r in hoist_rules()): break else: - # Only adjoints enter here? hsmapper[hs] = hsmapper[hs].drop(f) imapper[i].append(hs.halo_scheme.project(f)) break @@ -131,138 +113,21 @@ def rule1(dep, candidates, loc_dims): return iet -def denest_halospots(iet): - """ - Denest nested HaloSpots. - # TOFIX: This also merges HaloSpots that have different loc_indices - """ - mapper = {} - for hs in FindNodes(HaloSpot).visit(iet): - if hs.body.is_HaloSpot: - halo_scheme = HaloScheme.union([hs.halo_scheme, hs.body.halo_scheme]) - mapper[hs] = hs._rebuild(halo_scheme=halo_scheme, body=hs.body.body) - iet = Transformer(mapper, nested=True).visit(iet) - - return iet - - -def _merge_halospots(iet): +def _hoist_and_drop(iet): """ - Merge HaloSpots on the same Iteration tree level where all data dependencies - would be honored. + Detect HaloSpots that refer to different time slices of the same TimeFunction and + could be hoisted to the outermost Iteration. This is a function that mixes principles + from both `_hoist_halospots` and `_merge_halospots`, aiming to catch a special case + where the HaloSpots refer to the same Function but with different loc_indices. + In some cases, it is better to hoist rather than merge and this is why this comes + before `_merge_halospots`. """ - - # Merge rules -- if the retval is True, then it means the input `dep` is not - # a stopper to halo merging - - def rule0(dep, hs, loc_indices): - # E.g., `dep=W -> R` => True - return not any(d in hs.dimensions or dep.distance_mapper[d] is S.Infinity - for d in dep.cause) - - def rule1(dep, hs, loc_indices): - # TODO This is apparently never hit, but feeling uncomfortable to remove it - return (dep.is_regular and - dep.read is not None and - all(not any(dep.read.touched_halo(d.root)) for d in dep.cause)) - - def rule2(dep, hs, loc_indices): - # E.g., `dep=W -> R` and `loc_indices={t: t0}` => True - return any(dep.distance_mapper[d] == 0 and dep.source[d] is not v - for d, v in loc_indices.items()) - - rules = [rule0, rule1, rule2] - # Analysis - cond_mapper = MapHaloSpots().visit(iet) - cond_mapper = {hs: {i for i in v if i.is_Conditional and - not isinstance(i.condition, GuardFactorEq)} - for hs, v in cond_mapper.items()} - - iter_mapper = MapNodes(Iteration, HaloSpot, 'immediate').visit(iet) - - mapper = {} - for iter, halo_spots in iter_mapper.items(): - if iter is None or len(halo_spots) <= 1: - continue - - scope = Scope([e.expr for e in FindNodes(Expression).visit(iter)]) - - # TOFIX: Why only comparing against the first HaloSpot? - hs0 = halo_spots[0] - mapper[hs0] = hs0.halo_scheme - - for hs1 in halo_spots[1:]: - mapper[hs1] = hs1.halo_scheme - - # If there are Conditionals involved, both `hs0` and `hs1` must be - # within the same Conditional, otherwise we would break the control - # flow semantics - if cond_mapper.get(hs0) != cond_mapper.get(hs1): - continue - - for f, v in hs1.fmapper.items(): - for dep in scope.d_flow.project(f): - if not any(r(dep, hs1, v.loc_indices) for r in rules): - break - else: - try: - hs = hs1.halo_scheme.project(f) - mapper[hs0] = HaloScheme.union([mapper[hs0], hs]) - mapper[hs1] = mapper[hs1].drop(f) - except ValueError: - # `hs1.loc_indices= -> R` => True - return not any(d in hs.dimensions or dep.distance_mapper[d] is S.Infinity - for d in dep.cause) - - def rule1(dep, hs, loc_indices): - # TODO This is apparently never hit, but feeling uncomfortable to remove it - return (dep.is_regular and - dep.read is not None and - all(not any(dep.read.touched_halo(d.root)) for d in dep.cause)) - - def rule2(dep, hs, loc_indices): - # E.g., `dep=W -> R` and `loc_indices={t: t0}` => True - return any(dep.distance_mapper[d] == 0 and dep.source[d] is not v - for d, v in loc_indices.items()) - - rules = [rule0, rule1, rule2] - - # Analysis - cond_mapper = MapHaloSpots().visit(iet) - cond_mapper = {hs: {i for i in v if i.is_Conditional and - not isinstance(i.condition, GuardFactorEq)} - for hs, v in cond_mapper.items()} - - # Precompute scopes to save time - scopes = {i: Scope([e.expr for e in v]) for i, v in MapNodes().visit(iet).items()} + cond_mapper = _create_cond_mapper(iet) candidates = [] - # Loop over iterations and their halospots to detect candidates + # Look for parent Iterations of children HaloSpots for iters, halo_spots in MapNodes(Iteration, HaloSpot, 'groupby').visit(iet).items(): if iters is None or len(halo_spots) <= 1: continue @@ -278,33 +143,34 @@ def rule2(dep, hs, loc_indices): if cond_mapper.get(hs0) != cond_mapper.get(hs1): continue - # Ensure no common loc_indices are shared for the two candidates - if any(i in hs0.halo_scheme.loc_indices2 for i in hs1.halo_scheme.loc_indices2): # noqa + # Ensure no common loc_indices are shared for the two candidates + if any(i in hs0.halo_scheme.loc_values + for i in hs1.halo_scheme.loc_values): continue for f, v in hs1.fmapper.items(): + # Skip functions that do not have time accesses if not hs1.halo_scheme.fmapper[f].loc_indices: continue for iter in iters: - if iter not in scopes: - continue - - for dep in scopes[iter].d_flow.project(f): - if not any(r(dep, hs1, v.loc_indices) for r in rules): + # Follow the rules of mrgeable HaloSpots + scope = Scope([e.expr for e in FindNodes(Expression).visit(iter)]) + for dep in scope.d_flow.project(f): + if not any(r(dep, hs1, v.loc_indices) for r in merge_rules()): break else: - # Count the number of repetitions of the function - reps = (*hs0.functions, *hs1.functions).count(f) - # We only care for functions appearing in both - if reps < 2: + # Count the number of repetitions of the function, stop + # if it appears only once + count = (*hs0.functions, *hs1.functions).count(f) + if count < 2: continue else: - candidates.append((hs0, hs1, f, reps, iter)) + candidates.append((hs0, hs1, f, count, iter)) - # Mapper for the iterations (hoisting the halospots) + # Mapper for the iterations (hoist the halospots) imapper = defaultdict(list) - # Mapper for the halospots (dropping the functions) + # Mapper for the halospots (drop functions from the halospots) hs_mapper = {} if candidates: @@ -329,12 +195,9 @@ def rule2(dep, hs, loc_indices): fm.loc_indices[d].root.symbolic_min) raw_loc_indices[d] = new_min - # Should HaloSchemeEntry be turned into a class and not be a namedtuple - # anymore? Then here we can use a ._rebuild - hs1.halo_scheme.fmapper[f] = HaloSchemeEntry(frozendict(raw_loc_indices), - fm.loc_dirs, - fm.halos, - fm.dims) + hs_entry = hs1.halo_scheme.fmapper[f] + hs_entry = hs_entry.rebuild(loc_indices=frozendict(raw_loc_indices)) + hs1.halo_scheme.fmapper[f] = hs_entry imapper[iter].append(hs1.halo_scheme.project(f)) @@ -350,6 +213,62 @@ def rule2(dep, hs, loc_indices): return iet +def _merge_halospots(iet): + """ + Merge HaloSpots on the same Iteration tree level where all data dependencies + would be honored. + """ + + # Analysis + cond_mapper = _create_cond_mapper(iet) + + mapper = {} + for iter, halo_spots in MapNodes(Iteration, HaloSpot, 'immediate').visit(iet).items(): + if iter is None or len(halo_spots) <= 1: + continue + + scope = Scope([e.expr for e in FindNodes(Expression).visit(iter)]) + + # TOFIX: Why only comparing against the first HaloSpot? + # We could similary to `_hoist_and_drop` compare all pairs of HaloSpots + # and merge them based upon some priority ranking on which pair we prefer to + # merge + hs0 = halo_spots[0] + mapper[hs0] = hs0.halo_scheme + + for hs1 in halo_spots[1:]: + mapper[hs1] = hs1.halo_scheme + + # If there are Conditionals involved, both `hs0` and `hs1` must be + # within the same Conditional, otherwise we would break the control + # flow semantics + if cond_mapper.get(hs0) != cond_mapper.get(hs1): + continue + + for f, v in hs1.fmapper.items(): + for dep in scope.d_flow.project(f): + if not any(r(dep, hs1, v.loc_indices) for r in merge_rules()): + break + else: + try: + hs = hs1.halo_scheme.project(f) + mapper[hs0] = HaloScheme.union([mapper[hs0], hs]) + mapper[hs1] = mapper[hs1].drop(f) + except ValueError: + # `hs1.loc_indices= -> R` => True + return not any(d in hs.dimensions or dep.distance_mapper[d] is S.Infinity + for d in dep.cause) + + def rule1(dep, hs, loc_indices): + # TODO This is apparently never hit, but feeling uncomfortable to remove it + return (dep.is_regular and + dep.read is not None and + all(not any(dep.read.touched_halo(d.root)) for d in dep.cause)) + + def rule2(dep, hs, loc_indices): + # E.g., `dep=W -> R` and `loc_indices={t: t0}` => True + return any(dep.distance_mapper[d] == 0 and dep.source[d] is not v + for d, v in loc_indices.items()) + + rules = [rule0, rule1, rule2] + + return rules + + +def hoist_rules(): + # Hoisting rules -- if the retval is True, then it means the input `dep` is not + # a stopper to halo hoisting + + def rule0(dep, candidates, loc_dims): + # E.g., `dep=W -> R` and `candidates=({time}, {x})` => False + # E.g., `dep=W -> R`, `dep.cause={t,time}` and + # `candidates=({x},)` => True + return (all(i & set(dep.distance_mapper) for i in candidates) and + not any(i & dep.cause for i in candidates) and + not any(i & loc_dims for i in candidates)) + + def rule1(dep, candidates, loc_dims): + # A reduction isn't a stopper to hoisting + return dep.write is not None and dep.write.is_reduction + + rules = [rule0, rule1] + + return rules diff --git a/tests/mfe_1d.py b/tests/mfe_1d.py deleted file mode 100644 index 4641f8ed17..0000000000 --- a/tests/mfe_1d.py +++ /dev/null @@ -1,42 +0,0 @@ -import numpy as np - -from devito import (SpaceDimension, Grid, TimeFunction, Eq, Operator, - solve, Constant, norm) -from examples.seismic.source import TimeAxis, Receiver - -# Space related -extent = (1500., ) -shape = (201, ) -x = SpaceDimension(name='x', spacing=Constant(name='h_x', value=extent[0]/(shape[0]-1))) -grid = Grid(extent=extent, shape=shape, dimensions=(x, )) - -# Time related -t0, tn = 0., 30. -dt = (10. / np.sqrt(2.)) / 6. -time_range = TimeAxis(start=t0, stop=tn, step=dt) - -# Velocity and pressure fields -so = 2 -to = 1 -v = TimeFunction(name='v', grid=grid, space_order=so, time_order=to) -tau = TimeFunction(name='tau', grid=grid, space_order=so, time_order=to) - -# The receiver -nrec = 1 -rec = Receiver(name="rec", grid=grid, npoint=nrec, time_range=time_range) -rec.coordinates.data[:, 0] = np.linspace(0., extent[0], num=nrec) -rec_term = rec.interpolate(expr=v) - -# First order elastic-like dependencies equations -pde_v = v.dt - (tau.dx) -pde_tau = (tau.dt - ((v.forward).dx)) - -u_v = Eq(v.forward, solve(pde_v, v.forward)) -u_tau = Eq(tau.forward, solve(pde_tau, tau.forward)) - -op = Operator([u_v] + [u_tau] + rec_term) -op.apply(dt=dt) - -print(norm(v)) -print(norm(tau)) -# print(op.ccode) \ No newline at end of file diff --git a/tests/test_mpi.py b/tests/test_mpi.py index 41e0f28fa4..0ffd771747 100644 --- a/tests/test_mpi.py +++ b/tests/test_mpi.py @@ -1010,8 +1010,6 @@ def test_avoid_haloupdate_if_distr_but_sequential(self, mode): @pytest.mark.parallel(mode=1) def test_issue_2448(self, mode): - # TOFIX: Placeholder test for issue 2448 - # Space related extent = (1500., ) shape = (201, ) x = SpaceDimension(name='x', spacing=Constant(name='h_x', @@ -1053,6 +1051,50 @@ def test_issue_2448(self, mode): assert calls[1].arguments[0] is tau assert calls[2].arguments[0] is v + @pytest.mark.parallel(mode=1) + def test_issue_2448_v2(self, mode): + # Similar but with v.forward + extent = (1500., ) + shape = (201, ) + x = SpaceDimension(name='x', spacing=Constant(name='h_x', + value=extent[0]/(shape[0]-1))) + grid = Grid(extent=extent, shape=shape, dimensions=(x, )) + + # Time related + t0, tn = 0., 30. + dt = (10. / np.sqrt(2.)) / 6. + time_range = TimeAxis(start=t0, stop=tn, step=dt) + + # Velocity and pressure fields + so = 2 + to = 1 + v = TimeFunction(name='v', grid=grid, space_order=so, time_order=to) + tau = TimeFunction(name='tau', grid=grid, space_order=so, time_order=to) + + # The receiver + nrec = 1 + rec = Receiver(name="rec", grid=grid, npoint=nrec, time_range=time_range) + rec.coordinates.data[:, 0] = np.linspace(0., extent[0], num=nrec) + rec_term = rec.interpolate(expr=v.forward) + + # First order elastic-like dependencies equations + pde_v = v.dt - (tau.dx) + pde_tau = (tau.dt - ((v.forward).dx)) + u_v = Eq(v.forward, solve(pde_v, v.forward)) + + u_tau = Eq(tau.forward, solve(pde_tau, tau.forward)) + + op = Operator([u_v] + [u_tau] + rec_term) + + calls = [i for i in FindNodes(Call).visit(op) + if isinstance(i, HaloUpdateCall)] + + # The correct we want + assert len(calls) == 3 + assert calls[0].arguments[0] is tau + assert calls[1].arguments[0] is v + assert calls[2].arguments[0] is v + @pytest.mark.parallel(mode=1) def test_avoid_haloupdate_with_subdims(self, mode): grid = Grid(shape=(4,)) @@ -1404,7 +1446,6 @@ def test_merge_haloupdate_if_diff_locindices_v1(self, mode): op = Operator(eqns) calls = FindNodes(Call).visit(op) - print(op.ccode) assert len(calls) == 2 op.apply(time_M=1) From 063c4ea5abcf0134be735dee0fd90cd08672f8ff Mon Sep 17 00:00:00 2001 From: George Bisbas Date: Wed, 30 Oct 2024 20:08:29 +0200 Subject: [PATCH 04/16] compiler: Concretize halo_opt_revamp, tests, drop redundant code --- devito/ir/iet/nodes.py | 7 +- devito/mpi/halo_scheme.py | 8 +- devito/passes/iet/mpi.py | 227 ++++++++++---------------------------- tests/test_dse.py | 2 +- tests/test_mpi.py | 168 +++++++++++++++++++--------- 5 files changed, 180 insertions(+), 232 deletions(-) diff --git a/devito/ir/iet/nodes.py b/devito/ir/iet/nodes.py index fc85f3c545..9def607678 100644 --- a/devito/ir/iet/nodes.py +++ b/devito/ir/iet/nodes.py @@ -1461,16 +1461,13 @@ def __repr__(self): for f in self.functions: loc_indices = set().union(*[self.halo_scheme.fmapper[f].loc_indices.values()]) loc_indices = list(loc_indices) - if loc_indices: - loc_indices_str = str(loc_indices) - else: - loc_indices_str = "" + loc_indices_str = str(list(loc_indices)) if loc_indices else "" fstrings.append(f"{f.name}{loc_indices_str}") functions = ",".join(fstrings) - return "<%s(%s)>" % (self.__class__.__name__, functions) + return f"<{self.__class__.__name__}({functions})>" @property def halo_scheme(self): diff --git a/devito/mpi/halo_scheme.py b/devito/mpi/halo_scheme.py index a330ce636c..d026e8ea65 100644 --- a/devito/mpi/halo_scheme.py +++ b/devito/mpi/halo_scheme.py @@ -129,17 +129,13 @@ def __repr__(self): fstrings = [] for f in self.fmapper: loc_indices = set().union(*[self._mapper[f].loc_indices.values()]) - loc_indices = list(loc_indices) - if loc_indices: - loc_indices_str = str(loc_indices) - else: - loc_indices_str = "" + loc_indices_str = str(list(loc_indices)) if loc_indices else "" fstrings.append(f"{f.name}{loc_indices_str}") functions = ",".join(fstrings) - return "%s<%s>" % (self.__class__.__name__, functions) + return f"<{self.__class__.__name__}({functions})>" def __eq__(self, other): return (isinstance(other, HaloScheme) and diff --git a/devito/passes/iet/mpi.py b/devito/passes/iet/mpi.py index 913164a7aa..ccc3dc729f 100644 --- a/devito/passes/iet/mpi.py +++ b/devito/passes/iet/mpi.py @@ -24,7 +24,6 @@ def optimize_halospots(iet, **kwargs): """ iet = _drop_halospots(iet) iet = _hoist_halospots(iet) - iet = _hoist_and_drop(iet) iet = _merge_halospots(iet) iet = _drop_if_unwritten(iet, **kwargs) iet = _mark_overlappable(iet) @@ -60,157 +59,94 @@ def _hoist_halospots(iet): """ Hoist HaloSpots from inner to outer Iterations where all data dependencies would be honored. + + Example: + haloupd v[t0] + for time for time + W v[t1]- R v[t0] W v[t1]- R v[t0] + haloupd v[t1] haloupd v[t1] + R v[t1] R v[t1] + haloupd v[t0] R v[t0] + R v[t0] + """ # Precompute scopes to save time scopes = {i: Scope([e.expr for e in v]) for i, v in MapNodes().visit(iet).items()} + cond_mapper = _create_cond_mapper(iet) + # Analysis hsmapper = {} imapper = defaultdict(list) - # Look for parent Iterations of children HaloSpots - for iters, halo_spots in MapNodes(Iteration, HaloSpot, 'groupby').visit(iet).items(): - for hs in halo_spots: - hsmapper[hs] = hs.halo_scheme - - for f, v in hs.fmapper.items(): - loc_dims = frozenset().union([q for d in v.loc_indices - for q in d._defines]) - - for n, i in enumerate(iters): - if i not in scopes: - continue - - candidates = [i.dim._defines for i in iters[n:]] - - all_candidates = set().union(*candidates) - reads = scopes[i].getreads(f) - if any(set(a.ispace.dimensions) & all_candidates - for a in reads): - continue - - for dep in scopes[i].d_flow.project(f): - if not any(r(dep, candidates, loc_dims) for r in hoist_rules()): - break - else: - hsmapper[hs] = hsmapper[hs].drop(f) - imapper[i].append(hs.halo_scheme.project(f)) - break - - # Post-process analysis - mapper = {i: HaloSpot(i._rebuild(), HaloScheme.union(hss)) - for i, hss in imapper.items()} - - mapper.update({i: i.body if hs.is_void else i._rebuild(halo_scheme=hs) - for i, hs in hsmapper.items()}) - - # Transform the IET hoisting/dropping HaloSpots as according to the analysis - iet = Transformer(mapper, nested=True).visit(iet) - - # Clean up: de-nest HaloSpots if necessary - iet = denest_halospots(iet) - - return iet - - -def _hoist_and_drop(iet): - """ - Detect HaloSpots that refer to different time slices of the same TimeFunction and - could be hoisted to the outermost Iteration. This is a function that mixes principles - from both `_hoist_halospots` and `_merge_halospots`, aiming to catch a special case - where the HaloSpots refer to the same Function but with different loc_indices. - In some cases, it is better to hoist rather than merge and this is why this comes - before `_merge_halospots`. - """ - # Analysis - cond_mapper = _create_cond_mapper(iet) - - candidates = [] # Look for parent Iterations of children HaloSpots for iters, halo_spots in MapNodes(Iteration, HaloSpot, 'groupby').visit(iet).items(): - if iters is None or len(halo_spots) <= 1: - continue + for i, hs0 in enumerate(halo_spots): - # Try to detect pairs of halospots that refer to the same function - for i, _ in enumerate(halo_spots): - hs0 = halo_spots[i] + # Nothing to do if the HaloSpot is void + if hs0.halo_scheme.is_void: + continue for hs1 in halo_spots[i+1:]: # If there are Conditionals involved, both `hs0` and `hs1` must be # within the same Conditional, otherwise we would break the control - # flow semantics if cond_mapper.get(hs0) != cond_mapper.get(hs1): continue - # Ensure no common loc_indices are shared for the two candidates + # If there are overlapping time accesses, skip if any(i in hs0.halo_scheme.loc_values - for i in hs1.halo_scheme.loc_values): + for i in hs1.halo_scheme.loc_values): continue - for f, v in hs1.fmapper.items(): - # Skip functions that do not have time accesses - if not hs1.halo_scheme.fmapper[f].loc_indices: - continue - - for iter in iters: - # Follow the rules of mrgeable HaloSpots - scope = Scope([e.expr for e in FindNodes(Expression).visit(iter)]) - for dep in scope.d_flow.project(f): - if not any(r(dep, hs1, v.loc_indices) for r in merge_rules()): - break - else: - # Count the number of repetitions of the function, stop - # if it appears only once - count = (*hs0.functions, *hs1.functions).count(f) - if count < 2: - continue - else: - candidates.append((hs0, hs1, f, count, iter)) - - # Mapper for the iterations (hoist the halospots) - imapper = defaultdict(list) - # Mapper for the halospots (drop functions from the halospots) - hs_mapper = {} + # Compare hs0 to subsequent halo_spots, looking for optimization + # possibilities + _process_halo_to_halo(hs0, hs1, iters, scopes, hsmapper, imapper) - if candidates: - # The latter HaloSpot (hs1) is lifted while the former (hs0) stays in place - for hs0, hs1, f, reps, iter in candidates: + mapper = {i: HaloSpot(i._rebuild(), HaloScheme.union(hss)) + for i, hss in imapper.items()} + mapper.update({i: i.body if hs.is_void else i._rebuild(halo_scheme=hs) + for i, hs in hsmapper.items()}) - # Drop more functions if the Halospot already exists in mapper - if hs1 in hs_mapper: - hs_mapper[hs1] = hs_mapper[hs1].drop(f) - else: - hs_mapper[hs1] = hs1.halo_scheme.drop(f) + iet = Transformer(mapper, nested=True).visit(iet) - # Reconstruct a HaloSchemeEntry for the hoisted HaloSpot - fm = hs1.halo_scheme.fmapper[f] + return iet - raw_loc_indices = {} - # Since lifted, we need to update the loc_indices with known values - # TODO: Can I get this in a more elegant way? - for d in fm.loc_indices: - root_min = fm.loc_indices[d].symbolic_min - new_min = root_min.subs(fm.loc_indices[d].root, - fm.loc_indices[d].root.symbolic_min) - raw_loc_indices[d] = new_min - hs_entry = hs1.halo_scheme.fmapper[f] - hs_entry = hs_entry.rebuild(loc_indices=frozendict(raw_loc_indices)) - hs1.halo_scheme.fmapper[f] = hs_entry +def _process_halo_to_halo(hs0, hs1, iters, scopes, hsmapper, imapper): - imapper[iter].append(hs1.halo_scheme.project(f)) + # Loop over the functions in the HaloSpots + for f, v in hs1.fmapper.items(): + # If no time accesses, skip + if not hs1.halo_scheme.fmapper[f].loc_indices: + continue - # Post-process analysis - mapper = {i: HaloSpot(i._rebuild(), HaloScheme.union(hss)) - for i, hss in imapper.items()} + # If the function is not in both HaloSpots, skip + if (*hs0.functions, *hs1.functions).count(f) < 2: + continue - mapper.update({i: i.body if hs.is_void else i._rebuild(halo_scheme=hs) - for i, hs in hs_mapper.items()}) + for iter in iters: + # If also merge-able we can start hoisting the latter + for dep in scopes[iter].d_flow.project(f): + if not any(r(dep, hs1, v.loc_indices) for r in merge_rules()): + break + else: + hse = hs1.halo_scheme.fmapper[f] + raw_loc_indices = {} + # Entering here means we can lift, and we need to update + # the loc_indices with known values + # TODO: Can I get this in a more elegant way? + for d in hse.loc_indices: + root_min = hse.loc_indices[d].symbolic_min + new_min = root_min.subs(hse.loc_indices[d].root, + hse.loc_indices[d].root.symbolic_min) + raw_loc_indices[d] = new_min - iet = Transformer(mapper, nested=True).visit(iet) + hse = hse.rebuild(loc_indices=frozendict(raw_loc_indices)) + hs1.halo_scheme.fmapper[f] = hse - return iet + hsmapper[hs1] = hsmapper.get(hs1, hs1.halo_scheme).drop(f) + imapper[iter].append(hs1.halo_scheme.project(f)) def _merge_halospots(iet): @@ -229,10 +165,6 @@ def _merge_halospots(iet): scope = Scope([e.expr for e in FindNodes(Expression).visit(iter)]) - # TOFIX: Why only comparing against the first HaloSpot? - # We could similary to `_hoist_and_drop` compare all pairs of HaloSpots - # and merge them based upon some priority ranking on which pair we prefer to - # merge hs0 = halo_spots[0] mapper[hs0] = hs0.halo_scheme @@ -250,14 +182,9 @@ def _merge_halospots(iet): if not any(r(dep, hs1, v.loc_indices) for r in merge_rules()): break else: - try: - hs = hs1.halo_scheme.project(f) - mapper[hs0] = HaloScheme.union([mapper[hs0], hs]) - mapper[hs1] = mapper[hs1].drop(f) - except ValueError: - # `hs1.loc_indices= -> R` and `candidates=({time}, {x})` => False - # E.g., `dep=W -> R`, `dep.cause={t,time}` and - # `candidates=({x},)` => True - return (all(i & set(dep.distance_mapper) for i in candidates) and - not any(i & dep.cause for i in candidates) and - not any(i & loc_dims for i in candidates)) - - def rule1(dep, candidates, loc_dims): - # A reduction isn't a stopper to hoisting - return dep.write is not None and dep.write.is_reduction - - rules = [rule0, rule1] - - return rules diff --git a/tests/test_dse.py b/tests/test_dse.py index 33f93bf5e5..c82848a28a 100644 --- a/tests/test_dse.py +++ b/tests/test_dse.py @@ -12,7 +12,7 @@ ConditionalDimension, DefaultDimension, Grid, Operator, norm, grad, div, dimensions, switchconfig, configuration, centered, first_derivative, solve, transpose, Abs, cos, - sin, sqrt, floor, Ge, Lt, Derivative) + sin, sqrt, floor, Ge, Lt, Derivative, solve) from devito.exceptions import InvalidArgument, InvalidOperator from devito.ir import (Conditional, DummyEq, Expression, Iteration, FindNodes, FindSymbols, ParallelIteration, retrieve_iteration_tree) diff --git a/tests/test_mpi.py b/tests/test_mpi.py index 0ffd771747..e51a8f06ee 100644 --- a/tests/test_mpi.py +++ b/tests/test_mpi.py @@ -4,11 +4,12 @@ from conftest import _R, assert_blocking, assert_structure from devito import (Grid, Constant, Function, TimeFunction, SparseFunction, - SparseTimeFunction, Dimension, ConditionalDimension, div, + SparseTimeFunction, VectorTimeFunction, TensorTimeFunction, + Dimension, ConditionalDimension, div, SubDimension, SubDomain, Eq, Ne, Inc, NODE, Operator, norm, inner, configuration, switchconfig, generic_derivative, PrecomputedSparseFunction, DefaultDimension, Buffer, - solve) + solve, diag, grad) from devito.arch.compiler import OneapiCompiler from devito.data import LEFT, RIGHT from devito.ir.iet import (Call, Conditional, Iteration, FindNodes, FindSymbols, @@ -21,7 +22,8 @@ from devito.types.dimension import SpaceDimension from examples.seismic.acoustic import acoustic_setup -from examples.seismic import Receiver, TimeAxis +from examples.seismic import Receiver, TimeAxis, demo_model +from tests.test_dse import TestTTI class TestDistributor: @@ -983,6 +985,7 @@ def test_avoid_redundant_haloupdate_cond(self, mode): calls = FindNodes(Call).visit(op) assert len(calls) == 1 + assert calls[0].functions[0] is f @pytest.mark.parallel(mode=1) def test_avoid_haloupdate_if_distr_but_sequential(self, mode): @@ -1010,8 +1013,11 @@ def test_avoid_haloupdate_if_distr_but_sequential(self, mode): @pytest.mark.parallel(mode=1) def test_issue_2448(self, mode): - extent = (1500., ) - shape = (201, ) + extent = (10., ) + shape = (2, ) + so = 2 + to = 1 + x = SpaceDimension(name='x', spacing=Constant(name='h_x', value=extent[0]/(shape[0]-1))) grid = Grid(extent=extent, shape=shape, dimensions=(x, )) @@ -1022,17 +1028,9 @@ def test_issue_2448(self, mode): time_range = TimeAxis(start=t0, stop=tn, step=dt) # Velocity and pressure fields - so = 2 - to = 1 v = TimeFunction(name='v', grid=grid, space_order=so, time_order=to) tau = TimeFunction(name='tau', grid=grid, space_order=so, time_order=to) - # The receiver - nrec = 1 - rec = Receiver(name="rec", grid=grid, npoint=nrec, time_range=time_range) - rec.coordinates.data[:, 0] = np.linspace(0., extent[0], num=nrec) - rec_term = rec.interpolate(expr=v) - # First order elastic-like dependencies equations pde_v = v.dt - (tau.dx) pde_tau = (tau.dt - ((v.forward).dx)) @@ -1040,57 +1038,44 @@ def test_issue_2448(self, mode): u_tau = Eq(tau.forward, solve(pde_tau, tau.forward)) - op = Operator([u_v] + [u_tau] + rec_term) + # Test two variants of receiver interpolation + nrec = 1 + rec = Receiver(name="rec", grid=grid, npoint=nrec, time_range=time_range) + rec.coordinates.data[:, 0] = np.linspace(0., extent[0], num=nrec) + + # The receiver 0 + rec_term0 = rec.interpolate(expr=v) + + # The receiver 1 + rec_term1 = rec.interpolate(expr=v.forward) - calls = [i for i in FindNodes(Call).visit(op) + # Test receiver interpolation 0, here we have a halo exchange hoisted + op0 = Operator([u_v] + [u_tau] + rec_term0) + + calls = [i for i in FindNodes(Call).visit(op0) if isinstance(i, HaloUpdateCall)] # The correct we want assert len(calls) == 3 + + assert len(FindNodes(HaloUpdateCall).visit(op0.body.body[1].body[1].body[0])) == 1 + assert len(FindNodes(HaloUpdateCall).visit(op0.body.body[1].body[1].body[1])) == 2 assert calls[0].arguments[0] is v assert calls[1].arguments[0] is tau assert calls[2].arguments[0] is v - @pytest.mark.parallel(mode=1) - def test_issue_2448_v2(self, mode): - # Similar but with v.forward - extent = (1500., ) - shape = (201, ) - x = SpaceDimension(name='x', spacing=Constant(name='h_x', - value=extent[0]/(shape[0]-1))) - grid = Grid(extent=extent, shape=shape, dimensions=(x, )) - - # Time related - t0, tn = 0., 30. - dt = (10. / np.sqrt(2.)) / 6. - time_range = TimeAxis(start=t0, stop=tn, step=dt) - - # Velocity and pressure fields - so = 2 - to = 1 - v = TimeFunction(name='v', grid=grid, space_order=so, time_order=to) - tau = TimeFunction(name='tau', grid=grid, space_order=so, time_order=to) - - # The receiver - nrec = 1 - rec = Receiver(name="rec", grid=grid, npoint=nrec, time_range=time_range) - rec.coordinates.data[:, 0] = np.linspace(0., extent[0], num=nrec) - rec_term = rec.interpolate(expr=v.forward) - - # First order elastic-like dependencies equations - pde_v = v.dt - (tau.dx) - pde_tau = (tau.dt - ((v.forward).dx)) - u_v = Eq(v.forward, solve(pde_v, v.forward)) - - u_tau = Eq(tau.forward, solve(pde_tau, tau.forward)) - - op = Operator([u_v] + [u_tau] + rec_term) + # Test receiver interpolation 1, here we should not have any halo exchange + # hoisted + op1 = Operator([u_v] + [u_tau] + rec_term1) - calls = [i for i in FindNodes(Call).visit(op) + calls = [i for i in FindNodes(Call).visit(op1) if isinstance(i, HaloUpdateCall)] # The correct we want assert len(calls) == 3 + + assert len(FindNodes(HaloUpdateCall).visit(op1.body.body[1].body[0])) == 0 + assert len(FindNodes(HaloUpdateCall).visit(op1.body.body[1].body[1])) == 3 assert calls[0].arguments[0] is tau assert calls[1].arguments[0] is v assert calls[2].arguments[0] is v @@ -1131,8 +1116,8 @@ def test_avoid_haloupdate_with_constant_index(self, mode): eq = Eq(u.forward, u[t, 1] + u[t, 1 + x.symbolic_min] + u[t, x]) op = Operator(eq) - calls = FindNodes(Call).visit(op) + assert len(calls) == 0 @pytest.mark.parallel(mode=1) @@ -1228,6 +1213,7 @@ def test_hoist_haloupdate_from_innerloop(self, mode): calls = FindNodes(Call).visit(op) assert len(calls) == 1 + # Also make sure the Call is at the right place in the IET assert op.body.body[-1].body[1].body[0].body[0].body[0].body[0].is_Call assert op.body.body[-1].body[1].body[0].body[0].body[1].is_Iteration @@ -2479,7 +2465,6 @@ def test_staggering(self, mode): op = Operator(eqns) op(time_M=2) - # Expected norms computed "manually" from sequential runs assert np.isclose(norm(ux), 7003.098, rtol=1.e-4) assert np.isclose(norm(uxx), 78902.21, rtol=1.e-4) @@ -2816,6 +2801,85 @@ def test_adjoint_F_no_omp(self, mode): self.run_adjoint_F(3) +class TestElastic: + + @pytest.mark.parallel(mode=[(1, 'diag')]) + def test_elastic_structure(self, mode): + + so = 4 + model = demo_model(preset='layers-elastic', nlayers=1, + shape=(301, 301), spacing=(10., 10.), + space_order=so) + + t0, tn = 0., 2000. + dt = model.critical_dt + time_range = TimeAxis(start=t0, stop=tn, step=dt) + + x, z = model.grid.dimensions + + v = VectorTimeFunction(name='v', grid=model.grid, space_order=so, time_order=1) + tau = TensorTimeFunction(name='t', grid=model.grid, space_order=so, time_order=1) + + # The receiver + nrec = 301 + rec = Receiver(name="rec", grid=model.grid, npoint=nrec, time_range=time_range) + rec.coordinates.data[:, 0] = np.linspace(0., model.domain_size[0], num=nrec) + rec.coordinates.data[:, -1] = 5. + + rec_term = rec.interpolate(expr=v[0] + v[1]) + + # Now let's try and create the staggered updates + # Lame parameters + l, mu, ro = model.lam, model.mu, model.b + + # First order elastic wave equation + pde_v = v.dt - ro * div(tau) + pde_tau = (tau.dt - l * diag(div(v.forward)) - + mu * (grad(v.forward) + grad(v.forward).transpose(inner=False))) + # Time update + u_v = Eq(v.forward, model.damp * solve(pde_v, v.forward)) + u_t = Eq(tau.forward, model.damp * solve(pde_tau, tau.forward)) + + op = Operator([u_v] + [u_t] + rec_term) + + assert len(op._func_table) == 11 + + calls = [i for i in FindNodes(Call).visit(op) if isinstance(i, HaloUpdateCall)] + + # The correct we want + assert len(calls) == 5 + + assert len(FindNodes(HaloUpdateCall).visit(op.body.body[1].body[3].body[0])) == 1 + assert len(FindNodes(HaloUpdateCall).visit(op.body.body[1].body[3].body[1])) == 4 + assert len(FindNodes(HaloUpdateCall).visit(op.body.body[1].body[3].body[2])) == 0 + + assert calls[0].arguments[0] is v[0] + assert calls[0].arguments[1] is v[1] + + assert calls[1].arguments[0] is tau[0, 0] + assert calls[2].arguments[0] is tau[0, 1] + assert calls[3].arguments[0] is tau[1, 1] + + assert calls[4].arguments[0] is v[0] + assert calls[4].arguments[1] is v[1] + + +class TestTTI_w_MPI: + + @pytest.mark.parallel(mode=[(1)]) + def test_halo_structure(self, mode): + + mytest = TestTTI() + solver = mytest.tti_operator(opt='advanced', space_order=8) + op = solver.op_fwd(save=False) + + calls = [i for i in FindNodes(Call).visit(op) if isinstance(i, HaloUpdateCall)] + + assert len(calls) == 1 + assert calls[0].functions[0].name == 'u' + assert calls[0].functions[1].name == 'v' + + if __name__ == "__main__": # configuration['mpi'] = 'overlap' # TestDecomposition().test_reshape_left_right() From f922248499479b285a1f67152ae39ec8a8fdcebf Mon Sep 17 00:00:00 2001 From: George Bisbas Date: Mon, 4 Nov 2024 14:57:49 +0200 Subject: [PATCH 05/16] examples: Update MPI notebook output --- examples/mpi/overview.ipynb | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/examples/mpi/overview.ipynb b/examples/mpi/overview.ipynb index df57022cbe..7e8926aa1f 100644 --- a/examples/mpi/overview.ipynb +++ b/examples/mpi/overview.ipynb @@ -107,7 +107,10 @@ "source": [ "%%px\n", "from devito import configuration\n", - "configuration['mpi'] = True" + "configuration['mpi'] = True\n", + "\n", + "# Feel free to change the log level, and see more detailed logging\n", + "configuration['log-level'] = 'INFO'" ] }, { From 38e667b582e80190677e72aac8025665613baf58 Mon Sep 17 00:00:00 2001 From: George Bisbas Date: Mon, 4 Nov 2024 19:55:30 +0200 Subject: [PATCH 06/16] examples: Update with edge case of 03_PML notebook --- devito/ir/iet/nodes.py | 1 - devito/passes/iet/mpi.py | 13 +++++++++---- examples/seismic/abc_methods/03_pml.ipynb | 8 +++----- 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/devito/ir/iet/nodes.py b/devito/ir/iet/nodes.py index 9def607678..378d0be721 100644 --- a/devito/ir/iet/nodes.py +++ b/devito/ir/iet/nodes.py @@ -1460,7 +1460,6 @@ def __repr__(self): fstrings = [] for f in self.functions: loc_indices = set().union(*[self.halo_scheme.fmapper[f].loc_indices.values()]) - loc_indices = list(loc_indices) loc_indices_str = str(list(loc_indices)) if loc_indices else "" fstrings.append(f"{f.name}{loc_indices_str}") diff --git a/devito/passes/iet/mpi.py b/devito/passes/iet/mpi.py index ccc3dc729f..2aff3fd5c9 100644 --- a/devito/passes/iet/mpi.py +++ b/devito/passes/iet/mpi.py @@ -137,10 +137,15 @@ def _process_halo_to_halo(hs0, hs1, iters, scopes, hsmapper, imapper): # the loc_indices with known values # TODO: Can I get this in a more elegant way? for d in hse.loc_indices: - root_min = hse.loc_indices[d].symbolic_min - new_min = root_min.subs(hse.loc_indices[d].root, - hse.loc_indices[d].root.symbolic_min) - raw_loc_indices[d] = new_min + if hse.loc_indices[d].is_Symbol: + assert d in hse.loc_indices[d]._defines + root_min = hse.loc_indices[d].symbolic_min + new_min = root_min.subs(hse.loc_indices[d].root, + hse.loc_indices[d].root.symbolic_min) + raw_loc_indices[d] = new_min + else: + assert d.symbolic_min in hse.loc_indices[d].free_symbols + raw_loc_indices[d] = hse.loc_indices[d] hse = hse.rebuild(loc_indices=frozendict(raw_loc_indices)) hs1.halo_scheme.fmapper[f] = hse diff --git a/examples/seismic/abc_methods/03_pml.ipynb b/examples/seismic/abc_methods/03_pml.ipynb index d3ca9929b1..b0ab62839c 100644 --- a/examples/seismic/abc_methods/03_pml.ipynb +++ b/examples/seismic/abc_methods/03_pml.ipynb @@ -179,10 +179,8 @@ "# NBVAL_IGNORE_OUTPUT\n", "\n", "%matplotlib inline\n", - "from examples.seismic import TimeAxis\n", - "from examples.seismic import RickerSource\n", - "from examples.seismic import Receiver\n", - "from devito import SubDomain, Grid, NODE, TimeFunction, Function, Eq, solve, Operator" + "from devito import SubDomain, Grid, NODE, TimeFunction, Function, Eq, solve, Operator\n", + "from examples.seismic import TimeAxis, RickerSource, Receiver" ] }, { @@ -205,7 +203,7 @@ "compx = x1-x0\n", "z0 = 0.\n", "z1 = 1000.\n", - "compz = z1-z0;\n", + "compz = z1-z0\n", "hxv = (x1-x0)/(nptx-1)\n", "hzv = (z1-z0)/(nptz-1)" ] From d11d7057db4ba823ecd656b74763cfe3472dcc2e Mon Sep 17 00:00:00 2001 From: George Bisbas Date: Tue, 5 Nov 2024 18:57:52 +0200 Subject: [PATCH 07/16] compiler: Inline halo_to_halo --- devito/mpi/halo_scheme.py | 5 ++- devito/passes/iet/mpi.py | 92 ++++++++++++++++++--------------------- tests/test_mpi.py | 35 ++++++--------- 3 files changed, 59 insertions(+), 73 deletions(-) diff --git a/devito/mpi/halo_scheme.py b/devito/mpi/halo_scheme.py index d026e8ea65..bfbab67870 100644 --- a/devito/mpi/halo_scheme.py +++ b/devito/mpi/halo_scheme.py @@ -161,7 +161,10 @@ def union(self, halo_schemes): Create a new HaloScheme from the union of a set of HaloSchemes. """ halo_schemes = [hs for hs in halo_schemes if hs is not None] - if not halo_schemes: + + if len(halo_schemes) == 1: + return halo_schemes[0] + elif not halo_schemes: return None fmapper = {} diff --git a/devito/passes/iet/mpi.py b/devito/passes/iet/mpi.py index 2aff3fd5c9..323b390c7f 100644 --- a/devito/passes/iet/mpi.py +++ b/devito/passes/iet/mpi.py @@ -63,7 +63,7 @@ def _hoist_halospots(iet): Example: haloupd v[t0] for time for time - W v[t1]- R v[t0] W v[t1]- R v[t0] + W v[t1]- R v[t0] W v[t1]- R v[t0] haloupd v[t1] haloupd v[t1] R v[t1] R v[t1] haloupd v[t0] R v[t0] @@ -74,7 +74,7 @@ def _hoist_halospots(iet): # Precompute scopes to save time scopes = {i: Scope([e.expr for e in v]) for i, v in MapNodes().visit(iet).items()} - cond_mapper = _create_cond_mapper(iet) + cond_mapper = _make_cond_mapper(iet) # Analysis hsmapper = {} @@ -95,13 +95,46 @@ def _hoist_halospots(iet): continue # If there are overlapping time accesses, skip - if any(i in hs0.halo_scheme.loc_values - for i in hs1.halo_scheme.loc_values): + if hs0.halo_scheme.loc_values.intersection(hs1.halo_scheme.loc_values): continue - # Compare hs0 to subsequent halo_spots, looking for optimization - # possibilities - _process_halo_to_halo(hs0, hs1, iters, scopes, hsmapper, imapper) + # Loop over the functions in the HaloSpots + for f, v in hs1.fmapper.items(): + # If no time accesses, skip + if not hs1.halo_scheme.fmapper[f].loc_indices: + continue + + # If the function is not in both HaloSpots, skip + if f not in hs0.functions: + continue + + for it in iters: + # If also merge-able we can start hoisting the latter + for dep in scopes[it].d_flow.project(f): + if not any(r(dep, hs1, v.loc_indices) for r in merge_rules()): + break + else: + hse = hs1.halo_scheme.fmapper[f] + raw_loc_indices = {} + # Entering here means we can lift, and we need to update + # the loc_indices with known values + # TODO: Can I get this in a more elegant way? + for d in hse.loc_indices: + if hse.loc_indices[d].is_Symbol: + assert d in hse.loc_indices[d]._defines + root_min = hse.loc_indices[d].symbolic_min + new_min = root_min.subs(hse.loc_indices[d].root, + hse.loc_indices[d].root.symbolic_min) + raw_loc_indices[d] = new_min + else: + assert d.symbolic_min in hse.loc_indices[d].free_symbols + raw_loc_indices[d] = hse.loc_indices[d] + + hse = hse.rebuild(loc_indices=frozendict(raw_loc_indices)) + hs1.halo_scheme.fmapper[f] = hse + + hsmapper[hs1] = hsmapper.get(hs1, hs1.halo_scheme).drop(f) + imapper[it].append(hs1.halo_scheme.project(f)) mapper = {i: HaloSpot(i._rebuild(), HaloScheme.union(hss)) for i, hss in imapper.items()} @@ -113,47 +146,6 @@ def _hoist_halospots(iet): return iet -def _process_halo_to_halo(hs0, hs1, iters, scopes, hsmapper, imapper): - - # Loop over the functions in the HaloSpots - for f, v in hs1.fmapper.items(): - # If no time accesses, skip - if not hs1.halo_scheme.fmapper[f].loc_indices: - continue - - # If the function is not in both HaloSpots, skip - if (*hs0.functions, *hs1.functions).count(f) < 2: - continue - - for iter in iters: - # If also merge-able we can start hoisting the latter - for dep in scopes[iter].d_flow.project(f): - if not any(r(dep, hs1, v.loc_indices) for r in merge_rules()): - break - else: - hse = hs1.halo_scheme.fmapper[f] - raw_loc_indices = {} - # Entering here means we can lift, and we need to update - # the loc_indices with known values - # TODO: Can I get this in a more elegant way? - for d in hse.loc_indices: - if hse.loc_indices[d].is_Symbol: - assert d in hse.loc_indices[d]._defines - root_min = hse.loc_indices[d].symbolic_min - new_min = root_min.subs(hse.loc_indices[d].root, - hse.loc_indices[d].root.symbolic_min) - raw_loc_indices[d] = new_min - else: - assert d.symbolic_min in hse.loc_indices[d].free_symbols - raw_loc_indices[d] = hse.loc_indices[d] - - hse = hse.rebuild(loc_indices=frozendict(raw_loc_indices)) - hs1.halo_scheme.fmapper[f] = hse - - hsmapper[hs1] = hsmapper.get(hs1, hs1.halo_scheme).drop(f) - imapper[iter].append(hs1.halo_scheme.project(f)) - - def _merge_halospots(iet): """ Merge HaloSpots on the same Iteration tree level where all data dependencies @@ -161,7 +153,7 @@ def _merge_halospots(iet): """ # Analysis - cond_mapper = _create_cond_mapper(iet) + cond_mapper = _make_cond_mapper(iet) mapper = {} for iter, halo_spots in MapNodes(Iteration, HaloSpot, 'immediate').visit(iet).items(): @@ -363,7 +355,7 @@ def mpiize(graph, **kwargs): # Utility functions to avoid code duplication -def _create_cond_mapper(iet): +def _make_cond_mapper(iet): cond_mapper = MapHaloSpots().visit(iet) return {hs: {i for i in v if i.is_Conditional and not isinstance(i.condition, GuardFactorEq)} diff --git a/tests/test_mpi.py b/tests/test_mpi.py index e51a8f06ee..c02d122731 100644 --- a/tests/test_mpi.py +++ b/tests/test_mpi.py @@ -22,7 +22,7 @@ from devito.types.dimension import SpaceDimension from examples.seismic.acoustic import acoustic_setup -from examples.seismic import Receiver, TimeAxis, demo_model +from examples.seismic import demo_model from tests.test_dse import TestTTI @@ -1013,23 +1013,20 @@ def test_avoid_haloupdate_if_distr_but_sequential(self, mode): @pytest.mark.parallel(mode=1) def test_issue_2448(self, mode): - extent = (10., ) - shape = (2, ) + extent = (10.,) + shape = (2,) so = 2 - to = 1 x = SpaceDimension(name='x', spacing=Constant(name='h_x', value=extent[0]/(shape[0]-1))) - grid = Grid(extent=extent, shape=shape, dimensions=(x, )) + grid = Grid(extent=extent, shape=shape, dimensions=(x,)) # Time related - t0, tn = 0., 30. - dt = (10. / np.sqrt(2.)) / 6. - time_range = TimeAxis(start=t0, stop=tn, step=dt) + tn = 30 # Velocity and pressure fields - v = TimeFunction(name='v', grid=grid, space_order=so, time_order=to) - tau = TimeFunction(name='tau', grid=grid, space_order=so, time_order=to) + v = TimeFunction(name='v', grid=grid, space_order=so) + tau = TimeFunction(name='tau', grid=grid, space_order=so) # First order elastic-like dependencies equations pde_v = v.dt - (tau.dx) @@ -1040,7 +1037,7 @@ def test_issue_2448(self, mode): # Test two variants of receiver interpolation nrec = 1 - rec = Receiver(name="rec", grid=grid, npoint=nrec, time_range=time_range) + rec = SparseTimeFunction(name="rec", grid=grid, npoint=nrec, nt=tn) rec.coordinates.data[:, 0] = np.linspace(0., extent[0], num=nrec) # The receiver 0 @@ -2811,18 +2808,12 @@ def test_elastic_structure(self, mode): shape=(301, 301), spacing=(10., 10.), space_order=so) - t0, tn = 0., 2000. - dt = model.critical_dt - time_range = TimeAxis(start=t0, stop=tn, step=dt) - - x, z = model.grid.dimensions - - v = VectorTimeFunction(name='v', grid=model.grid, space_order=so, time_order=1) - tau = TensorTimeFunction(name='t', grid=model.grid, space_order=so, time_order=1) + v = VectorTimeFunction(name='v', grid=model.grid, space_order=so) + tau = TensorTimeFunction(name='t', grid=model.grid, space_order=so) # The receiver nrec = 301 - rec = Receiver(name="rec", grid=model.grid, npoint=nrec, time_range=time_range) + rec = SparseTimeFunction(name="rec", grid=model.grid, npoint=nrec, nt=10) rec.coordinates.data[:, 0] = np.linspace(0., model.domain_size[0], num=nrec) rec.coordinates.data[:, -1] = 5. @@ -2864,9 +2855,9 @@ def test_elastic_structure(self, mode): assert calls[4].arguments[1] is v[1] -class TestTTI_w_MPI: +class TestTTIwMPI: - @pytest.mark.parallel(mode=[(1)]) + @pytest.mark.parallel(mode=1) def test_halo_structure(self, mode): mytest = TestTTI() From 65b5d3356eac681a93816f442e831abc09091de3 Mon Sep 17 00:00:00 2001 From: George Bisbas Date: Wed, 6 Nov 2024 15:44:49 +0200 Subject: [PATCH 08/16] compiler: class HaloSchemeEntry(Reconstructable) --- devito/ir/iet/nodes.py | 4 ++-- devito/mpi/halo_scheme.py | 21 ++++++++------------- devito/passes/iet/mpi.py | 18 +++++++++--------- 3 files changed, 19 insertions(+), 24 deletions(-) diff --git a/devito/ir/iet/nodes.py b/devito/ir/iet/nodes.py index 378d0be721..72ff6e3c04 100644 --- a/devito/ir/iet/nodes.py +++ b/devito/ir/iet/nodes.py @@ -16,7 +16,7 @@ Forward, WithLock, PrefetchUpdate, detect_io) from devito.symbolics import ListInitializer, CallFromPointer, ccode from devito.tools import (Signer, as_tuple, filter_ordered, filter_sorted, flatten, - ctypes_to_cstr) + ctypes_to_cstr, OrderedSet) from devito.types.basic import (AbstractFunction, AbstractSymbol, Basic, Indexed, Symbol) from devito.types.object import AbstractObject, LocalObject @@ -1459,7 +1459,7 @@ def __init__(self, body, halo_scheme): def __repr__(self): fstrings = [] for f in self.functions: - loc_indices = set().union(*[self.halo_scheme.fmapper[f].loc_indices.values()]) + loc_indices = OrderedSet(*(self.halo_scheme.fmapper[f].loc_indices.values())) loc_indices_str = str(list(loc_indices)) if loc_indices else "" fstrings.append(f"{f.name}{loc_indices_str}") diff --git a/devito/mpi/halo_scheme.py b/devito/mpi/halo_scheme.py index bfbab67870..692fe62217 100644 --- a/devito/mpi/halo_scheme.py +++ b/devito/mpi/halo_scheme.py @@ -11,7 +11,7 @@ from devito.ir.support import Forward, Scope from devito.symbolics.manipulation import _uxreplace_registry from devito.tools import (Reconstructable, Tag, as_tuple, filter_ordered, flatten, - frozendict, is_integer, filter_sorted) + frozendict, is_integer, filter_sorted, OrderedSet) from devito.types import Grid __all__ = ['HaloScheme', 'HaloSchemeEntry', 'HaloSchemeException', 'HaloTouch'] @@ -28,7 +28,9 @@ class HaloLabel(Tag): STENCIL = HaloLabel('stencil') -class HaloSchemeEntry: +class HaloSchemeEntry(Reconstructable): + + __rkwargs__ = ('loc_indices', 'loc_dirs', 'halos', 'dims') def __init__(self, loc_indices, loc_dirs, halos, dims): self.loc_indices = loc_indices @@ -36,10 +38,6 @@ def __init__(self, loc_indices, loc_dirs, halos, dims): self.halos = halos self.dims = dims - def __repr__(self): - return (f"HaloSchemeEntry(loc_indices={self.loc_indices}, " - f"loc_dirs={self.loc_dirs}, halos={self.halos}, dims={self.dims})") - def __eq__(self, other): if not isinstance(other, HaloSchemeEntry): return False @@ -54,12 +52,9 @@ def __hash__(self): frozenset(self.halos), frozenset(self.dims))) - def rebuild(self, **kwargs): - loc_indices = kwargs.get('loc_indices', self.loc_indices) - loc_dirs = kwargs.get('loc_dirs', self.loc_dirs) - halos = kwargs.get('halos', self.halos) - dims = kwargs.get('dims', self.dims) - return HaloSchemeEntry(loc_indices, loc_dirs, halos, dims) + def __repr__(self): + return (f"HaloSchemeEntry(loc_indices={self.loc_indices}, " + f"loc_dirs={self.loc_dirs}, halos={self.halos}, dims={self.dims})") Halo = namedtuple('Halo', 'dim side') @@ -128,7 +123,7 @@ def __init__(self, exprs, ispace): def __repr__(self): fstrings = [] for f in self.fmapper: - loc_indices = set().union(*[self._mapper[f].loc_indices.values()]) + loc_indices = OrderedSet(*(self._mapper[f].loc_indices.values())) loc_indices_str = str(list(loc_indices)) if loc_indices else "" fstrings.append(f"{f.name}{loc_indices_str}") diff --git a/devito/passes/iet/mpi.py b/devito/passes/iet/mpi.py index 323b390c7f..72e351bc84 100644 --- a/devito/passes/iet/mpi.py +++ b/devito/passes/iet/mpi.py @@ -109,7 +109,6 @@ def _hoist_halospots(iet): continue for it in iters: - # If also merge-able we can start hoisting the latter for dep in scopes[it].d_flow.project(f): if not any(r(dep, hs1, v.loc_indices) for r in merge_rules()): break @@ -120,17 +119,18 @@ def _hoist_halospots(iet): # the loc_indices with known values # TODO: Can I get this in a more elegant way? for d in hse.loc_indices: - if hse.loc_indices[d].is_Symbol: - assert d in hse.loc_indices[d]._defines - root_min = hse.loc_indices[d].symbolic_min - new_min = root_min.subs(hse.loc_indices[d].root, - hse.loc_indices[d].root.symbolic_min) + md = hse.loc_indices[d] + if md.is_Symbol: + root = md.root + hse_min = md.symbolic_min + new_min = hse_min.subs(root, root.symbolic_min) raw_loc_indices[d] = new_min else: - assert d.symbolic_min in hse.loc_indices[d].free_symbols - raw_loc_indices[d] = hse.loc_indices[d] + # md is in form of an expression + assert d.symbolic_min in md.free_symbols + raw_loc_indices[d] = md - hse = hse.rebuild(loc_indices=frozendict(raw_loc_indices)) + hse = hse._rebuild(loc_indices=frozendict(raw_loc_indices)) hs1.halo_scheme.fmapper[f] = hse hsmapper[hs1] = hsmapper.get(hs1, hs1.halo_scheme).drop(f) From b100a3e60841e92c400281e6d0bc2fbf2e702b93 Mon Sep 17 00:00:00 2001 From: George Bisbas Date: Thu, 14 Nov 2024 10:28:04 +0000 Subject: [PATCH 09/16] compiler: Rework terminology, minor cleanup, comments --- devito/ir/iet/nodes.py | 6 +- devito/mpi/halo_scheme.py | 12 +- devito/passes/iet/mpi.py | 195 +++++++++++--------- examples/seismic/viscoacoustic/operators.py | 2 +- examples/seismic/viscoelastic/operators.py | 2 +- tests/test_mpi.py | 83 +++++++-- 6 files changed, 182 insertions(+), 118 deletions(-) diff --git a/devito/ir/iet/nodes.py b/devito/ir/iet/nodes.py index 72ff6e3c04..f03112464e 100644 --- a/devito/ir/iet/nodes.py +++ b/devito/ir/iet/nodes.py @@ -1461,12 +1461,10 @@ def __repr__(self): for f in self.functions: loc_indices = OrderedSet(*(self.halo_scheme.fmapper[f].loc_indices.values())) loc_indices_str = str(list(loc_indices)) if loc_indices else "" - - fstrings.append(f"{f.name}{loc_indices_str}") + fstrings.append("%s%s" % (f.name, loc_indices_str)) functions = ",".join(fstrings) - - return f"<{self.__class__.__name__}({functions})>" + return "<%s(%s)>" % (self.__class__.__name__, functions) @property def halo_scheme(self): diff --git a/devito/mpi/halo_scheme.py b/devito/mpi/halo_scheme.py index 692fe62217..12a102b30e 100644 --- a/devito/mpi/halo_scheme.py +++ b/devito/mpi/halo_scheme.py @@ -30,7 +30,7 @@ class HaloLabel(Tag): class HaloSchemeEntry(Reconstructable): - __rkwargs__ = ('loc_indices', 'loc_dirs', 'halos', 'dims') + __rargs__ = ('loc_indices', 'loc_dirs', 'halos', 'dims') def __init__(self, loc_indices, loc_dirs, halos, dims): self.loc_indices = loc_indices @@ -125,12 +125,10 @@ def __repr__(self): for f in self.fmapper: loc_indices = OrderedSet(*(self._mapper[f].loc_indices.values())) loc_indices_str = str(list(loc_indices)) if loc_indices else "" - - fstrings.append(f"{f.name}{loc_indices_str}") + fstrings.append("%s%s" % (f.name, loc_indices_str)) functions = ",".join(fstrings) - - return f"<{self.__class__.__name__}({functions})>" + return "<%s(%s)>" % (self.__class__.__name__, functions) def __eq__(self, other): return (isinstance(other, HaloScheme) and @@ -676,8 +674,8 @@ def _uxreplace_dispatch_haloscheme(hs0, rule): # Nope, let's try with the next Indexed, if any continue - hse = hse0.rebuild(loc_indices=frozendict(loc_indices), - loc_dirs=frozendict(loc_dirs)) + hse = hse0._rebuild(loc_indices=frozendict(loc_indices), + loc_dirs=frozendict(loc_dirs)) else: continue diff --git a/devito/passes/iet/mpi.py b/devito/passes/iet/mpi.py index 72e351bc84..c352a86c3a 100644 --- a/devito/passes/iet/mpi.py +++ b/devito/passes/iet/mpi.py @@ -1,6 +1,7 @@ from collections import defaultdict from sympy import S +from itertools import combinations from devito.ir.iet import (Call, Expression, HaloSpot, Iteration, FindNodes, MapNodes, MapHaloSpots, Transformer, @@ -19,11 +20,11 @@ @iet_pass def optimize_halospots(iet, **kwargs): """ - Optimize the HaloSpots in ``iet``. HaloSpots may be dropped, merged and moved - around in order to improve the halo exchange performance. + Optimize the HaloSpots in ``iet``. HaloSpots may be dropped, hoisted, + merged and moved around in order to improve the halo exchange performance. """ - iet = _drop_halospots(iet) - iet = _hoist_halospots(iet) + iet = _drop_reduction_halospots(iet) + iet = _hoist_invariant(iet) iet = _merge_halospots(iet) iet = _drop_if_unwritten(iet, **kwargs) iet = _mark_overlappable(iet) @@ -31,7 +32,7 @@ def optimize_halospots(iet, **kwargs): return iet, {} -def _drop_halospots(iet): +def _drop_reduction_halospots(iet): """ Remove HaloSpots that: @@ -48,17 +49,18 @@ def _drop_halospots(iet): mapper[hs].add(f) # Transform the IET introducing the "reduced" HaloSpots - subs = {hs: hs._rebuild(halo_scheme=hs.halo_scheme.drop(mapper[hs])) - for hs in FindNodes(HaloSpot).visit(iet)} - iet = Transformer(subs, nested=True).visit(iet) + mapper = {hs: hs._rebuild(halo_scheme=hs.halo_scheme.drop(mapper[hs])) + for hs in FindNodes(HaloSpot).visit(iet)} + iet = Transformer(mapper, nested=True).visit(iet) return iet -def _hoist_halospots(iet): +def _hoist_invariant(iet): """ Hoist HaloSpots from inner to outer Iterations where all data dependencies - would be honored. + would be honored. This pass avoids redundant halo exchanges when the same + data is redundantly exchanged within the same Iteration tree level. Example: haloupd v[t0] @@ -80,108 +82,113 @@ def _hoist_halospots(iet): hsmapper = {} imapper = defaultdict(list) - # Look for parent Iterations of children HaloSpots - for iters, halo_spots in MapNodes(Iteration, HaloSpot, 'groupby').visit(iet).items(): - for i, hs0 in enumerate(halo_spots): + iter_mapper = MapNodes(Iteration, HaloSpot, 'immediate').visit(iet) + + # Drop void `halo_scheme`s from the analysis + iter_mapper = {k: [hs for hs in v if not hs.halo_scheme.is_void] + for k, v in iter_mapper.items()} + + iter_mapper = {k: v for k, v in iter_mapper.items() if k is not None} + + iter_mapper = {k: v for k, v in iter_mapper.items() if len(v) > 1} - # Nothing to do if the HaloSpot is void - if hs0.halo_scheme.is_void: + for it, halo_spots in iter_mapper.items(): + for hs0, hs1 in combinations(halo_spots, r=2): + + if _ensure_control_flow(hs0, hs1, cond_mapper): continue - for hs1 in halo_spots[i+1:]: - # If there are Conditionals involved, both `hs0` and `hs1` must be - # within the same Conditional, otherwise we would break the control - if cond_mapper.get(hs0) != cond_mapper.get(hs1): - continue + # If there are overlapping loc_indices, skip + hs0_mdims = hs0.halo_scheme.loc_values + hs1_mdims = hs1.halo_scheme.loc_values + if hs0_mdims.intersection(hs1_mdims): + continue - # If there are overlapping time accesses, skip - if hs0.halo_scheme.loc_values.intersection(hs1.halo_scheme.loc_values): + for f, v in hs1.fmapper.items(): + + if f not in hs0.functions: continue - # Loop over the functions in the HaloSpots - for f, v in hs1.fmapper.items(): - # If no time accesses, skip - if not hs1.halo_scheme.fmapper[f].loc_indices: - continue + for dep in scopes[it].d_flow.project(f): + if not any(r(dep, hs1, v.loc_indices) for r in rules): + break + else: + # hs1 can be hoisted out of `it`, but we need to infer valid + # loc_indices + hse = hs1.halo_scheme.fmapper[f] + raw_loc_indices = {} + + for d in hse.loc_indices: + md = hse.loc_indices[d] + if md in it.uindices: + new_min = md.symbolic_min.subs(it.dim, + it.dim.symbolic_min) + raw_loc_indices[d] = new_min + else: + raw_loc_indices[d] = md - # If the function is not in both HaloSpots, skip - if f not in hs0.functions: - continue + hse = hse._rebuild(loc_indices=frozendict(raw_loc_indices)) + hs1.halo_scheme.fmapper[f] = hse - for it in iters: - for dep in scopes[it].d_flow.project(f): - if not any(r(dep, hs1, v.loc_indices) for r in merge_rules()): - break - else: - hse = hs1.halo_scheme.fmapper[f] - raw_loc_indices = {} - # Entering here means we can lift, and we need to update - # the loc_indices with known values - # TODO: Can I get this in a more elegant way? - for d in hse.loc_indices: - md = hse.loc_indices[d] - if md.is_Symbol: - root = md.root - hse_min = md.symbolic_min - new_min = hse_min.subs(root, root.symbolic_min) - raw_loc_indices[d] = new_min - else: - # md is in form of an expression - assert d.symbolic_min in md.free_symbols - raw_loc_indices[d] = md - - hse = hse._rebuild(loc_indices=frozendict(raw_loc_indices)) - hs1.halo_scheme.fmapper[f] = hse - - hsmapper[hs1] = hsmapper.get(hs1, hs1.halo_scheme).drop(f) - imapper[it].append(hs1.halo_scheme.project(f)) + hsmapper[hs1] = hsmapper.get(hs1, hs1.halo_scheme).drop(f) + imapper[it].append(hs1.halo_scheme.project(f)) mapper = {i: HaloSpot(i._rebuild(), HaloScheme.union(hss)) for i, hss in imapper.items()} mapper.update({i: i.body if hs.is_void else i._rebuild(halo_scheme=hs) for i, hs in hsmapper.items()}) - + # Transform the IET hoisting/dropping HaloSpots as according to the analysis iet = Transformer(mapper, nested=True).visit(iet) - return iet def _merge_halospots(iet): """ Merge HaloSpots on the same Iteration tree level where all data dependencies - would be honored. + would be honored. Avoids redundant halo exchanges when the same data is + redundantly exchanged within the same Iteration tree level as well as to initiate + multiple halo exchanges at once. + + Example: + + for time for time + haloupd v[t0] haloupd v[t0], h + W v[t1]- R v[t0] W v[t1]- R v[t0] + haloupd v[t0], h + W g[t1]- R v[t0], h W g[t1]- R v[t0], h + """ # Analysis cond_mapper = _make_cond_mapper(iet) mapper = {} - for iter, halo_spots in MapNodes(Iteration, HaloSpot, 'immediate').visit(iet).items(): - if iter is None or len(halo_spots) <= 1: - continue - scope = Scope([e.expr for e in FindNodes(Expression).visit(iter)]) + iter_mapper = MapNodes(Iteration, HaloSpot, 'immediate').visit(iet) + + iter_mapper = {k: v for k, v in iter_mapper.items() if k is not None} + + iter_mapper = {k: v for k, v in iter_mapper.items() if len(v) > 1} + + for it, halo_spots in iter_mapper.items(): + scope = Scope([e.expr for e in FindNodes(Expression).visit(it)]) hs0 = halo_spots[0] - mapper[hs0] = hs0.halo_scheme for hs1 in halo_spots[1:]: - mapper[hs1] = hs1.halo_scheme - # If there are Conditionals involved, both `hs0` and `hs1` must be - # within the same Conditional, otherwise we would break the control - # flow semantics - if cond_mapper.get(hs0) != cond_mapper.get(hs1): + if _ensure_control_flow(hs0, hs1, cond_mapper): continue for f, v in hs1.fmapper.items(): for dep in scope.d_flow.project(f): - if not any(r(dep, hs1, v.loc_indices) for r in merge_rules()): + if not any(r(dep, hs1, v.loc_indices) for r in rules): break else: + # hs1 is merged with hs0 hs = hs1.halo_scheme.project(f) - mapper[hs0] = HaloScheme.union([mapper[hs0], hs]) - mapper[hs1] = mapper[hs1].drop(f) + mapper[hs0] = HaloScheme.union([mapper.get(hs0, hs0.halo_scheme), hs]) + mapper[hs1] = mapper.get(hs1, hs1.halo_scheme).drop(f) # Post-process analysis mapper = {i: i.body if hs.is_void else i._rebuild(halo_scheme=hs) @@ -353,7 +360,7 @@ def mpiize(graph, **kwargs): make_reductions(graph, mpimode=mpimode, **kwargs) -# Utility functions to avoid code duplication +# *** Utilities def _make_cond_mapper(iet): cond_mapper = MapHaloSpots().visit(iet) @@ -362,26 +369,30 @@ def _make_cond_mapper(iet): for hs, v in cond_mapper.items()} -def merge_rules(): - # Merge rules -- if the retval is True, then it means the input `dep` is not - # a stopper to halo merging +def _ensure_control_flow(hs0, hs1, cond_mapper): + """ + If there are Conditionals involved, both `hs0` and `hs1` must be + within the same Conditional, otherwise we would break control flow + """ + cond0 = cond_mapper.get(hs0) + cond1 = cond_mapper.get(hs1) + + return cond0 != cond1 + + +# Code motion rules -- if the retval is True, then it means the input `dep` is not +# a stopper to moving the HaloSpot `hs` around - def rule0(dep, hs, loc_indices): - # E.g., `dep=W -> R` => True - return not any(d in hs.dimensions or dep.distance_mapper[d] is S.Infinity - for d in dep.cause) +def _rule0(dep, hs, loc_indices): + # E.g., `dep=W -> R` => True + return not any(d in hs.dimensions or dep.distance_mapper[d] is S.Infinity + for d in dep.cause) - def rule1(dep, hs, loc_indices): - # TODO This is apparently never hit, but feeling uncomfortable to remove it - return (dep.is_regular and - dep.read is not None and - all(not any(dep.read.touched_halo(d.root)) for d in dep.cause)) - def rule2(dep, hs, loc_indices): - # E.g., `dep=W -> R` and `loc_indices={t: t0}` => True - return any(dep.distance_mapper[d] == 0 and dep.source[d] is not v - for d, v in loc_indices.items()) +def _rule1(dep, hs, loc_indices): + # E.g., `dep=W -> R` and `loc_indices={t: t0}` => True + return any(dep.distance_mapper[d] == 0 and dep.source[d] is not v + for d, v in loc_indices.items()) - rules = [rule0, rule1, rule2] - return rules +rules = [_rule0, _rule1] diff --git a/examples/seismic/viscoacoustic/operators.py b/examples/seismic/viscoacoustic/operators.py index 1253b61686..a1119236e6 100755 --- a/examples/seismic/viscoacoustic/operators.py +++ b/examples/seismic/viscoacoustic/operators.py @@ -524,7 +524,7 @@ def ForwardOperator(model, geometry, space_order=4, kernel='sls', time_order=2, # Substitute spacing terms to reduce flops return Operator(eqn + src_term + rec_term, subs=model.spacing_map, - name='Forward', **kwargs) + name='ViscoAcForward', **kwargs) def AdjointOperator(model, geometry, space_order=4, kernel='SLS', time_order=2, **kwargs): diff --git a/examples/seismic/viscoelastic/operators.py b/examples/seismic/viscoelastic/operators.py index bc2f5642a1..fdf1110004 100644 --- a/examples/seismic/viscoelastic/operators.py +++ b/examples/seismic/viscoelastic/operators.py @@ -64,4 +64,4 @@ def ForwardOperator(model, geometry, space_order=4, save=False, **kwargs): # Substitute spacing terms to reduce flops return Operator([u_v, u_r, u_t] + src_rec_expr, subs=model.spacing_map, - name='Forward', **kwargs) + name='ViscoElForward', **kwargs) diff --git a/tests/test_mpi.py b/tests/test_mpi.py index c02d122731..105c2b4b08 100644 --- a/tests/test_mpi.py +++ b/tests/test_mpi.py @@ -5,11 +5,10 @@ from conftest import _R, assert_blocking, assert_structure from devito import (Grid, Constant, Function, TimeFunction, SparseFunction, SparseTimeFunction, VectorTimeFunction, TensorTimeFunction, - Dimension, ConditionalDimension, div, + Dimension, ConditionalDimension, div, solve, diag, grad, SubDimension, SubDomain, Eq, Ne, Inc, NODE, Operator, norm, inner, configuration, switchconfig, generic_derivative, - PrecomputedSparseFunction, DefaultDimension, Buffer, - solve, diag, grad) + PrecomputedSparseFunction, DefaultDimension, Buffer) from devito.arch.compiler import OneapiCompiler from devito.data import LEFT, RIGHT from devito.ir.iet import (Call, Conditional, Iteration, FindNodes, FindSymbols, @@ -19,7 +18,6 @@ ComputeCall) from devito.mpi.distributed import CustomTopology from devito.tools import Bunch -from devito.types.dimension import SpaceDimension from examples.seismic.acoustic import acoustic_setup from examples.seismic import demo_model @@ -1013,13 +1011,10 @@ def test_avoid_haloupdate_if_distr_but_sequential(self, mode): @pytest.mark.parallel(mode=1) def test_issue_2448(self, mode): - extent = (10.,) shape = (2,) so = 2 - x = SpaceDimension(name='x', spacing=Constant(name='h_x', - value=extent[0]/(shape[0]-1))) - grid = Grid(extent=extent, shape=shape, dimensions=(x,)) + grid = Grid(shape=shape) # Time related tn = 30 @@ -1030,7 +1025,7 @@ def test_issue_2448(self, mode): # First order elastic-like dependencies equations pde_v = v.dt - (tau.dx) - pde_tau = (tau.dt - ((v.forward).dx)) + pde_tau = tau.dt - ((v.forward).dx) u_v = Eq(v.forward, solve(pde_v, v.forward)) u_tau = Eq(tau.forward, solve(pde_tau, tau.forward)) @@ -1038,7 +1033,7 @@ def test_issue_2448(self, mode): # Test two variants of receiver interpolation nrec = 1 rec = SparseTimeFunction(name="rec", grid=grid, npoint=nrec, nt=tn) - rec.coordinates.data[:, 0] = np.linspace(0., extent[0], num=nrec) + rec.coordinates.data[:, 0] = np.linspace(0., shape[0], num=nrec) # The receiver 0 rec_term0 = rec.interpolate(expr=v) @@ -1077,6 +1072,69 @@ def test_issue_2448(self, mode): assert calls[1].arguments[0] is v assert calls[2].arguments[0] is v + # Further complicate/stree-test adding an artifical example + # with two hoisting opportunities + + # Velocity and pressure fields + v2 = TimeFunction(name='v2', grid=grid, space_order=so) + tau2 = TimeFunction(name='tau2', grid=grid, space_order=so) + + # First order elastic-like dependencies equations + pde_v2 = v2.dt - (tau2.dx) + pde_tau2 = tau2.dt - ((v2.forward).dx) + u_v2 = Eq(v2.forward, solve(pde_v2, v2.forward)) + + u_tau2 = Eq(tau2.forward, solve(pde_tau2, tau2.forward)) + + # Test two variants of receiver interpolation + nrec = 1 + rec2 = SparseTimeFunction(name="rec2", grid=grid, npoint=nrec, nt=tn) + rec2.coordinates.data[:, 0] = np.linspace(0., shape[0], num=nrec) + + # The receiver 2 + rec_term2 = rec2.interpolate(expr=v2) + + # The receiver 3 + rec_term3 = rec2.interpolate(expr=v2.forward) + + # Test receiver interpolation 0, here we have a halo exchange hoisted + op2 = Operator([u_v] + [u_v2] + [u_tau] + [u_tau2] + rec_term0 + rec_term2) + + calls = [i for i in FindNodes(Call).visit(op2) + if isinstance(i, HaloUpdateCall)] + + # The correct we want + assert len(calls) == 5 + + assert len(FindNodes(HaloUpdateCall).visit(op2.body.body[1].body[1].body[0])) == 2 + assert len(FindNodes(HaloUpdateCall).visit(op2.body.body[1].body[1].body[1])) == 3 + + assert calls[0].arguments[0] is v + assert calls[1].arguments[0] is v2 + assert calls[2].arguments[0] is tau + assert calls[2].arguments[1] is tau2 + assert calls[3].arguments[0] is v + assert calls[4].arguments[0] is v2 + + # Test receiver interpolation 0, here we have a halo exchange hoisted + op3 = Operator([u_v] + [u_v2] + [u_tau] + [u_tau2] + rec_term0 + rec_term3) + + calls = [i for i in FindNodes(Call).visit(op3) + if isinstance(i, HaloUpdateCall)] + + # The correct we want + assert len(calls) == 5 + + assert len(FindNodes(HaloUpdateCall).visit(op3.body.body[1].body[1].body[0])) == 1 + assert len(FindNodes(HaloUpdateCall).visit(op3.body.body[1].body[1].body[1])) == 4 + + assert calls[0].arguments[0] is v + assert calls[1].arguments[0] is tau + assert calls[1].arguments[1] is tau2 + assert calls[2].arguments[0] is v + assert calls[3].arguments[0] is v2 + assert calls[4].arguments[0] is v2 + @pytest.mark.parallel(mode=1) def test_avoid_haloupdate_with_subdims(self, mode): grid = Grid(shape=(4,)) @@ -1113,8 +1171,8 @@ def test_avoid_haloupdate_with_constant_index(self, mode): eq = Eq(u.forward, u[t, 1] + u[t, 1 + x.symbolic_min] + u[t, x]) op = Operator(eq) - calls = FindNodes(Call).visit(op) + calls = FindNodes(Call).visit(op) assert len(calls) == 0 @pytest.mark.parallel(mode=1) @@ -2860,8 +2918,7 @@ class TestTTIwMPI: @pytest.mark.parallel(mode=1) def test_halo_structure(self, mode): - mytest = TestTTI() - solver = mytest.tti_operator(opt='advanced', space_order=8) + solver = TestTTI().tti_operator(opt='advanced', space_order=8) op = solver.op_fwd(save=False) calls = [i for i in FindNodes(Call).visit(op) if isinstance(i, HaloUpdateCall)] From c098235c8f1178908d63406702c636110cfdd77f Mon Sep 17 00:00:00 2001 From: George Bisbas Date: Thu, 14 Nov 2024 11:03:03 +0000 Subject: [PATCH 10/16] compiler: add frozendict to __init__ --- devito/mpi/halo_scheme.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/devito/mpi/halo_scheme.py b/devito/mpi/halo_scheme.py index 12a102b30e..cf8edadf11 100644 --- a/devito/mpi/halo_scheme.py +++ b/devito/mpi/halo_scheme.py @@ -33,8 +33,8 @@ class HaloSchemeEntry(Reconstructable): __rargs__ = ('loc_indices', 'loc_dirs', 'halos', 'dims') def __init__(self, loc_indices, loc_dirs, halos, dims): - self.loc_indices = loc_indices - self.loc_dirs = loc_dirs + self.loc_indices = frozendict(loc_indices) + self.loc_dirs = frozendict(loc_dirs) self.halos = halos self.dims = dims @@ -596,7 +596,7 @@ def process_loc_indices(raw_loc_indices, directions): known = set().union(*[i._defines for i in loc_indices]) loc_dirs = {d: v for d, v in directions.items() if d in known} - return frozendict(loc_indices), frozendict(loc_dirs) + return loc_indices, loc_dirs class HaloTouch(sympy.Function, Reconstructable): @@ -674,8 +674,7 @@ def _uxreplace_dispatch_haloscheme(hs0, rule): # Nope, let's try with the next Indexed, if any continue - hse = hse0._rebuild(loc_indices=frozendict(loc_indices), - loc_dirs=frozendict(loc_dirs)) + hse = hse0._rebuild(loc_indices=loc_indices, loc_dirs=loc_dirs) else: continue From 25e8ad53579016a4c9b1839c4a2fe3b24a594c67 Mon Sep 17 00:00:00 2001 From: George Bisbas Date: Mon, 18 Nov 2024 14:49:15 +0200 Subject: [PATCH 11/16] tests: Add backward test and drop redundancies --- devito/ir/iet/nodes.py | 8 ++++ devito/ir/iet/visitors.py | 7 +-- devito/passes/iet/mpi.py | 37 ++++++++------- tests/test_dse.py | 4 +- tests/test_mpi.py | 98 +++++++++++++++++++++++++++------------ 5 files changed, 100 insertions(+), 54 deletions(-) diff --git a/devito/ir/iet/nodes.py b/devito/ir/iet/nodes.py index f03112464e..76876d687a 100644 --- a/devito/ir/iet/nodes.py +++ b/devito/ir/iet/nodes.py @@ -617,6 +617,14 @@ def bounds(self, _min=None, _max=None): return (_min, _max) + @property + def start(self): + """The start value.""" + if self.direction is Forward: + return self.dim.symbolic_min + else: + return self.dim.symbolic_max + @property def step(self): """The step value.""" diff --git a/devito/ir/iet/visitors.py b/devito/ir/iet/visitors.py index 505fe2e001..9b068a7d25 100644 --- a/devito/ir/iet/visitors.py +++ b/devito/ir/iet/visitors.py @@ -873,7 +873,6 @@ def default_retval(cls): the nodes of type ``child_types`` retrieved by the search. This behaviour can be changed through this parameter. Accepted values are: - 'immediate': only the closest matching ancestor is mapped. - - 'groupby': the matching ancestors are grouped together as a single key. """ def __init__(self, parent_type=None, child_types=None, mode=None): @@ -886,7 +885,7 @@ def __init__(self, parent_type=None, child_types=None, mode=None): assert issubclass(parent_type, Node) self.parent_type = parent_type self.child_types = as_tuple(child_types) or (Call, Expression) - assert mode in (None, 'immediate', 'groupby') + assert mode in (None, 'immediate') self.mode = mode def visit_object(self, o, ret=None, **kwargs): @@ -903,9 +902,7 @@ def visit_Node(self, o, ret=None, parents=None, in_parent=False): if parents is None: parents = [] if isinstance(o, self.child_types): - if self.mode == 'groupby': - ret.setdefault(as_tuple(parents), []).append(o) - elif self.mode == 'immediate': + if self.mode == 'immediate': if in_parent: ret.setdefault(parents[-1], []).append(o) else: diff --git a/devito/passes/iet/mpi.py b/devito/passes/iet/mpi.py index c352a86c3a..a3cef2ff8b 100644 --- a/devito/passes/iet/mpi.py +++ b/devito/passes/iet/mpi.py @@ -82,15 +82,7 @@ def _hoist_invariant(iet): hsmapper = {} imapper = defaultdict(list) - iter_mapper = MapNodes(Iteration, HaloSpot, 'immediate').visit(iet) - - # Drop void `halo_scheme`s from the analysis - iter_mapper = {k: [hs for hs in v if not hs.halo_scheme.is_void] - for k, v in iter_mapper.items()} - - iter_mapper = {k: v for k, v in iter_mapper.items() if k is not None} - - iter_mapper = {k: v for k, v in iter_mapper.items() if len(v) > 1} + iter_mapper = _filter_iter_mapper(iet) for it, halo_spots in iter_mapper.items(): for hs0, hs1 in combinations(halo_spots, r=2): @@ -121,9 +113,8 @@ def _hoist_invariant(iet): for d in hse.loc_indices: md = hse.loc_indices[d] if md in it.uindices: - new_min = md.symbolic_min.subs(it.dim, - it.dim.symbolic_min) - raw_loc_indices[d] = new_min + md_sub = it.start + raw_loc_indices[d] = md.symbolic_min.subs(it.dim, md_sub) else: raw_loc_indices[d] = md @@ -164,11 +155,7 @@ def _merge_halospots(iet): mapper = {} - iter_mapper = MapNodes(Iteration, HaloSpot, 'immediate').visit(iet) - - iter_mapper = {k: v for k, v in iter_mapper.items() if k is not None} - - iter_mapper = {k: v for k, v in iter_mapper.items() if len(v) > 1} + iter_mapper = _filter_iter_mapper(iet) for it, halo_spots in iter_mapper.items(): scope = Scope([e.expr for e in FindNodes(Expression).visit(it)]) @@ -362,6 +349,20 @@ def mpiize(graph, **kwargs): # *** Utilities +def _filter_iter_mapper(iet): + """ + Given an IET, return a mapper from Iterations to the HaloSpots. + Additionally, filter out Iterations that are not of interest. + """ + iter_mapper = MapNodes(Iteration, HaloSpot, 'immediate').visit(iet) + iter_mapper = {k: [hs for hs in v if not hs.halo_scheme.is_void] + for k, v in iter_mapper.items()} + iter_mapper = {k: v for k, v in iter_mapper.items() if k is not None} + iter_mapper = {k: v for k, v in iter_mapper.items() if len(v) > 1} + + return iter_mapper + + def _make_cond_mapper(iet): cond_mapper = MapHaloSpots().visit(iet) return {hs: {i for i in v if i.is_Conditional and @@ -395,4 +396,4 @@ def _rule1(dep, hs, loc_indices): for d, v in loc_indices.items()) -rules = [_rule0, _rule1] +rules = (_rule0, _rule1) diff --git a/tests/test_dse.py b/tests/test_dse.py index c82848a28a..989e22929a 100644 --- a/tests/test_dse.py +++ b/tests/test_dse.py @@ -11,8 +11,8 @@ SparseTimeFunction, Dimension, SubDimension, ConditionalDimension, DefaultDimension, Grid, Operator, norm, grad, div, dimensions, switchconfig, configuration, - centered, first_derivative, solve, transpose, Abs, cos, - sin, sqrt, floor, Ge, Lt, Derivative, solve) + first_derivative, solve, transpose, Abs, cos, + sin, sqrt, floor, Ge, Lt, Derivative) from devito.exceptions import InvalidArgument, InvalidOperator from devito.ir import (Conditional, DummyEq, Expression, Iteration, FindNodes, FindSymbols, ParallelIteration, retrieve_iteration_tree) diff --git a/tests/test_mpi.py b/tests/test_mpi.py index 105c2b4b08..53b34b883f 100644 --- a/tests/test_mpi.py +++ b/tests/test_mpi.py @@ -20,7 +20,6 @@ from devito.tools import Bunch from examples.seismic.acoustic import acoustic_setup -from examples.seismic import demo_model from tests.test_dse import TestTTI @@ -1031,9 +1030,8 @@ def test_issue_2448(self, mode): u_tau = Eq(tau.forward, solve(pde_tau, tau.forward)) # Test two variants of receiver interpolation - nrec = 1 - rec = SparseTimeFunction(name="rec", grid=grid, npoint=nrec, nt=tn) - rec.coordinates.data[:, 0] = np.linspace(0., shape[0], num=nrec) + rec = SparseTimeFunction(name="rec", grid=grid, npoint=1, nt=tn) + rec.coordinates.data[:, 0] = np.linspace(0., shape[0], num=1) # The receiver 0 rec_term0 = rec.interpolate(expr=v) @@ -1087,9 +1085,8 @@ def test_issue_2448(self, mode): u_tau2 = Eq(tau2.forward, solve(pde_tau2, tau2.forward)) # Test two variants of receiver interpolation - nrec = 1 - rec2 = SparseTimeFunction(name="rec2", grid=grid, npoint=nrec, nt=tn) - rec2.coordinates.data[:, 0] = np.linspace(0., shape[0], num=nrec) + rec2 = SparseTimeFunction(name="rec2", grid=grid, npoint=1, nt=tn) + rec2.coordinates.data[:, 0] = np.linspace(0., shape[0], num=1) # The receiver 2 rec_term2 = rec2.interpolate(expr=v2) @@ -1135,6 +1132,56 @@ def test_issue_2448(self, mode): assert calls[3].arguments[0] is v2 assert calls[4].arguments[0] is v2 + @pytest.mark.parallel(mode=1) + def test_issue_2448_backward(self, mode): + ''' + Similar to test_issue_2448, but with backward instead of forward + so that the hoisted halo + + ''' + shape = (2,) + so = 2 + + grid = Grid(shape=shape) + t = grid.stepping_dim + + tn = 7 + + # Velocity and pressure fields + v = TimeFunction(name='v', grid=grid, space_order=so) + v.data_with_halo[0, :] = 1. + v.data_with_halo[1, :] = 3. + + tau = TimeFunction(name='tau', grid=grid, space_order=so) + tau.data_with_halo[:] = 1. + + # First order elastic-like dependencies equations + pde_v = v.dt - (tau.dx) + pde_tau = tau.dt - ((v.backward).dx) + + u_v = Eq(v.backward, solve(pde_v, v)) + u_tau = Eq(tau.backward, solve(pde_tau, tau)) + + # Test two variants of receiver interpolation + nrec = 1 + rec = SparseTimeFunction(name="rec", grid=grid, npoint=nrec, nt=tn) + rec.coordinates.data[:, 0] = np.linspace(0., shape[0], num=nrec) + + # Test receiver interpolation 0, here we have a halo exchange hoisted + op0 = Operator([u_v] + [u_tau] + rec.interpolate(expr=v)) + + calls = [i for i in FindNodes(Call).visit(op0) + if isinstance(i, HaloUpdateCall)] + + # The correct we want + assert len(calls) == 3 + assert len(FindNodes(HaloUpdateCall).visit(op0.body.body[1].body[1].body[0])) == 1 + assert len(FindNodes(HaloUpdateCall).visit(op0.body.body[1].body[1].body[1])) == 2 + assert calls[0].arguments[0] is v + assert calls[0].arguments[3].args[0] is t.symbolic_max + assert calls[1].arguments[0] is tau + assert calls[2].arguments[0] is v + @pytest.mark.parallel(mode=1) def test_avoid_haloupdate_with_subdims(self, mode): grid = Grid(shape=(4,)) @@ -2862,35 +2909,30 @@ class TestElastic: def test_elastic_structure(self, mode): so = 4 - model = demo_model(preset='layers-elastic', nlayers=1, - shape=(301, 301), spacing=(10., 10.), - space_order=so) + grid = Grid(shape=(3, 3)) - v = VectorTimeFunction(name='v', grid=model.grid, space_order=so) - tau = TensorTimeFunction(name='t', grid=model.grid, space_order=so) + v = VectorTimeFunction(name='v', grid=grid, space_order=so) + tau = TensorTimeFunction(name='t', grid=grid, space_order=so) - # The receiver - nrec = 301 - rec = SparseTimeFunction(name="rec", grid=model.grid, npoint=nrec, nt=10) - rec.coordinates.data[:, 0] = np.linspace(0., model.domain_size[0], num=nrec) - rec.coordinates.data[:, -1] = 5. + damp = Function(name='damp', grid=grid) + l = Function(name='lam', grid=grid) + mu = Function(name='mu', grid=grid) + ro = Function(name='b', grid=grid) + # The receiver + rec = SparseTimeFunction(name="rec", grid=grid, npoint=1, nt=10) rec_term = rec.interpolate(expr=v[0] + v[1]) - # Now let's try and create the staggered updates - # Lame parameters - l, mu, ro = model.lam, model.mu, model.b - # First order elastic wave equation pde_v = v.dt - ro * div(tau) pde_tau = (tau.dt - l * diag(div(v.forward)) - mu * (grad(v.forward) + grad(v.forward).transpose(inner=False))) + # Time update - u_v = Eq(v.forward, model.damp * solve(pde_v, v.forward)) - u_t = Eq(tau.forward, model.damp * solve(pde_tau, tau.forward)) + u_v = Eq(v.forward, damp * solve(pde_v, v.forward)) + u_t = Eq(tau.forward, damp * solve(pde_tau, tau.forward)) op = Operator([u_v] + [u_t] + rec_term) - assert len(op._func_table) == 11 calls = [i for i in FindNodes(Call).visit(op) if isinstance(i, HaloUpdateCall)] @@ -2898,17 +2940,15 @@ def test_elastic_structure(self, mode): # The correct we want assert len(calls) == 5 - assert len(FindNodes(HaloUpdateCall).visit(op.body.body[1].body[3].body[0])) == 1 - assert len(FindNodes(HaloUpdateCall).visit(op.body.body[1].body[3].body[1])) == 4 - assert len(FindNodes(HaloUpdateCall).visit(op.body.body[1].body[3].body[2])) == 0 + assert len(FindNodes(HaloUpdateCall).visit(op.body.body[1].body[1].body[0])) == 1 + assert len(FindNodes(HaloUpdateCall).visit(op.body.body[1].body[1].body[1])) == 4 + assert len(FindNodes(HaloUpdateCall).visit(op.body.body[1].body[1].body[2])) == 0 assert calls[0].arguments[0] is v[0] assert calls[0].arguments[1] is v[1] - assert calls[1].arguments[0] is tau[0, 0] assert calls[2].arguments[0] is tau[0, 1] assert calls[3].arguments[0] is tau[1, 1] - assert calls[4].arguments[0] is v[0] assert calls[4].arguments[1] is v[1] From f8182b7577b2cc79316835c209b640dfff92bc96 Mon Sep 17 00:00:00 2001 From: George Bisbas Date: Tue, 26 Nov 2024 18:25:33 +0200 Subject: [PATCH 12/16] compiler: Improve HaloSpot with reviews --- devito/ir/iet/nodes.py | 24 +++++++++++---------- devito/mpi/halo_scheme.py | 13 ++--------- devito/passes/iet/mpi.py | 24 +++++++++------------ examples/seismic/viscoacoustic/operators.py | 2 +- 4 files changed, 26 insertions(+), 37 deletions(-) diff --git a/devito/ir/iet/nodes.py b/devito/ir/iet/nodes.py index 76876d687a..86e432ad71 100644 --- a/devito/ir/iet/nodes.py +++ b/devito/ir/iet/nodes.py @@ -1438,8 +1438,20 @@ def DummyExpr(*args, init=False): # Nodes required for distributed-memory halo exchange +class HaloMixin: -class HaloSpot(Node): + def __repr__(self): + fstrings = [] + for f in self.fmapper.keys(): + loc_indices = OrderedSet(*(self.fmapper[f].loc_indices.values())) + loc_indices_str = str(list(loc_indices)) if loc_indices else "" + fstrings.append("%s%s" % (f.name, loc_indices_str)) + + functions = ",".join(fstrings) + return "<%s(%s)>" % (self.__class__.__name__, functions) + + +class HaloSpot(HaloMixin, Node): """ A halo exchange operation (e.g., send, recv, wait, ...) required to @@ -1464,16 +1476,6 @@ def __init__(self, body, halo_scheme): self._halo_scheme = halo_scheme - def __repr__(self): - fstrings = [] - for f in self.functions: - loc_indices = OrderedSet(*(self.halo_scheme.fmapper[f].loc_indices.values())) - loc_indices_str = str(list(loc_indices)) if loc_indices else "" - fstrings.append("%s%s" % (f.name, loc_indices_str)) - - functions = ",".join(fstrings) - return "<%s(%s)>" % (self.__class__.__name__, functions) - @property def halo_scheme(self): return self._halo_scheme diff --git a/devito/mpi/halo_scheme.py b/devito/mpi/halo_scheme.py index cf8edadf11..92bfa92d03 100644 --- a/devito/mpi/halo_scheme.py +++ b/devito/mpi/halo_scheme.py @@ -9,6 +9,7 @@ from devito import configuration from devito.data import CORE, OWNED, LEFT, CENTER, RIGHT from devito.ir.support import Forward, Scope +from devito.ir.iet.nodes import HaloMixin from devito.symbolics.manipulation import _uxreplace_registry from devito.tools import (Reconstructable, Tag, as_tuple, filter_ordered, flatten, frozendict, is_integer, filter_sorted, OrderedSet) @@ -62,7 +63,7 @@ def __repr__(self): OMapper = namedtuple('OMapper', 'core owned') -class HaloScheme: +class HaloScheme(HaloMixin): """ A HaloScheme describes a set of halo exchanges through a mapper: @@ -120,16 +121,6 @@ def __init__(self, exprs, ispace): self._honored[i.root] = frozenset([(ltk, rtk)]) self._honored = frozendict(self._honored) - def __repr__(self): - fstrings = [] - for f in self.fmapper: - loc_indices = OrderedSet(*(self._mapper[f].loc_indices.values())) - loc_indices_str = str(list(loc_indices)) if loc_indices else "" - fstrings.append("%s%s" % (f.name, loc_indices_str)) - - functions = ",".join(fstrings) - return "<%s(%s)>" % (self.__class__.__name__, functions) - def __eq__(self, other): return (isinstance(other, HaloScheme) and self._mapper == other._mapper and diff --git a/devito/passes/iet/mpi.py b/devito/passes/iet/mpi.py index a3cef2ff8b..fb1684a21a 100644 --- a/devito/passes/iet/mpi.py +++ b/devito/passes/iet/mpi.py @@ -76,18 +76,17 @@ def _hoist_invariant(iet): # Precompute scopes to save time scopes = {i: Scope([e.expr for e in v]) for i, v in MapNodes().visit(iet).items()} - cond_mapper = _make_cond_mapper(iet) - # Analysis hsmapper = {} imapper = defaultdict(list) + cond_mapper = _make_cond_mapper(iet) iter_mapper = _filter_iter_mapper(iet) for it, halo_spots in iter_mapper.items(): for hs0, hs1 in combinations(halo_spots, r=2): - if _ensure_control_flow(hs0, hs1, cond_mapper): + if _check_control_flow(hs0, hs1, cond_mapper): continue # If there are overlapping loc_indices, skip @@ -110,13 +109,12 @@ def _hoist_invariant(iet): hse = hs1.halo_scheme.fmapper[f] raw_loc_indices = {} - for d in hse.loc_indices: - md = hse.loc_indices[d] - if md in it.uindices: - md_sub = it.start - raw_loc_indices[d] = md.symbolic_min.subs(it.dim, md_sub) + for d, v in hse.loc_indices.items(): + if v in it.uindices: + v_sub = it.start + raw_loc_indices[d] = v.symbolic_min.subs(it.dim, v_sub) else: - raw_loc_indices[d] = md + raw_loc_indices[d] = v hse = hse._rebuild(loc_indices=frozendict(raw_loc_indices)) hs1.halo_scheme.fmapper[f] = hse @@ -151,10 +149,8 @@ def _merge_halospots(iet): """ # Analysis - cond_mapper = _make_cond_mapper(iet) - mapper = {} - + cond_mapper = _make_cond_mapper(iet) iter_mapper = _filter_iter_mapper(iet) for it, halo_spots in iter_mapper.items(): @@ -164,7 +160,7 @@ def _merge_halospots(iet): for hs1 in halo_spots[1:]: - if _ensure_control_flow(hs0, hs1, cond_mapper): + if _check_control_flow(hs0, hs1, cond_mapper): continue for f, v in hs1.fmapper.items(): @@ -370,7 +366,7 @@ def _make_cond_mapper(iet): for hs, v in cond_mapper.items()} -def _ensure_control_flow(hs0, hs1, cond_mapper): +def _check_control_flow(hs0, hs1, cond_mapper): """ If there are Conditionals involved, both `hs0` and `hs1` must be within the same Conditional, otherwise we would break control flow diff --git a/examples/seismic/viscoacoustic/operators.py b/examples/seismic/viscoacoustic/operators.py index a1119236e6..d237d43ea6 100755 --- a/examples/seismic/viscoacoustic/operators.py +++ b/examples/seismic/viscoacoustic/operators.py @@ -524,7 +524,7 @@ def ForwardOperator(model, geometry, space_order=4, kernel='sls', time_order=2, # Substitute spacing terms to reduce flops return Operator(eqn + src_term + rec_term, subs=model.spacing_map, - name='ViscoAcForward', **kwargs) + name='ViscoIsoAcousticForward', **kwargs) def AdjointOperator(model, geometry, space_order=4, kernel='SLS', time_order=2, **kwargs): From 5a014ed56ec349347268ecdfdfcfc2c0edede656 Mon Sep 17 00:00:00 2001 From: George Bisbas Date: Wed, 27 Nov 2024 14:17:47 +0200 Subject: [PATCH 13/16] compiler: Further split tests --- devito/mpi/halo_scheme.py | 2 +- devito/passes/iet/mpi.py | 6 +- tests/test_mpi.py | 118 ++++++++++++++++++++------------------ 3 files changed, 66 insertions(+), 60 deletions(-) diff --git a/devito/mpi/halo_scheme.py b/devito/mpi/halo_scheme.py index 92bfa92d03..f1bbb79003 100644 --- a/devito/mpi/halo_scheme.py +++ b/devito/mpi/halo_scheme.py @@ -12,7 +12,7 @@ from devito.ir.iet.nodes import HaloMixin from devito.symbolics.manipulation import _uxreplace_registry from devito.tools import (Reconstructable, Tag, as_tuple, filter_ordered, flatten, - frozendict, is_integer, filter_sorted, OrderedSet) + frozendict, is_integer, filter_sorted) from devito.types import Grid __all__ = ['HaloScheme', 'HaloSchemeEntry', 'HaloSchemeException', 'HaloTouch'] diff --git a/devito/passes/iet/mpi.py b/devito/passes/iet/mpi.py index fb1684a21a..204fcc2cae 100644 --- a/devito/passes/iet/mpi.py +++ b/devito/passes/iet/mpi.py @@ -34,10 +34,8 @@ def optimize_halospots(iet, **kwargs): def _drop_reduction_halospots(iet): """ - Remove HaloSpots that: - - * Would be used to compute Increments (in which case, a halo exchange - is actually unnecessary) + Remove HaloSpots that are used to compute Increments + (in which case, a halo exchange is actually unnecessary) """ mapper = defaultdict(set) diff --git a/tests/test_mpi.py b/tests/test_mpi.py index 53b34b883f..7d28dfe7bc 100644 --- a/tests/test_mpi.py +++ b/tests/test_mpi.py @@ -1008,16 +1008,14 @@ def test_avoid_haloupdate_if_distr_but_sequential(self, mode): calls = FindNodes(Call).visit(op) assert len(calls) == 0 - @pytest.mark.parallel(mode=1) - def test_issue_2448(self, mode): + @pytest.fixture + def setup(self): shape = (2,) so = 2 + tn = 30 grid = Grid(shape=shape) - # Time related - tn = 30 - # Velocity and pressure fields v = TimeFunction(name='v', grid=grid, space_order=so) tau = TimeFunction(name='tau', grid=grid, space_order=so) @@ -1026,86 +1024,76 @@ def test_issue_2448(self, mode): pde_v = v.dt - (tau.dx) pde_tau = tau.dt - ((v.forward).dx) u_v = Eq(v.forward, solve(pde_v, v.forward)) - u_tau = Eq(tau.forward, solve(pde_tau, tau.forward)) - # Test two variants of receiver interpolation + # Receiver rec = SparseTimeFunction(name="rec", grid=grid, npoint=1, nt=tn) rec.coordinates.data[:, 0] = np.linspace(0., shape[0], num=1) - # The receiver 0 - rec_term0 = rec.interpolate(expr=v) + return grid, v, tau, u_v, u_tau, rec - # The receiver 1 - rec_term1 = rec.interpolate(expr=v.forward) + @pytest.mark.parallel(mode=1) + def test_issue_2448_I(self, mode, setup): + _, v, tau, u_v, u_tau, rec = setup - # Test receiver interpolation 0, here we have a halo exchange hoisted - op0 = Operator([u_v] + [u_tau] + rec_term0) + rec_term0 = rec.interpolate(expr=v) - calls = [i for i in FindNodes(Call).visit(op0) - if isinstance(i, HaloUpdateCall)] + op0 = Operator([u_v, u_tau, rec_term0]) - # The correct we want - assert len(calls) == 3 + calls = [i for i in FindNodes(Call).visit(op0) if isinstance(i, HaloUpdateCall)] + assert len(calls) == 3 assert len(FindNodes(HaloUpdateCall).visit(op0.body.body[1].body[1].body[0])) == 1 assert len(FindNodes(HaloUpdateCall).visit(op0.body.body[1].body[1].body[1])) == 2 assert calls[0].arguments[0] is v assert calls[1].arguments[0] is tau assert calls[2].arguments[0] is v - # Test receiver interpolation 1, here we should not have any halo exchange - # hoisted - op1 = Operator([u_v] + [u_tau] + rec_term1) + @pytest.mark.parallel(mode=1) + def test_issue_2448_II(self, mode, setup): + _, v, tau, u_v, u_tau, rec = setup - calls = [i for i in FindNodes(Call).visit(op1) - if isinstance(i, HaloUpdateCall)] + rec_term1 = rec.interpolate(expr=v.forward) - # The correct we want - assert len(calls) == 3 + op1 = Operator([u_v, u_tau, rec_term1]) + calls = [i for i in FindNodes(Call).visit(op1) if isinstance(i, HaloUpdateCall)] + + assert len(calls) == 3 assert len(FindNodes(HaloUpdateCall).visit(op1.body.body[1].body[0])) == 0 assert len(FindNodes(HaloUpdateCall).visit(op1.body.body[1].body[1])) == 3 assert calls[0].arguments[0] is tau assert calls[1].arguments[0] is v assert calls[2].arguments[0] is v - # Further complicate/stree-test adding an artifical example - # with two hoisting opportunities + @pytest.mark.parallel(mode=1) + def test_issue_2448_III(self, mode, setup): + grid, v, tau, u_v, u_tau, rec = setup - # Velocity and pressure fields - v2 = TimeFunction(name='v2', grid=grid, space_order=so) - tau2 = TimeFunction(name='tau2', grid=grid, space_order=so) + # Additional velocity and pressure fields + v2 = TimeFunction(name='v2', grid=grid, space_order=2) + tau2 = TimeFunction(name='tau2', grid=grid, space_order=2) # First order elastic-like dependencies equations pde_v2 = v2.dt - (tau2.dx) pde_tau2 = tau2.dt - ((v2.forward).dx) u_v2 = Eq(v2.forward, solve(pde_v2, v2.forward)) - u_tau2 = Eq(tau2.forward, solve(pde_tau2, tau2.forward)) - # Test two variants of receiver interpolation - rec2 = SparseTimeFunction(name="rec2", grid=grid, npoint=1, nt=tn) - rec2.coordinates.data[:, 0] = np.linspace(0., shape[0], num=1) + # Receiver + rec2 = SparseTimeFunction(name="rec2", grid=grid, npoint=1, nt=30) + rec2.coordinates.data[:, 0] = np.linspace(0., grid.shape[0], num=1) - # The receiver 2 + rec_term0 = rec.interpolate(expr=v) rec_term2 = rec2.interpolate(expr=v2) - # The receiver 3 - rec_term3 = rec2.interpolate(expr=v2.forward) - - # Test receiver interpolation 0, here we have a halo exchange hoisted - op2 = Operator([u_v] + [u_v2] + [u_tau] + [u_tau2] + rec_term0 + rec_term2) + op2 = Operator([u_v, u_v2, u_tau, u_tau2, rec_term0, rec_term2]) - calls = [i for i in FindNodes(Call).visit(op2) - if isinstance(i, HaloUpdateCall)] + calls = [i for i in FindNodes(Call).visit(op2) if isinstance(i, HaloUpdateCall)] - # The correct we want assert len(calls) == 5 - assert len(FindNodes(HaloUpdateCall).visit(op2.body.body[1].body[1].body[0])) == 2 assert len(FindNodes(HaloUpdateCall).visit(op2.body.body[1].body[1].body[1])) == 3 - assert calls[0].arguments[0] is v assert calls[1].arguments[0] is v2 assert calls[2].arguments[0] is tau @@ -1113,18 +1101,34 @@ def test_issue_2448(self, mode): assert calls[3].arguments[0] is v assert calls[4].arguments[0] is v2 - # Test receiver interpolation 0, here we have a halo exchange hoisted - op3 = Operator([u_v] + [u_v2] + [u_tau] + [u_tau2] + rec_term0 + rec_term3) + @pytest.mark.parallel(mode=1) + def test_issue_2448_IV(self, mode, setup): + grid, v, tau, u_v, u_tau, rec = setup - calls = [i for i in FindNodes(Call).visit(op3) - if isinstance(i, HaloUpdateCall)] + # Additional velocity and pressure fields + v2 = TimeFunction(name='v2', grid=grid, space_order=2) + tau2 = TimeFunction(name='tau2', grid=grid, space_order=2) - # The correct we want - assert len(calls) == 5 + # First order elastic-like dependencies equations + pde_v2 = v2.dt - (tau2.dx) + pde_tau2 = tau2.dt - ((v2.forward).dx) + u_v2 = Eq(v2.forward, solve(pde_v2, v2.forward)) + u_tau2 = Eq(tau2.forward, solve(pde_tau2, tau2.forward)) + + # Receiver + rec2 = SparseTimeFunction(name="rec2", grid=grid, npoint=1, nt=30) + rec2.coordinates.data[:, 0] = np.linspace(0., grid.shape[0], num=1) + + rec_term0 = rec.interpolate(expr=v) + rec_term3 = rec2.interpolate(expr=v2.forward) + op3 = Operator([u_v, u_v2, u_tau, u_tau2, rec_term0, rec_term3]) + + calls = [i for i in FindNodes(Call).visit(op3) if isinstance(i, HaloUpdateCall)] + + assert len(calls) == 5 assert len(FindNodes(HaloUpdateCall).visit(op3.body.body[1].body[1].body[0])) == 1 assert len(FindNodes(HaloUpdateCall).visit(op3.body.body[1].body[1].body[1])) == 4 - assert calls[0].arguments[0] is v assert calls[1].arguments[0] is tau assert calls[1].arguments[1] is tau2 @@ -1136,8 +1140,7 @@ def test_issue_2448(self, mode): def test_issue_2448_backward(self, mode): ''' Similar to test_issue_2448, but with backward instead of forward - so that the hoisted halo - + so that the hoisted halo has different starting point ''' shape = (2,) so = 2 @@ -1397,7 +1400,7 @@ def test_avoid_fullmode_if_crossloop_dep(self, mode): assert np.all(f.data[:] == 2.) @pytest.mark.parallel(mode=2) - def test_avoid_haloudate_if_flowdep_along_other_dim(self, mode): + def test_avoid_halopudate_if_flowdep_along_other_dim(self, mode): grid = Grid(shape=(10,)) x = grid.dimensions[0] t = grid.stepping_dim @@ -1535,6 +1538,11 @@ def test_merge_haloupdate_if_diff_locindices_v1(self, mode): calls = FindNodes(Call).visit(op) assert len(calls) == 2 + assert calls[0].arguments[3].args[0] is t.symbolic_min + + assert len(FindNodes(HaloUpdateCall).visit(op.body.body[1].body[0])) == 1 + assert len(FindNodes(HaloUpdateCall).visit(op.body.body[1].body[1])) == 1 + assert len(FindNodes(HaloUpdateCall).visit(op.body.body[1].body[2])) == 0 op.apply(time_M=1) glb_pos_map = f.grid.distributor.glb_pos_map @@ -2953,7 +2961,7 @@ def test_elastic_structure(self, mode): assert calls[4].arguments[1] is v[1] -class TestTTIwMPI: +class TestTTIOp: @pytest.mark.parallel(mode=1) def test_halo_structure(self, mode): From 8bfceeb78e9f216789afce6a194be6d5c9d413fa Mon Sep 17 00:00:00 2001 From: George Bisbas Date: Mon, 9 Dec 2024 19:28:53 +0000 Subject: [PATCH 14/16] compiler: Fix misc review comments --- devito/ir/iet/nodes.py | 20 +- devito/mpi/halo_scheme.py | 29 +- devito/passes/iet/mpi.py | 30 +- .../seismic/tutorials/09_viscoelastic.ipynb | 2 +- examples/seismic/viscoelastic/operators.py | 2 +- tests/test_mpi.py | 368 +++++++++--------- 6 files changed, 229 insertions(+), 222 deletions(-) diff --git a/devito/ir/iet/nodes.py b/devito/ir/iet/nodes.py index 86e432ad71..23f293ad1e 100644 --- a/devito/ir/iet/nodes.py +++ b/devito/ir/iet/nodes.py @@ -16,7 +16,7 @@ Forward, WithLock, PrefetchUpdate, detect_io) from devito.symbolics import ListInitializer, CallFromPointer, ccode from devito.tools import (Signer, as_tuple, filter_ordered, filter_sorted, flatten, - ctypes_to_cstr, OrderedSet) + ctypes_to_cstr) from devito.types.basic import (AbstractFunction, AbstractSymbol, Basic, Indexed, Symbol) from devito.types.object import AbstractObject, LocalObject @@ -1438,20 +1438,7 @@ def DummyExpr(*args, init=False): # Nodes required for distributed-memory halo exchange -class HaloMixin: - - def __repr__(self): - fstrings = [] - for f in self.fmapper.keys(): - loc_indices = OrderedSet(*(self.fmapper[f].loc_indices.values())) - loc_indices_str = str(list(loc_indices)) if loc_indices else "" - fstrings.append("%s%s" % (f.name, loc_indices_str)) - - functions = ",".join(fstrings) - return "<%s(%s)>" % (self.__class__.__name__, functions) - - -class HaloSpot(HaloMixin, Node): +class HaloSpot(Node): """ A halo exchange operation (e.g., send, recv, wait, ...) required to @@ -1508,6 +1495,9 @@ def body(self): def functions(self): return tuple(self.fmapper) + def __repr__(self): + funcs = self.halo_scheme.__reprfuncs__() + return "<%s(%s)>" % (self.__class__.__name__, funcs) # Utility classes diff --git a/devito/mpi/halo_scheme.py b/devito/mpi/halo_scheme.py index f1bbb79003..d80127fae2 100644 --- a/devito/mpi/halo_scheme.py +++ b/devito/mpi/halo_scheme.py @@ -9,10 +9,9 @@ from devito import configuration from devito.data import CORE, OWNED, LEFT, CENTER, RIGHT from devito.ir.support import Forward, Scope -from devito.ir.iet.nodes import HaloMixin from devito.symbolics.manipulation import _uxreplace_registry from devito.tools import (Reconstructable, Tag, as_tuple, filter_ordered, flatten, - frozendict, is_integer, filter_sorted) + frozendict, is_integer, filter_sorted, OrderedSet) from devito.types import Grid __all__ = ['HaloScheme', 'HaloSchemeEntry', 'HaloSchemeException', 'HaloTouch'] @@ -36,8 +35,8 @@ class HaloSchemeEntry(Reconstructable): def __init__(self, loc_indices, loc_dirs, halos, dims): self.loc_indices = frozendict(loc_indices) self.loc_dirs = frozendict(loc_dirs) - self.halos = halos - self.dims = dims + self.halos = frozenset(halos) + self.dims = frozenset(dims) def __eq__(self, other): if not isinstance(other, HaloSchemeEntry): @@ -48,10 +47,10 @@ def __eq__(self, other): self.dims == other.dims) def __hash__(self): - return hash((frozenset(self.loc_indices.items()), - frozenset(self.loc_dirs.items()), - frozenset(self.halos), - frozenset(self.dims))) + return hash((tuple(self.loc_indices.items()), + tuple(self.loc_dirs.items()), + self.halos, + self.dims)) def __repr__(self): return (f"HaloSchemeEntry(loc_indices={self.loc_indices}, " @@ -63,7 +62,7 @@ def __repr__(self): OMapper = namedtuple('OMapper', 'core owned') -class HaloScheme(HaloMixin): +class HaloScheme(): """ A HaloScheme describes a set of halo exchanges through a mapper: @@ -121,6 +120,18 @@ def __init__(self, exprs, ispace): self._honored[i.root] = frozenset([(ltk, rtk)]) self._honored = frozendict(self._honored) + def __reprfuncs__(self): + fstrings = [] + for f in self.fmapper.keys(): + loc_indices = OrderedSet(*(self.fmapper[f].loc_indices.values())) + loc_indices_str = str(list(loc_indices)) if loc_indices else "" + fstrings.append("%s%s" % (f.name, loc_indices_str)) + + return ",".join(fstrings) + + def __repr__(self): + return "<%s(%s)>" % (self.__class__.__name__, self.__reprfuncs__()) + def __eq__(self, other): return (isinstance(other, HaloScheme) and self._mapper == other._mapper and diff --git a/devito/passes/iet/mpi.py b/devito/passes/iet/mpi.py index 204fcc2cae..030dfacfad 100644 --- a/devito/passes/iet/mpi.py +++ b/devito/passes/iet/mpi.py @@ -12,7 +12,7 @@ from devito.mpi.reduction_scheme import DistReduce from devito.mpi.routines import HaloExchangeBuilder, ReductionBuilder from devito.passes.iet.engine import iet_pass -from devito.tools import generator, frozendict +from devito.tools import generator __all__ = ['mpiize'] @@ -94,7 +94,6 @@ def _hoist_invariant(iet): continue for f, v in hs1.fmapper.items(): - if f not in hs0.functions: continue @@ -114,7 +113,7 @@ def _hoist_invariant(iet): else: raw_loc_indices[d] = v - hse = hse._rebuild(loc_indices=frozendict(raw_loc_indices)) + hse = hse._rebuild(loc_indices=raw_loc_indices) hs1.halo_scheme.fmapper[f] = hse hsmapper[hs1] = hsmapper.get(hs1, hs1.halo_scheme).drop(f) @@ -348,20 +347,27 @@ def _filter_iter_mapper(iet): Given an IET, return a mapper from Iterations to the HaloSpots. Additionally, filter out Iterations that are not of interest. """ - iter_mapper = MapNodes(Iteration, HaloSpot, 'immediate').visit(iet) - iter_mapper = {k: [hs for hs in v if not hs.halo_scheme.is_void] - for k, v in iter_mapper.items()} - iter_mapper = {k: v for k, v in iter_mapper.items() if k is not None} - iter_mapper = {k: v for k, v in iter_mapper.items() if len(v) > 1} + iter_mapper = {} + for k, v in MapNodes(Iteration, HaloSpot, 'immediate').visit(iet).items(): + filtered_hs = [hs for hs in v if not hs.halo_scheme.is_void] + if k is not None and len(filtered_hs) > 1: + iter_mapper[k] = filtered_hs return iter_mapper def _make_cond_mapper(iet): - cond_mapper = MapHaloSpots().visit(iet) - return {hs: {i for i in v if i.is_Conditional and - not isinstance(i.condition, GuardFactorEq)} - for hs, v in cond_mapper.items()} + + cond_mapper = {} + for hs, v in MapHaloSpots().visit(iet).items(): + conditionals = set() + for i in v: + if i.is_Conditional and not isinstance(i.condition, GuardFactorEq): + conditionals.add(i) + + cond_mapper[hs] = conditionals + + return cond_mapper def _check_control_flow(hs0, hs1, cond_mapper): diff --git a/examples/seismic/tutorials/09_viscoelastic.ipynb b/examples/seismic/tutorials/09_viscoelastic.ipynb index 5d7cabdd75..0d1d524613 100644 --- a/examples/seismic/tutorials/09_viscoelastic.ipynb +++ b/examples/seismic/tutorials/09_viscoelastic.ipynb @@ -457,7 +457,7 @@ "source": [ "# References\n", "\n", - "[1] Johan O. A. Roberston, *et.al.* (1994). \"Viscoelatic finite-difference modeling\" GEOPHYSICS, 59(9), 1444-1456.\n", + "[1] Johan O. A. Roberston, *et.al.* (1994). \"Viscoelastic finite-difference modeling\" GEOPHYSICS, 59(9), 1444-1456.\n", "\n", "\n", "[2] https://janth.home.xs4all.nl/Software/fdelmodcManual.pdf" diff --git a/examples/seismic/viscoelastic/operators.py b/examples/seismic/viscoelastic/operators.py index fdf1110004..9e0269665a 100644 --- a/examples/seismic/viscoelastic/operators.py +++ b/examples/seismic/viscoelastic/operators.py @@ -64,4 +64,4 @@ def ForwardOperator(model, geometry, space_order=4, save=False, **kwargs): # Substitute spacing terms to reduce flops return Operator([u_v, u_r, u_t] + src_rec_expr, subs=model.spacing_map, - name='ViscoElForward', **kwargs) + name='ViscoIsoElasticForward', **kwargs) diff --git a/tests/test_mpi.py b/tests/test_mpi.py index 7d28dfe7bc..05a5b4b82d 100644 --- a/tests/test_mpi.py +++ b/tests/test_mpi.py @@ -1008,183 +1008,6 @@ def test_avoid_haloupdate_if_distr_but_sequential(self, mode): calls = FindNodes(Call).visit(op) assert len(calls) == 0 - @pytest.fixture - def setup(self): - shape = (2,) - so = 2 - tn = 30 - - grid = Grid(shape=shape) - - # Velocity and pressure fields - v = TimeFunction(name='v', grid=grid, space_order=so) - tau = TimeFunction(name='tau', grid=grid, space_order=so) - - # First order elastic-like dependencies equations - pde_v = v.dt - (tau.dx) - pde_tau = tau.dt - ((v.forward).dx) - u_v = Eq(v.forward, solve(pde_v, v.forward)) - u_tau = Eq(tau.forward, solve(pde_tau, tau.forward)) - - # Receiver - rec = SparseTimeFunction(name="rec", grid=grid, npoint=1, nt=tn) - rec.coordinates.data[:, 0] = np.linspace(0., shape[0], num=1) - - return grid, v, tau, u_v, u_tau, rec - - @pytest.mark.parallel(mode=1) - def test_issue_2448_I(self, mode, setup): - _, v, tau, u_v, u_tau, rec = setup - - rec_term0 = rec.interpolate(expr=v) - - op0 = Operator([u_v, u_tau, rec_term0]) - - calls = [i for i in FindNodes(Call).visit(op0) if isinstance(i, HaloUpdateCall)] - - assert len(calls) == 3 - assert len(FindNodes(HaloUpdateCall).visit(op0.body.body[1].body[1].body[0])) == 1 - assert len(FindNodes(HaloUpdateCall).visit(op0.body.body[1].body[1].body[1])) == 2 - assert calls[0].arguments[0] is v - assert calls[1].arguments[0] is tau - assert calls[2].arguments[0] is v - - @pytest.mark.parallel(mode=1) - def test_issue_2448_II(self, mode, setup): - _, v, tau, u_v, u_tau, rec = setup - - rec_term1 = rec.interpolate(expr=v.forward) - - op1 = Operator([u_v, u_tau, rec_term1]) - - calls = [i for i in FindNodes(Call).visit(op1) if isinstance(i, HaloUpdateCall)] - - assert len(calls) == 3 - assert len(FindNodes(HaloUpdateCall).visit(op1.body.body[1].body[0])) == 0 - assert len(FindNodes(HaloUpdateCall).visit(op1.body.body[1].body[1])) == 3 - assert calls[0].arguments[0] is tau - assert calls[1].arguments[0] is v - assert calls[2].arguments[0] is v - - @pytest.mark.parallel(mode=1) - def test_issue_2448_III(self, mode, setup): - grid, v, tau, u_v, u_tau, rec = setup - - # Additional velocity and pressure fields - v2 = TimeFunction(name='v2', grid=grid, space_order=2) - tau2 = TimeFunction(name='tau2', grid=grid, space_order=2) - - # First order elastic-like dependencies equations - pde_v2 = v2.dt - (tau2.dx) - pde_tau2 = tau2.dt - ((v2.forward).dx) - u_v2 = Eq(v2.forward, solve(pde_v2, v2.forward)) - u_tau2 = Eq(tau2.forward, solve(pde_tau2, tau2.forward)) - - # Receiver - rec2 = SparseTimeFunction(name="rec2", grid=grid, npoint=1, nt=30) - rec2.coordinates.data[:, 0] = np.linspace(0., grid.shape[0], num=1) - - rec_term0 = rec.interpolate(expr=v) - rec_term2 = rec2.interpolate(expr=v2) - - op2 = Operator([u_v, u_v2, u_tau, u_tau2, rec_term0, rec_term2]) - - calls = [i for i in FindNodes(Call).visit(op2) if isinstance(i, HaloUpdateCall)] - - assert len(calls) == 5 - assert len(FindNodes(HaloUpdateCall).visit(op2.body.body[1].body[1].body[0])) == 2 - assert len(FindNodes(HaloUpdateCall).visit(op2.body.body[1].body[1].body[1])) == 3 - assert calls[0].arguments[0] is v - assert calls[1].arguments[0] is v2 - assert calls[2].arguments[0] is tau - assert calls[2].arguments[1] is tau2 - assert calls[3].arguments[0] is v - assert calls[4].arguments[0] is v2 - - @pytest.mark.parallel(mode=1) - def test_issue_2448_IV(self, mode, setup): - grid, v, tau, u_v, u_tau, rec = setup - - # Additional velocity and pressure fields - v2 = TimeFunction(name='v2', grid=grid, space_order=2) - tau2 = TimeFunction(name='tau2', grid=grid, space_order=2) - - # First order elastic-like dependencies equations - pde_v2 = v2.dt - (tau2.dx) - pde_tau2 = tau2.dt - ((v2.forward).dx) - u_v2 = Eq(v2.forward, solve(pde_v2, v2.forward)) - u_tau2 = Eq(tau2.forward, solve(pde_tau2, tau2.forward)) - - # Receiver - rec2 = SparseTimeFunction(name="rec2", grid=grid, npoint=1, nt=30) - rec2.coordinates.data[:, 0] = np.linspace(0., grid.shape[0], num=1) - - rec_term0 = rec.interpolate(expr=v) - rec_term3 = rec2.interpolate(expr=v2.forward) - - op3 = Operator([u_v, u_v2, u_tau, u_tau2, rec_term0, rec_term3]) - - calls = [i for i in FindNodes(Call).visit(op3) if isinstance(i, HaloUpdateCall)] - - assert len(calls) == 5 - assert len(FindNodes(HaloUpdateCall).visit(op3.body.body[1].body[1].body[0])) == 1 - assert len(FindNodes(HaloUpdateCall).visit(op3.body.body[1].body[1].body[1])) == 4 - assert calls[0].arguments[0] is v - assert calls[1].arguments[0] is tau - assert calls[1].arguments[1] is tau2 - assert calls[2].arguments[0] is v - assert calls[3].arguments[0] is v2 - assert calls[4].arguments[0] is v2 - - @pytest.mark.parallel(mode=1) - def test_issue_2448_backward(self, mode): - ''' - Similar to test_issue_2448, but with backward instead of forward - so that the hoisted halo has different starting point - ''' - shape = (2,) - so = 2 - - grid = Grid(shape=shape) - t = grid.stepping_dim - - tn = 7 - - # Velocity and pressure fields - v = TimeFunction(name='v', grid=grid, space_order=so) - v.data_with_halo[0, :] = 1. - v.data_with_halo[1, :] = 3. - - tau = TimeFunction(name='tau', grid=grid, space_order=so) - tau.data_with_halo[:] = 1. - - # First order elastic-like dependencies equations - pde_v = v.dt - (tau.dx) - pde_tau = tau.dt - ((v.backward).dx) - - u_v = Eq(v.backward, solve(pde_v, v)) - u_tau = Eq(tau.backward, solve(pde_tau, tau)) - - # Test two variants of receiver interpolation - nrec = 1 - rec = SparseTimeFunction(name="rec", grid=grid, npoint=nrec, nt=tn) - rec.coordinates.data[:, 0] = np.linspace(0., shape[0], num=nrec) - - # Test receiver interpolation 0, here we have a halo exchange hoisted - op0 = Operator([u_v] + [u_tau] + rec.interpolate(expr=v)) - - calls = [i for i in FindNodes(Call).visit(op0) - if isinstance(i, HaloUpdateCall)] - - # The correct we want - assert len(calls) == 3 - assert len(FindNodes(HaloUpdateCall).visit(op0.body.body[1].body[1].body[0])) == 1 - assert len(FindNodes(HaloUpdateCall).visit(op0.body.body[1].body[1].body[1])) == 2 - assert calls[0].arguments[0] is v - assert calls[0].arguments[3].args[0] is t.symbolic_max - assert calls[1].arguments[0] is tau - assert calls[2].arguments[0] is v - @pytest.mark.parallel(mode=1) def test_avoid_haloupdate_with_subdims(self, mode): grid = Grid(shape=(4,)) @@ -1400,7 +1223,7 @@ def test_avoid_fullmode_if_crossloop_dep(self, mode): assert np.all(f.data[:] == 2.) @pytest.mark.parallel(mode=2) - def test_avoid_halopudate_if_flowdep_along_other_dim(self, mode): + def test_avoid_haloupdate_if_flowdep_along_other_dim(self, mode): grid = Grid(shape=(10,)) x = grid.dimensions[0] t = grid.stepping_dim @@ -1513,8 +1336,10 @@ def test_merge_haloupdate_if_diff_locindices_v1(self, mode): * the second and third Eqs cannot be fused in the same loop - In the IET we end up with *one* HaloSpots, placed right before the - second Eq. The third Eq will seamlessy find its halo up-to-date. + In the IET we end up with *two* HaloSpots, one placed before the + time loop, and one placed before the second Eq. The third Eq, + reading from f[t0], will seamlessy find its halo up-to-date, + due to the f[t1] being updated in the previous time iteration. """ grid = Grid(shape=(10,)) x = grid.dimensions[0] @@ -1545,6 +1370,7 @@ def test_merge_haloupdate_if_diff_locindices_v1(self, mode): assert len(FindNodes(HaloUpdateCall).visit(op.body.body[1].body[2])) == 0 op.apply(time_M=1) + glb_pos_map = f.grid.distributor.glb_pos_map R = 1e-07 # Can't use np.all due to rounding error at the tails if LEFT in glb_pos_map[x]: @@ -2911,7 +2737,7 @@ def test_adjoint_F_no_omp(self, mode): self.run_adjoint_F(3) -class TestElastic: +class TestElasticLike: @pytest.mark.parallel(mode=[(1, 'diag')]) def test_elastic_structure(self, mode): @@ -2927,7 +2753,6 @@ def test_elastic_structure(self, mode): mu = Function(name='mu', grid=grid) ro = Function(name='b', grid=grid) - # The receiver rec = SparseTimeFunction(name="rec", grid=grid, npoint=1, nt=10) rec_term = rec.interpolate(expr=v[0] + v[1]) @@ -2945,7 +2770,6 @@ def test_elastic_structure(self, mode): calls = [i for i in FindNodes(Call).visit(op) if isinstance(i, HaloUpdateCall)] - # The correct we want assert len(calls) == 5 assert len(FindNodes(HaloUpdateCall).visit(op.body.body[1].body[1].body[0])) == 1 @@ -2960,12 +2784,188 @@ def test_elastic_structure(self, mode): assert calls[4].arguments[0] is v[0] assert calls[4].arguments[1] is v[1] + @pytest.fixture + def setup(self): + """ + This fixture sets up the grid, fields, elastic-like + equations and receivers for test_issue_2448_*. + """ + shape = (2,) + so = 2 + tn = 30 + + grid = Grid(shape=shape) + + # Velocity and pressure fields + v = TimeFunction(name='v', grid=grid, space_order=so) + tau = TimeFunction(name='tau', grid=grid, space_order=so) + + # First order elastic-like dependencies equations + pde_v = v.dt - (tau.dx) + pde_tau = tau.dt - ((v.forward).dx) + u_v = Eq(v.forward, solve(pde_v, v.forward)) + u_tau = Eq(tau.forward, solve(pde_tau, tau.forward)) + + rec = SparseTimeFunction(name="rec", grid=grid, npoint=1, nt=tn) + rec.coordinates.data[:, 0] = np.linspace(0., shape[0], num=1) + + return grid, v, tau, u_v, u_tau, rec + + @pytest.mark.parallel(mode=1) + def test_issue_2448_v0(self, mode, setup): + _, v, tau, u_v, u_tau, rec = setup + + rec_term0 = rec.interpolate(expr=v) + + op0 = Operator([u_v, u_tau, rec_term0]) + + calls = [i for i in FindNodes(Call).visit(op0) if isinstance(i, HaloUpdateCall)] + + assert len(calls) == 3 + assert len(FindNodes(HaloUpdateCall).visit(op0.body.body[1].body[1].body[0])) == 1 + assert len(FindNodes(HaloUpdateCall).visit(op0.body.body[1].body[1].body[1])) == 2 + assert calls[0].arguments[0] is v + assert calls[1].arguments[0] is tau + assert calls[2].arguments[0] is v + + @pytest.mark.parallel(mode=1) + def test_issue_2448_v1(self, mode, setup): + _, v, tau, u_v, u_tau, rec = setup + + rec_term1 = rec.interpolate(expr=v.forward) + + op1 = Operator([u_v, u_tau, rec_term1]) + + calls = [i for i in FindNodes(Call).visit(op1) if isinstance(i, HaloUpdateCall)] + + assert len(calls) == 3 + assert len(FindNodes(HaloUpdateCall).visit(op1.body.body[1].body[0])) == 0 + assert len(FindNodes(HaloUpdateCall).visit(op1.body.body[1].body[1])) == 3 + assert calls[0].arguments[0] is tau + assert calls[1].arguments[0] is v + assert calls[2].arguments[0] is v + + @pytest.mark.parallel(mode=1) + def test_issue_2448_v2(self, mode, setup): + grid, v, tau, u_v, u_tau, rec = setup + + # Additional velocity and pressure fields + v2 = TimeFunction(name='v2', grid=grid, space_order=2) + tau2 = TimeFunction(name='tau2', grid=grid, space_order=2) + + # First order elastic-like dependencies equations + pde_v2 = v2.dt - (tau2.dx) + pde_tau2 = tau2.dt - ((v2.forward).dx) + u_v2 = Eq(v2.forward, solve(pde_v2, v2.forward)) + u_tau2 = Eq(tau2.forward, solve(pde_tau2, tau2.forward)) + + rec2 = SparseTimeFunction(name="rec2", grid=grid, npoint=1, nt=30) + rec2.coordinates.data[:, 0] = np.linspace(0., grid.shape[0], num=1) + + rec_term0 = rec.interpolate(expr=v) + rec_term2 = rec2.interpolate(expr=v2) + + op2 = Operator([u_v, u_v2, u_tau, u_tau2, rec_term0, rec_term2]) + + calls = [i for i in FindNodes(Call).visit(op2) if isinstance(i, HaloUpdateCall)] + + assert len(calls) == 5 + assert len(FindNodes(HaloUpdateCall).visit(op2.body.body[1].body[1].body[0])) == 2 + assert len(FindNodes(HaloUpdateCall).visit(op2.body.body[1].body[1].body[1])) == 3 + assert calls[0].arguments[0] is v + assert calls[1].arguments[0] is v2 + assert calls[2].arguments[0] is tau + assert calls[2].arguments[1] is tau2 + assert calls[3].arguments[0] is v + assert calls[4].arguments[0] is v2 + + @pytest.mark.parallel(mode=1) + def test_issue_2448_v3(self, mode, setup): + grid, v, tau, u_v, u_tau, rec = setup + + # Additional velocity and pressure fields + v2 = TimeFunction(name='v2', grid=grid, space_order=2) + tau2 = TimeFunction(name='tau2', grid=grid, space_order=2) + + # First order elastic-like dependencies equations + pde_v2 = v2.dt - (tau2.dx) + pde_tau2 = tau2.dt - ((v2.forward).dx) + u_v2 = Eq(v2.forward, solve(pde_v2, v2.forward)) + u_tau2 = Eq(tau2.forward, solve(pde_tau2, tau2.forward)) + + rec2 = SparseTimeFunction(name="rec2", grid=grid, npoint=1, nt=30) + rec2.coordinates.data[:, 0] = np.linspace(0., grid.shape[0], num=1) + + rec_term0 = rec.interpolate(expr=v) + rec_term3 = rec2.interpolate(expr=v2.forward) + + op3 = Operator([u_v, u_v2, u_tau, u_tau2, rec_term0, rec_term3]) + + calls = [i for i in FindNodes(Call).visit(op3) if isinstance(i, HaloUpdateCall)] + + assert len(calls) == 5 + assert len(FindNodes(HaloUpdateCall).visit(op3.body.body[1].body[1].body[0])) == 1 + assert len(FindNodes(HaloUpdateCall).visit(op3.body.body[1].body[1].body[1])) == 4 + assert calls[0].arguments[0] is v + assert calls[1].arguments[0] is tau + assert calls[1].arguments[1] is tau2 + assert calls[2].arguments[0] is v + assert calls[3].arguments[0] is v2 + assert calls[4].arguments[0] is v2 + + @pytest.mark.parallel(mode=1) + def test_issue_2448_backward(self, mode): + ''' + Similar to test_issue_2448, but with backward instead of forward + so that the hoisted halo has different starting point + ''' + shape = (2,) + so = 2 + + grid = Grid(shape=shape) + t = grid.stepping_dim + + tn = 7 + + # Velocity and pressure fields + v = TimeFunction(name='v', grid=grid, space_order=so) + v.data_with_halo[0, :] = 1. + v.data_with_halo[1, :] = 3. + + tau = TimeFunction(name='tau', grid=grid, space_order=so) + tau.data_with_halo[:] = 1. + + # First order elastic-like dependencies equations + pde_v = v.dt - (tau.dx) + pde_tau = tau.dt - ((v.backward).dx) + + u_v = Eq(v.backward, solve(pde_v, v)) + u_tau = Eq(tau.backward, solve(pde_tau, tau)) + + # Test two variants of receiver interpolation + nrec = 1 + rec = SparseTimeFunction(name="rec", grid=grid, npoint=nrec, nt=tn) + rec.coordinates.data[:, 0] = np.linspace(0., shape[0], num=nrec) + + # Test receiver interpolation 0, here we have a halo exchange hoisted + op0 = Operator([u_v] + [u_tau] + rec.interpolate(expr=v)) + + calls = [i for i in FindNodes(Call).visit(op0) + if isinstance(i, HaloUpdateCall)] + + assert len(calls) == 3 + assert len(FindNodes(HaloUpdateCall).visit(op0.body.body[1].body[1].body[0])) == 1 + assert len(FindNodes(HaloUpdateCall).visit(op0.body.body[1].body[1].body[1])) == 2 + assert calls[0].arguments[0] is v + assert calls[0].arguments[3].args[0] is t.symbolic_max + assert calls[1].arguments[0] is tau + assert calls[2].arguments[0] is v + class TestTTIOp: @pytest.mark.parallel(mode=1) def test_halo_structure(self, mode): - solver = TestTTI().tti_operator(opt='advanced', space_order=8) op = solver.op_fwd(save=False) From 018ba542519a0f860e99f551aa674b41f0041157 Mon Sep 17 00:00:00 2001 From: George Bisbas Date: Tue, 10 Dec 2024 15:23:15 +0000 Subject: [PATCH 15/16] compiler: Revert __repr__, add more minor fixes --- devito/ir/iet/nodes.py | 9 ++++----- devito/mpi/halo_scheme.py | 18 ++++-------------- devito/passes/iet/mpi.py | 20 ++++++++------------ tests/test_mpi.py | 9 +++++---- 4 files changed, 21 insertions(+), 35 deletions(-) diff --git a/devito/ir/iet/nodes.py b/devito/ir/iet/nodes.py index 23f293ad1e..4aa3df1f2b 100644 --- a/devito/ir/iet/nodes.py +++ b/devito/ir/iet/nodes.py @@ -1437,7 +1437,6 @@ def DummyExpr(*args, init=False): # Nodes required for distributed-memory halo exchange - class HaloSpot(Node): """ @@ -1463,6 +1462,10 @@ def __init__(self, body, halo_scheme): self._halo_scheme = halo_scheme + def __repr__(self): + functions = "(%s)" % ",".join(i.name for i in self.functions) + return "<%s%s>" % (self.__class__.__name__, functions) + @property def halo_scheme(self): return self._halo_scheme @@ -1495,10 +1498,6 @@ def body(self): def functions(self): return tuple(self.fmapper) - def __repr__(self): - funcs = self.halo_scheme.__reprfuncs__() - return "<%s(%s)>" % (self.__class__.__name__, funcs) - # Utility classes diff --git a/devito/mpi/halo_scheme.py b/devito/mpi/halo_scheme.py index d80127fae2..3819caccb4 100644 --- a/devito/mpi/halo_scheme.py +++ b/devito/mpi/halo_scheme.py @@ -11,7 +11,7 @@ from devito.ir.support import Forward, Scope from devito.symbolics.manipulation import _uxreplace_registry from devito.tools import (Reconstructable, Tag, as_tuple, filter_ordered, flatten, - frozendict, is_integer, filter_sorted, OrderedSet) + frozendict, is_integer, filter_sorted) from devito.types import Grid __all__ = ['HaloScheme', 'HaloSchemeEntry', 'HaloSchemeException', 'HaloTouch'] @@ -62,7 +62,7 @@ def __repr__(self): OMapper = namedtuple('OMapper', 'core owned') -class HaloScheme(): +class HaloScheme: """ A HaloScheme describes a set of halo exchanges through a mapper: @@ -120,17 +120,9 @@ def __init__(self, exprs, ispace): self._honored[i.root] = frozenset([(ltk, rtk)]) self._honored = frozendict(self._honored) - def __reprfuncs__(self): - fstrings = [] - for f in self.fmapper.keys(): - loc_indices = OrderedSet(*(self.fmapper[f].loc_indices.values())) - loc_indices_str = str(list(loc_indices)) if loc_indices else "" - fstrings.append("%s%s" % (f.name, loc_indices_str)) - - return ",".join(fstrings) - def __repr__(self): - return "<%s(%s)>" % (self.__class__.__name__, self.__reprfuncs__()) + fnames = ",".join(i.name for i in set(self._mapper)) + return "HaloScheme<%s>" % fnames def __eq__(self, other): return (isinstance(other, HaloScheme) and @@ -545,8 +537,6 @@ def classify(exprs, ispace): loc_indices, loc_dirs = process_loc_indices(raw_loc_indices, ispace.directions) - halos = frozenset(halos) - dims = frozenset(dims) mapper[f] = HaloSchemeEntry(loc_indices, loc_dirs, halos, dims) diff --git a/devito/passes/iet/mpi.py b/devito/passes/iet/mpi.py index 030dfacfad..3bc6a61344 100644 --- a/devito/passes/iet/mpi.py +++ b/devito/passes/iet/mpi.py @@ -101,19 +101,18 @@ def _hoist_invariant(iet): if not any(r(dep, hs1, v.loc_indices) for r in rules): break else: - # hs1 can be hoisted out of `it`, but we need to infer valid + # `hs1`` can be hoisted out of `it`, but we need to infer valid # loc_indices hse = hs1.halo_scheme.fmapper[f] - raw_loc_indices = {} + loc_indices = {} for d, v in hse.loc_indices.items(): if v in it.uindices: - v_sub = it.start - raw_loc_indices[d] = v.symbolic_min.subs(it.dim, v_sub) + loc_indices[d] = v.symbolic_min.subs(it.dim, it.start) else: - raw_loc_indices[d] = v + loc_indices[d] = v - hse = hse._rebuild(loc_indices=raw_loc_indices) + hse = hse._rebuild(loc_indices=loc_indices) hs1.halo_scheme.fmapper[f] = hse hsmapper[hs1] = hsmapper.get(hs1, hs1.halo_scheme).drop(f) @@ -357,14 +356,11 @@ def _filter_iter_mapper(iet): def _make_cond_mapper(iet): - + "Return a mapper from HaloSpots to the Conditionals that contain them." cond_mapper = {} for hs, v in MapHaloSpots().visit(iet).items(): - conditionals = set() - for i in v: - if i.is_Conditional and not isinstance(i.condition, GuardFactorEq): - conditionals.add(i) - + conditionals = {i for i in v if i.is_Conditional and + not isinstance(i.condition, GuardFactorEq)} cond_mapper[hs] = conditionals return cond_mapper diff --git a/tests/test_mpi.py b/tests/test_mpi.py index 05a5b4b82d..9d2032c472 100644 --- a/tests/test_mpi.py +++ b/tests/test_mpi.py @@ -1299,7 +1299,7 @@ def test_unmerge_haloupdate_if_no_locindices(self, mode): assert np.allclose(g.data_ro_domain[0, 5:], [16., 16., 14., 13., 6.], rtol=R) @pytest.mark.parallel(mode=1) - def test_merge_haloupdate_if_diff_locindices_v0(self, mode): + def test_merge_haloupdate_if_diff_locindices(self, mode): grid = Grid(shape=(101, 101)) x, y = grid.dimensions t = grid.stepping_dim @@ -1320,11 +1320,12 @@ def test_merge_haloupdate_if_diff_locindices_v0(self, mode): op.cfunction @pytest.mark.parallel(mode=2) - def test_merge_haloupdate_if_diff_locindices_v1(self, mode): + def test_merge_and_hoist_haloupdate_if_diff_locindices(self, mode): """ This test is a revisited, more complex version of - `test_merge_haloupdate_if_diff_locindices_v0`. And in addition to - checking the generated code, it also checks the numerical output. + `test_merge_haloupdate_if_diff_locindices`, also checking hoisting. + And in addition to checking the generated code, + it also checks the numerical output. In the Operator there are three Eqs: From 2e36b6b16bc5d8d02110dbf904de87cda621ddec Mon Sep 17 00:00:00 2001 From: George Bisbas Date: Mon, 16 Dec 2024 14:58:12 +0000 Subject: [PATCH 16/16] compiler: class HaloSchemeEntry(EnrichedTuple) --- devito/ir/iet/nodes.py | 3 +++ devito/mpi/halo_scheme.py | 22 +++++----------------- devito/passes/iet/mpi.py | 4 +++- 3 files changed, 11 insertions(+), 18 deletions(-) diff --git a/devito/ir/iet/nodes.py b/devito/ir/iet/nodes.py index 4aa3df1f2b..57becab5ff 100644 --- a/devito/ir/iet/nodes.py +++ b/devito/ir/iet/nodes.py @@ -1437,6 +1437,8 @@ def DummyExpr(*args, init=False): # Nodes required for distributed-memory halo exchange + + class HaloSpot(Node): """ @@ -1498,6 +1500,7 @@ def body(self): def functions(self): return tuple(self.fmapper) + # Utility classes diff --git a/devito/mpi/halo_scheme.py b/devito/mpi/halo_scheme.py index 3819caccb4..09cf98ac6a 100644 --- a/devito/mpi/halo_scheme.py +++ b/devito/mpi/halo_scheme.py @@ -11,7 +11,7 @@ from devito.ir.support import Forward, Scope from devito.symbolics.manipulation import _uxreplace_registry from devito.tools import (Reconstructable, Tag, as_tuple, filter_ordered, flatten, - frozendict, is_integer, filter_sorted) + frozendict, is_integer, filter_sorted, EnrichedTuple) from devito.types import Grid __all__ = ['HaloScheme', 'HaloSchemeEntry', 'HaloSchemeException', 'HaloTouch'] @@ -28,34 +28,22 @@ class HaloLabel(Tag): STENCIL = HaloLabel('stencil') -class HaloSchemeEntry(Reconstructable): +class HaloSchemeEntry(EnrichedTuple): __rargs__ = ('loc_indices', 'loc_dirs', 'halos', 'dims') - def __init__(self, loc_indices, loc_dirs, halos, dims): + def __init__(self, loc_indices, loc_dirs, halos, dims, getters=None): self.loc_indices = frozendict(loc_indices) self.loc_dirs = frozendict(loc_dirs) self.halos = frozenset(halos) self.dims = frozenset(dims) - def __eq__(self, other): - if not isinstance(other, HaloSchemeEntry): - return False - return (self.loc_indices == other.loc_indices and - self.loc_dirs == other.loc_dirs and - self.halos == other.halos and - self.dims == other.dims) - def __hash__(self): - return hash((tuple(self.loc_indices.items()), - tuple(self.loc_dirs.items()), + return hash((self.loc_indices, + self.loc_dirs, self.halos, self.dims)) - def __repr__(self): - return (f"HaloSchemeEntry(loc_indices={self.loc_indices}, " - f"loc_dirs={self.loc_dirs}, halos={self.halos}, dims={self.dims})") - Halo = namedtuple('Halo', 'dim side') diff --git a/devito/passes/iet/mpi.py b/devito/passes/iet/mpi.py index 3bc6a61344..930cc108b2 100644 --- a/devito/passes/iet/mpi.py +++ b/devito/passes/iet/mpi.py @@ -356,7 +356,9 @@ def _filter_iter_mapper(iet): def _make_cond_mapper(iet): - "Return a mapper from HaloSpots to the Conditionals that contain them." + """ + Return a mapper from HaloSpots to the Conditionals that contain them. + """ cond_mapper = {} for hs, v in MapHaloSpots().visit(iet).items(): conditionals = {i for i in v if i.is_Conditional and