From 7aec61552e0dea65f012f2f4e5afae7c6ec9036f Mon Sep 17 00:00:00 2001 From: Fabio Luporini Date: Thu, 26 Sep 2024 10:59:13 +0000 Subject: [PATCH] compiler: Fix toposort to account for fences --- devito/ir/clusters/cluster.py | 26 +++++++++++++++----------- devito/passes/clusters/misc.py | 12 ++++++++---- 2 files changed, 23 insertions(+), 15 deletions(-) diff --git a/devito/ir/clusters/cluster.py b/devito/ir/clusters/cluster.py index a4a26dadbb..629ebdde4a 100644 --- a/devito/ir/clusters/cluster.py +++ b/devito/ir/clusters/cluster.py @@ -14,7 +14,7 @@ from devito.mpi.reduction_scheme import DistReduce from devito.symbolics import estimate_cost from devito.tools import as_tuple, flatten, infer_dtype -from devito.types import WeakFence, CriticalRegion +from devito.types import Fence, WeakFence, CriticalRegion __all__ = ["Cluster", "ClusterGroup"] @@ -239,34 +239,38 @@ def is_sparse(self): """ return any(a.is_irregular for a in self.scope.accesses) - @property + @cached_property def is_wild(self): """ True if encoding a non-mathematical operation, False otherwise. """ - return self.is_halo_touch or self.is_dist_reduce or self.is_fence + return (self.is_halo_touch or + self.is_dist_reduce or + self.is_weak_fence or + self.is_critical_region) - @property + @cached_property def is_halo_touch(self): return self.exprs and all(isinstance(e.rhs, HaloTouch) for e in self.exprs) - @property + @cached_property def is_dist_reduce(self): return self.exprs and all(isinstance(e.rhs, DistReduce) for e in self.exprs) - @property + @cached_property def is_fence(self): - return self.is_weak_fence or self.is_critical_region + return (self.exprs and all(isinstance(e.rhs, Fence) for e in self.exprs) or + self.is_critical_region) - @property + @cached_property def is_weak_fence(self): return self.exprs and all(isinstance(e.rhs, WeakFence) for e in self.exprs) - @property + @cached_property def is_critical_region(self): return self.exprs and all(isinstance(e.rhs, CriticalRegion) for e in self.exprs) - @property + @cached_property def is_async(self): """ True if an asynchronous Cluster, False otherwise. @@ -274,7 +278,7 @@ def is_async(self): return any(isinstance(s, (WithLock, PrefetchUpdate)) for s in flatten(self.syncs.values())) - @property + @cached_property def is_wait(self): """ True if a Cluster waiting on a lock (that is a special synchronization diff --git a/devito/passes/clusters/misc.py b/devito/passes/clusters/misc.py index a8ccb500c1..9a6d7a4d6b 100644 --- a/devito/passes/clusters/misc.py +++ b/devito/passes/clusters/misc.py @@ -340,6 +340,8 @@ def is_cross(source, sink): return t0 < v <= t1 or t1 < v <= t0 for cg1 in cgroups[n+1:]: + n1 = cgroups.index(cg1) + # A Scope to compute all cross-ClusterGroup anti-dependences scope = Scope(exprs=cg0.exprs + cg1.exprs, rules=is_cross) @@ -355,14 +357,16 @@ def is_cross(source, sink): break # Any anti- and iaw-dependences impose that `cg1` follows `cg0` - # and forbid any sort of fusion - elif any(scope.d_anti_gen()) or\ - any(i.is_iaw for i in scope.d_output_gen()): + # and forbid any sort of fusion. Fences have the same effect + elif (any(scope.d_anti_gen()) or + any(i.is_iaw for i in scope.d_output_gen()) or + any(c.is_fence for c in flatten(cgroups[n:n1+1]))): dag.add_edge(cg0, cg1) # Any flow-dependences along an inner Dimension (i.e., a Dimension # that doesn't appear in `prefix`) impose that `cg1` follows `cg0` - elif any(not (i.cause and i.cause & prefix) for i in scope.d_flow_gen()): + elif any(not (i.cause and i.cause & prefix) + for i in scope.d_flow_gen()): dag.add_edge(cg0, cg1) # Clearly, output dependences must be honored