From 25ddf55b3df184c979a3e36540ff139d2a1660fc Mon Sep 17 00:00:00 2001
From: Fabio Luporini <fabio@devitocodes.com>
Date: Mon, 15 May 2023 10:20:31 +0000
Subject: [PATCH 01/90] api: Revamp PrecomputedSparseFunction

---
 devito/operations/interpolators.py |  40 ++--------
 devito/types/sparse.py             | 121 ++++++++++++++++++++---------
 2 files changed, 93 insertions(+), 68 deletions(-)

diff --git a/devito/operations/interpolators.py b/devito/operations/interpolators.py
index 2322ad2a7a..3bc1b3627f 100644
--- a/devito/operations/interpolators.py
+++ b/devito/operations/interpolators.py
@@ -4,11 +4,10 @@
 import numpy as np
 from cached_property import cached_property
 
-from devito.logger import warning
 from devito.symbolics import retrieve_function_carriers, indexify, INT
 from devito.tools import as_tuple, powerset, flatten, prod
-from devito.types import (ConditionalDimension, Dimension, DefaultDimension, Eq, Inc,
-                          Evaluable, Symbol, SubFunction)
+from devito.types import (ConditionalDimension, DefaultDimension, Eq, Inc,
+                          Evaluable, Symbol)
 
 __all__ = ['LinearInterpolator', 'PrecomputedInterpolator']
 
@@ -316,37 +315,12 @@ def callback():
 
 class PrecomputedInterpolator(GenericInterpolator):
 
-    def __init__(self, obj, r, gridpoints_data, coefficients_data):
-        if not isinstance(r, int):
-            raise TypeError('Need `r` int argument')
-        if r <= 0:
-            raise ValueError('`r` must be > 0')
-        self.r = r
+    def __init__(self, obj):
         self.obj = obj
-        self._npoint = obj._npoint
-        gridpoints = SubFunction(name="%s_gridpoints" % self.obj.name, dtype=np.int32,
-                                 dimensions=(self.obj.indices[-1], Dimension(name='d')),
-                                 shape=(self._npoint, self.obj.grid.dim), space_order=0,
-                                 parent=self.obj)
-
-        assert(gridpoints_data is not None)
-        gridpoints.data[:] = gridpoints_data[:]
-        self.obj._gridpoints = gridpoints
-
-        interpolation_coeffs = SubFunction(name="%s_interpolation_coeffs" % self.obj.name,
-                                           dimensions=(self.obj.indices[-1],
-                                                       Dimension(name='d'),
-                                                       Dimension(name='i')),
-                                           shape=(self.obj.npoint, self.obj.grid.dim,
-                                                  self.r),
-                                           dtype=self.obj.dtype, space_order=0,
-                                           parent=self.obj)
-        assert(coefficients_data is not None)
-        interpolation_coeffs.data[:] = coefficients_data[:]
-        self.obj._interpolation_coeffs = interpolation_coeffs
-        warning("Ensure that the provided interpolation coefficient and grid point " +
-                "values are computed on the final grid that will be used for other " +
-                "computations.")
+
+    @property
+    def r(self):
+        return self.obj._r
 
     def interpolate(self, expr, offset=0, increment=False, self_subs={}):
         """
diff --git a/devito/types/sparse.py b/devito/types/sparse.py
index 7ce74e0586..f3171deaee 100644
--- a/devito/types/sparse.py
+++ b/devito/types/sparse.py
@@ -1,4 +1,4 @@
-from collections import OrderedDict
+from collections import Iterable, OrderedDict
 from itertools import product
 
 import sympy
@@ -7,6 +7,7 @@
 
 from devito.finite_differences import generate_fd_shortcuts
 from devito.finite_differences.elementary import floor
+from devito.logger import warning
 from devito.mpi import MPI, SparseDistributor
 from devito.operations import LinearInterpolator, PrecomputedInterpolator
 from devito.symbolics import (INT, cast_mapper, indexify,
@@ -925,31 +926,32 @@ class PrecomputedSparseFunction(AbstractSparseFunction):
         The computational domain from which the sparse points are sampled.
     r : int
         Number of gridpoints in each dimension to interpolate a single sparse
-        point to. E.g. ``r=2`` for linear interpolation.
+        point to. E.g. `r=2` for linear interpolation.
     gridpoints : np.ndarray, optional
-        An array carrying the *reference* grid point corresponding to each sparse point.
-        Of all the gridpoints that one sparse point would be interpolated to, this is the
-        grid point closest to the origin, i.e. the one with the lowest value of each
-        coordinate dimension. Must be a two-dimensional array of shape
-        ``(npoint, grid.ndim)``.
+        An array carrying the *reference* grid point corresponding to each
+        sparse point.  Of all the gridpoints that one sparse point would be
+        interpolated to, this is the grid point closest to the origin, i.e. the
+        one with the lowest value of each coordinate dimension. Must be a
+        two-dimensional array of shape `(npoint, grid.ndim)`.
     interpolation_coeffs : np.ndarray, optional
-        An array containing the coefficient for each of the r^2 (2D) or r^3 (3D)
-        gridpoints that each sparse point will be interpolated to. The coefficient is
-        split across the n dimensions such that the contribution of the point (i, j, k)
-        will be multiplied by ``interpolation_coeffs[..., i]*interpolation_coeffs[...,
-        j]*interpolation_coeffs[...,k]``. So for ``r=6``, we will store 18
-        coefficients per sparse point (instead of potentially 216).
-        Must be a three-dimensional array of shape ``(npoint, grid.ndim, r)``.
+        An array containing the coefficient for each of the r^2 (2D) or r^3
+        (3D) gridpoints that each sparse point will be interpolated to. The
+        coefficient is split across the n dimensions such that the contribution
+        of the point (i, j, k) will be multiplied by
+        `interp_coeffs[..., i]*interpo_coeffs[...,j]*interp_coeffs[...,k]`.
+        So for `r=6`, we will store 18 coefficients per sparse point (instead of
+        potentially 216).  Must be a three-dimensional array of shape
+        `(npoint, grid.ndim, r)`.
     space_order : int, optional
         Discretisation order for space derivatives. Defaults to 0.
     shape : tuple of ints, optional
-        Shape of the object. Defaults to ``(npoint,)``.
+        Shape of the object. Defaults to `(npoint,)`.
     dimensions : tuple of Dimension, optional
         Dimensions associated with the object. Only necessary if the SparseFunction
         defines a multi-dimensional tensor.
     dtype : data-type, optional
         Any object that can be interpreted as a numpy data type. Defaults
-        to ``np.float32``.
+        to `np.float32`.
     initializer : callable or any object exposing the buffer interface, optional
         Data initializer. If a callable is provided, data is allocated lazily.
     allocator : MemoryAllocator, optional
@@ -960,7 +962,7 @@ class PrecomputedSparseFunction(AbstractSparseFunction):
     Notes
     -----
     The parameters must always be given as keyword arguments, since SymPy
-    uses ``*args`` to (re-)create the dimension arguments of the symbolic object.
+    uses `*args` to (re-)create the dimension arguments of the symbolic object.
     """
 
     is_PrecomputedSparseFunction = True
@@ -968,15 +970,63 @@ class PrecomputedSparseFunction(AbstractSparseFunction):
     _sub_functions = ('gridpoints', 'interpolation_coeffs')
 
     def __init_finalize__(self, *args, **kwargs):
-        super(PrecomputedSparseFunction, self).__init_finalize__(*args, **kwargs)
+        super().__init_finalize__(*args, **kwargs)
 
         # Grid points per sparse point (2 in the case of bilinear and trilinear)
         r = kwargs.get('r')
+        if not is_integer(r):
+            raise TypeError('Need `r` int argument')
+        if r <= 0:
+            raise ValueError('`r` must be > 0')
+        self._r = r
+
         gridpoints = kwargs.get('gridpoints')
+        if isinstance(gridpoints, SubFunction):
+            self._gridpoints = gridpoints
+        elif isinstance(gridpoints, Iterable):
+            gridpoints_data = gridpoints
+            dimensions = (self.indices[-1], Dimension(name='d'))
+            shape = (self._npoint, self.grid.dim)
+            gridpoints = SubFunction(name="%s_gridpoints" % self.name,
+                                     dtype=np.int32, dimensions=dimensions,
+                                     shape=shape, space_order=0, parent=self)
+            gridpoints.data[:] = gridpoints_data
+            self._gridpoints = gridpoints
+        else:
+            raise ValueError("`gridpoints` must be either SubFunction or iterable "
+                             "(e.g., list, np.ndnarray)")
+
         interpolation_coeffs = kwargs.get('interpolation_coeffs')
+        if isinstance(interpolation_coeffs, SubFunction):
+            self._interpolation_coeffs = interpolation_coeffs
+        elif isinstance(interpolation_coeffs, Iterable):
+            interpolation_coeffs_data = interpolation_coeffs
+            dimensions = (self.indices[-1], Dimension(name='d'),
+                          Dimension(name='i'))
+            shape = (self.npoint, self.grid.dim, r)
+            interpolation_coeffs = SubFunction(name="%s_interp_coeffs" % self.name,
+                                               dimensions=dimensions, shape=shape,
+                                               dtype=self.dtype, space_order=0,
+                                               parent=self)
+            interpolation_coeffs.data[:] = interpolation_coeffs_data
+            self._interpolation_coeffs = interpolation_coeffs
+        else:
+            raise ValueError("`interpolation_coeffs` must be either SubFunction "
+                             "or iterable (e.g., list, np.ndarray)")
+
+        warning("Ensure that the provided interpolation coefficient and grid "
+                "point values are computed on the final grid that will be used "
+                "for other computations.")
 
-        self.interpolator = PrecomputedInterpolator(self, r, gridpoints,
-                                                    interpolation_coeffs)
+        self.interpolator = PrecomputedInterpolator(self)
+
+    @property
+    def _radius(self):
+        return self.r
+
+    @property
+    def r(self):
+        return self._r
 
     @property
     def gridpoints(self):
@@ -1034,33 +1084,34 @@ class PrecomputedSparseTimeFunction(AbstractSparseTimeFunction,
         The computational domain from which the sparse points are sampled.
     r : int
         Number of gridpoints in each dimension to interpolate a single sparse
-        point to. E.g. ``r=2`` for linear interpolation.
+        point to. E.g. `r=2` for linear interpolation.
     gridpoints : np.ndarray, optional
-        An array carrying the *reference* grid point corresponding to each sparse point.
-        Of all the gridpoints that one sparse point would be interpolated to, this is the
-        grid point closest to the origin, i.e. the one with the lowest value of each
-        coordinate dimension. Must be a two-dimensional array of shape
-        ``(npoint, grid.ndim)``.
+        An array carrying the *reference* grid point corresponding to each
+        sparse point.  Of all the gridpoints that one sparse point would be
+        interpolated to, this is the grid point closest to the origin, i.e. the
+        one with the lowest value of each coordinate dimension. Must be a
+        two-dimensional array of shape `(npoint, grid.ndim)`.
     interpolation_coeffs : np.ndarray, optional
-        An array containing the coefficient for each of the r^2 (2D) or r^3 (3D)
-        gridpoints that each sparse point will be interpolated to. The coefficient is
-        split across the n dimensions such that the contribution of the point (i, j, k)
-        will be multiplied by ``interpolation_coeffs[..., i]*interpolation_coeffs[...,
-        j]*interpolation_coeffs[...,k]``. So for ``r=6``, we will store 18 coefficients
-        per sparse point (instead of potentially 216). Must be a three-dimensional array
-        of shape ``(npoint, grid.ndim, r)``.
+        An array containing the coefficient for each of the r^2 (2D) or r^3
+        (3D) gridpoints that each sparse point will be interpolated to. The
+        coefficient is split across the n dimensions such that the contribution
+        of the point (i, j, k) will be multiplied by
+        `interp_coeffs[..., i]*interpo_coeffs[...,j]*interp_coeffs[...,k]`.
+        So for `r=6`, we will store 18 coefficients per sparse point (instead of
+        potentially 216).  Must be a three-dimensional array of shape
+        `(npoint, grid.ndim, r)`.
     space_order : int, optional
         Discretisation order for space derivatives. Defaults to 0.
     time_order : int, optional
         Discretisation order for time derivatives. Default to 1.
     shape : tuple of ints, optional
-        Shape of the object. Defaults to ``(npoint,)``.
+        Shape of the object. Defaults to `(npoint,)`.
     dimensions : tuple of Dimension, optional
         Dimensions associated with the object. Only necessary if the SparseFunction
         defines a multi-dimensional tensor.
     dtype : data-type, optional
         Any object that can be interpreted as a numpy data type. Defaults
-        to ``np.float32``.
+        to `np.float32`.
     initializer : callable or any object exposing the buffer interface, optional
         Data initializer. If a callable is provided, data is allocated lazily.
     allocator : MemoryAllocator, optional

From d572cfe1ef1703ffa6fe057b7c9650decb798356 Mon Sep 17 00:00:00 2001
From: Fabio Luporini <fabio@devitocodes.com>
Date: Mon, 15 May 2023 10:48:08 +0000
Subject: [PATCH 02/90] api: Add coordinates to PrecomputedSparseFunction

---
 devito/types/sparse.py | 52 ++++++++++++++++++++++++++++++------------
 1 file changed, 38 insertions(+), 14 deletions(-)

diff --git a/devito/types/sparse.py b/devito/types/sparse.py
index f3171deaee..67d647e1b1 100644
--- a/devito/types/sparse.py
+++ b/devito/types/sparse.py
@@ -927,6 +927,8 @@ class PrecomputedSparseFunction(AbstractSparseFunction):
     r : int
         Number of gridpoints in each dimension to interpolate a single sparse
         point to. E.g. `r=2` for linear interpolation.
+    coordinates : np.ndarray, optional
+        The coordinates of each sparse point.
     gridpoints : np.ndarray, optional
         An array carrying the *reference* grid point corresponding to each
         sparse point.  Of all the gridpoints that one sparse point would be
@@ -980,18 +982,39 @@ def __init_finalize__(self, *args, **kwargs):
             raise ValueError('`r` must be > 0')
         self._r = r
 
+        coordinates = kwargs.get('coordinates')
+        if isinstance(coordinates, SubFunction):
+            self._coordinates = coordinates
+        elif isinstance(coordinates, Iterable):
+            coordinates_data = coordinates
+            dimensions = (self.indices[self._sparse_position], Dimension(name='d'))
+            shape = (self.npoint, self.grid.dim)
+            self._coordinates = SubFunction(
+                name='%s_coords' % self.name, parent=self, dtype=self.dtype,
+                dimensions=dimensions, shape=shape, space_order=0,
+                initializer=coordinates_data, alias=self.alias,
+                distributor=self._distributor
+            )
+        elif coordinates is None:
+            # Unlike `gridpoints` or `interpolation_coefficients`, not
+            # strictly necessary
+            pass
+        else:
+            raise ValueError("`coordinates` must be either SubFunction or iterable "
+                             "(e.g., list, np.ndnarray)")
+
         gridpoints = kwargs.get('gridpoints')
         if isinstance(gridpoints, SubFunction):
             self._gridpoints = gridpoints
         elif isinstance(gridpoints, Iterable):
             gridpoints_data = gridpoints
-            dimensions = (self.indices[-1], Dimension(name='d'))
+            dimensions = (self.indices[self._sparse_position], Dimension(name='d'))
             shape = (self._npoint, self.grid.dim)
-            gridpoints = SubFunction(name="%s_gridpoints" % self.name,
-                                     dtype=np.int32, dimensions=dimensions,
-                                     shape=shape, space_order=0, parent=self)
-            gridpoints.data[:] = gridpoints_data
-            self._gridpoints = gridpoints
+            self._gridpoints = SubFunction(
+                name="%s_gridpoints" % self.name, parent=self, dtype=np.int32,
+                dimensions=dimensions, shape=shape, space_order=0,
+                initializer=gridpoints_data, distributor=self._distributor
+            )
         else:
             raise ValueError("`gridpoints` must be either SubFunction or iterable "
                              "(e.g., list, np.ndnarray)")
@@ -1001,15 +1024,14 @@ def __init_finalize__(self, *args, **kwargs):
             self._interpolation_coeffs = interpolation_coeffs
         elif isinstance(interpolation_coeffs, Iterable):
             interpolation_coeffs_data = interpolation_coeffs
-            dimensions = (self.indices[-1], Dimension(name='d'),
-                          Dimension(name='i'))
+            dimensions = (self.indices[self._sparse_position],
+                          Dimension(name='d'), Dimension(name='i'))
             shape = (self.npoint, self.grid.dim, r)
-            interpolation_coeffs = SubFunction(name="%s_interp_coeffs" % self.name,
-                                               dimensions=dimensions, shape=shape,
-                                               dtype=self.dtype, space_order=0,
-                                               parent=self)
-            interpolation_coeffs.data[:] = interpolation_coeffs_data
-            self._interpolation_coeffs = interpolation_coeffs
+            self._interpolation_coeffs = SubFunction(
+                name="%s_interp_coeffs" % self.name, parent=self, dtype=self.dtype,
+                dimensions=dimensions, shape=shape, space_order=0,
+                initializer=interpolation_coeffs_data
+            )
         else:
             raise ValueError("`interpolation_coeffs` must be either SubFunction "
                              "or iterable (e.g., list, np.ndarray)")
@@ -1085,6 +1107,8 @@ class PrecomputedSparseTimeFunction(AbstractSparseTimeFunction,
     r : int
         Number of gridpoints in each dimension to interpolate a single sparse
         point to. E.g. `r=2` for linear interpolation.
+    coordinates : np.ndarray, optional
+        The coordinates of each sparse point.
     gridpoints : np.ndarray, optional
         An array carrying the *reference* grid point corresponding to each
         sparse point.  Of all the gridpoints that one sparse point would be

From 1381a687938adad24fe2d43c7c575e09485c9f26 Mon Sep 17 00:00:00 2001
From: Fabio Luporini <fabio@devitocodes.com>
Date: Mon, 15 May 2023 13:24:06 +0000
Subject: [PATCH 03/90] api: Fix pickling for PrecomputedSparseFunction

---
 devito/types/sparse.py | 73 ++++++++++++++++++++++++++++--------------
 tests/test_pickle.py   | 41 ++++++++++++++++++++++--
 2 files changed, 88 insertions(+), 26 deletions(-)

diff --git a/devito/types/sparse.py b/devito/types/sparse.py
index 67d647e1b1..d7be54a6d9 100644
--- a/devito/types/sparse.py
+++ b/devito/types/sparse.py
@@ -83,6 +83,17 @@ def __shape_setup__(cls, **kwargs):
             shape = (glb_npoint[grid.distributor.myrank],)
         return shape
 
+    def __distributor_setup__(self, **kwargs):
+        """
+        A `SparseDistributor` handles the SparseFunction decomposition based on
+        physical ownership, and allows to convert between global and local indices.
+        """
+        return SparseDistributor(
+            kwargs.get('npoint', kwargs.get('npoint_global')),
+            self._sparse_dim,
+            kwargs['grid'].distributor
+        )
+
     def _halo_exchange(self):
         # no-op for SparseFunctions
         return
@@ -475,6 +486,7 @@ class SparseFunction(AbstractSparseFunction):
     def __init_finalize__(self, *args, **kwargs):
         super(SparseFunction, self).__init_finalize__(*args, **kwargs)
         self.interpolator = LinearInterpolator(self)
+
         # Set up sparse point coordinates
         coordinates = kwargs.get('coordinates', kwargs.get('coordinates_data'))
         if isinstance(coordinates, Function):
@@ -497,17 +509,6 @@ def __init_finalize__(self, *args, **kwargs):
                 # case ``self._data is None``
                 self.coordinates.data
 
-    def __distributor_setup__(self, **kwargs):
-        """
-        A `SparseDistributor` handles the SparseFunction decomposition based on
-        physical ownership, and allows to convert between global and local indices.
-        """
-        return SparseDistributor(
-            kwargs.get('npoint', kwargs.get('npoint_global')),
-            self._sparse_dim,
-            kwargs['grid'].distributor
-        )
-
     @property
     def coordinates(self):
         """The SparseFunction coordinates."""
@@ -940,7 +941,7 @@ class PrecomputedSparseFunction(AbstractSparseFunction):
         (3D) gridpoints that each sparse point will be interpolated to. The
         coefficient is split across the n dimensions such that the contribution
         of the point (i, j, k) will be multiplied by
-        `interp_coeffs[..., i]*interpo_coeffs[...,j]*interp_coeffs[...,k]`.
+        `interp_coeffs[..., i]*interp_coeffs[...,j]*interp_coeffs[...,k]`.
         So for `r=6`, we will store 18 coefficients per sparse point (instead of
         potentially 216).  Must be a three-dimensional array of shape
         `(npoint, grid.ndim, r)`.
@@ -967,10 +968,12 @@ class PrecomputedSparseFunction(AbstractSparseFunction):
     uses `*args` to (re-)create the dimension arguments of the symbolic object.
     """
 
-    is_PrecomputedSparseFunction = True
-
     _sub_functions = ('gridpoints', 'interpolation_coeffs')
 
+    __rkwargs__ = (AbstractSparseFunction.__rkwargs__ +
+                   ('r', 'coordinates_data', 'gridpoints_data',
+                    'interpolation_coeffs_data'))
+
     def __init_finalize__(self, *args, **kwargs):
         super().__init_finalize__(*args, **kwargs)
 
@@ -982,7 +985,7 @@ def __init_finalize__(self, *args, **kwargs):
             raise ValueError('`r` must be > 0')
         self._r = r
 
-        coordinates = kwargs.get('coordinates')
+        coordinates = kwargs.get('coordinates', kwargs.get('coordinates_data'))
         if isinstance(coordinates, SubFunction):
             self._coordinates = coordinates
         elif isinstance(coordinates, Iterable):
@@ -996,30 +999,32 @@ def __init_finalize__(self, *args, **kwargs):
                 distributor=self._distributor
             )
         elif coordinates is None:
-            # Unlike `gridpoints` or `interpolation_coefficients`, not
-            # strictly necessary
-            pass
+            # Unlike `gridpoints` or `interpolation_coeffs`, not strictly necessary
+            self._coordinates = None
         else:
             raise ValueError("`coordinates` must be either SubFunction or iterable "
                              "(e.g., list, np.ndnarray)")
 
         gridpoints = kwargs.get('gridpoints')
+        gridpoints = kwargs.get('gridpoints', kwargs.get('gridpoints_data'))
         if isinstance(gridpoints, SubFunction):
             self._gridpoints = gridpoints
         elif isinstance(gridpoints, Iterable):
             gridpoints_data = gridpoints
             dimensions = (self.indices[self._sparse_position], Dimension(name='d'))
-            shape = (self._npoint, self.grid.dim)
+            shape = (self.npoint, self.grid.dim)
             self._gridpoints = SubFunction(
                 name="%s_gridpoints" % self.name, parent=self, dtype=np.int32,
                 dimensions=dimensions, shape=shape, space_order=0,
-                initializer=gridpoints_data, distributor=self._distributor
+                initializer=gridpoints_data, alias=self.alias,
+                distributor=self._distributor,
             )
         else:
             raise ValueError("`gridpoints` must be either SubFunction or iterable "
                              "(e.g., list, np.ndnarray)")
 
-        interpolation_coeffs = kwargs.get('interpolation_coeffs')
+        interpolation_coeffs = kwargs.get('interpolation_coeffs',
+                                          kwargs.get('interpolation_coeffs_data'))
         if isinstance(interpolation_coeffs, SubFunction):
             self._interpolation_coeffs = interpolation_coeffs
         elif isinstance(interpolation_coeffs, Iterable):
@@ -1030,7 +1035,7 @@ def __init_finalize__(self, *args, **kwargs):
             self._interpolation_coeffs = SubFunction(
                 name="%s_interp_coeffs" % self.name, parent=self, dtype=self.dtype,
                 dimensions=dimensions, shape=shape, space_order=0,
-                initializer=interpolation_coeffs_data
+                initializer=interpolation_coeffs_data, alias=self.alias
             )
         else:
             raise ValueError("`interpolation_coeffs` must be either SubFunction "
@@ -1050,6 +1055,10 @@ def _radius(self):
     def r(self):
         return self._r
 
+    @property
+    def coordinates(self):
+        return self._coordinates
+
     @property
     def gridpoints(self):
         return self._gridpoints
@@ -1059,6 +1068,21 @@ def interpolation_coeffs(self):
         """ The Precomputed interpolation coefficients."""
         return self._interpolation_coeffs
 
+    @property
+    def coordinates_data(self):
+        try:
+            return self.coordinates.data.view(np.ndarray)
+        except AttributeError:
+            return None
+
+    @property
+    def gridpoints_data(self):
+        return self.gridpoints.data.view(np.ndarray)
+
+    @property
+    def interpolation_coeffs_data(self):
+        return self.interpolation_coeffs.data.view(np.ndarray)
+
     def _dist_scatter(self, data=None):
         data = data if data is not None else self.data
         distributor = self.grid.distributor
@@ -1120,7 +1144,7 @@ class PrecomputedSparseTimeFunction(AbstractSparseTimeFunction,
         (3D) gridpoints that each sparse point will be interpolated to. The
         coefficient is split across the n dimensions such that the contribution
         of the point (i, j, k) will be multiplied by
-        `interp_coeffs[..., i]*interpo_coeffs[...,j]*interp_coeffs[...,k]`.
+        `interp_coeffs[..., i]*interp_coeffs[...,j]*interp_coeffs[...,k]`.
         So for `r=6`, we will store 18 coefficients per sparse point (instead of
         potentially 216).  Must be a three-dimensional array of shape
         `(npoint, grid.ndim, r)`.
@@ -1149,7 +1173,8 @@ class PrecomputedSparseTimeFunction(AbstractSparseTimeFunction,
     uses ``*args`` to (re-)create the dimension arguments of the symbolic object.
     """
 
-    is_PrecomputedSparseTimeFunction = True
+    __rkwargs__ = tuple(filter_ordered(AbstractSparseTimeFunction.__rkwargs__ +
+                                       PrecomputedSparseFunction.__rkwargs__))
 
     def interpolate(self, expr, offset=0, u_t=None, p_t=None, increment=False):
         """
diff --git a/tests/test_pickle.py b/tests/test_pickle.py
index 27e9f80c7a..e87b1507cf 100644
--- a/tests/test_pickle.py
+++ b/tests/test_pickle.py
@@ -8,7 +8,8 @@
 from conftest import skipif
 from devito import (Constant, Eq, Function, TimeFunction, SparseFunction, Grid,
                     Dimension, SubDimension, ConditionalDimension, IncrDimension,
-                    TimeDimension, SteppingDimension, Operator, MPI, Min, solve)
+                    TimeDimension, SteppingDimension, Operator, MPI, Min,
+                    PrecomputedSparseTimeFunction)
 from devito.ir import GuardFactor
 from devito.data import LEFT, OWNED
 from devito.mpi.halo_scheme import Halo
@@ -259,7 +260,43 @@ def test_shared_data(self, pickle):
         assert sdata.cfields == new_sdata.cfields
         assert sdata.ncfields == new_sdata.ncfields
 
-        ffp = FieldFromPointer(sdata._field_flag, sdata.symbolic_base)
+def test_precomputed_sparse_function():
+    grid = Grid(shape=(10, 10))
+
+    sf = PrecomputedSparseTimeFunction(
+        name='sf', grid=grid, r=2, npoint=3, nt=5,
+        coordinates=[(0., 0.), (1., 1.), (2., 2.)],
+        gridpoints=[(5, 90), (1, 80), (7, 84)],
+        interpolation_coeffs=np.ndarray(shape=(3, 2, 2)),
+    )
+    sf.data[2, 1] = 5.
+
+    pkl_sf = pickle.dumps(sf)
+    new_sf = pickle.loads(pkl_sf)
+
+    # .data is initialized, so it should have been pickled too
+    assert new_sf.data[2, 1] == 5.
+
+    # gridpoints and interpolation coefficients must have been pickled
+    assert np.all(sf.gridpoints.data == new_sf.gridpoints.data)
+    assert np.all(sf.interpolation_coeffs.data == new_sf.interpolation_coeffs.data)
+
+    # coordinates, since they were given, should also have been pickled
+    assert np.all(sf.coordinates.data == new_sf.coordinates.data)
+
+    assert sf._radius == new_sf._radius == 2
+    assert sf.space_order == new_sf.space_order
+    assert sf.time_order == new_sf.time_order
+    assert sf.dtype == new_sf.dtype
+    assert sf.npoint == new_sf.npoint == 3
+
+
+def test_internal_symbols():
+    s = dSymbol(name='s', dtype=np.float32)
+    pkl_s = pickle.dumps(s)
+    new_s = pickle.loads(pkl_s)
+    assert new_s.name == s.name
+    assert new_s.dtype is np.float32
 
         pkl_ffp = pickle.dumps(ffp)
         new_ffp = pickle.loads(pkl_ffp)

From c7b9c22f86b8a018682ec6ac2f8afcd1ca1d15dd Mon Sep 17 00:00:00 2001
From: mloubout <mathias.louboutin@gmail.com>
Date: Mon, 15 May 2023 11:30:40 -0400
Subject: [PATCH 04/90] types: setup_coordinates for all sparse functions

---
 devito/operations/interpolators.py |   2 +-
 devito/types/sparse.py             | 150 ++++++++++++-----------------
 tests/test_pickle.py               |   4 +-
 3 files changed, 62 insertions(+), 94 deletions(-)

diff --git a/devito/operations/interpolators.py b/devito/operations/interpolators.py
index 3bc1b3627f..61b12641e3 100644
--- a/devito/operations/interpolators.py
+++ b/devito/operations/interpolators.py
@@ -320,7 +320,7 @@ def __init__(self, obj):
 
     @property
     def r(self):
-        return self.obj._r
+        return self.obj.r
 
     def interpolate(self, expr, offset=0, increment=False, self_subs={}):
         """
diff --git a/devito/types/sparse.py b/devito/types/sparse.py
index d7be54a6d9..e1b7e3538c 100644
--- a/devito/types/sparse.py
+++ b/devito/types/sparse.py
@@ -1,4 +1,9 @@
-from collections import Iterable, OrderedDict
+from collections import OrderedDict
+try:
+    from collections import Iterable
+except ImportError:
+    # After python 3.10
+    from collections.abc import Iterable
 from itertools import product
 
 import sympy
@@ -94,6 +99,45 @@ def __distributor_setup__(self, **kwargs):
             kwargs['grid'].distributor
         )
 
+    def __subfunc_setup__(self, key='coordinates', ndim=2, allow_empty=False, **kwargs):
+        """
+        Setup SubFunction for a SparseFunction.
+        """
+        coordinates = kwargs.get(key, kwargs.get('%s_data' % key))
+        # In case only number of points is specified
+        npoint = kwargs.get('npoint', None)
+        if npoint is not None and coordinates is None:
+            coordinates = np.zeros((npoint, self.grid.dim))
+        # Check if already a pre-setup SubFunction
+        if isinstance(coordinates, Function):
+            setattr(self, '_%s' % key, coordinates)
+        # Setup the subfunction
+        elif isinstance(coordinates, Iterable):
+            dimensions = (self.indices[self._sparse_position], Dimension(name='d'),
+                          Dimension(name='i'))[:ndim]
+            shape = (self.npoint, self.grid.dim, self.r)[:ndim]
+            # Only retain the local data region
+            if coordinates is not None:
+                coordinates = np.array(coordinates)
+            coords = SubFunction(
+                name='%s_%s' % (self.name, key[:5]), parent=self, dtype=self.dtype,
+                dimensions=dimensions, shape=shape,
+                space_order=0, initializer=coordinates, alias=self.alias,
+                distributor=self._distributor
+            )
+            if self.npoint == 0:
+                # This is a corner case -- we might get here, for example, when
+                # running with MPI and some processes get 0-size arrays after
+                # domain decomposition. We "touch" the data anyway to avoid the
+                # case ``self._data is None``
+                coords.data
+            setattr(self, '_%s' % key, coords)
+        elif allow_empty:
+            setattr(self, '_%s' % key, None)
+        else:
+            raise ValueError("`%s` must be either SubFunction "
+                             "or iterable (e.g., list, np.ndarray)" % key)
+
     def _halo_exchange(self):
         # no-op for SparseFunctions
         return
@@ -118,6 +162,10 @@ def space_order(self):
         """The space order."""
         return self._space_order
 
+    @property
+    def r(self):
+        return self._radius
+
     @property
     def _sparse_dim(self):
         return self.dimensions[self._sparse_position]
@@ -488,26 +536,7 @@ def __init_finalize__(self, *args, **kwargs):
         self.interpolator = LinearInterpolator(self)
 
         # Set up sparse point coordinates
-        coordinates = kwargs.get('coordinates', kwargs.get('coordinates_data'))
-        if isinstance(coordinates, Function):
-            self._coordinates = coordinates
-        else:
-            dimensions = (self.indices[self._sparse_position], Dimension(name='d'))
-            # Only retain the local data region
-            if coordinates is not None:
-                coordinates = np.array(coordinates)
-            self._coordinates = SubFunction(
-                name='%s_coords' % self.name, parent=self, dtype=self.dtype,
-                dimensions=dimensions, shape=(self.npoint, self.grid.dim),
-                space_order=0, initializer=coordinates, alias=self.alias,
-                distributor=self._distributor
-            )
-            if self.npoint == 0:
-                # This is a corner case -- we might get here, for example, when
-                # running with MPI and some processes get 0-size arrays after
-                # domain decomposition. We "touch" the data anyway to avoid the
-                # case ``self._data is None``
-                self.coordinates.data
+        self.__subfunc_setup__(**kwargs)
 
     @property
     def coordinates(self):
@@ -983,63 +1012,13 @@ def __init_finalize__(self, *args, **kwargs):
             raise TypeError('Need `r` int argument')
         if r <= 0:
             raise ValueError('`r` must be > 0')
-        self._r = r
+        self._radius = r
 
-        coordinates = kwargs.get('coordinates', kwargs.get('coordinates_data'))
-        if isinstance(coordinates, SubFunction):
-            self._coordinates = coordinates
-        elif isinstance(coordinates, Iterable):
-            coordinates_data = coordinates
-            dimensions = (self.indices[self._sparse_position], Dimension(name='d'))
-            shape = (self.npoint, self.grid.dim)
-            self._coordinates = SubFunction(
-                name='%s_coords' % self.name, parent=self, dtype=self.dtype,
-                dimensions=dimensions, shape=shape, space_order=0,
-                initializer=coordinates_data, alias=self.alias,
-                distributor=self._distributor
-            )
-        elif coordinates is None:
-            # Unlike `gridpoints` or `interpolation_coeffs`, not strictly necessary
-            self._coordinates = None
-        else:
-            raise ValueError("`coordinates` must be either SubFunction or iterable "
-                             "(e.g., list, np.ndnarray)")
-
-        gridpoints = kwargs.get('gridpoints')
-        gridpoints = kwargs.get('gridpoints', kwargs.get('gridpoints_data'))
-        if isinstance(gridpoints, SubFunction):
-            self._gridpoints = gridpoints
-        elif isinstance(gridpoints, Iterable):
-            gridpoints_data = gridpoints
-            dimensions = (self.indices[self._sparse_position], Dimension(name='d'))
-            shape = (self.npoint, self.grid.dim)
-            self._gridpoints = SubFunction(
-                name="%s_gridpoints" % self.name, parent=self, dtype=np.int32,
-                dimensions=dimensions, shape=shape, space_order=0,
-                initializer=gridpoints_data, alias=self.alias,
-                distributor=self._distributor,
-            )
-        else:
-            raise ValueError("`gridpoints` must be either SubFunction or iterable "
-                             "(e.g., list, np.ndnarray)")
-
-        interpolation_coeffs = kwargs.get('interpolation_coeffs',
-                                          kwargs.get('interpolation_coeffs_data'))
-        if isinstance(interpolation_coeffs, SubFunction):
-            self._interpolation_coeffs = interpolation_coeffs
-        elif isinstance(interpolation_coeffs, Iterable):
-            interpolation_coeffs_data = interpolation_coeffs
-            dimensions = (self.indices[self._sparse_position],
-                          Dimension(name='d'), Dimension(name='i'))
-            shape = (self.npoint, self.grid.dim, r)
-            self._interpolation_coeffs = SubFunction(
-                name="%s_interp_coeffs" % self.name, parent=self, dtype=self.dtype,
-                dimensions=dimensions, shape=shape, space_order=0,
-                initializer=interpolation_coeffs_data, alias=self.alias
-            )
-        else:
-            raise ValueError("`interpolation_coeffs` must be either SubFunction "
-                             "or iterable (e.g., list, np.ndarray)")
+        self.__subfunc_setup__(allow_empty=True, **kwargs)
+        if self.coordinates is None:
+            self.__subfunc_setup__(key='gridpoints', **kwargs)
+
+        self.__subfunc_setup__(key='interpolation_coeffs', ndim=3, **kwargs)
 
         warning("Ensure that the provided interpolation coefficient and grid "
                 "point values are computed on the final grid that will be used "
@@ -1047,14 +1026,6 @@ def __init_finalize__(self, *args, **kwargs):
 
         self.interpolator = PrecomputedInterpolator(self)
 
-    @property
-    def _radius(self):
-        return self.r
-
-    @property
-    def r(self):
-        return self._r
-
     @property
     def coordinates(self):
         return self._coordinates
@@ -1077,7 +1048,10 @@ def coordinates_data(self):
 
     @property
     def gridpoints_data(self):
-        return self.gridpoints.data.view(np.ndarray)
+        try:
+            return self.gridpoints.data.view(np.ndarray)
+        except AttributeError:
+            return None
 
     @property
     def interpolation_coeffs_data(self):
@@ -1540,10 +1514,6 @@ def _sub_functions(self):
                 'mrow', 'mcol', 'mval', 'par_dim_to_nnz_map',
                 'par_dim_to_nnz_m', 'par_dim_to_nnz_M')
 
-    @property
-    def r(self):
-        return self._radius
-
     def interpolate(self, expr, offset=0, u_t=None, p_t=None):
         """Creates a :class:`sympy.Eq` equation for the interpolation
         of an expression onto this sparse point collection.
diff --git a/tests/test_pickle.py b/tests/test_pickle.py
index e87b1507cf..b48f47face 100644
--- a/tests/test_pickle.py
+++ b/tests/test_pickle.py
@@ -8,7 +8,7 @@
 from conftest import skipif
 from devito import (Constant, Eq, Function, TimeFunction, SparseFunction, Grid,
                     Dimension, SubDimension, ConditionalDimension, IncrDimension,
-                    TimeDimension, SteppingDimension, Operator, MPI, Min,
+                    TimeDimension, SteppingDimension, Operator, MPI, Min, solve,
                     PrecomputedSparseTimeFunction)
 from devito.ir import GuardFactor
 from devito.data import LEFT, OWNED
@@ -266,7 +266,6 @@ def test_precomputed_sparse_function():
     sf = PrecomputedSparseTimeFunction(
         name='sf', grid=grid, r=2, npoint=3, nt=5,
         coordinates=[(0., 0.), (1., 1.), (2., 2.)],
-        gridpoints=[(5, 90), (1, 80), (7, 84)],
         interpolation_coeffs=np.ndarray(shape=(3, 2, 2)),
     )
     sf.data[2, 1] = 5.
@@ -278,7 +277,6 @@ def test_precomputed_sparse_function():
     assert new_sf.data[2, 1] == 5.
 
     # gridpoints and interpolation coefficients must have been pickled
-    assert np.all(sf.gridpoints.data == new_sf.gridpoints.data)
     assert np.all(sf.interpolation_coeffs.data == new_sf.interpolation_coeffs.data)
 
     # coordinates, since they were given, should also have been pickled

From cf4ab5160aa94c6a2c1987a994696b5857fd1598 Mon Sep 17 00:00:00 2001
From: Fabio Luporini <fabio@devitocodes.com>
Date: Tue, 16 May 2023 09:22:59 +0000
Subject: [PATCH 05/90] api: Rework AbstractSparseFunction.__subfunc_setup__

---
 devito/types/sparse.py | 122 ++++++++++++++++++++++++++---------------
 1 file changed, 79 insertions(+), 43 deletions(-)

diff --git a/devito/types/sparse.py b/devito/types/sparse.py
index e1b7e3538c..a5d528af42 100644
--- a/devito/types/sparse.py
+++ b/devito/types/sparse.py
@@ -99,45 +99,56 @@ def __distributor_setup__(self, **kwargs):
             kwargs['grid'].distributor
         )
 
-    def __subfunc_setup__(self, key='coordinates', ndim=2, allow_empty=False, **kwargs):
-        """
-        Setup SubFunction for a SparseFunction.
-        """
-        coordinates = kwargs.get(key, kwargs.get('%s_data' % key))
-        # In case only number of points is specified
-        npoint = kwargs.get('npoint', None)
-        if npoint is not None and coordinates is None:
-            coordinates = np.zeros((npoint, self.grid.dim))
-        # Check if already a pre-setup SubFunction
-        if isinstance(coordinates, Function):
-            setattr(self, '_%s' % key, coordinates)
-        # Setup the subfunction
-        elif isinstance(coordinates, Iterable):
-            dimensions = (self.indices[self._sparse_position], Dimension(name='d'),
-                          Dimension(name='i'))[:ndim]
-            shape = (self.npoint, self.grid.dim, self.r)[:ndim]
-            # Only retain the local data region
-            if coordinates is not None:
-                coordinates = np.array(coordinates)
-            coords = SubFunction(
-                name='%s_%s' % (self.name, key[:5]), parent=self, dtype=self.dtype,
-                dimensions=dimensions, shape=shape,
-                space_order=0, initializer=coordinates, alias=self.alias,
-                distributor=self._distributor
-            )
-            if self.npoint == 0:
-                # This is a corner case -- we might get here, for example, when
-                # running with MPI and some processes get 0-size arrays after
-                # domain decomposition. We "touch" the data anyway to avoid the
-                # case ``self._data is None``
-                coords.data
-            setattr(self, '_%s' % key, coords)
-        elif allow_empty:
-            setattr(self, '_%s' % key, None)
-        else:
+    def __subfunc_setup__(self, key, suffix):
+        if isinstance(key, SubFunction):
+            return key
+        elif key is not None and not isinstance(key, Iterable):
             raise ValueError("`%s` must be either SubFunction "
                              "or iterable (e.g., list, np.ndarray)" % key)
 
+        name = '%s_%s' % (self.name, suffix)
+        dimensions = (self.indices[self._sparse_position],
+                      Dimension(name='d'),
+                      Dimension(name='i'))
+        shape = (self.npoint, self.grid.dim, self.r)
+
+        if key is None:
+            # Fallback to default behaviour
+            n = 2  # (Sparse points, Grid Dimensions)
+            dtype = self.dtype
+        else:
+            if not isinstance(key, np.ndarray):
+                key = np.array(key)
+
+            n = key.ndim
+            if shape[:n] != key.shape:
+                raise ValueError("Incompatible shape `%s`; expected `%s`" %
+                                 (shape[:n], key.shape))
+
+            # Infer dtype
+            if np.issubdtype(key.dtype.type, np.integer):
+                dtype = np.int32
+            else:
+                dtype = self.dtype
+
+        dimensions = dimensions[:n]
+        shape = shape[:n]
+
+        sf = SubFunction(
+            name=name, parent=self, dtype=dtype, dimensions=dimensions,
+            shape=shape, space_order=0, initializer=key, alias=self.alias,
+            distributor=self._distributor
+        )
+
+        if self.npoint == 0:
+            # This is a corner case -- we might get here, for example, when
+            # running with MPI and some processes get 0-size arrays after
+            # domain decomposition. We "touch" the data anyway to avoid the
+            # case ``self._data is None``
+            sf.data
+
+        return sf
+
     def _halo_exchange(self):
         # no-op for SparseFunctions
         return
@@ -313,7 +324,11 @@ def _dist_scatter(self):
     def _arg_defaults(self, alias=None):
         key = alias or self
         mapper = {self: key}
-        mapper.update({getattr(self, i): getattr(key, i) for i in self._sub_functions})
+        for i in self._sub_functions:
+            f = getattr(key, i)
+            if f is not None:
+                mapper[getattr(self, i)] = f
+
         args = ReducerMap()
 
         # Add in the sparse data (as well as any SubFunction data) belonging to
@@ -536,7 +551,8 @@ def __init_finalize__(self, *args, **kwargs):
         self.interpolator = LinearInterpolator(self)
 
         # Set up sparse point coordinates
-        self.__subfunc_setup__(**kwargs)
+        coordinates = kwargs.get('coordinates', kwargs.get('coordinates_data'))
+        self._coordinates = self.__subfunc_setup__(coordinates, 'coords')
 
     @property
     def coordinates(self):
@@ -997,7 +1013,7 @@ class PrecomputedSparseFunction(AbstractSparseFunction):
     uses `*args` to (re-)create the dimension arguments of the symbolic object.
     """
 
-    _sub_functions = ('gridpoints', 'interpolation_coeffs')
+    _sub_functions = ('coordinates', 'gridpoints', 'interpolation_coeffs')
 
     __rkwargs__ = (AbstractSparseFunction.__rkwargs__ +
                    ('r', 'coordinates_data', 'gridpoints_data',
@@ -1014,11 +1030,31 @@ def __init_finalize__(self, *args, **kwargs):
             raise ValueError('`r` must be > 0')
         self._radius = r
 
-        self.__subfunc_setup__(allow_empty=True, **kwargs)
-        if self.coordinates is None:
-            self.__subfunc_setup__(key='gridpoints', **kwargs)
+        coordinates = kwargs.get('coordinates', kwargs.get('coordinates_data'))
+        gridpoints = kwargs.get('gridpoints', kwargs.get('gridpoints_data'))
+        if coordinates is not None and gridpoints is not None:
+            raise ValueError("Either `coordinates` or `gridpoints` must be "
+                             "provided, but not both")
 
-        self.__subfunc_setup__(key='interpolation_coeffs', ndim=3, **kwargs)
+        # Specifying only `npoints` is acceptable; this will require the user
+        # to setup the coordinates data later on
+        npoint = kwargs.get('npoint', None)
+        if self.npoint and coordinates is None and gridpoints is None:
+            coordinates = np.zeros((npoint, self.grid.dim))
+
+        if coordinates is not None:
+            self._coordinates = self.__subfunc_setup__(coordinates, 'coords')
+            self._gridpoints = None
+        else:
+            assert gridpoints is not None
+            self._coordinates = None
+            self._gridpoints = self.__subfunc_setup__(gridpoints, 'gridpoints')
+
+        # Setup the interpolation coefficients. These are compulsory
+        interpolation_coeffs = kwargs.get('interpolation_coeffs',
+                                          kwargs.get('interpolation_coeffs_data'))
+        self._interpolation_coeffs = \
+            self.__subfunc_setup__(interpolation_coeffs, 'interp_coeffs')
 
         warning("Ensure that the provided interpolation coefficient and grid "
                 "point values are computed on the final grid that will be used "

From 60f0fdc6c0e6b59dad7d9f2bb7c5811c878cd627 Mon Sep 17 00:00:00 2001
From: mloubout <mathias.louboutin@gmail.com>
Date: Wed, 17 May 2023 15:38:33 -0400
Subject: [PATCH 06/90] types: add MPI support to PrecomputedSparseFunction

---
 devito/types/grid.py   |  11 +-
 devito/types/sparse.py | 353 ++++++++++++++++++++++-------------------
 2 files changed, 200 insertions(+), 164 deletions(-)

diff --git a/devito/types/grid.py b/devito/types/grid.py
index b916ab4dbf..7596ac5325 100644
--- a/devito/types/grid.py
+++ b/devito/types/grid.py
@@ -222,11 +222,16 @@ def origin_map(self):
         return dict(zip(self.origin_symbols, self.origin))
 
     @property
-    def origin_offset(self):
-        """Offset of the local (per-process) origin from the domain origin."""
+    def origin_ioffset(self):
+        """Offset index of the local (per-process) origin from the domain origin."""
         grid_origin = [min(i) for i in self.distributor.glb_numb]
         assert len(grid_origin) == len(self.spacing)
-        return tuple(i*h for i, h in zip(grid_origin, self.spacing))
+        return grid_origin
+
+    @property
+    def origin_offset(self):
+        """Physical offset of the local (per-process) origin from the domain origin."""
+        return tuple(i*h for i, h in zip(self.origin_ioffset, self.spacing))
 
     @property
     def time_dim(self):
diff --git a/devito/types/sparse.py b/devito/types/sparse.py
index a5d528af42..41f6febf1d 100644
--- a/devito/types/sparse.py
+++ b/devito/types/sparse.py
@@ -19,7 +19,7 @@
                               retrieve_function_carriers)
 from devito.tools import (ReducerMap, as_tuple, flatten, prod, filter_ordered,
                           memoized_meth, is_integer)
-from devito.types.dense import DiscreteFunction, Function, SubFunction
+from devito.types.dense import DiscreteFunction, SubFunction
 from devito.types.dimension import (Dimension, ConditionalDimension, DefaultDimension,
                                     DynamicDimension)
 from devito.types.basic import Symbol
@@ -121,9 +121,9 @@ def __subfunc_setup__(self, key, suffix):
                 key = np.array(key)
 
             n = key.ndim
-            if shape[:n] != key.shape:
-                raise ValueError("Incompatible shape `%s`; expected `%s`" %
-                                 (shape[:n], key.shape))
+            # if shape[:n] != key.shape:
+            #     raise ValueError("Incompatible shape `%s`; expected `%s`" %
+            #                      (shape[:n], key.shape))
 
             # Infer dtype
             if np.issubdtype(key.dtype.type, np.integer):
@@ -140,12 +140,12 @@ def __subfunc_setup__(self, key, suffix):
             distributor=self._distributor
         )
 
-        if self.npoint == 0:
-            # This is a corner case -- we might get here, for example, when
-            # running with MPI and some processes get 0-size arrays after
-            # domain decomposition. We "touch" the data anyway to avoid the
-            # case ``self._data is None``
-            sf.data
+        # if self.npoint == 0:
+        #     # This is a corner case -- we might get here, for example, when
+        #     # running with MPI and some processes get 0-size arrays after
+        #     # domain decomposition. We "touch" the data anyway to avoid the
+        #     # case ``self._data is None``
+        sf.data
 
         return sf
 
@@ -181,6 +181,18 @@ def r(self):
     def _sparse_dim(self):
         return self.dimensions[self._sparse_position]
 
+    @property
+    def _mpitype(self):
+        return MPI._typedict[np.dtype(self.dtype).char]
+
+    @property
+    def comm(self):
+        return self.grid.distributor.comm
+
+    @property
+    def distributor(self):
+        return self.grid.distributor
+
     @property
     def gridpoints(self):
         """
@@ -226,7 +238,7 @@ def _dist_datamap(self):
         """
         Mapper ``M : MPI rank -> required sparse data``.
         """
-        return self.grid.distributor.glb_to_rank(self._support) or {}
+        return self.distributor.glb_to_rank(self._support) or {}
 
     def _dist_scatter_mask(self, dmap=None):
         """
@@ -306,20 +318,148 @@ def _dist_alltoall(self, dmap=None):
 
         return sshape, scount, sdisp, rshape, rcount, rdisp
 
-    def _dist_subfunc_alltoall(self, dmap=None):
+    def _dist_subfunc_alltoall(self, subfunc, dmap=None):
         """
         The metadata necessary to perform an ``MPI_Alltoallv`` distributing
         self's SubFunction values across the MPI ranks needing them.
         """
-        raise NotImplementedError
+        dmap = dmap or self._dist_datamap
+        ssparse, rsparse = self._dist_count(dmap=dmap)
+
+        # Per-rank shape of send/recv `coordinates`
+        shape = subfunc.shape[1:]
+        sshape = [(i, *shape) for i in ssparse]
+        rshape = [(i, *shape) for i in rsparse]
+
+        # Per-rank count of send/recv `coordinates`
+        scount = [prod(i) for i in sshape]
+        rcount = [prod(i) for i in rshape]
+
+        # Per-rank displacement of send/recv `coordinates` (it's actually all
+        # contiguous, but the Alltoallv needs this information anyway)
+        sdisp = np.concatenate([[0], np.cumsum(scount)[:-1]])
+        rdisp = np.concatenate([[0], tuple(np.cumsum(rcount))[:-1]])
+
+        # Total shape of send/recv `coordinates`
+        sshape = list(subfunc.shape)
+        sshape[0] = sum(ssparse)
+        rshape = list(subfunc.shape)
+        rshape[0] = sum(rsparse)
+
+        return sshape, scount, sdisp, rshape, rcount, rdisp
 
-    def _dist_scatter(self):
+    def _dist_data_scatter(self, data=None):
         """
         A ``numpy.ndarray`` containing up-to-date data values belonging
         to the calling MPI rank. A data value belongs to a given MPI rank R
         if its coordinates fall within R's local domain.
         """
-        raise NotImplementedError
+        data = data if data is not None else self.data._local
+
+        # If not using MPI, don't waste time
+        if self.distributor.nprocs == 1:
+            return data
+
+        # Compute dist map only once
+        dmap = self._dist_datamap
+        mask = self._dist_scatter_mask(dmap=dmap)
+
+        # Pack sparse data values so that they can be sent out via an Alltoallv
+        data = data[mask]
+        data = np.ascontiguousarray(np.transpose(data, self._dist_reorder_mask))
+
+        # Send out the sparse point values
+        _, scount, sdisp, rshape, rcount, rdisp = self._dist_alltoall(dmap=dmap)
+        scattered = np.empty(shape=rshape, dtype=self.dtype)
+        self.comm.Alltoallv([data, scount, sdisp, self._mpitype],
+                            [scattered, rcount, rdisp, self._mpitype])
+
+        # Unpack data values so that they follow the expected storage layout
+        return np.ascontiguousarray(np.transpose(scattered, self._dist_reorder_mask))
+
+    def _dist_subfunc_scatter(self, subfunc):
+        # If not using MPI, don't waste time
+        if self.distributor.nprocs == 1:
+            return {subfunc: subfunc.data}
+
+        # Compute dist map only once
+        dmap = self._dist_datamap
+        mask = self._dist_scatter_mask(dmap=dmap)
+
+        # Pack (reordered) coordinates so that they can be sent out via an Alltoallv
+        sfuncd = subfunc.data._local[mask[self._sparse_position]]
+
+        # Send out the sparse point coordinates
+        _, scount, sdisp, rshape, rcount, rdisp = \
+            self._dist_subfunc_alltoall(subfunc, dmap=dmap)
+        scattered = np.empty(shape=rshape, dtype=subfunc.dtype)
+        self.comm.Alltoallv([sfuncd, scount, sdisp, self._mpitype],
+                            [scattered, rcount, rdisp, self._mpitype])
+        sfuncd = scattered
+
+        # Translate global coordinates into local coordinates
+        if self.dist_origin[subfunc] is not None:
+            sfuncd = sfuncd - np.array(self.dist_origin[subfunc], dtype=self.dtype)
+
+        return {subfunc: sfuncd}
+
+    def _dist_data_gather(self, data):
+        # If not using MPI, don't waste time
+        if self.distributor.nprocs == 1:
+            return
+
+        # Compute dist map only once
+        dmap = self._dist_datamap
+        mask = self._dist_scatter_mask(dmap=dmap)
+
+        # Pack sparse data values so that they can be sent out via an Alltoallv
+        data = np.ascontiguousarray(np.transpose(data, self._dist_reorder_mask))
+        # Send back the sparse point values
+        sshape, scount, sdisp, _, rcount, rdisp = self._dist_alltoall(dmap=dmap)
+        gathered = np.empty(shape=sshape, dtype=self.dtype)
+        self.comm.Alltoallv([data, rcount, rdisp, self._mpitype],
+                            [gathered, scount, sdisp, self._mpitype])
+        # Unpack data values so that they follow the expected storage layout
+        gathered = np.ascontiguousarray(np.transpose(gathered, self._dist_reorder_mask))
+        self.data
+        self._data[mask] = gathered[:]
+
+    def _dist_subfunc_gather(self, sfuncd, sfunc):
+        if np.sum([sfuncd._obj.size[i] for i in range(self.ndim)]) > 0:
+            sfuncd = sfunc._C_as_ndarray(sfuncd)
+        # If not using MPI, don't waste time
+        if self.distributor.nprocs == 1:
+            return
+
+        # Compute dist map only once
+        dmap = self._dist_datamap
+        mask = self._dist_scatter_mask(dmap=dmap)
+        # Pack (reordered) coordinates so that they can be sent out via an Alltoallv
+        if self.dist_origin[sfunc] is not None:
+            sfuncd = sfuncd + np.array(self.dist_origin[sfunc], dtype=self.dtype)
+        # Send out the sparse point coordinates
+        sshape, scount, sdisp, _, rcount, rdisp = \
+            self._dist_subfunc_alltoall(sfunc, dmap=dmap)
+        gathered = np.empty(shape=sshape, dtype=sfunc.dtype)
+        self.comm.Alltoallv([sfuncd, rcount, rdisp, self._mpitype],
+                            [gathered, scount, sdisp, self._mpitype])
+        sfunc.data._local[mask[self._sparse_position]] = gathered[:]
+
+        # Note: this method "mirrors" `_dist_scatter`: a sparse point that is sent
+        # in `_dist_scatter` is here received; a sparse point that is received in
+        # `_dist_scatter` is here sent.
+
+    def _dist_scatter(self, data=None):
+        mapper = {self: self._dist_data_scatter(data=data)}
+        for i in self._sub_functions:
+            if getattr(self, i) is not None:
+                mapper.update(self._dist_subfunc_scatter(getattr(self, i)))
+        return mapper
+
+    def _dist_gather(self, data, *subfunc):
+        self._dist_data_gather(data)
+        for (sg, s) in zip(subfunc, self._sub_functions):
+            self._dist_subfunc_gather(sg, getattr(self, s))
 
     def _arg_defaults(self, alias=None):
         key = alias or self
@@ -364,16 +504,11 @@ def _arg_values(self, **kwargs):
 
         return values
 
-    def _arg_apply(self, dataobj, coordsobj, alias=None):
+    def _arg_apply(self, dataobj, *subfunc, alias=None):
         key = alias if alias is not None else self
         if isinstance(key, AbstractSparseFunction):
             # Gather into `self.data`
-            # Coords may be None if the coordinates are not used in the Operator
-            if coordsobj is None:
-                pass
-            elif np.sum([coordsobj._obj.size[i] for i in range(self.ndim)]) > 0:
-                coordsobj = self.coordinates._C_as_ndarray(coordsobj)
-            key._dist_gather(self._C_as_ndarray(dataobj), coordsobj)
+            key._dist_gather(self._C_as_ndarray(dataobj), *subfunc)
         elif self.grid.distributor.nprocs > 1:
             raise NotImplementedError("Don't know how to gather data from an "
                                       "object of type `%s`" % type(key))
@@ -553,6 +688,7 @@ def __init_finalize__(self, *args, **kwargs):
         # Set up sparse point coordinates
         coordinates = kwargs.get('coordinates', kwargs.get('coordinates_data'))
         self._coordinates = self.__subfunc_setup__(coordinates, 'coords')
+        self._dist_origin = {self._coordinates: self.grid.origin_offset}
 
     @property
     def coordinates(self):
@@ -566,6 +702,10 @@ def coordinates_data(self):
         except AttributeError:
             return None
 
+    @property
+    def dist_origin(self):
+        return self._dist_origin
+
     @cached_property
     def _point_symbols(self):
         """Symbol for coordinate value in each dimension of the point."""
@@ -700,116 +840,6 @@ def _decomposition(self):
         mapper = {self._sparse_dim: self._distributor.decomposition[self._sparse_dim]}
         return tuple(mapper.get(d) for d in self.dimensions)
 
-    def _dist_subfunc_alltoall(self, dmap=None):
-        dmap = dmap or self._dist_datamap
-        ssparse, rsparse = self._dist_count(dmap=dmap)
-
-        # Per-rank shape of send/recv `coordinates`
-        sshape = [(i, self.grid.dim) for i in ssparse]
-        rshape = [(i, self.grid.dim) for i in rsparse]
-
-        # Per-rank count of send/recv `coordinates`
-        scount = [prod(i) for i in sshape]
-        rcount = [prod(i) for i in rshape]
-
-        # Per-rank displacement of send/recv `coordinates` (it's actually all
-        # contiguous, but the Alltoallv needs this information anyway)
-        sdisp = np.concatenate([[0], np.cumsum(scount)[:-1]])
-        rdisp = np.concatenate([[0], tuple(np.cumsum(rcount))[:-1]])
-
-        # Total shape of send/recv `coordinates`
-        sshape = list(self.coordinates.shape)
-        sshape[0] = sum(ssparse)
-        rshape = list(self.coordinates.shape)
-        rshape[0] = sum(rsparse)
-
-        return sshape, scount, sdisp, rshape, rcount, rdisp
-
-    def _dist_scatter(self, data=None):
-        data = data if data is not None else self.data._local
-        distributor = self.grid.distributor
-
-        # If not using MPI, don't waste time
-        if distributor.nprocs == 1:
-            return {self: data, self.coordinates: self.coordinates.data}
-
-        comm = distributor.comm
-        mpitype = MPI._typedict[np.dtype(self.dtype).char]
-
-        # Compute dist map only once
-        dmap = self._dist_datamap
-        mask = self._dist_scatter_mask(dmap=dmap)
-
-        # Pack sparse data values so that they can be sent out via an Alltoallv
-        data = data[mask]
-        data = np.ascontiguousarray(np.transpose(data, self._dist_reorder_mask))
-
-        # Send out the sparse point values
-        _, scount, sdisp, rshape, rcount, rdisp = self._dist_alltoall(dmap=dmap)
-        scattered = np.empty(shape=rshape, dtype=self.dtype)
-        comm.Alltoallv([data, scount, sdisp, mpitype],
-                       [scattered, rcount, rdisp, mpitype])
-        data = scattered
-
-        # Unpack data values so that they follow the expected storage layout
-        data = np.ascontiguousarray(np.transpose(data, self._dist_reorder_mask))
-
-        # Pack (reordered) coordinates so that they can be sent out via an Alltoallv
-        coords = self.coordinates.data._local[mask[self._sparse_position]]
-
-        # Send out the sparse point coordinates
-        _, scount, sdisp, rshape, rcount, rdisp = self._dist_subfunc_alltoall(dmap=dmap)
-        scattered = np.empty(shape=rshape, dtype=self.coordinates.dtype)
-        comm.Alltoallv([coords, scount, sdisp, mpitype],
-                       [scattered, rcount, rdisp, mpitype])
-        coords = scattered
-
-        # Translate global coordinates into local coordinates
-        coords = coords - np.array(self.grid.origin_offset, dtype=self.dtype)
-
-        return {self: data, self.coordinates: coords}
-
-    def _dist_gather(self, data, coords):
-        distributor = self.grid.distributor
-
-        # If not using MPI, don't waste time
-        if distributor.nprocs == 1:
-            return
-
-        comm = distributor.comm
-
-        # Compute dist map only once
-        dmap = self._dist_datamap
-        mask = self._dist_scatter_mask(dmap=dmap)
-
-        # Pack sparse data values so that they can be sent out via an Alltoallv
-        data = np.ascontiguousarray(np.transpose(data, self._dist_reorder_mask))
-        # Send back the sparse point values
-        sshape, scount, sdisp, _, rcount, rdisp = self._dist_alltoall(dmap=dmap)
-        gathered = np.empty(shape=sshape, dtype=self.dtype)
-        mpitype = MPI._typedict[np.dtype(self.dtype).char]
-        comm.Alltoallv([data, rcount, rdisp, mpitype],
-                       [gathered, scount, sdisp, mpitype])
-        # Unpack data values so that they follow the expected storage layout
-        gathered = np.ascontiguousarray(np.transpose(gathered, self._dist_reorder_mask))
-        self._data[mask] = gathered[:]
-
-        if coords is not None:
-            # Pack (reordered) coordinates so that they can be sent out via an Alltoallv
-            coords = coords + np.array(self.grid.origin_offset, dtype=self.dtype)
-            # Send out the sparse point coordinates
-            sshape, scount, sdisp, _, rcount, rdisp = \
-                self._dist_subfunc_alltoall(dmap=dmap)
-            gathered = np.empty(shape=sshape, dtype=self.coordinates.dtype)
-            mpitype = MPI._typedict[np.dtype(self.coordinates.dtype).char]
-            comm.Alltoallv([coords, rcount, rdisp, mpitype],
-                           [gathered, scount, sdisp, mpitype])
-            self._coordinates.data._local[mask[self._sparse_position]] = gathered[:]
-
-        # Note: this method "mirrors" `_dist_scatter`: a sparse point that is sent
-        # in `_dist_scatter` is here received; a sparse point that is received in
-        # `_dist_scatter` is here sent.
-
 
 class SparseTimeFunction(AbstractSparseTimeFunction, SparseFunction):
     """
@@ -1028,6 +1058,7 @@ def __init_finalize__(self, *args, **kwargs):
             raise TypeError('Need `r` int argument')
         if r <= 0:
             raise ValueError('`r` must be > 0')
+
         self._radius = r
 
         coordinates = kwargs.get('coordinates', kwargs.get('coordinates_data'))
@@ -1045,16 +1076,24 @@ def __init_finalize__(self, *args, **kwargs):
         if coordinates is not None:
             self._coordinates = self.__subfunc_setup__(coordinates, 'coords')
             self._gridpoints = None
+            self._dist_origin = {self._coordinates: self.grid.origin_offset}
         else:
             assert gridpoints is not None
             self._coordinates = None
             self._gridpoints = self.__subfunc_setup__(gridpoints, 'gridpoints')
+            self._dist_origin = {self._coordinates: self.grid.origin_ioffset}
 
         # Setup the interpolation coefficients. These are compulsory
         interpolation_coeffs = kwargs.get('interpolation_coeffs',
                                           kwargs.get('interpolation_coeffs_data'))
         self._interpolation_coeffs = \
             self.__subfunc_setup__(interpolation_coeffs, 'interp_coeffs')
+        self._dist_origin.update({self._interpolation_coeffs: None})
+        # Make sure it matches the radius
+        if self._interpolation_coeffs.shape[-1] != r:
+            nr = self._interpolation_coeffs.shape[-1]
+            raise ValueError("Interpolation coefficients shape %d do "
+                             "not match specified radius %d" % (r, nr))
 
         warning("Ensure that the provided interpolation coefficient and grid "
                 "point values are computed on the final grid that will be used "
@@ -1068,8 +1107,15 @@ def coordinates(self):
 
     @property
     def gridpoints(self):
+        if self._gripoints is None:
+            coord = self.coordinates.data._local - self.grid.origin
+            return (np.floor(coord) / self.grid.spacing).astype(int)
         return self._gridpoints
 
+    @property
+    def dist_origin(self):
+        return self._dist_origin
+
     @property
     def interpolation_coeffs(self):
         """ The Precomputed interpolation coefficients."""
@@ -1089,39 +1135,24 @@ def gridpoints_data(self):
         except AttributeError:
             return None
 
+    @cached_property
+    def coords_or_points(self):
+        if self.gridpoints is None:
+            return self.coordinates
+        else:
+            return self.gridpoints
+
+    @cached_property
+    def coord_origin(self):
+        if self.gridpoints is None:
+            return self.grid.origin_offset
+        else:
+            return self.grid.grid_origin
+
     @property
     def interpolation_coeffs_data(self):
         return self.interpolation_coeffs.data.view(np.ndarray)
 
-    def _dist_scatter(self, data=None):
-        data = data if data is not None else self.data
-        distributor = self.grid.distributor
-
-        # If not using MPI, don't waste time
-        if distributor.nprocs == 1:
-            return {self: data, self.gridpoints: self.gridpoints.data,
-                    self._interpolation_coeffs: self._interpolation_coeffs.data}
-
-        raise NotImplementedError
-
-    def _dist_gather(self, data):
-        distributor = self.grid.distributor
-
-        # If not using MPI, don't waste time
-        if distributor.nprocs == 1:
-            return
-
-        raise NotImplementedError
-
-    def _arg_apply(self, *args, **kwargs):
-        distributor = self.grid.distributor
-
-        # If not using MPI, don't waste time
-        if distributor.nprocs == 1:
-            return
-
-        raise NotImplementedError
-
 
 class PrecomputedSparseTimeFunction(AbstractSparseTimeFunction,
                                     PrecomputedSparseFunction):

From ca5b20fa4aca7dbaf69e883286e6941d8864db11 Mon Sep 17 00:00:00 2001
From: mloubout <mathias.louboutin@gmail.com>
Date: Thu, 18 May 2023 11:00:39 -0400
Subject: [PATCH 07/90] mpi: rewrok interpolation to work with precomputed and
 mpi

---
 devito/operations/interpolators.py | 184 +++++++++++++++++++---------
 devito/types/sparse.py             | 190 +++++++++++++++++------------
 2 files changed, 234 insertions(+), 140 deletions(-)

diff --git a/devito/operations/interpolators.py b/devito/operations/interpolators.py
index 61b12641e3..ba7106c468 100644
--- a/devito/operations/interpolators.py
+++ b/devito/operations/interpolators.py
@@ -211,6 +211,18 @@ def _interpolation_indices(self, variables, offset=0, field_offset=0,
 
         return idx_subs, temps
 
+    def subs_coords(self, _expr, *idx_subs):
+        return [_expr.xreplace(v_sub) * b.xreplace(v_sub)
+                for b, v_sub in zip(self._interpolation_coeffs, idx_subs)]
+
+    def subs_coords_eq(self, field, _expr, *idx_subs, implicit_dims=None):
+        return [Inc(field.xreplace(vsub), _expr.xreplace(vsub) * b,
+                    implicit_dims=implicit_dims)
+                for b, vsub in zip(self._interpolation_coeffs, idx_subs)]
+
+    def implicit_dims(self, implicit_dims):
+        return as_tuple(implicit_dims) + self.sfunction.dimensions
+
     def interpolate(self, expr, offset=0, increment=False, self_subs={},
                     implicit_dims=None):
         """
@@ -229,7 +241,7 @@ def interpolate(self, expr, offset=0, increment=False, self_subs={},
             interpolation expression, but that should be honored when constructing
             the operator.
         """
-        implicit_dims = as_tuple(implicit_dims) + self.sfunction.dimensions
+        implicit_dims = self.implicit_dims(implicit_dims)
 
         def callback():
             # Derivatives must be evaluated before the introduction of indirect accesses
@@ -250,8 +262,7 @@ def callback():
             )
 
             # Substitute coordinate base symbols into the interpolation coefficients
-            args = [_expr.xreplace(v_sub) * b.xreplace(v_sub)
-                    for b, v_sub in zip(self._interpolation_coeffs, idx_subs)]
+            args = self.subs_coords(_expr, *idx_subs)
 
             # Accumulate point-wise contributions into a temporary
             rhs = Symbol(name='sum', dtype=self.sfunction.dtype)
@@ -284,7 +295,7 @@ def inject(self, field, expr, offset=0, implicit_dims=None):
             injection expression, but that should be honored when constructing
             the operator.
         """
-        implicit_dims = as_tuple(implicit_dims) + self.sfunction.dimensions
+        implicit_dims = self.implicit_dims(implicit_dims)
 
         def callback():
             # Derivatives must be evaluated before the introduction of indirect accesses
@@ -304,81 +315,132 @@ def callback():
             )
 
             # Substitute coordinate base symbols into the interpolation coefficients
-            eqns = [Inc(field.xreplace(vsub), _expr.xreplace(vsub) * b,
-                        implicit_dims=implicit_dims)
-                    for b, vsub in zip(self._interpolation_coeffs, idx_subs)]
+            eqns = self.subs_coords_eq(field, _expr, *idx_subs, implicit_dims=implicit_dims)
 
             return temps + eqns
 
         return Injection(field, expr, offset, self, callback)
 
 
-class PrecomputedInterpolator(GenericInterpolator):
+class PrecomputedInterpolator(LinearInterpolator):
 
     def __init__(self, obj):
-        self.obj = obj
+        self.sfunction = obj
 
     @property
     def r(self):
         return self.obj.r
 
-    def interpolate(self, expr, offset=0, increment=False, self_subs={}):
-        """
-        Generate equations interpolating an arbitrary expression into ``self``.
 
-        Parameters
-        ----------
-        expr : expr-like
-            Input expression to interpolate.
-        offset : int, optional
-            Additional offset from the boundary.
-        increment: bool, optional
-            If True, generate increments (Inc) rather than assignments (Eq).
+    def _interpolation_indices(self, variables, offset=0, field_offset=0,
+                               implicit_dims=None):
         """
-        def callback():
-            _expr = indexify(expr)
+        Generate interpolation indices for the DiscreteFunctions in ``variables``.
+        """
+        if self.sfunction.gridpoints is None:
+            return super()._interpolation_indices(variables, offset=offset,
+                                                  field_offset=field_offset,
+                                 
+                                                  implicit_dims=implicit_dims)
 
-            p, _, _ = self.obj.interpolation_coeffs.indices
-            dim_subs = []
-            coeffs = []
-            for i, d in enumerate(self.obj.grid.dimensions):
-                rd = DefaultDimension(name="r%s" % d.name, default_value=self.r)
-                dim_subs.append((d, INT(rd + self.obj.gridpoints[p, i])))
-                coeffs.append(self.obj.interpolation_coeffs[p, i, rd])
-            # Apply optional time symbol substitutions to lhs of assignment
-            lhs = self.obj.subs(self_subs)
-            rhs = prod(coeffs) * _expr.subs(dim_subs)
+        index_matrix, points = self.sfunction._index_matrix(offset)
 
-            return [Inc(lhs, rhs)]
+        idx_subs = []
+        for i, idx in enumerate(index_matrix):
+            # Introduce ConditionalDimension so that we don't go OOB
+            mapper = {}
+            for j, d in zip(idx, self.grid.dimensions):
+                p = points[j]
+                lb = sympy.And(p >= d.symbolic_min - self.sfunction._radius,
+                               evaluate=False)
+                ub = sympy.And(p\
+                <= d.symbolic_max + self.sfunction._radius,
+                               evaluate=False)
+                condition = sympy.And(lb, ub, evaluate=False)
+                mapper[d] = ConditionalDimension(p.name, self.sfunction._sparse_dim,
+                                                 condition=condition, indirect=True)
 
-        return Interpolation(expr, offset, increment, self_subs, self, callback)
+            # Track Indexed substitutions
+            idx_subs.append(mapper)
 
-    def inject(self, field, expr, offset=0):
-        """
-        Generate equations injecting an arbitrary expression into a field.
+        # Temporaries for the indirection dimensions
+        temps = [Eq(v, k, implicit_dims=implicit_dims) for k, v in points.items()]
 
-        Parameters
-        ----------
-        field : Function
-            Input field into which the injection is performed.
-        expr : expr-like
-            Injected expression.
-        offset : int, optional
-            Additional offset from the boundary.
-        """
-        def callback():
-            _expr = indexify(expr)
-            _field = indexify(field)
-
-            p, _ = self.obj.gridpoints.indices
-            dim_subs = []
-            coeffs = []
-            for i, d in enumerate(self.obj.grid.dimensions):
-                rd = DefaultDimension(name="r%s" % d.name, default_value=self.r)
-                dim_subs.append((d, INT(rd + self.obj.gridpoints[p, i])))
-                coeffs.append(self.obj.interpolation_coeffs[p, i, rd])
-            rhs = prod(coeffs) * _expr
-            _field = _field.subs(dim_subs)
-            return [Inc(_field, rhs.subs(dim_subs))]
+        return idx_subs, temps
 
-        return Injection(field, expr, offset, self, callback)
+    def implicit_dims(self, implicit_dims):
+        return as_tuple(implicit_dims) + self.sfunction.interpolation_coeffs.dimensions
+
+    @property
+    def _interpolation_coeffs(self):
+        return self.sfunction.interpolation_coeffs
+
+    def subs_coords(self, _expr, *idx_subs):
+        b = self._interpolation_coeffs
+        return [_expr.xreplace(v_sub) * b for v_sub in idx_subs]
+
+    def subs_coords_eq(self, field, _expr, *idx_subs, implicit_dims=None):
+        b = self._interpolation_coeffs
+        return [Inc(field.xreplace(vsub), _expr.xreplace(vsub) * b,
+                    implicit_dims=implicit_dims) for vsub in idx_subs]
+    
+    # def interpolate(self, expr, offset=0, increment=False, self_subs={}):
+    #     """
+    #     Generate equations interpolating an arbitrary expression into ``self``.
+
+    #     Parameters
+    #     ----------
+    #     expr : expr-like
+    #         Input expression to interpolate.
+    #     offset : int, optional
+    #         Additional offset from the boundary.
+    #     increment: bool, optional
+    #         If True, generate increments (Inc) rather than assignments (Eq).
+    #     """
+    #     def callback():
+    #         _expr = indexify(expr)
+
+    #         p, _, _ = self.obj.interpolation_coeffs.indices
+    #         dim_subs = []
+    #         coeffs = []
+    #         for i, d in enumerate(self.obj.grid.dimensions):
+    #             rd = DefaultDimension(name="r%s" % d.name, default_value=self.r)
+    #             dim_subs.append((d, INT(rd + self.obj.gridpoints_data[p, i])))
+    #             coeffs.append(self.obj.interpolation_coeffs[p, i, rd])
+    #         # Apply optional time symbol substitutions to lhs of assignment
+    #         lhs = self.obj.subs(self_subs)
+    #         rhs = prod(coeffs) * _expr.subs(dim_subs)
+
+    #         return [Inc(lhs, rhs)]
+
+    #     return Interpolation(expr, offset, increment, self_subs, self, callback)
+
+    # def inject(self, field, expr, offset=0):
+    #     """
+    #     Generate equations injecting an arbitrary expression into a field.
+
+    #     Parameters
+    #     ----------
+    #     field : Function
+    #         Input field into which the injection is performed.
+    #     expr : expr-like
+    #         Injected expression.
+    #     offset : int, optional
+    #         Additional offset from the boundary.
+    #     """
+    #     def callback():
+    #         _expr = indexify(expr)
+    #         _field = indexify(field)
+
+    #         p, _ = self.obj.gridpoints.indices
+    #         dim_subs = []
+    #         coeffs = []
+    #         for i, d in enumerate(self.obj.grid.dimensions):
+    #             rd = DefaultDimension(name="r%s" % d.name, default_value=self.r)
+    #             dim_subs.append((d, INT(rd + self.obj.gridpoints[p, i])))
+    #             coeffs.append(self.obj.interpolation_coeffs[p, i, rd])
+    #         rhs = prod(coeffs) * _expr
+    #         _field = _field.subs(dim_subs)
+    #         return [Inc(_field, rhs.subs(dim_subs))]
+
+    #     return Injection(field, expr, offset, self, callback)
diff --git a/devito/types/sparse.py b/devito/types/sparse.py
index 41f6febf1d..111472d01f 100644
--- a/devito/types/sparse.py
+++ b/devito/types/sparse.py
@@ -121,9 +121,9 @@ def __subfunc_setup__(self, key, suffix):
                 key = np.array(key)
 
             n = key.ndim
-            # if shape[:n] != key.shape:
-            #     raise ValueError("Incompatible shape `%s`; expected `%s`" %
-            #                      (shape[:n], key.shape))
+            if shape[:n] != key.shape:
+                raise ValueError("Incompatible shape `%s`; expected `%s`" %
+                                 (shape[:n], key.shape))
 
             # Infer dtype
             if np.issubdtype(key.dtype.type, np.integer):
@@ -140,12 +140,12 @@ def __subfunc_setup__(self, key, suffix):
             distributor=self._distributor
         )
 
-        # if self.npoint == 0:
-        #     # This is a corner case -- we might get here, for example, when
-        #     # running with MPI and some processes get 0-size arrays after
-        #     # domain decomposition. We "touch" the data anyway to avoid the
-        #     # case ``self._data is None``
-        sf.data
+        if self.npoint == 0:
+            # This is a corner case -- we might get here, for example, when
+            # running with MPI and some processes get 0-size arrays after
+            # domain decomposition. We "touch" the data anyway to avoid the
+            # case ``self._data is None``
+            sf.data
 
         return sf
 
@@ -193,6 +193,56 @@ def comm(self):
     def distributor(self):
         return self.grid.distributor
 
+    @cached_property
+    def _point_symbols(self):
+        """Symbol for coordinate value in each dimension of the point."""
+        return tuple(Symbol(name='p%s' % d, dtype=self.dtype)
+                     for d in self.grid.dimensions)
+
+    @cached_property
+    def _position_map(self):
+        """
+        Symbols map for the position of the sparse points relative to the grid
+        origin.
+
+        Notes
+        -----
+        The expression `(coord - origin)/spacing` could also be computed in the
+        mathematically equivalent expanded form `coord/spacing -
+        origin/spacing`. This particular form is problematic when a sparse
+        point is in close proximity of the grid origin, since due to a larger
+        machine precision error it may cause a +-1 error in the computation of
+        the position. We mitigate this problem by computing the positions
+        individually (hence the need for a position map).
+        """
+        symbols = [Symbol(name='pos%s' % d, dtype=self.dtype)
+                   for d in self.grid.dimensions]
+        return OrderedDict([(c - o, p) for p, c, o in zip(symbols,
+                                                          self._coordinate_symbols,
+                                                          self.grid.origin_symbols)])
+
+    @cached_property
+    def _point_increments(self):
+        """Index increments in each dimension for each point symbol."""
+        return tuple(product(range(self.r+1), repeat=self.grid.dim))
+
+    @cached_property
+    def _coordinate_indices(self):
+        """Symbol for each grid index according to the coordinates."""
+        return tuple([INT(floor((c - o) / i.spacing))
+                      for c, o, i in zip(self._coordinate_symbols,
+                                         self.grid.origin_symbols,
+                                         self.grid.dimensions[:self.grid.dim])])
+
+    def _coordinate_bases(self, field_offset):
+        """Symbol for the base coordinates of the reference grid point."""
+        return tuple([cast_mapper[self.dtype](c - o - idx * i.spacing)
+                      for c, o, idx, i, of in zip(self._coordinate_symbols,
+                                                  self.grid.origin_symbols,
+                                                  self._coordinate_indices,
+                                                  self.grid.dimensions[:self.grid.dim],
+                                                  field_offset)])
+
     @property
     def gridpoints(self):
         """
@@ -425,7 +475,9 @@ def _dist_data_gather(self, data):
         self._data[mask] = gathered[:]
 
     def _dist_subfunc_gather(self, sfuncd, sfunc):
-        if np.sum([sfuncd._obj.size[i] for i in range(self.ndim)]) > 0:
+        if sfuncd is None:
+            pass
+        elif np.sum([sfuncd._obj.size[i] for i in range(self.ndim)]) > 0:
             sfuncd = sfunc._C_as_ndarray(sfuncd)
         # If not using MPI, don't waste time
         if self.distributor.nprocs == 1:
@@ -702,43 +754,6 @@ def coordinates_data(self):
         except AttributeError:
             return None
 
-    @property
-    def dist_origin(self):
-        return self._dist_origin
-
-    @cached_property
-    def _point_symbols(self):
-        """Symbol for coordinate value in each dimension of the point."""
-        return tuple(Symbol(name='p%s' % d, dtype=self.dtype)
-                     for d in self.grid.dimensions)
-
-    @cached_property
-    def _position_map(self):
-        """
-        Symbols map for the position of the sparse points relative to the grid
-        origin.
-
-        Notes
-        -----
-        The expression `(coord - origin)/spacing` could also be computed in the
-        mathematically equivalent expanded form `coord/spacing -
-        origin/spacing`. This particular form is problematic when a sparse
-        point is in close proximity of the grid origin, since due to a larger
-        machine precision error it may cause a +-1 error in the computation of
-        the position. We mitigate this problem by computing the positions
-        individually (hence the need for a position map).
-        """
-        symbols = [Symbol(name='pos%s' % d, dtype=self.dtype)
-                   for d in self.grid.dimensions]
-        return OrderedDict([(c - o, p) for p, c, o in zip(symbols,
-                                                          self._coordinate_symbols,
-                                                          self.grid.origin_symbols)])
-
-    @cached_property
-    def _point_increments(self):
-        """Index increments in each dimension for each point symbol."""
-        return tuple(product(range(2), repeat=self.grid.dim))
-
     @cached_property
     def _coordinate_symbols(self):
         """Symbol representing the coordinate values in each dimension."""
@@ -746,23 +761,6 @@ def _coordinate_symbols(self):
         return tuple([self.coordinates.indexify((p_dim, i))
                       for i in range(self.grid.dim)])
 
-    @cached_property
-    def _coordinate_indices(self):
-        """Symbol for each grid index according to the coordinates."""
-        return tuple([INT(floor((c - o) / i.spacing))
-                      for c, o, i in zip(self._coordinate_symbols,
-                                         self.grid.origin_symbols,
-                                         self.grid.dimensions[:self.grid.dim])])
-
-    def _coordinate_bases(self, field_offset):
-        """Symbol for the base coordinates of the reference grid point."""
-        return tuple([cast_mapper[self.dtype](c - o - idx * i.spacing)
-                      for c, o, idx, i, of in zip(self._coordinate_symbols,
-                                                  self.grid.origin_symbols,
-                                                  self._coordinate_indices,
-                                                  self.grid.dimensions[:self.grid.dim],
-                                                  field_offset)])
-
     @memoized_meth
     def _index_matrix(self, offset):
         # Note about the use of *memoization*
@@ -782,9 +780,14 @@ def _index_matrix(self, offset):
 
         return index_matrix, points
 
+    @property
+    def dist_origin(self):
+        return self._dist_origin
+
+
     @property
     def gridpoints(self):
-        if self.coordinates._data is None:
+        if self.coordinates.data is None:
             raise ValueError("No coordinates attached to this SparseFunction")
         return (
             np.floor(self.coordinates.data._local - self.grid.origin) / self.grid.spacing
@@ -1101,15 +1104,17 @@ def __init_finalize__(self, *args, **kwargs):
 
         self.interpolator = PrecomputedInterpolator(self)
 
+    @cached_property
+    def _point_increments(self):
+        """Index increments in each dimension for each point symbol."""
+        return tuple(product(range(-self.r//2+1, self.r//2+1), repeat=self.grid.dim))
+
     @property
     def coordinates(self):
         return self._coordinates
 
     @property
     def gridpoints(self):
-        if self._gripoints is None:
-            coord = self.coordinates.data._local - self.grid.origin
-            return (np.floor(coord) / self.grid.spacing).astype(int)
         return self._gridpoints
 
     @property
@@ -1130,10 +1135,10 @@ def coordinates_data(self):
 
     @property
     def gridpoints_data(self):
-        try:
-            return self.gridpoints.data.view(np.ndarray)
-        except AttributeError:
-            return None
+        if self._gridpoints is None:
+            coord = self.coordinates.data._local - self.grid.origin
+            return (np.floor(coord) / self.grid.spacing).astype(int)
+        return self._gridpoints.data.view(np.ndarray)
 
     @cached_property
     def coords_or_points(self):
@@ -1142,17 +1147,44 @@ def coords_or_points(self):
         else:
             return self.gridpoints
 
-    @cached_property
-    def coord_origin(self):
-        if self.gridpoints is None:
-            return self.grid.origin_offset
-        else:
-            return self.grid.grid_origin
-
     @property
     def interpolation_coeffs_data(self):
         return self.interpolation_coeffs.data.view(np.ndarray)
 
+    @cached_property
+    def _coordinate_symbols(self):
+        """Symbol representing the coordinate values in each dimension."""
+        p_dim = self.indices[self._sparse_position]
+        if self._gridpoints is None:
+            return tuple([self.coordinates.indexify((p_dim, i))
+                        for i in range(self.grid.dim)])
+        else:
+            return tuple([self.gridpoints.indexify((p_dim, i)) * d
+                          for (i, d) in enumerate(self.grid.spacing_symbols)])
+
+    @memoized_meth
+    def _index_matrix(self, offset):
+        # Note about the use of *memoization*
+        # Since this method is called by `_interpolation_indices`, using
+        # memoization avoids a proliferation of symbolically identical
+        # ConditionalDimensions for a given set of indirection indices
+
+        # List of indirection indices for all adjacent grid points
+        if self._gridpoints is None:
+            index_matrix = [tuple(idx + ii + offset for ii, idx
+                                in zip(inc, self._coordinate_indices))
+                            for inc in self._point_increments]
+        else:
+            index_matrix = [tuple(self._gridpoints + ii + offset for ii in inc)
+                            for inc in self._point_increments]
+
+        # A unique symbol for each indirection index
+        indices = filter_ordered(flatten(index_matrix))
+        points = OrderedDict([(p, Symbol(name='ii_%s_%d' % (self.name, i)))
+                              for i, p in enumerate(indices)])
+    
+        return index_matrix, points
+
 
 class PrecomputedSparseTimeFunction(AbstractSparseTimeFunction,
                                     PrecomputedSparseFunction):

From f66806ed3e54199ce13014ef33ce2196934b82fe Mon Sep 17 00:00:00 2001
From: mloubout <mathias.louboutin@gmail.com>
Date: Thu, 18 May 2023 14:15:14 -0400
Subject: [PATCH 08/90] api: precomputed sparse function temps

---
 devito/operations/interpolators.py | 101 ++++++-----------------------
 devito/types/sparse.py             |  24 +++----
 2 files changed, 34 insertions(+), 91 deletions(-)

diff --git a/devito/operations/interpolators.py b/devito/operations/interpolators.py
index ba7106c468..5ff0fdafc8 100644
--- a/devito/operations/interpolators.py
+++ b/devito/operations/interpolators.py
@@ -4,10 +4,9 @@
 import numpy as np
 from cached_property import cached_property
 
-from devito.symbolics import retrieve_function_carriers, indexify, INT
+from devito.symbolics import retrieve_function_carriers
 from devito.tools import as_tuple, powerset, flatten, prod
-from devito.types import (ConditionalDimension, DefaultDimension, Eq, Inc,
-                          Evaluable, Symbol)
+from devito.types import (ConditionalDimension, Eq, Inc, Evaluable, Symbol)
 
 __all__ = ['LinearInterpolator', 'PrecomputedInterpolator']
 
@@ -315,7 +314,8 @@ def callback():
             )
 
             # Substitute coordinate base symbols into the interpolation coefficients
-            eqns = self.subs_coords_eq(field, _expr, *idx_subs, implicit_dims=implicit_dims)
+            eqns = self.subs_coords_eq(field, _expr, *idx_subs,
+                                       implicit_dims=implicit_dims)
 
             return temps + eqns
 
@@ -331,7 +331,6 @@ def __init__(self, obj):
     def r(self):
         return self.obj.r
 
-
     def _interpolation_indices(self, variables, offset=0, field_offset=0,
                                implicit_dims=None):
         """
@@ -340,26 +339,26 @@ def _interpolation_indices(self, variables, offset=0, field_offset=0,
         if self.sfunction.gridpoints is None:
             return super()._interpolation_indices(variables, offset=offset,
                                                   field_offset=field_offset,
-                                 
                                                   implicit_dims=implicit_dims)
 
-        index_matrix, points = self.sfunction._index_matrix(offset)
+        index_matrix, points, shifts = self.sfunction._index_matrix(offset)
 
         idx_subs = []
+        coeffs = self._interpolation_coeffs
+        dt, it = coeffs.dimensions[1:]
         for i, idx in enumerate(index_matrix):
             # Introduce ConditionalDimension so that we don't go OOB
             mapper = {}
-            for j, d in zip(idx, self.grid.dimensions):
+            for j, (di, d) in zip(idx, enumerate(self.grid.dimensions)):
                 p = points[j]
-                lb = sympy.And(p >= d.symbolic_min - self.sfunction._radius,
+                lb = sympy.And(p >= d.symbolic_min - self.sfunction.r // 2,
                                evaluate=False)
-                ub = sympy.And(p\
-                <= d.symbolic_max + self.sfunction._radius,
+                ub = sympy.And(p <= d.symbolic_max + self.sfunction.r // 2,
                                evaluate=False)
                 condition = sympy.And(lb, ub, evaluate=False)
                 mapper[d] = ConditionalDimension(p.name, self.sfunction._sparse_dim,
                                                  condition=condition, indirect=True)
-
+                mapper[coeffs._subs(dt, di)] = coeffs.subs({dt: di, it: shifts[i][di]})
             # Track Indexed substitutions
             idx_subs.append(mapper)
 
@@ -368,79 +367,21 @@ def _interpolation_indices(self, variables, offset=0, field_offset=0,
 
         return idx_subs, temps
 
-    def implicit_dims(self, implicit_dims):
-        return as_tuple(implicit_dims) + self.sfunction.interpolation_coeffs.dimensions
-
     @property
     def _interpolation_coeffs(self):
         return self.sfunction.interpolation_coeffs
 
+    @property
+    def _interpolation_coeffsp(self):
+        d = self.sfunction.interpolation_coeffs.dimensions[1]
+        return prod([self.sfunction.interpolation_coeffs._subs(d, i)
+                     for (i, _) in enumerate(self.sfunction.grid.dimensions)])
+
     def subs_coords(self, _expr, *idx_subs):
-        b = self._interpolation_coeffs
-        return [_expr.xreplace(v_sub) * b for v_sub in idx_subs]
+        b = self._interpolation_coeffsp
+        return [_expr.xreplace(v_sub) * b.xreplace(v_sub) for v_sub in idx_subs]
 
     def subs_coords_eq(self, field, _expr, *idx_subs, implicit_dims=None):
-        b = self._interpolation_coeffs
-        return [Inc(field.xreplace(vsub), _expr.xreplace(vsub) * b,
+        b = self._interpolation_coeffsp
+        return [Inc(field.xreplace(vsub), _expr.xreplace(vsub) * b.xreplace(vsub),
                     implicit_dims=implicit_dims) for vsub in idx_subs]
-    
-    # def interpolate(self, expr, offset=0, increment=False, self_subs={}):
-    #     """
-    #     Generate equations interpolating an arbitrary expression into ``self``.
-
-    #     Parameters
-    #     ----------
-    #     expr : expr-like
-    #         Input expression to interpolate.
-    #     offset : int, optional
-    #         Additional offset from the boundary.
-    #     increment: bool, optional
-    #         If True, generate increments (Inc) rather than assignments (Eq).
-    #     """
-    #     def callback():
-    #         _expr = indexify(expr)
-
-    #         p, _, _ = self.obj.interpolation_coeffs.indices
-    #         dim_subs = []
-    #         coeffs = []
-    #         for i, d in enumerate(self.obj.grid.dimensions):
-    #             rd = DefaultDimension(name="r%s" % d.name, default_value=self.r)
-    #             dim_subs.append((d, INT(rd + self.obj.gridpoints_data[p, i])))
-    #             coeffs.append(self.obj.interpolation_coeffs[p, i, rd])
-    #         # Apply optional time symbol substitutions to lhs of assignment
-    #         lhs = self.obj.subs(self_subs)
-    #         rhs = prod(coeffs) * _expr.subs(dim_subs)
-
-    #         return [Inc(lhs, rhs)]
-
-    #     return Interpolation(expr, offset, increment, self_subs, self, callback)
-
-    # def inject(self, field, expr, offset=0):
-    #     """
-    #     Generate equations injecting an arbitrary expression into a field.
-
-    #     Parameters
-    #     ----------
-    #     field : Function
-    #         Input field into which the injection is performed.
-    #     expr : expr-like
-    #         Injected expression.
-    #     offset : int, optional
-    #         Additional offset from the boundary.
-    #     """
-    #     def callback():
-    #         _expr = indexify(expr)
-    #         _field = indexify(field)
-
-    #         p, _ = self.obj.gridpoints.indices
-    #         dim_subs = []
-    #         coeffs = []
-    #         for i, d in enumerate(self.obj.grid.dimensions):
-    #             rd = DefaultDimension(name="r%s" % d.name, default_value=self.r)
-    #             dim_subs.append((d, INT(rd + self.obj.gridpoints[p, i])))
-    #             coeffs.append(self.obj.interpolation_coeffs[p, i, rd])
-    #         rhs = prod(coeffs) * _expr
-    #         _field = _field.subs(dim_subs)
-    #         return [Inc(_field, rhs.subs(dim_subs))]
-
-    #     return Injection(field, expr, offset, self, callback)
diff --git a/devito/types/sparse.py b/devito/types/sparse.py
index 111472d01f..cfd5e5c46d 100644
--- a/devito/types/sparse.py
+++ b/devito/types/sparse.py
@@ -121,9 +121,10 @@ def __subfunc_setup__(self, key, suffix):
                 key = np.array(key)
 
             n = key.ndim
-            if shape[:n] != key.shape:
-                raise ValueError("Incompatible shape `%s`; expected `%s`" %
-                                 (shape[:n], key.shape))
+            # Need to fix this check to get global npoint, global_shape broken
+            # if shape[:n] != key.shape and self.distributor.nprocs == 1:
+            #     raise ValueError("Incompatible shape `%s`; expected `%s`" %
+            #                      (shape[:n], key.shape))
 
             # Infer dtype
             if np.issubdtype(key.dtype.type, np.integer):
@@ -784,7 +785,6 @@ def _index_matrix(self, offset):
     def dist_origin(self):
         return self._dist_origin
 
-
     @property
     def gridpoints(self):
         if self.coordinates.data is None:
@@ -1157,7 +1157,7 @@ def _coordinate_symbols(self):
         p_dim = self.indices[self._sparse_position]
         if self._gridpoints is None:
             return tuple([self.coordinates.indexify((p_dim, i))
-                        for i in range(self.grid.dim)])
+                          for i in range(self.grid.dim)])
         else:
             return tuple([self.gridpoints.indexify((p_dim, i)) * d
                           for (i, d) in enumerate(self.grid.spacing_symbols)])
@@ -1171,19 +1171,21 @@ def _index_matrix(self, offset):
 
         # List of indirection indices for all adjacent grid points
         if self._gridpoints is None:
-            index_matrix = [tuple(idx + ii + offset for ii, idx
-                                in zip(inc, self._coordinate_indices))
+            index_matrix = [tuple(idx + ii + offset
+                                  for ii, idx in zip(inc, self._coordinate_indices))
                             for inc in self._point_increments]
         else:
-            index_matrix = [tuple(self._gridpoints + ii + offset for ii in inc)
+            ddim = self._gridpoints.dimensions[1]
+            index_matrix = [tuple(self._gridpoints._subs(ddim, d) + ii + offset
+                                  for (ii, d) in zip(inc, range(self.grid.dim)))
                             for inc in self._point_increments]
-
+        shifts = [tuple(ii + offset for ii in inc)
+                  for inc in self._point_increments]
         # A unique symbol for each indirection index
         indices = filter_ordered(flatten(index_matrix))
         points = OrderedDict([(p, Symbol(name='ii_%s_%d' % (self.name, i)))
                               for i, p in enumerate(indices)])
-    
-        return index_matrix, points
+        return index_matrix, points, shifts
 
 
 class PrecomputedSparseTimeFunction(AbstractSparseTimeFunction,

From 3f096e8c1ff2f042bfa9795bbbcfe0fe9c978ec6 Mon Sep 17 00:00:00 2001
From: mloubout <mathias.louboutin@gmail.com>
Date: Fri, 19 May 2023 08:39:20 -0400
Subject: [PATCH 09/90] tests: update test according to new sparse setup

---
 devito/types/sparse.py | 18 ++++++++++--------
 tests/test_caching.py  |  1 +
 tests/test_dle.py      | 11 +++++------
 tests/test_msparse.py  |  4 ++++
 4 files changed, 20 insertions(+), 14 deletions(-)

diff --git a/devito/types/sparse.py b/devito/types/sparse.py
index cfd5e5c46d..19f6cd20c1 100644
--- a/devito/types/sparse.py
+++ b/devito/types/sparse.py
@@ -119,8 +119,10 @@ def __subfunc_setup__(self, key, suffix):
         else:
             if not isinstance(key, np.ndarray):
                 key = np.array(key)
-
-            n = key.ndim
+                # Correct for corner case of single coordinate
+                n = max(key.ndim, 2)
+            else:
+                n = key.ndim
             # Need to fix this check to get global npoint, global_shape broken
             # if shape[:n] != key.shape and self.distributor.nprocs == 1:
             #     raise ValueError("Incompatible shape `%s`; expected `%s`" %
@@ -134,7 +136,6 @@ def __subfunc_setup__(self, key, suffix):
 
         dimensions = dimensions[:n]
         shape = shape[:n]
-
         sf = SubFunction(
             name=name, parent=self, dtype=dtype, dimensions=dimensions,
             shape=shape, space_order=0, initializer=key, alias=self.alias,
@@ -1135,10 +1136,10 @@ def coordinates_data(self):
 
     @property
     def gridpoints_data(self):
-        if self._gridpoints is None:
-            coord = self.coordinates.data._local - self.grid.origin
-            return (np.floor(coord) / self.grid.spacing).astype(int)
-        return self._gridpoints.data.view(np.ndarray)
+        try:
+            return self._gridpoints.data.view(np.ndarray)
+        except AttributeError:
+            return None
 
     @cached_property
     def coords_or_points(self):
@@ -1386,7 +1387,6 @@ def __init_finalize__(self, *args, **kwargs):
 
         # Rows are locations, columns are source/receivers
         nloc, npoint = self.matrix.shape
-
         super().__init_finalize__(
             *args, **kwargs, npoint=npoint)
 
@@ -1572,6 +1572,8 @@ def free_data(self):
         self.scatter_result = None
         self.scattered_data = None
 
+    __distributor_setup__ = DiscreteFunction.__distributor_setup__
+
     @property
     def dt(self):
         t = self.time_dim
diff --git a/tests/test_caching.py b/tests/test_caching.py
index 408a49453e..aa2b61ac6b 100644
--- a/tests/test_caching.py
+++ b/tests/test_caching.py
@@ -631,6 +631,7 @@ def test_sparse_function(self, operate_on_empty_cache):
         # With the legacy caching model also u, u(inds), u_coords, and
         # u_coords(inds) would have been added to the cache; not anymore!
         ncreated = 4
+
         assert len(_SymbolCache) == cur_cache_size + ncreated
 
         cur_cache_size = len(_SymbolCache)
diff --git a/tests/test_dle.py b/tests/test_dle.py
index cbe19aee7e..9dfa8af671 100644
--- a/tests/test_dle.py
+++ b/tests/test_dle.py
@@ -284,8 +284,7 @@ def test_cache_blocking_structure_optrelax_prec_inject():
                                           'openmp': True,
                                           'par-collapse-ncores': 1}))
 
-    assert_structure(op, ['t,p_s0_blk0,p_s', 't,p_s0_blk0,p_s,rx,ry'],
-                     't,p_s0_blk0,p_s,rx,ry')
+    assert_structure(op, ['t', 't,p_s0_blk0,p_s'], 't,p_s0_blk0,p_s')
 
 
 class TestBlockingParTile(object):
@@ -942,16 +941,16 @@ def test_parallel_prec_inject(self):
         op0 = Operator(eqns, opt=('advanced', {'openmp': True,
                                                'par-collapse-ncores': 1}))
         iterations = FindNodes(Iteration).visit(op0)
-        assert all(not i.pragmas for i in iterations[:2])
-        assert 'omp for collapse(2) schedule(dynamic,chunk_size)'\
-            in iterations[2].pragmas[0].value
+        assert not iterations[0].pragmas
+        assert 'omp for collapse(1) schedule(dynamic,chunk_size)'\
+            in iterations[1].pragmas[0].value
 
         op1 = Operator(eqns, opt=('advanced', {'openmp': True,
                                                'par-collapse-ncores': 1,
                                                'par-collapse-work': 1}))
         iterations = FindNodes(Iteration).visit(op1)
         assert not iterations[0].pragmas
-        assert 'omp for collapse(3) schedule(dynamic,chunk_size)'\
+        assert 'omp for collapse(1) schedule(dynamic,chunk_size)'\
             in iterations[1].pragmas[0].value
 
 
diff --git a/tests/test_msparse.py b/tests/test_msparse.py
index b9d65de125..5cbfde848a 100644
--- a/tests/test_msparse.py
+++ b/tests/test_msparse.py
@@ -392,3 +392,7 @@ def test_mpi(self):
 
         if grid.distributor.myrank == 0:
             assert sf.data[0, 0] == -3.0  # 1 * (1 * 1) * 1 + (-1) * (2 * 2) * 1
+
+
+if __name__ == "__main__":
+    TestMatrixSparseTimeFunction().test_mpi()

From 28546b2386b925269a1eff5518669e9bbe208f7e Mon Sep 17 00:00:00 2001
From: mloubout <mathias.louboutin@gmail.com>
Date: Fri, 19 May 2023 11:06:34 -0400
Subject: [PATCH 10/90] operator: explicit alias of arg_apply

---
 devito/operator/operator.py |  5 +++--
 devito/types/sparse.py      | 16 +++++++++++++---
 tests/test_mpi.py           |  6 +++---
 3 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/devito/operator/operator.py b/devito/operator/operator.py
index 26202a419b..5d8828fe63 100644
--- a/devito/operator/operator.py
+++ b/devito/operator/operator.py
@@ -632,9 +632,10 @@ def _postprocess_arguments(self, args, **kwargs):
         """Process runtime arguments upon returning from ``.apply()``."""
         for p in self.parameters:
             try:
-                p._arg_apply(args[p.name], args[p.coordinates.name], kwargs.get(p.name))
+                subfuncs = (args[s] for s in p._subfunc_names)
+                p._arg_apply(args[p.name], *subfuncs, alias=kwargs.get(p.name))
             except AttributeError:
-                p._arg_apply(args[p.name], kwargs.get(p.name))
+                p._arg_apply(args[p.name], alias=kwargs.get(p.name))
 
     @cached_property
     def _known_arguments(self):
diff --git a/devito/types/sparse.py b/devito/types/sparse.py
index 19f6cd20c1..9440513905 100644
--- a/devito/types/sparse.py
+++ b/devito/types/sparse.py
@@ -195,6 +195,16 @@ def comm(self):
     def distributor(self):
         return self.grid.distributor
 
+    @property
+    def _subfunc_names(self):
+        names = []
+        for s in self._sub_functions:
+            try:
+                names.append(getattr(self, s).name)
+            except AttributeError:
+                pass
+        return tuple(names)
+
     @cached_property
     def _point_symbols(self):
         """Symbol for coordinate value in each dimension of the point."""
@@ -477,10 +487,10 @@ def _dist_data_gather(self, data):
         self._data[mask] = gathered[:]
 
     def _dist_subfunc_gather(self, sfuncd, sfunc):
-        if sfuncd is None:
-            pass
-        elif np.sum([sfuncd._obj.size[i] for i in range(self.ndim)]) > 0:
+        try:
             sfuncd = sfunc._C_as_ndarray(sfuncd)
+        except AttributeError:
+            pass
         # If not using MPI, don't waste time
         if self.distributor.nprocs == 1:
             return
diff --git a/tests/test_mpi.py b/tests/test_mpi.py
index 8639a435e4..55ba5e90dd 100644
--- a/tests/test_mpi.py
+++ b/tests/test_mpi.py
@@ -2523,10 +2523,10 @@ def test_adjoint_F_no_omp(self):
 
 
 if __name__ == "__main__":
-    configuration['mpi'] = 'overlap'
+    # configuration['mpi'] = 'overlap'
     # TestDecomposition().test_reshape_left_right()
-    TestOperatorSimple().test_trivial_eq_2d()
+    # TestOperatorSimple().test_trivial_eq_2d()
     # TestFunction().test_halo_exchange_bilateral()
     # TestSparseFunction().test_scatter_gather()
     # TestOperatorAdvanced().test_fission_due_to_antidep()
-    # TestIsotropicAcoustic().test_adjoint_F_no_omp()
+    TestIsotropicAcoustic().test_adjoint_F(1)

From 5a0b68243c0006021d458ae20c068493e9e39773 Mon Sep 17 00:00:00 2001
From: mloubout <mathias.louboutin@gmail.com>
Date: Fri, 19 May 2023 14:24:06 -0400
Subject: [PATCH 11/90] api: enforce gridpoints as subfunc for precomputed

---
 devito/types/grid.py   |   2 +-
 devito/types/sparse.py | 103 +++++++++++++++++------------------------
 tests/test_mpi.py      |  37 +++++++++++++--
 3 files changed, 78 insertions(+), 64 deletions(-)

diff --git a/devito/types/grid.py b/devito/types/grid.py
index 7596ac5325..faecb25236 100644
--- a/devito/types/grid.py
+++ b/devito/types/grid.py
@@ -226,7 +226,7 @@ def origin_ioffset(self):
         """Offset index of the local (per-process) origin from the domain origin."""
         grid_origin = [min(i) for i in self.distributor.glb_numb]
         assert len(grid_origin) == len(self.spacing)
-        return grid_origin
+        return tuple(grid_origin)
 
     @property
     def origin_offset(self):
diff --git a/devito/types/sparse.py b/devito/types/sparse.py
index 9440513905..0209067f45 100644
--- a/devito/types/sparse.py
+++ b/devito/types/sparse.py
@@ -136,6 +136,7 @@ def __subfunc_setup__(self, key, suffix):
 
         dimensions = dimensions[:n]
         shape = shape[:n]
+
         sf = SubFunction(
             name=name, parent=self, dtype=dtype, dimensions=dimensions,
             shape=shape, space_order=0, initializer=key, alias=self.alias,
@@ -187,6 +188,11 @@ def _sparse_dim(self):
     def _mpitype(self):
         return MPI._typedict[np.dtype(self.dtype).char]
 
+    @property
+    def _smpitype(self):
+        sfuncs = [getattr(self, s) for s in self._sub_functions]
+        return {s: MPI._typedict[np.dtype(s.dtype).char] for s in sfuncs}
+
     @property
     def comm(self):
         return self.grid.distributor.comm
@@ -292,7 +298,7 @@ def _support(self):
         """
         max_shape = np.array(self.grid.shape).reshape(1, self.grid.dim)
         minmax = lambda arr: np.minimum(max_shape, np.maximum(0, arr))
-        return np.stack([minmax(self.gridpoints + s) for s in self._point_support],
+        return np.stack([minmax(self.gridpoints_data + s) for s in self._point_support],
                         axis=2)
 
     @property
@@ -455,14 +461,13 @@ def _dist_subfunc_scatter(self, subfunc):
         _, scount, sdisp, rshape, rcount, rdisp = \
             self._dist_subfunc_alltoall(subfunc, dmap=dmap)
         scattered = np.empty(shape=rshape, dtype=subfunc.dtype)
-        self.comm.Alltoallv([sfuncd, scount, sdisp, self._mpitype],
-                            [scattered, rcount, rdisp, self._mpitype])
+        self.comm.Alltoallv([sfuncd, scount, sdisp, self._smpitype[subfunc]],
+                            [scattered, rcount, rdisp, self._smpitype[subfunc]])
         sfuncd = scattered
 
         # Translate global coordinates into local coordinates
         if self.dist_origin[subfunc] is not None:
-            sfuncd = sfuncd - np.array(self.dist_origin[subfunc], dtype=self.dtype)
-
+            sfuncd = sfuncd - np.array(self.dist_origin[subfunc], dtype=subfunc.dtype)
         return {subfunc: sfuncd}
 
     def _dist_data_gather(self, data):
@@ -500,13 +505,13 @@ def _dist_subfunc_gather(self, sfuncd, sfunc):
         mask = self._dist_scatter_mask(dmap=dmap)
         # Pack (reordered) coordinates so that they can be sent out via an Alltoallv
         if self.dist_origin[sfunc] is not None:
-            sfuncd = sfuncd + np.array(self.dist_origin[sfunc], dtype=self.dtype)
+            sfuncd = sfuncd + np.array(self.dist_origin[sfunc], dtype=sfunc.dtype)
         # Send out the sparse point coordinates
         sshape, scount, sdisp, _, rcount, rdisp = \
             self._dist_subfunc_alltoall(sfunc, dmap=dmap)
         gathered = np.empty(shape=sshape, dtype=sfunc.dtype)
-        self.comm.Alltoallv([sfuncd, rcount, rdisp, self._mpitype],
-                            [gathered, scount, sdisp, self._mpitype])
+        self.comm.Alltoallv([sfuncd, rcount, rdisp, self._smpitype[sfunc]],
+                            [gathered, scount, sdisp, self._smpitype[sfunc]])
         sfunc.data._local[mask[self._sparse_position]] = gathered[:]
 
         # Note: this method "mirrors" `_dist_scatter`: a sparse point that is sent
@@ -523,7 +528,8 @@ def _dist_scatter(self, data=None):
     def _dist_gather(self, data, *subfunc):
         self._dist_data_gather(data)
         for (sg, s) in zip(subfunc, self._sub_functions):
-            self._dist_subfunc_gather(sg, getattr(self, s))
+            if getattr(self, s) is not None:
+                self._dist_subfunc_gather(sg, getattr(self, s))
 
     def _arg_defaults(self, alias=None):
         key = alias or self
@@ -802,7 +808,9 @@ def gridpoints(self):
             raise ValueError("No coordinates attached to this SparseFunction")
         return (
             np.floor(self.coordinates.data._local - self.grid.origin) / self.grid.spacing
-        ).astype(int)
+        ).astype(np.int32)
+
+    gridpoints_data = gridpoints
 
     def guard(self, expr=None, offset=0):
         """
@@ -1057,11 +1065,10 @@ class PrecomputedSparseFunction(AbstractSparseFunction):
     uses `*args` to (re-)create the dimension arguments of the symbolic object.
     """
 
-    _sub_functions = ('coordinates', 'gridpoints', 'interpolation_coeffs')
+    _sub_functions = ('gridpoints', 'interpolation_coeffs')
 
     __rkwargs__ = (AbstractSparseFunction.__rkwargs__ +
-                   ('r', 'coordinates_data', 'gridpoints_data',
-                    'interpolation_coeffs_data'))
+                   ('r', 'gridpoints_data', 'interpolation_coeffs_data'))
 
     def __init_finalize__(self, *args, **kwargs):
         super().__init_finalize__(*args, **kwargs)
@@ -1083,19 +1090,19 @@ def __init_finalize__(self, *args, **kwargs):
 
         # Specifying only `npoints` is acceptable; this will require the user
         # to setup the coordinates data later on
-        npoint = kwargs.get('npoint', None)
         if self.npoint and coordinates is None and gridpoints is None:
-            coordinates = np.zeros((npoint, self.grid.dim))
+            gridpoints = np.zeros((self.npoint, self.grid.dim))
 
         if coordinates is not None:
-            self._coordinates = self.__subfunc_setup__(coordinates, 'coords')
-            self._gridpoints = None
-            self._dist_origin = {self._coordinates: self.grid.origin_offset}
+            # Convert to gridpoints
+            if isinstance(coordinates, SubFunction):
+                raise ValueError("`coordinates` only accepted as array")
+            loc = np.floor((coordinates - self.grid.origin) / self.grid.spacing)
+            self._gridpoints = self.__subfunc_setup__(loc.astype(int), 'gridpoints')
         else:
             assert gridpoints is not None
-            self._coordinates = None
             self._gridpoints = self.__subfunc_setup__(gridpoints, 'gridpoints')
-            self._dist_origin = {self._coordinates: self.grid.origin_ioffset}
+        self._dist_origin = {self._gridpoints: self.grid.origin_ioffset}
 
         # Setup the interpolation coefficients. These are compulsory
         interpolation_coeffs = kwargs.get('interpolation_coeffs',
@@ -1120,14 +1127,19 @@ def _point_increments(self):
         """Index increments in each dimension for each point symbol."""
         return tuple(product(range(-self.r//2+1, self.r//2+1), repeat=self.grid.dim))
 
-    @property
-    def coordinates(self):
-        return self._coordinates
+    @cached_property
+    def _point_support(self):
+        return np.array(tuple(product(range(-self.r // 2 + 1, self.r // 2 + 1),
+                                      repeat=self.grid.dim)))
 
     @property
     def gridpoints(self):
         return self._gridpoints
 
+    @property
+    def gridpoints_data(self):
+        return self.gridpoints.data._local
+
     @property
     def dist_origin(self):
         return self._dist_origin
@@ -1137,41 +1149,16 @@ def interpolation_coeffs(self):
         """ The Precomputed interpolation coefficients."""
         return self._interpolation_coeffs
 
-    @property
-    def coordinates_data(self):
-        try:
-            return self.coordinates.data.view(np.ndarray)
-        except AttributeError:
-            return None
-
-    @property
-    def gridpoints_data(self):
-        try:
-            return self._gridpoints.data.view(np.ndarray)
-        except AttributeError:
-            return None
-
-    @cached_property
-    def coords_or_points(self):
-        if self.gridpoints is None:
-            return self.coordinates
-        else:
-            return self.gridpoints
-
     @property
     def interpolation_coeffs_data(self):
-        return self.interpolation_coeffs.data.view(np.ndarray)
+        return self.interpolation_coeffs.data._local
 
     @cached_property
     def _coordinate_symbols(self):
         """Symbol representing the coordinate values in each dimension."""
         p_dim = self.indices[self._sparse_position]
-        if self._gridpoints is None:
-            return tuple([self.coordinates.indexify((p_dim, i))
-                          for i in range(self.grid.dim)])
-        else:
-            return tuple([self.gridpoints.indexify((p_dim, i)) * d
-                          for (i, d) in enumerate(self.grid.spacing_symbols)])
+        return tuple([self.coordinates.indexify((p_dim, i))
+                      for i in range(self.grid.dim)])
 
     @memoized_meth
     def _index_matrix(self, offset):
@@ -1181,21 +1168,17 @@ def _index_matrix(self, offset):
         # ConditionalDimensions for a given set of indirection indices
 
         # List of indirection indices for all adjacent grid points
-        if self._gridpoints is None:
-            index_matrix = [tuple(idx + ii + offset
-                                  for ii, idx in zip(inc, self._coordinate_indices))
-                            for inc in self._point_increments]
-        else:
-            ddim = self._gridpoints.dimensions[1]
-            index_matrix = [tuple(self._gridpoints._subs(ddim, d) + ii + offset
-                                  for (ii, d) in zip(inc, range(self.grid.dim)))
-                            for inc in self._point_increments]
+        ddim = self._gridpoints.dimensions[1]
+        index_matrix = [tuple(self._gridpoints._subs(ddim, d) + ii + offset
+                              for (ii, d) in zip(inc, range(self.grid.dim)))
+                        for inc in self._point_increments]
         shifts = [tuple(ii + offset for ii in inc)
                   for inc in self._point_increments]
         # A unique symbol for each indirection index
         indices = filter_ordered(flatten(index_matrix))
         points = OrderedDict([(p, Symbol(name='ii_%s_%d' % (self.name, i)))
                               for i, p in enumerate(indices)])
+
         return index_matrix, points, shifts
 
 
diff --git a/tests/test_mpi.py b/tests/test_mpi.py
index 55ba5e90dd..6db4a27f48 100644
--- a/tests/test_mpi.py
+++ b/tests/test_mpi.py
@@ -6,7 +6,7 @@
 from devito import (Grid, Constant, Function, TimeFunction, SparseFunction,
                     SparseTimeFunction, Dimension, ConditionalDimension, SubDimension,
                     SubDomain, Eq, Ne, Inc, NODE, Operator, norm, inner, configuration,
-                    switchconfig, generic_derivative)
+                    switchconfig, generic_derivative, PrecomputedSparseFunction)
 from devito.arch.compiler import OneapiCompiler
 from devito.data import LEFT, RIGHT
 from devito.ir.iet import (Call, Conditional, Iteration, FindNodes, FindSymbols,
@@ -556,6 +556,36 @@ def test_sparse_coords_issue1823(self):
 
         assert np.allclose(rec.coordinates.data[:], ref.coordinates.data)
 
+    @pytest.mark.parallel(mode=4)
+    @pytest.mark.parametrize('r', [2])
+    def test_precomputed_sparse(self, r):
+        grid = Grid(shape=(4, 4), extent=(3.0, 3.0))
+
+        coords = np.array([(1.0, 1.0), (2.0, 2.0), (1.0, 2.0), (2.0, 1.0)])
+        points = np.array([(1, 1), (2, 2), (1, 2), (2, 1)])
+        coeffs = np.ones((4, 2, r))
+
+        sf1 = PrecomputedSparseFunction(name="sf1", grid=grid, coordinates=coords,
+                                        npoint=4, interpolation_coeffs=coeffs, r=r)
+        sf2 = PrecomputedSparseFunction(name="sf2", grid=grid, gridpoints=points,
+                                        npoint=4, interpolation_coeffs=coeffs, r=r)
+
+        assert sf1.npoint == 1
+        assert sf2.npoint == 1
+        assert np.all(sf1.gridpoints.data.shape == (1, 2))
+        assert np.all(sf2.gridpoints.data.shape == (1, 2))
+        assert np.all(sf1.gridpoints_data == sf2.gridpoints_data)
+        assert np.all(sf1.interpolation_coeffs.shape == (1, 2, r))
+        assert np.all(sf2.interpolation_coeffs.shape == (1, 2, r))
+
+        u = Function(name="u", grid=grid, space_order=r)
+        u._data_with_outhalo[:] = 1
+        Operator(sf2.interpolate(u))()
+        print(sf2.data)
+        assert np.all(sf2.data == 4)
+        Operator(sf1.interpolate(u))()
+        assert np.all(sf1.data == 4)
+
 
 class TestOperatorSimple(object):
 
@@ -2527,6 +2557,7 @@ def test_adjoint_F_no_omp(self):
     # TestDecomposition().test_reshape_left_right()
     # TestOperatorSimple().test_trivial_eq_2d()
     # TestFunction().test_halo_exchange_bilateral()
-    # TestSparseFunction().test_scatter_gather()
+    # TestSparseFunction().test_sparse_coords()
+    TestSparseFunction().test_precomputed_sparse(2)
     # TestOperatorAdvanced().test_fission_due_to_antidep()
-    TestIsotropicAcoustic().test_adjoint_F(1)
+    # TestIsotropicAcoustic().test_adjoint_F(1)

From be1625dd6f634036d169885e03322e9e2aec2028 Mon Sep 17 00:00:00 2001
From: mloubout <mathias.louboutin@gmail.com>
Date: Mon, 22 May 2023 12:52:00 -0400
Subject: [PATCH 12/90] api: revamp interpolator for better generalization

---
 devito/operations/interpolators.py | 259 +++++++++++++----------------
 devito/tools/algorithms.py         |   1 +
 devito/types/basic.py              |   3 +-
 devito/types/dense.py              |   7 +
 devito/types/sparse.py             | 159 +++++++++++-------
 tests/test_dimension.py            |   2 +-
 tests/test_dle.py                  |  11 +-
 tests/test_dse.py                  |   2 +-
 tests/test_mpi.py                  |   5 +-
 tests/test_pickle.py               |  18 +-
 10 files changed, 245 insertions(+), 222 deletions(-)

diff --git a/devito/operations/interpolators.py b/devito/operations/interpolators.py
index 5ff0fdafc8..411d41bc19 100644
--- a/devito/operations/interpolators.py
+++ b/devito/operations/interpolators.py
@@ -1,11 +1,12 @@
 from abc import ABC, abstractmethod
+from itertools import product
 
 import sympy
-import numpy as np
 from cached_property import cached_property
 
-from devito.symbolics import retrieve_function_carriers
-from devito.tools import as_tuple, powerset, flatten, prod
+from devito.finite_differences.elementary import floor
+from devito.symbolics import retrieve_function_carriers, INT
+from devito.tools import as_tuple, flatten, prod
 from devito.types import (ConditionalDimension, Eq, Inc, Evaluable, Symbol)
 
 __all__ = ['LinearInterpolator', 'PrecomputedInterpolator']
@@ -106,15 +107,12 @@ def interpolate(self, *args, **kwargs):
         pass
 
 
-class LinearInterpolator(GenericInterpolator):
+class WeightedInterpolation(GenericInterpolator):
 
     """
-    Concrete implementation of GenericInterpolator implementing a Linear interpolation
-    scheme, i.e. Bilinear for 2D and Trilinear for 3D problems.
-
-    Parameters
-    ----------
-    sfunction: The SparseFunction that this Interpolator operates on.
+    Represent an Interpolation operation on a SparseFunction that is separable
+    in space, meaning hte coefficient are defined for each Dimension separately
+    and multiplied at a given point: `w[x, y] = wx[x] * wy[y]`
     """
 
     def __init__(self, sfunction):
@@ -124,91 +122,46 @@ def __init__(self, sfunction):
     def grid(self):
         return self.sfunction.grid
 
-    @cached_property
-    def _interpolation_coeffs(self):
-        """
-        Symbolic expression for the coefficients for sparse point interpolation
-        according to:
+    @property
+    def _weights(self):
+        raise NotImplementedError
 
-            https://en.wikipedia.org/wiki/Bilinear_interpolation.
+    @property
+    def _psym(self):
+        return self.sfunction._point_symbols
 
-        Returns
-        -------
-        Matrix of coefficient expressions.
-        """
-        # Grid indices corresponding to the corners of the cell ie x1, y1, z1
-        indices1 = tuple(sympy.symbols('%s1' % d) for d in self.grid.dimensions)
-        indices2 = tuple(sympy.symbols('%s2' % d) for d in self.grid.dimensions)
-        # 1, x1, y1, z1, x1*y1, ...
-        indices = list(powerset(indices1))
-        indices[0] = (1,)
-        point_sym = list(powerset(self.sfunction._point_symbols))
-        point_sym[0] = (1,)
-        # 1, px. py, pz, px*py, ...
-        A = []
-        ref_A = [np.prod(ind) for ind in indices]
-        # Create the matrix with the same increment order as the point increment
-        for i in self.sfunction._point_increments:
-            # substitute x1 by x2 if increment in that dimension
-            subs = dict((indices1[d], indices2[d] if i[d] == 1 else indices1[d])
-                        for d in range(len(i)))
-            A += [[1] + [a.subs(subs) for a in ref_A[1:]]]
-
-        A = sympy.Matrix(A)
-        # Coordinate values of the sparse point
-        p = sympy.Matrix([[np.prod(ind)] for ind in point_sym])
-
-        # reference cell x1:0, x2:h_x
-        left = dict((a, 0) for a in indices1)
-        right = dict((b, dim.spacing) for b, dim in zip(indices2, self.grid.dimensions))
-        reference_cell = {**left, **right}
-        # Substitute in interpolation matrix
-        A = A.subs(reference_cell)
-        return A.inv().T * p
+    @property
+    def _gdim(self):
+        return self.grid.dimensions
 
-    def _interpolation_indices(self, variables, offset=0, field_offset=0,
-                               implicit_dims=None):
-        """
-        Generate interpolation indices for the DiscreteFunctions in ``variables``.
-        """
-        index_matrix, points = self.sfunction._index_matrix(offset)
+    def implicit_dims(self, implicit_dims):
+        return as_tuple(implicit_dims) + self.sfunction.dimensions
 
-        idx_subs = []
-        for i, idx in enumerate(index_matrix):
-            # Introduce ConditionalDimension so that we don't go OOB
-            mapper = {}
-            for j, d in zip(idx, self.grid.dimensions):
-                p = points[j]
-                lb = sympy.And(p >= d.symbolic_min - self.sfunction._radius,
-                               evaluate=False)
-                ub = sympy.And(p <= d.symbolic_max + self.sfunction._radius,
-                               evaluate=False)
-                condition = sympy.And(lb, ub, evaluate=False)
-                mapper[d] = ConditionalDimension(p.name, self.sfunction._sparse_dim,
-                                                 condition=condition, indirect=True)
+    @property
+    def r(self):
+        return self.sfunction.r
 
-            # Apply mapper to each variable with origin correction before the
-            # Dimensions get replaced
-            subs = {v: v.subs({k: c - v.origin.get(k, 0) for k, c in mapper.items()})
-                    for v in variables}
+    @property
+    def _interp_points(self):
+        return range(-self.r+1, self.r+1)
 
-            # Track Indexed substitutions
-            idx_subs.append(subs)
+    @property
+    def _nd_points(self):
+        return product(self._interp_points, repeat=self.grid.dim)
 
-        # Temporaries for the position
-        temps = [Eq(v, k, implicit_dims=implicit_dims)
-                 for k, v in self.sfunction._position_map.items()]
-        # Temporaries for the indirection dimensions
-        temps.extend([Eq(v, k.subs(self.sfunction._position_map),
-                         implicit_dims=implicit_dims)
-                      for k, v in points.items()])
-        # Temporaries for the coefficients
-        temps.extend([Eq(p, c.subs(self.sfunction._position_map),
-                         implicit_dims=implicit_dims)
-                      for p, c in zip(self.sfunction._point_symbols,
-                                      self.sfunction._coordinate_bases(field_offset))])
+    @property
+    def _interpolation_coeffs(self):
+        coeffs = {}
+        for p in self._nd_points:
+            coeffs[p] = prod([self._weights[d][i] for (d, i) in zip(self._gdim, p)])
+        return list(coeffs.values())
 
-        return idx_subs, temps
+    def _coeff_temps(self, implicit_dims):
+        return []
+
+    def _positions(self, implicit_dims):
+        return [Eq(v, k, implicit_dims=implicit_dims)
+                for k, v in self.sfunction._position_map.items()]
 
     def subs_coords(self, _expr, *idx_subs):
         return [_expr.xreplace(v_sub) * b.xreplace(v_sub)
@@ -219,8 +172,47 @@ def subs_coords_eq(self, field, _expr, *idx_subs, implicit_dims=None):
                     implicit_dims=implicit_dims)
                 for b, vsub in zip(self._interpolation_coeffs, idx_subs)]
 
-    def implicit_dims(self, implicit_dims):
-        return as_tuple(implicit_dims) + self.sfunction.dimensions
+    def _interpolation_indices(self, variables, offset=0, field_offset=0,
+                               implicit_dims=None):
+        """
+        Generate interpolation indices for the DiscreteFunctions in ``variables``.
+        """
+        idx_subs = []
+        points = {d: [] for d in self._gdim}
+        mapper = {d: [] for d in self._gdim}
+
+        # Positon map and temporaries for it
+        pmap = self.sfunction._coordinate_indices
+
+        # Temporaries for the position
+        temps = self._positions(implicit_dims)
+
+        # Coefficient symbol expression
+        temps.extend(self._coeff_temps(implicit_dims))
+
+        # Create positions and indices temporaries/indirections
+        for ((di, d), pos) in zip(enumerate(self._gdim), pmap):
+            for (ri, r) in enumerate(self._interp_points):
+                p = Symbol(name='ii_%s_%s_%d' % (self.sfunction.name, d.name, ri))
+                points[d].append(p)
+                # Conditionals to avoid OOB
+                lb = sympy.And(p >= d.symbolic_min - self.r, evaluate=False)
+                ub = sympy.And(p <= d.symbolic_max + self.r, evaluate=False)
+                condition = sympy.And(lb, ub, evaluate=False)
+                mapper[d].append(ConditionalDimension(p.name, self.sfunction._sparse_dim,
+                                                      condition=condition, indirect=True))
+                temps.extend([Eq(p, pos + r, implicit_dims=implicit_dims)])
+
+        # Substitution mapper
+        for p in self._nd_points:
+            # Apply mapper to each variable with origin correction before the
+            # Dimensions get replaced
+            subs = {v: v.subs({k: c[pi] - v.origin.get(k, 0)
+                              for ((k, c), pi) in zip(mapper.items(), p)})
+                    for v in variables}
+            idx_subs.append(subs)
+
+        return idx_subs, temps
 
     def interpolate(self, expr, offset=0, increment=False, self_subs={},
                     implicit_dims=None):
@@ -262,7 +254,6 @@ def callback():
 
             # Substitute coordinate base symbols into the interpolation coefficients
             args = self.subs_coords(_expr, *idx_subs)
-
             # Accumulate point-wise contributions into a temporary
             rhs = Symbol(name='sum', dtype=self.sfunction.dtype)
             summands = [Eq(rhs, 0., implicit_dims=implicit_dims)]
@@ -322,66 +313,56 @@ def callback():
         return Injection(field, expr, offset, self, callback)
 
 
-class PrecomputedInterpolator(LinearInterpolator):
+class LinearInterpolator(WeightedInterpolation):
 
-    def __init__(self, obj):
-        self.sfunction = obj
+    """
+    Concrete implementation of GenericInterpolator implementing a Linear interpolation
+    scheme, i.e. Bilinear for 2D and Trilinear for 3D problems.
 
-    @property
-    def r(self):
-        return self.obj.r
+    Parameters
+    ----------
+    sfunction: The SparseFunction that this Interpolator operates on.
+    """
 
-    def _interpolation_indices(self, variables, offset=0, field_offset=0,
-                               implicit_dims=None):
-        """
-        Generate interpolation indices for the DiscreteFunctions in ``variables``.
-        """
-        if self.sfunction.gridpoints is None:
-            return super()._interpolation_indices(variables, offset=offset,
-                                                  field_offset=field_offset,
-                                                  implicit_dims=implicit_dims)
+    @cached_property
+    def _weights(self):
+        return {d: [1 - p/d.spacing, p/d.spacing]
+                for (d, p) in zip(self._gdim, self._psym)}
 
-        index_matrix, points, shifts = self.sfunction._index_matrix(offset)
+    def _coeff_temps(self, implicit_dims):
+        pmap = self.sfunction._position_map.values()
+        return [Eq(self._psym[d], pos - d.spacing*INT(floor(pos/d.spacing)),
+                   implicit_dims=implicit_dims)
+                for (d, pos) in zip(self._gdim, pmap)]
 
-        idx_subs = []
-        coeffs = self._interpolation_coeffs
-        dt, it = coeffs.dimensions[1:]
-        for i, idx in enumerate(index_matrix):
-            # Introduce ConditionalDimension so that we don't go OOB
-            mapper = {}
-            for j, (di, d) in zip(idx, enumerate(self.grid.dimensions)):
-                p = points[j]
-                lb = sympy.And(p >= d.symbolic_min - self.sfunction.r // 2,
-                               evaluate=False)
-                ub = sympy.And(p <= d.symbolic_max + self.sfunction.r // 2,
-                               evaluate=False)
-                condition = sympy.And(lb, ub, evaluate=False)
-                mapper[d] = ConditionalDimension(p.name, self.sfunction._sparse_dim,
-                                                 condition=condition, indirect=True)
-                mapper[coeffs._subs(dt, di)] = coeffs.subs({dt: di, it: shifts[i][di]})
-            # Track Indexed substitutions
-            idx_subs.append(mapper)
 
-        # Temporaries for the indirection dimensions
-        temps = [Eq(v, k, implicit_dims=implicit_dims) for k, v in points.items()]
+class PrecomputedInterpolator(WeightedInterpolation):
 
-        return idx_subs, temps
+    def _positions(self, implicit_dims):
+        if self.sfunction.gridpoints is None:
+            return [Eq(v, k, implicit_dims=implicit_dims)
+                    for k, v in self.sfunction._position_map.items()]
+        # No position temp as we have directly the gridpoints
+        return []
 
     @property
-    def _interpolation_coeffs(self):
+    def _interp_points(self):
+        return range(-self.r//2 + 1, self.r//2 + 1)
+
+    @property
+    def _icoeffs(self):
         return self.sfunction.interpolation_coeffs
 
     @property
-    def _interpolation_coeffsp(self):
-        d = self.sfunction.interpolation_coeffs.dimensions[1]
-        return prod([self.sfunction.interpolation_coeffs._subs(d, i)
-                     for (i, _) in enumerate(self.sfunction.grid.dimensions)])
+    def _idim(self):
+        return self.sfunction.interpolation_coeffs.dimensions[-1]
 
-    def subs_coords(self, _expr, *idx_subs):
-        b = self._interpolation_coeffsp
-        return [_expr.xreplace(v_sub) * b.xreplace(v_sub) for v_sub in idx_subs]
+    @property
+    def _ddim(self):
+        return self.sfunction.interpolation_coeffs.dimensions[1]
 
-    def subs_coords_eq(self, field, _expr, *idx_subs, implicit_dims=None):
-        b = self._interpolation_coeffsp
-        return [Inc(field.xreplace(vsub), _expr.xreplace(vsub) * b.xreplace(vsub),
-                    implicit_dims=implicit_dims) for vsub in idx_subs]
+    @cached_property
+    def _weights(self):
+        return {d: [self._icoeffs.subs({self._ddim: di, self._idim: k})
+                    for k in self._interp_points]
+                for (di, d) in enumerate(self._gdim)}
diff --git a/devito/tools/algorithms.py b/devito/tools/algorithms.py
index 0021a6f608..f7edde22f5 100644
--- a/devito/tools/algorithms.py
+++ b/devito/tools/algorithms.py
@@ -72,4 +72,5 @@ def toposort(data):
                               if item not in ordered])
     if len(processed) != len(set(flatten(data) + flatten(data.values()))):
         raise ValueError("A cyclic dependency exists amongst %r" % data)
+
     return processed
diff --git a/devito/types/basic.py b/devito/types/basic.py
index d7a422b39e..45d54f51db 100644
--- a/devito/types/basic.py
+++ b/devito/types/basic.py
@@ -979,7 +979,7 @@ def origin(self):
         f(x) : origin = 0
         f(x + hx/2) : origin = hx/2
         """
-        return DimensionTuple(*(r-d for d, r in zip(self.dimensions, self.indices_ref)),
+        return DimensionTuple(*(r - d for d, r in zip(self.dimensions, self.indices_ref)),
                               getters=self.dimensions)
 
     @property
@@ -1249,6 +1249,7 @@ def indexify(self, indices=None, subs=None):
                    zip(self.args, self.dimensions, self.origin, subs)]
         indices = [i.xreplace({k: sympy.Integer(k) for k in i.atoms(sympy.Float)})
                    for i in indices]
+
         return self.indexed[indices]
 
     def __getitem__(self, index):
diff --git a/devito/types/dense.py b/devito/types/dense.py
index aec69bc1eb..0ad9b1f3fe 100644
--- a/devito/types/dense.py
+++ b/devito/types/dense.py
@@ -1466,6 +1466,13 @@ def __padding_setup__(self, **kwargs):
     def _halo_exchange(self):
         return
 
+    @property
+    def origin(self):
+        """
+        SubFunction have zero origin
+        """
+        return DimensionTuple(*(0 for _ in range(self.ndim)), getters=self.dimensions)
+
     def _arg_values(self, **kwargs):
         if self.name in kwargs:
             raise RuntimeError("`%s` is a SubFunction, so it can't be assigned "
diff --git a/devito/types/sparse.py b/devito/types/sparse.py
index 0209067f45..6561207938 100644
--- a/devito/types/sparse.py
+++ b/devito/types/sparse.py
@@ -24,7 +24,7 @@
                                     DynamicDimension)
 from devito.types.basic import Symbol
 from devito.types.equation import Eq, Inc
-from devito.types.utils import IgnoreDimSort
+from devito.types.utils import IgnoreDimSort, DimensionTuple
 
 
 __all__ = ['SparseFunction', 'SparseTimeFunction', 'PrecomputedSparseFunction',
@@ -110,7 +110,7 @@ def __subfunc_setup__(self, key, suffix):
         dimensions = (self.indices[self._sparse_position],
                       Dimension(name='d'),
                       Dimension(name='i'))
-        shape = (self.npoint, self.grid.dim, self.r)
+        shape = (self.npoint, self.grid.dim, self._radius)
 
         if key is None:
             # Fallback to default behaviour
@@ -190,7 +190,8 @@ def _mpitype(self):
 
     @property
     def _smpitype(self):
-        sfuncs = [getattr(self, s) for s in self._sub_functions]
+        sfuncs = [getattr(self, s) for s in self._sub_functions
+                  if getattr(self, s) is not None]
         return {s: MPI._typedict[np.dtype(s.dtype).char] for s in sfuncs}
 
     @property
@@ -214,14 +215,26 @@ def _subfunc_names(self):
     @cached_property
     def _point_symbols(self):
         """Symbol for coordinate value in each dimension of the point."""
-        return tuple(Symbol(name='p%s' % d, dtype=self.dtype)
-                     for d in self.grid.dimensions)
+        return DimensionTuple(*(Symbol(name='p%s' % d, dtype=self.dtype)
+                                for d in self.grid.dimensions),
+                              getters=self.grid.dimensions)
 
     @cached_property
     def _position_map(self):
         """
-        Symbols map for the position of the sparse points relative to the grid
+        Symbols map for the physical position of the sparse points relative to the grid
         origin.
+        """
+        symbols = [Symbol(name='pos%s' % d, dtype=self.dtype)
+                   for d in self.grid.dimensions]
+        return OrderedDict([(c - o, p) for p, c, o in zip(symbols,
+                                                          self._coordinate_symbols,
+                                                          self.grid.origin_symbols)])
+
+    @cached_property
+    def _coordinate_indices(self):
+        """
+        Symbol for each grid index according to the coordinates.
 
         Notes
         -----
@@ -233,24 +246,9 @@ def _position_map(self):
         the position. We mitigate this problem by computing the positions
         individually (hence the need for a position map).
         """
-        symbols = [Symbol(name='pos%s' % d, dtype=self.dtype)
-                   for d in self.grid.dimensions]
-        return OrderedDict([(c - o, p) for p, c, o in zip(symbols,
-                                                          self._coordinate_symbols,
-                                                          self.grid.origin_symbols)])
-
-    @cached_property
-    def _point_increments(self):
-        """Index increments in each dimension for each point symbol."""
-        return tuple(product(range(self.r+1), repeat=self.grid.dim))
-
-    @cached_property
-    def _coordinate_indices(self):
-        """Symbol for each grid index according to the coordinates."""
-        return tuple([INT(floor((c - o) / i.spacing))
-                      for c, o, i in zip(self._coordinate_symbols,
-                                         self.grid.origin_symbols,
-                                         self.grid.dimensions[:self.grid.dim])])
+        return tuple([INT(floor(p / i.spacing))
+                      for p, i in zip(self._position_map.values(),
+                                      self.grid.dimensions[:self.grid.dim])])
 
     def _coordinate_bases(self, field_offset):
         """Symbol for the base coordinates of the reference grid point."""
@@ -285,10 +283,14 @@ def inject(self, *args, **kwargs):
         """
         return self.interpolator.inject(*args, **kwargs)
 
+    @cached_property
+    def _point_increments(self):
+        """Index increments in each dimension for each point symbol."""
+        return tuple(product(range(self.r+1), repeat=self.grid.dim))
+
     @cached_property
     def _point_support(self):
-        return np.array(tuple(product(range(-self._radius + 1, self._radius + 1),
-                                      repeat=self.grid.dim)))
+        return np.array(self._point_increments)
 
     @property
     def _support(self):
@@ -298,7 +300,7 @@ def _support(self):
         """
         max_shape = np.array(self.grid.shape).reshape(1, self.grid.dim)
         minmax = lambda arr: np.minimum(max_shape, np.maximum(0, arr))
-        return np.stack([minmax(self.gridpoints_data + s) for s in self._point_support],
+        return np.stack([minmax(self._coords_indices + s) for s in self._point_support],
                         axis=2)
 
     @property
@@ -803,14 +805,14 @@ def dist_origin(self):
         return self._dist_origin
 
     @property
-    def gridpoints(self):
+    def _coords_indices(self):
         if self.coordinates.data is None:
             raise ValueError("No coordinates attached to this SparseFunction")
         return (
             np.floor(self.coordinates.data._local - self.grid.origin) / self.grid.spacing
         ).astype(np.int32)
 
-    gridpoints_data = gridpoints
+    gridpoints = _coords_indices
 
     def guard(self, expr=None, offset=0):
         """
@@ -1065,10 +1067,11 @@ class PrecomputedSparseFunction(AbstractSparseFunction):
     uses `*args` to (re-)create the dimension arguments of the symbolic object.
     """
 
-    _sub_functions = ('gridpoints', 'interpolation_coeffs')
+    _sub_functions = ('gridpoints', 'coordinates', 'interpolation_coeffs')
 
     __rkwargs__ = (AbstractSparseFunction.__rkwargs__ +
-                   ('r', 'gridpoints_data', 'interpolation_coeffs_data'))
+                   ('r', 'gridpoints_data', 'coordinates_data',
+                    'interpolation_coeffs_data'))
 
     def __init_finalize__(self, *args, **kwargs):
         super().__init_finalize__(*args, **kwargs)
@@ -1090,19 +1093,19 @@ def __init_finalize__(self, *args, **kwargs):
 
         # Specifying only `npoints` is acceptable; this will require the user
         # to setup the coordinates data later on
+        npoint = kwargs.get('npoint', None)
         if self.npoint and coordinates is None and gridpoints is None:
-            gridpoints = np.zeros((self.npoint, self.grid.dim))
+            coordinates = np.zeros((npoint, self.grid.dim))
 
         if coordinates is not None:
-            # Convert to gridpoints
-            if isinstance(coordinates, SubFunction):
-                raise ValueError("`coordinates` only accepted as array")
-            loc = np.floor((coordinates - self.grid.origin) / self.grid.spacing)
-            self._gridpoints = self.__subfunc_setup__(loc.astype(int), 'gridpoints')
+            self._coordinates = self.__subfunc_setup__(coordinates, 'coords')
+            self._gridpoints = None
+            self._dist_origin = {self._coordinates: self.grid.origin_offset}
         else:
             assert gridpoints is not None
+            self._coordinates = None
             self._gridpoints = self.__subfunc_setup__(gridpoints, 'gridpoints')
-        self._dist_origin = {self._gridpoints: self.grid.origin_ioffset}
+            self._dist_origin = {self._gridpoints: self.grid.origin_ioffset}
 
         # Setup the interpolation coefficients. These are compulsory
         interpolation_coeffs = kwargs.get('interpolation_coeffs',
@@ -1122,6 +1125,10 @@ def __init_finalize__(self, *args, **kwargs):
 
         self.interpolator = PrecomputedInterpolator(self)
 
+    @property
+    def r(self):
+        return self._radius
+
     @cached_property
     def _point_increments(self):
         """Index increments in each dimension for each point symbol."""
@@ -1129,8 +1136,16 @@ def _point_increments(self):
 
     @cached_property
     def _point_support(self):
-        return np.array(tuple(product(range(-self.r // 2 + 1, self.r // 2 + 1),
-                                      repeat=self.grid.dim)))
+        return np.array(self._point_increments)
+
+    @property
+    def _coords_indices(self):
+        if self.gridpoints is not None:
+            return self.gridpoints.data._local
+        else:
+            return (
+                np.floor(self.coordinates_data - self.grid.origin) / self.grid.spacing
+            ).astype(np.int32)
 
     @property
     def gridpoints(self):
@@ -1138,7 +1153,21 @@ def gridpoints(self):
 
     @property
     def gridpoints_data(self):
-        return self.gridpoints.data._local
+        try:
+            return self.gridpoints.data._local
+        except AttributeError:
+            return None
+
+    @property
+    def coordinates(self):
+        return self._coordinates
+
+    @property
+    def coordinates_data(self):
+        try:
+            return self.coordinates.data._local
+        except AttributeError:
+            return None
 
     @property
     def dist_origin(self):
@@ -1157,29 +1186,35 @@ def interpolation_coeffs_data(self):
     def _coordinate_symbols(self):
         """Symbol representing the coordinate values in each dimension."""
         p_dim = self.indices[self._sparse_position]
+        if self.gridpoints is not None:
+            return tuple([self.gridpoints.indexify((p_dim, di)) * d.spacing + o
+                          for ((di, d), o) in zip(enumerate(self.grid.dimensions),
+                                                  self.grid.origin)])
+
         return tuple([self.coordinates.indexify((p_dim, i))
                       for i in range(self.grid.dim)])
 
-    @memoized_meth
-    def _index_matrix(self, offset):
-        # Note about the use of *memoization*
-        # Since this method is called by `_interpolation_indices`, using
-        # memoization avoids a proliferation of symbolically identical
-        # ConditionalDimensions for a given set of indirection indices
-
-        # List of indirection indices for all adjacent grid points
-        ddim = self._gridpoints.dimensions[1]
-        index_matrix = [tuple(self._gridpoints._subs(ddim, d) + ii + offset
-                              for (ii, d) in zip(inc, range(self.grid.dim)))
-                        for inc in self._point_increments]
-        shifts = [tuple(ii + offset for ii in inc)
-                  for inc in self._point_increments]
-        # A unique symbol for each indirection index
-        indices = filter_ordered(flatten(index_matrix))
-        points = OrderedDict([(p, Symbol(name='ii_%s_%d' % (self.name, i)))
-                              for i, p in enumerate(indices)])
+    @cached_property
+    def _coordinate_indices(self):
+        """
+        Symbol for each grid index according to the coordinates.
 
-        return index_matrix, points, shifts
+        Notes
+        -----
+        The expression `(coord - origin)/spacing` could also be computed in the
+        mathematically equivalent expanded form `coord/spacing -
+        origin/spacing`. This particular form is problematic when a sparse
+        point is in close proximity of the grid origin, since due to a larger
+        machine precision error it may cause a +-1 error in the computation of
+        the position. We mitigate this problem by computing the positions
+        individually (hence the need for a position map).
+        """
+        if self.gridpoints is not None:
+            ddim = self.gridpoints.dimensions[-1]
+            return tuple([self.gridpoints._subs(ddim, di) for di in range(self.grid.dim)])
+        return tuple([INT(floor(p / i.spacing))
+                      for p, i in zip(self._position_map.keys(),
+                                      self.grid.dimensions[:self.grid.dim])])
 
 
 class PrecomputedSparseTimeFunction(AbstractSparseTimeFunction,
@@ -2175,11 +2210,11 @@ def _dist_scatter(self, data=None):
 
     # The implementation in AbstractSparseFunction now relies on us
     # having a .coordinates property, which we don't have.
-    def _arg_apply(self, dataobj, alias=None):
+    def _arg_apply(self, *dataobj, alias=None):
         key = alias if alias is not None else self
         if isinstance(key, AbstractSparseFunction):
             # Gather into `self.data`
-            key._dist_gather(self._C_as_ndarray(dataobj))
+            key._dist_gather(self._C_as_ndarray(dataobj[0]))
         elif self.grid.distributor.nprocs > 1:
             raise NotImplementedError("Don't know how to gather data from an "
                                       "object of type `%s`" % type(key))
diff --git a/tests/test_dimension.py b/tests/test_dimension.py
index 7bf28faa35..150982ef9a 100644
--- a/tests/test_dimension.py
+++ b/tests/test_dimension.py
@@ -1054,7 +1054,7 @@ def test_no_index_sparse(self):
         indices = [(i, i+radius) for i in sf._coordinate_indices]
         bounds = [i.symbolic_size - radius for i in grid.dimensions]
 
-        eqs = []
+        eqs = [Eq(p, v) for (v, p) in sf._position_map.items()]
         for e, i in enumerate(product(*indices)):
             args = [j > 0 for j in i]
             args.extend([j < k for j, k in zip(i, bounds)])
diff --git a/tests/test_dle.py b/tests/test_dle.py
index 9dfa8af671..d3f3b11eaf 100644
--- a/tests/test_dle.py
+++ b/tests/test_dle.py
@@ -942,16 +942,7 @@ def test_parallel_prec_inject(self):
                                                'par-collapse-ncores': 1}))
         iterations = FindNodes(Iteration).visit(op0)
         assert not iterations[0].pragmas
-        assert 'omp for collapse(1) schedule(dynamic,chunk_size)'\
-            in iterations[1].pragmas[0].value
-
-        op1 = Operator(eqns, opt=('advanced', {'openmp': True,
-                                               'par-collapse-ncores': 1,
-                                               'par-collapse-work': 1}))
-        iterations = FindNodes(Iteration).visit(op1)
-        assert not iterations[0].pragmas
-        assert 'omp for collapse(1) schedule(dynamic,chunk_size)'\
-            in iterations[1].pragmas[0].value
+        assert 'omp for' in iterations[1].pragmas[0].value
 
 
 class TestNestedParallelism(object):
diff --git a/tests/test_dse.py b/tests/test_dse.py
index b346e00092..0328ecbd65 100644
--- a/tests/test_dse.py
+++ b/tests/test_dse.py
@@ -2670,7 +2670,7 @@ def test_fullopt(self):
         bns, _ = assert_blocking(op1, {'x0_blk0'})  # due to loop blocking
 
         assert summary0[('section0', None)].ops == 50
-        assert summary0[('section1', None)].ops == 148
+        assert summary0[('section1', None)].ops == 122
         assert np.isclose(summary0[('section0', None)].oi, 2.851, atol=0.001)
 
         assert summary1[('section0', None)].ops == 31
diff --git a/tests/test_mpi.py b/tests/test_mpi.py
index 6db4a27f48..4aba518d5f 100644
--- a/tests/test_mpi.py
+++ b/tests/test_mpi.py
@@ -572,16 +572,15 @@ def test_precomputed_sparse(self, r):
 
         assert sf1.npoint == 1
         assert sf2.npoint == 1
-        assert np.all(sf1.gridpoints.data.shape == (1, 2))
+        assert np.all(sf1.coordinates.data.shape == (1, 2))
         assert np.all(sf2.gridpoints.data.shape == (1, 2))
-        assert np.all(sf1.gridpoints_data == sf2.gridpoints_data)
+        assert np.all(sf1._coords_indices == sf2.gridpoints_data)
         assert np.all(sf1.interpolation_coeffs.shape == (1, 2, r))
         assert np.all(sf2.interpolation_coeffs.shape == (1, 2, r))
 
         u = Function(name="u", grid=grid, space_order=r)
         u._data_with_outhalo[:] = 1
         Operator(sf2.interpolate(u))()
-        print(sf2.data)
         assert np.all(sf2.data == 4)
         Operator(sf1.interpolate(u))()
         assert np.all(sf1.data == 4)
diff --git a/tests/test_pickle.py b/tests/test_pickle.py
index b48f47face..52aa77e1ff 100644
--- a/tests/test_pickle.py
+++ b/tests/test_pickle.py
@@ -260,13 +260,19 @@ def test_shared_data(self, pickle):
         assert sdata.cfields == new_sdata.cfields
         assert sdata.ncfields == new_sdata.ncfields
 
-def test_precomputed_sparse_function():
-    grid = Grid(shape=(10, 10))
+@pytest.mark.parametrize('mode', ['coordinates', 'gridpoints'])
+def test_precomputed_sparse_function(mode):
+    grid = Grid(shape=(11, 11))
+
+    coords = [(0., 0.), (.5, .5), (.7, .2)]
+    gridpoints = [(0, 0), (6, 6), (8, 3)]
+    keys = {'coordinates': coords, 'gridpoints': gridpoints}
+    kw = {mode: keys[mode]}
+    othermode = 'coordinates' if mode == 'gridpoints' else 'gridpoints'
 
     sf = PrecomputedSparseTimeFunction(
         name='sf', grid=grid, r=2, npoint=3, nt=5,
-        coordinates=[(0., 0.), (1., 1.), (2., 2.)],
-        interpolation_coeffs=np.ndarray(shape=(3, 2, 2)),
+        interpolation_coeffs=np.ndarray(shape=(3, 2, 2)), **kw
     )
     sf.data[2, 1] = 5.
 
@@ -280,7 +286,9 @@ def test_precomputed_sparse_function():
     assert np.all(sf.interpolation_coeffs.data == new_sf.interpolation_coeffs.data)
 
     # coordinates, since they were given, should also have been pickled
-    assert np.all(sf.coordinates.data == new_sf.coordinates.data)
+    assert np.all(getattr(sf, mode).data == getattr(new_sf, mode).data)
+    assert getattr(sf, othermode) is None
+    assert getattr(new_sf, othermode) is None
 
     assert sf._radius == new_sf._radius == 2
     assert sf.space_order == new_sf.space_order

From a80a06906df2d3690edbd5a5c51b7616098b9644 Mon Sep 17 00:00:00 2001
From: mloubout <mathias.louboutin@gmail.com>
Date: Tue, 23 May 2023 11:54:51 -0400
Subject: [PATCH 13/90] api: cleanup hierachy and properties of sparse and
 interpolator

---
 devito/data/data.py                |   2 +-
 devito/operations/interpolators.py |  47 ++--
 devito/tools/dtypes_lowering.py    |   7 +-
 devito/types/sparse.py             | 376 ++++++++++++++---------------
 tests/test_pickle.py               |   2 +-
 5 files changed, 211 insertions(+), 223 deletions(-)

diff --git a/devito/data/data.py b/devito/data/data.py
index 78859cdb7e..1ef35490b8 100644
--- a/devito/data/data.py
+++ b/devito/data/data.py
@@ -140,7 +140,7 @@ def __array_finalize__(self, obj):
     @property
     def _local(self):
         """A view of ``self`` with global indexing disabled."""
-        ret = self.view()
+        ret = self.view(np.ndarray)
         ret._is_distributed = False
         return ret
 
diff --git a/devito/operations/interpolators.py b/devito/operations/interpolators.py
index 411d41bc19..ffa50601fa 100644
--- a/devito/operations/interpolators.py
+++ b/devito/operations/interpolators.py
@@ -107,18 +107,18 @@ def interpolate(self, *args, **kwargs):
         pass
 
 
-class WeightedInterpolation(GenericInterpolator):
+class WeightedInterpolator(GenericInterpolator):
 
     """
     Represent an Interpolation operation on a SparseFunction that is separable
-    in space, meaning hte coefficient are defined for each Dimension separately
+    in space, meaning the coefficients are defined for each Dimension separately
     and multiplied at a given point: `w[x, y] = wx[x] * wy[y]`
     """
 
     def __init__(self, sfunction):
         self.sfunction = sfunction
 
-    @property
+    @cached_property
     def grid(self):
         return self.sfunction.grid
 
@@ -126,18 +126,18 @@ def grid(self):
     def _weights(self):
         raise NotImplementedError
 
-    @property
+    @cached_property
     def _psym(self):
         return self.sfunction._point_symbols
 
-    @property
+    @cached_property
     def _gdim(self):
         return self.grid.dimensions
 
     def implicit_dims(self, implicit_dims):
         return as_tuple(implicit_dims) + self.sfunction.dimensions
 
-    @property
+    @cached_property
     def r(self):
         return self.sfunction.r
 
@@ -313,10 +313,9 @@ def callback():
         return Injection(field, expr, offset, self, callback)
 
 
-class LinearInterpolator(WeightedInterpolation):
-
+class LinearInterpolator(WeightedInterpolator):
     """
-    Concrete implementation of GenericInterpolator implementing a Linear interpolation
+    Concrete implementation of WeightedInterpolator implementing a Linear interpolation
     scheme, i.e. Bilinear for 2D and Trilinear for 3D problems.
 
     Parameters
@@ -324,7 +323,7 @@ class LinearInterpolator(WeightedInterpolation):
     sfunction: The SparseFunction that this Interpolator operates on.
     """
 
-    @cached_property
+    @property
     def _weights(self):
         return {d: [1 - p/d.spacing, p/d.spacing]
                 for (d, p) in zip(self._gdim, self._psym)}
@@ -336,7 +335,16 @@ def _coeff_temps(self, implicit_dims):
                 for (d, pos) in zip(self._gdim, pmap)]
 
 
-class PrecomputedInterpolator(WeightedInterpolation):
+class PrecomputedInterpolator(WeightedInterpolator):
+    """
+    Concrete implementation of WeightedInterpolator implementing a Precomputed
+    interpolation scheme, i.e. an interpolation with user provided precomputed
+    weigths/coefficients.
+
+    Parameters
+    ----------
+    sfunction: The SparseFunction that this Interpolator operates on.
+    """
 
     def _positions(self, implicit_dims):
         if self.sfunction.gridpoints is None:
@@ -346,23 +354,12 @@ def _positions(self, implicit_dims):
         return []
 
     @property
-    def _interp_points(self):
-        return range(-self.r//2 + 1, self.r//2 + 1)
-
-    @property
-    def _icoeffs(self):
+    def interpolation_coeffs(self):
         return self.sfunction.interpolation_coeffs
 
     @property
-    def _idim(self):
-        return self.sfunction.interpolation_coeffs.dimensions[-1]
-
-    @property
-    def _ddim(self):
-        return self.sfunction.interpolation_coeffs.dimensions[1]
-
-    @cached_property
     def _weights(self):
-        return {d: [self._icoeffs.subs({self._ddim: di, self._idim: k})
+        ddim, cdim = self.interpolation_coeffs.dimensions[1:]
+        return {d: [self.interpolation_coeffs.subs({ddim: di, cdim: k})
                     for k in self._interp_points]
                 for (di, d) in enumerate(self._gdim)}
diff --git a/devito/tools/dtypes_lowering.py b/devito/tools/dtypes_lowering.py
index 9793904ac2..ac42a33965 100644
--- a/devito/tools/dtypes_lowering.py
+++ b/devito/tools/dtypes_lowering.py
@@ -8,7 +8,7 @@
 from cgen import dtype_to_ctype as cgen_dtype_to_ctype
 
 __all__ = ['int2', 'int3', 'int4', 'float2', 'float3', 'float4', 'double2',  # noqa
-           'double3', 'double4', 'dtypes_vector_mapper',
+           'double3', 'double4', 'dtypes_vector_mapper', 'dtype_to_mpidtype',
            'dtype_to_cstr', 'dtype_to_ctype', 'dtype_to_mpitype', 'dtype_len',
            'ctypes_to_cstr', 'c_restrict_void_p', 'ctypes_vector_mapper',
            'is_external_ctype', 'infer_dtype']
@@ -128,6 +128,11 @@ def dtype_to_mpitype(dtype):
     }[dtype]
 
 
+def dtype_to_mpidtype(dtype):
+    from devito.mpi import MPI
+    return MPI._typedict[np.dtype(dtype).char]
+
+
 def dtype_len(dtype):
     """
     Number of elements associated with one object of type `dtype`. Thus,
diff --git a/devito/types/sparse.py b/devito/types/sparse.py
index 6561207938..54bde5b637 100644
--- a/devito/types/sparse.py
+++ b/devito/types/sparse.py
@@ -18,10 +18,11 @@
 from devito.symbolics import (INT, cast_mapper, indexify,
                               retrieve_function_carriers)
 from devito.tools import (ReducerMap, as_tuple, flatten, prod, filter_ordered,
-                          memoized_meth, is_integer)
+                          memoized_meth, is_integer, dtype_to_mpidtype)
 from devito.types.dense import DiscreteFunction, SubFunction
 from devito.types.dimension import (Dimension, ConditionalDimension, DefaultDimension,
                                     DynamicDimension)
+from devito.types.dimension import dimensions as mkdims
 from devito.types.basic import Symbol
 from devito.types.equation import Eq, Inc
 from devito.types.utils import IgnoreDimSort, DimensionTuple
@@ -107,26 +108,19 @@ def __subfunc_setup__(self, key, suffix):
                              "or iterable (e.g., list, np.ndarray)" % key)
 
         name = '%s_%s' % (self.name, suffix)
-        dimensions = (self.indices[self._sparse_position],
-                      Dimension(name='d'),
-                      Dimension(name='i'))
-        shape = (self.npoint, self.grid.dim, self._radius)
+        dimensions = (self.indices[self._sparse_position], Dimension(name='d'))
+        shape = (self.npoint, self.grid.dim)
 
         if key is None:
             # Fallback to default behaviour
-            n = 2  # (Sparse points, Grid Dimensions)
             dtype = self.dtype
         else:
             if not isinstance(key, np.ndarray):
                 key = np.array(key)
-                # Correct for corner case of single coordinate
-                n = max(key.ndim, 2)
-            else:
-                n = key.ndim
             # Need to fix this check to get global npoint, global_shape broken
-            # if shape[:n] != key.shape and self.distributor.nprocs == 1:
-            #     raise ValueError("Incompatible shape `%s`; expected `%s`" %
-            #                      (shape[:n], key.shape))
+            # if shape != key.shape[:2] and self.distributor.nprocs == 1:
+            #     raise ValueError("Incompatible shape for %s, `%s`; expected `%s`" %
+            #                      (suffix, shape, key.shape[:2]))
 
             # Infer dtype
             if np.issubdtype(key.dtype.type, np.integer):
@@ -134,8 +128,10 @@ def __subfunc_setup__(self, key, suffix):
             else:
                 dtype = self.dtype
 
-        dimensions = dimensions[:n]
-        shape = shape[:n]
+        if key is not None and key.ndim > 2:
+            shape = (*shape, *key.shape[2:])
+            # Safely assume there is at most 3 (3D) extra dimensions
+            dimensions = (*dimensions, *mkdims("ijk"[:(key.ndim-2)]))
 
         sf = SubFunction(
             name=name, parent=self, dtype=dtype, dimensions=dimensions,
@@ -184,15 +180,19 @@ def r(self):
     def _sparse_dim(self):
         return self.dimensions[self._sparse_position]
 
+    @cached_property
+    def dist_origin(self):
+        return self._dist_origin
+
     @property
     def _mpitype(self):
-        return MPI._typedict[np.dtype(self.dtype).char]
+        return dtype_to_mpidtype(self.dtype)
 
     @property
     def _smpitype(self):
         sfuncs = [getattr(self, s) for s in self._sub_functions
                   if getattr(self, s) is not None]
-        return {s: MPI._typedict[np.dtype(s.dtype).char] for s in sfuncs}
+        return {s: dtype_to_mpidtype(s.dtype) for s in sfuncs}
 
     @property
     def comm(self):
@@ -212,9 +212,58 @@ def _subfunc_names(self):
                 pass
         return tuple(names)
 
+    @property
+    def _coords_indices(self):
+        if self.gridpoints_data is not None:
+            return self.gridpoints_data._local
+        else:
+            if self.coordinates_data is None:
+                raise ValueError("No coordinates or gridpoints attached"
+                                 "to this SparseFunction")
+            return (
+                np.floor(self.coordinates_data - self.grid.origin) / self.grid.spacing
+            ).astype(np.int32)
+
+    @property
+    def gridpoints(self):
+        try:
+            return self._gridpoints
+        except AttributeError:
+            return self._coords_indices
+
+    @property
+    def gridpoints_data(self):
+        try:
+            return self._gridpoints.data._local
+        except AttributeError:
+            return None
+
+    @property
+    def coordinates(self):
+        try:
+            return self._coordinates
+        except AttributeError:
+            return None
+
+    @property
+    def coordinates_data(self):
+        try:
+            return self.coordinates.data._local
+        except AttributeError:
+            return None
+
+    @cached_property
+    def _point_increments(self):
+        """Index increments in each Dimension for each point symbol."""
+        return tuple(product(range(-self.r+1, self.r+1), repeat=self.grid.dim))
+
+    @cached_property
+    def _point_support(self):
+        return np.array(self._point_increments)
+
     @cached_property
     def _point_symbols(self):
-        """Symbol for coordinate value in each dimension of the point."""
+        """Symbol for coordinate value in each Dimension of the point."""
         return DimensionTuple(*(Symbol(name='p%s' % d, dtype=self.dtype)
                                 for d in self.grid.dimensions),
                               getters=self.grid.dimensions)
@@ -259,18 +308,6 @@ def _coordinate_bases(self, field_offset):
                                                   self.grid.dimensions[:self.grid.dim],
                                                   field_offset)])
 
-    @property
-    def gridpoints(self):
-        """
-        The *reference* grid point corresponding to each sparse point.
-
-        Notes
-        -----
-        When using MPI, this property refers to the *physically* owned
-        sparse points.
-        """
-        raise NotImplementedError
-
     def interpolate(self, *args, **kwargs):
         """
         Implement an interpolation operation from the grid onto the given sparse points
@@ -283,15 +320,6 @@ def inject(self, *args, **kwargs):
         """
         return self.interpolator.inject(*args, **kwargs)
 
-    @cached_property
-    def _point_increments(self):
-        """Index increments in each dimension for each point symbol."""
-        return tuple(product(range(self.r+1), repeat=self.grid.dim))
-
-    @cached_property
-    def _point_support(self):
-        return np.array(self._point_increments)
-
     @property
     def _support(self):
         """
@@ -382,7 +410,7 @@ def _dist_alltoall(self, dmap=None):
         rshape[self._sparse_position] = sum(rsparse)
 
         # May have to swap axes, as `MPI_Alltoallv` expects contiguous data, and
-        # the sparse dimension may not be the outermost
+        # the sparse Dimension may not be the outermost
         sshape = tuple(sshape[i] for i in self._dist_reorder_mask)
         rshape = tuple(rshape[i] for i in self._dist_reorder_mask)
 
@@ -456,7 +484,7 @@ def _dist_subfunc_scatter(self, subfunc):
         dmap = self._dist_datamap
         mask = self._dist_scatter_mask(dmap=dmap)
 
-        # Pack (reordered) coordinates so that they can be sent out via an Alltoallv
+        # Pack (reordered) subfunc values so that they can be sent out via an Alltoallv
         sfuncd = subfunc.data._local[mask[self._sparse_position]]
 
         # Send out the sparse point coordinates
@@ -467,7 +495,7 @@ def _dist_subfunc_scatter(self, subfunc):
                             [scattered, rcount, rdisp, self._smpitype[subfunc]])
         sfuncd = scattered
 
-        # Translate global coordinates into local coordinates
+        # Translate global subfunc values into local subfunc values
         if self.dist_origin[subfunc] is not None:
             sfuncd = sfuncd - np.array(self.dist_origin[subfunc], dtype=subfunc.dtype)
         return {subfunc: sfuncd}
@@ -505,10 +533,10 @@ def _dist_subfunc_gather(self, sfuncd, sfunc):
         # Compute dist map only once
         dmap = self._dist_datamap
         mask = self._dist_scatter_mask(dmap=dmap)
-        # Pack (reordered) coordinates so that they can be sent out via an Alltoallv
+        # Pack (reordered) subfunc values so that they can be sent out via an Alltoallv
         if self.dist_origin[sfunc] is not None:
             sfuncd = sfuncd + np.array(self.dist_origin[sfunc], dtype=sfunc.dtype)
-        # Send out the sparse point coordinates
+        # Send out the sparse point subfunc values
         sshape, scount, sdisp, _, rcount, rdisp = \
             self._dist_subfunc_alltoall(sfunc, dmap=dmap)
         gathered = np.empty(shape=sshape, dtype=sfunc.dtype)
@@ -614,7 +642,7 @@ def __fd_setup__(self):
 
     @property
     def time_dim(self):
-        """The time dimension."""
+        """The time Dimension."""
         return self._time_dim
 
     @classmethod
@@ -684,7 +712,7 @@ class SparseFunction(AbstractSparseFunction):
         Discretisation order for space derivatives. Defaults to 0.
     shape : tuple of ints, optional
         Shape of the object. Defaults to ``(npoint,)``.
-    dimensions : tuple of Dimension, optional
+    Dimensions : tuple of Dimension, optional
         Dimensions associated with the object. Only necessary if the SparseFunction
         defines a multi-dimensional tensor.
     dtype : data-type, optional
@@ -728,7 +756,7 @@ class SparseFunction(AbstractSparseFunction):
     Notes
     -----
     The parameters must always be given as keyword arguments, since SymPy
-    uses ``*args`` to (re-)create the dimension arguments of the symbolic object.
+    uses ``*args`` to (re-)create the Dimension arguments of the symbolic object.
     About SparseFunction and MPI. There is a clear difference between:
 
         * Where the sparse points *physically* live, i.e., on which MPI rank. This
@@ -762,21 +790,9 @@ def __init_finalize__(self, *args, **kwargs):
         self._coordinates = self.__subfunc_setup__(coordinates, 'coords')
         self._dist_origin = {self._coordinates: self.grid.origin_offset}
 
-    @property
-    def coordinates(self):
-        """The SparseFunction coordinates."""
-        return self._coordinates
-
-    @property
-    def coordinates_data(self):
-        try:
-            return self.coordinates.data.view(np.ndarray)
-        except AttributeError:
-            return None
-
     @cached_property
     def _coordinate_symbols(self):
-        """Symbol representing the coordinate values in each dimension."""
+        """Symbol representing the coordinate values in each Dimension."""
         p_dim = self.indices[self._sparse_position]
         return tuple([self.coordinates.indexify((p_dim, i))
                       for i in range(self.grid.dim)])
@@ -800,20 +816,6 @@ def _index_matrix(self, offset):
 
         return index_matrix, points
 
-    @property
-    def dist_origin(self):
-        return self._dist_origin
-
-    @property
-    def _coords_indices(self):
-        if self.coordinates.data is None:
-            raise ValueError("No coordinates attached to this SparseFunction")
-        return (
-            np.floor(self.coordinates.data._local - self.grid.origin) / self.grid.spacing
-        ).astype(np.int32)
-
-    gridpoints = _coords_indices
-
     def guard(self, expr=None, offset=0):
         """
         Generate guarded expressions, that is expressions that are evaluated
@@ -852,7 +854,7 @@ def guard(self, expr=None, offset=0):
         # Temporaries for the position
         temps = [Eq(v, k, implicit_dims=self.dimensions)
                  for k, v in self._position_map.items()]
-        # Temporaries for the indirection dimensions
+        # Temporaries for the indirection Dimensions
         temps.extend([Eq(v, k.subs(self._position_map),
                          implicit_dims=self.dimensions)
                       for k, v in points.items() if v in conditions])
@@ -884,7 +886,7 @@ class SparseTimeFunction(AbstractSparseTimeFunction, SparseFunction):
     npoint : int
         Number of sparse points.
     nt : int
-        Number of timesteps along the time dimension.
+        Number of timesteps along the time Dimension.
     grid : Grid
         The computational domain from which the sparse points are sampled.
     coordinates : np.ndarray, optional
@@ -895,7 +897,7 @@ class SparseTimeFunction(AbstractSparseTimeFunction, SparseFunction):
         Discretisation order for time derivatives. Defaults to 1.
     shape : tuple of ints, optional
         Shape of the object. Defaults to ``(nt, npoint)``.
-    dimensions : tuple of Dimension, optional
+    Dimensions : tuple of Dimension, optional
         Dimensions associated with the object. Only necessary if the SparseFunction
         defines a multi-dimensional tensor.
     dtype : data-type, optional
@@ -941,7 +943,7 @@ class SparseTimeFunction(AbstractSparseTimeFunction, SparseFunction):
     Notes
     -----
     The parameters must always be given as keyword arguments, since SymPy
-    uses ``*args`` to (re-)create the dimension arguments of the symbolic object.
+    uses ``*args`` to (re-)create the Dimension arguments of the symbolic object.
     """
 
     is_SparseTimeFunction = True
@@ -1025,7 +1027,7 @@ class PrecomputedSparseFunction(AbstractSparseFunction):
     grid : Grid
         The computational domain from which the sparse points are sampled.
     r : int
-        Number of gridpoints in each dimension to interpolate a single sparse
+        Number of gridpoints in each Dimension to interpolate a single sparse
         point to. E.g. `r=2` for linear interpolation.
     coordinates : np.ndarray, optional
         The coordinates of each sparse point.
@@ -1033,12 +1035,12 @@ class PrecomputedSparseFunction(AbstractSparseFunction):
         An array carrying the *reference* grid point corresponding to each
         sparse point.  Of all the gridpoints that one sparse point would be
         interpolated to, this is the grid point closest to the origin, i.e. the
-        one with the lowest value of each coordinate dimension. Must be a
+        one with the lowest value of each coordinate Dimension. Must be a
         two-dimensional array of shape `(npoint, grid.ndim)`.
     interpolation_coeffs : np.ndarray, optional
         An array containing the coefficient for each of the r^2 (2D) or r^3
         (3D) gridpoints that each sparse point will be interpolated to. The
-        coefficient is split across the n dimensions such that the contribution
+        coefficient is split across the n Dimensions such that the contribution
         of the point (i, j, k) will be multiplied by
         `interp_coeffs[..., i]*interp_coeffs[...,j]*interp_coeffs[...,k]`.
         So for `r=6`, we will store 18 coefficients per sparse point (instead of
@@ -1048,7 +1050,7 @@ class PrecomputedSparseFunction(AbstractSparseFunction):
         Discretisation order for space derivatives. Defaults to 0.
     shape : tuple of ints, optional
         Shape of the object. Defaults to `(npoint,)`.
-    dimensions : tuple of Dimension, optional
+    Dimensions : tuple of Dimension, optional
         Dimensions associated with the object. Only necessary if the SparseFunction
         defines a multi-dimensional tensor.
     dtype : data-type, optional
@@ -1064,7 +1066,7 @@ class PrecomputedSparseFunction(AbstractSparseFunction):
     Notes
     -----
     The parameters must always be given as keyword arguments, since SymPy
-    uses `*args` to (re-)create the dimension arguments of the symbolic object.
+    uses `*args` to (re-)create the Dimension arguments of the symbolic object.
     """
 
     _sub_functions = ('gridpoints', 'coordinates', 'interpolation_coeffs')
@@ -1076,17 +1078,27 @@ class PrecomputedSparseFunction(AbstractSparseFunction):
     def __init_finalize__(self, *args, **kwargs):
         super().__init_finalize__(*args, **kwargs)
 
+        # Process kwargs
+        coordinates = kwargs.get('coordinates', kwargs.get('coordinates_data'))
+        gridpoints = kwargs.get('gridpoints', kwargs.get('gridpoints_data'))
+        interpolation_coeffs = kwargs.get('interpolation_coeffs',
+                                          kwargs.get('interpolation_coeffs_data'))
         # Grid points per sparse point (2 in the case of bilinear and trilinear)
         r = kwargs.get('r')
         if not is_integer(r):
             raise TypeError('Need `r` int argument')
         if r <= 0:
             raise ValueError('`r` must be > 0')
-
+        # Make sure radius matches the coefficients size
+        nr = interpolation_coeffs.shape[-1]
+        if nr // 2 != r:
+            if nr == r:
+                r = r // 2
+            else:
+                raise ValueError("Interpolation coefficients shape %d do "
+                                 "not match specified radius %d" % (r, nr))
         self._radius = r
 
-        coordinates = kwargs.get('coordinates', kwargs.get('coordinates_data'))
-        gridpoints = kwargs.get('gridpoints', kwargs.get('gridpoints_data'))
         if coordinates is not None and gridpoints is not None:
             raise ValueError("Either `coordinates` or `gridpoints` must be "
                              "provided, but not both")
@@ -1108,16 +1120,9 @@ def __init_finalize__(self, *args, **kwargs):
             self._dist_origin = {self._gridpoints: self.grid.origin_ioffset}
 
         # Setup the interpolation coefficients. These are compulsory
-        interpolation_coeffs = kwargs.get('interpolation_coeffs',
-                                          kwargs.get('interpolation_coeffs_data'))
         self._interpolation_coeffs = \
             self.__subfunc_setup__(interpolation_coeffs, 'interp_coeffs')
         self._dist_origin.update({self._interpolation_coeffs: None})
-        # Make sure it matches the radius
-        if self._interpolation_coeffs.shape[-1] != r:
-            nr = self._interpolation_coeffs.shape[-1]
-            raise ValueError("Interpolation coefficients shape %d do "
-                             "not match specified radius %d" % (r, nr))
 
         warning("Ensure that the provided interpolation coefficient and grid "
                 "point values are computed on the final grid that will be used "
@@ -1125,54 +1130,6 @@ def __init_finalize__(self, *args, **kwargs):
 
         self.interpolator = PrecomputedInterpolator(self)
 
-    @property
-    def r(self):
-        return self._radius
-
-    @cached_property
-    def _point_increments(self):
-        """Index increments in each dimension for each point symbol."""
-        return tuple(product(range(-self.r//2+1, self.r//2+1), repeat=self.grid.dim))
-
-    @cached_property
-    def _point_support(self):
-        return np.array(self._point_increments)
-
-    @property
-    def _coords_indices(self):
-        if self.gridpoints is not None:
-            return self.gridpoints.data._local
-        else:
-            return (
-                np.floor(self.coordinates_data - self.grid.origin) / self.grid.spacing
-            ).astype(np.int32)
-
-    @property
-    def gridpoints(self):
-        return self._gridpoints
-
-    @property
-    def gridpoints_data(self):
-        try:
-            return self.gridpoints.data._local
-        except AttributeError:
-            return None
-
-    @property
-    def coordinates(self):
-        return self._coordinates
-
-    @property
-    def coordinates_data(self):
-        try:
-            return self.coordinates.data._local
-        except AttributeError:
-            return None
-
-    @property
-    def dist_origin(self):
-        return self._dist_origin
-
     @property
     def interpolation_coeffs(self):
         """ The Precomputed interpolation coefficients."""
@@ -1184,15 +1141,15 @@ def interpolation_coeffs_data(self):
 
     @cached_property
     def _coordinate_symbols(self):
-        """Symbol representing the coordinate values in each dimension."""
+        """Symbol representing the coordinate values in each Dimension."""
         p_dim = self.indices[self._sparse_position]
         if self.gridpoints is not None:
             return tuple([self.gridpoints.indexify((p_dim, di)) * d.spacing + o
                           for ((di, d), o) in zip(enumerate(self.grid.dimensions),
                                                   self.grid.origin)])
-
-        return tuple([self.coordinates.indexify((p_dim, i))
-                      for i in range(self.grid.dim)])
+        else:
+            return tuple([self.coordinates.indexify((p_dim, i))
+                          for i in range(self.grid.dim)])
 
     @cached_property
     def _coordinate_indices(self):
@@ -1212,9 +1169,10 @@ def _coordinate_indices(self):
         if self.gridpoints is not None:
             ddim = self.gridpoints.dimensions[-1]
             return tuple([self.gridpoints._subs(ddim, di) for di in range(self.grid.dim)])
-        return tuple([INT(floor(p / i.spacing))
-                      for p, i in zip(self._position_map.keys(),
-                                      self.grid.dimensions[:self.grid.dim])])
+        else:
+            return tuple([INT(floor(p / i.spacing))
+                          for p, i in zip(self._position_map,
+                                          self.grid.dimensions[:self.grid.dim])])
 
 
 class PrecomputedSparseTimeFunction(AbstractSparseTimeFunction,
@@ -1233,7 +1191,7 @@ class PrecomputedSparseTimeFunction(AbstractSparseTimeFunction,
     grid : Grid
         The computational domain from which the sparse points are sampled.
     r : int
-        Number of gridpoints in each dimension to interpolate a single sparse
+        Number of gridpoints in each Dimension to interpolate a single sparse
         point to. E.g. `r=2` for linear interpolation.
     coordinates : np.ndarray, optional
         The coordinates of each sparse point.
@@ -1241,12 +1199,12 @@ class PrecomputedSparseTimeFunction(AbstractSparseTimeFunction,
         An array carrying the *reference* grid point corresponding to each
         sparse point.  Of all the gridpoints that one sparse point would be
         interpolated to, this is the grid point closest to the origin, i.e. the
-        one with the lowest value of each coordinate dimension. Must be a
+        one with the lowest value of each coordinate Dimension. Must be a
         two-dimensional array of shape `(npoint, grid.ndim)`.
     interpolation_coeffs : np.ndarray, optional
         An array containing the coefficient for each of the r^2 (2D) or r^3
         (3D) gridpoints that each sparse point will be interpolated to. The
-        coefficient is split across the n dimensions such that the contribution
+        coefficient is split across the n Dimensions such that the contribution
         of the point (i, j, k) will be multiplied by
         `interp_coeffs[..., i]*interp_coeffs[...,j]*interp_coeffs[...,k]`.
         So for `r=6`, we will store 18 coefficients per sparse point (instead of
@@ -1258,7 +1216,7 @@ class PrecomputedSparseTimeFunction(AbstractSparseTimeFunction,
         Discretisation order for time derivatives. Default to 1.
     shape : tuple of ints, optional
         Shape of the object. Defaults to `(npoint,)`.
-    dimensions : tuple of Dimension, optional
+    Dimensions : tuple of Dimension, optional
         Dimensions associated with the object. Only necessary if the SparseFunction
         defines a multi-dimensional tensor.
     dtype : data-type, optional
@@ -1274,7 +1232,7 @@ class PrecomputedSparseTimeFunction(AbstractSparseTimeFunction,
     Notes
     -----
     The parameters must always be given as keyword arguments, since SymPy
-    uses ``*args`` to (re-)create the dimension arguments of the symbolic object.
+    uses ``*args`` to (re-)create the Dimension arguments of the symbolic object.
     """
 
     __rkwargs__ = tuple(filter_ordered(AbstractSparseTimeFunction.__rkwargs__ +
@@ -1306,9 +1264,37 @@ def interpolate(self, expr, offset=0, u_t=None, p_t=None, increment=False):
         if p_t is not None:
             subs = {self.time_dim: p_t}
 
-        return super(PrecomputedSparseTimeFunction, self).interpolate(
-            expr, offset=offset, increment=increment, self_subs=subs
-        )
+        return super().interpolate(expr, offset=offset,
+                                   increment=increment, self_subs=subs)
+
+    def inject(self, field, expr, offset=0, u_t=None, p_t=None, implicit_dims=None):
+        """
+        Generate equations injecting an arbitrary expression into a field.
+
+        Parameters
+        ----------
+        field : Function
+            Input field into which the injection is performed.
+        expr : expr-like
+            Injected expression.
+        offset : int, optional
+            Additional offset from the boundary.
+        u_t : expr-like, optional
+            Time index at which the interpolation is performed.
+        p_t : expr-like, optional
+            Time index at which the result of the interpolation is stored.
+        implicit_dims : Dimension or list of Dimension, optional
+            An ordered list of Dimensions that do not explicitly appear in the
+            injection expression, but that should be honored when constructing
+            the operator.
+        """
+        # Apply optional time symbol substitutions to field and expr
+        if u_t is not None:
+            field = field.subs({field.time_dim: u_t})
+        if p_t is not None:
+            expr = expr.subs({self.time_dim: p_t})
+
+        return super().inject(field, expr, offset=offset, implicit_dims=implicit_dims)
 
 
 class MatrixSparseTimeFunction(AbstractSparseTimeFunction):
@@ -1330,21 +1316,21 @@ class MatrixSparseTimeFunction(AbstractSparseTimeFunction):
         data array.
 
     r: int or Mapping[Dimension, Optional[int]]
-        The number of gridpoints in each dimension used to inject/interpolate
+        The number of gridpoints in each Dimension used to inject/interpolate
         each physical point.  e.g. bi-/tri-linear interplation would use 2 coefficients
-        in each dimension.
+        in each Dimension.
 
         The Mapping version of this parameter allows a different number of grid points
-        in each dimension. If a Dimension maps to None, this has a special
-        interpretation - sources are not localised to coordinates in that dimension.
+        in each Dimension. If a Dimension maps to None, this has a special
+        interpretation - sources are not localised to coordinates in that Dimension.
         This is loosely equivalent to specifying r[dim] = dim_size, and with all
-        gridpoint locations along that dimension equal to zero.
+        gridpoint locations along that Dimension equal to zero.
 
     par_dim: Dimension
-        If set, this is the dimension used to split the sources for parallel
+        If set, this is the Dimension used to split the sources for parallel
         injection. The source injection loop becomes a loop over this spatial
-        dimension, and then a loop over sources which touch that spatial
-        dimension coordinate. This defaults to grid.dimensions[0], and if specified
+        Dimension, and then a loop over sources which touch that spatial
+        Dimension coordinate. This defaults to grid.dimensions[0], and if specified
         must correspond to one of the grid.dimensions.
 
     other parameters as per SparseTimeFunction
@@ -1353,11 +1339,11 @@ class MatrixSparseTimeFunction(AbstractSparseTimeFunction):
         msf.gridpoints.data[iloc, idim]: int
             integer, position (in global coordinates)
             of the _minimum_ index that location index
-            `iloc` is interpolated from / injected into, in dimension `idim`
+            `iloc` is interpolated from / injected into, in Dimension `idim`
             where idim is an index into the grid.dimensions
 
         msf.interpolation_coefficients: Dict[Dimension, np.ndarray]
-            For each dimension, there is an array of interpolation coefficients
+            For each Dimension, there is an array of interpolation coefficients
             for each location `iloc`.
 
             This array is of shape (nloc, r), and is also available as
@@ -1390,7 +1376,7 @@ class MatrixSparseTimeFunction(AbstractSparseTimeFunction):
     .. note::
 
         The parameters must always be given as keyword arguments, since
-        SymPy uses `*args` to (re-)create the dimension arguments of the
+        SymPy uses `*args` to (re-)create the Dimension arguments of the
         symbolic function.
     """
 
@@ -1429,7 +1415,7 @@ def __init_finalize__(self, *args, **kwargs):
             # convert to dictionary with same size in all dims
             r = {dim: r for dim in self.grid.dimensions}
 
-        # Validate radius is set correctly for all grid dimensions
+        # Validate radius is set correctly for all grid Dimensions
         for d in self.grid.dimensions:
             if d not in r:
                 raise ValueError("dimension %s not specified in r mapping" % d)
@@ -1441,14 +1427,14 @@ def __init_finalize__(self, *args, **kwargs):
         # TODO is this going to cause some trouble with users of self.r?
         self._radius = r
 
-        # Get the parallelism dimension for injection
+        # Get the parallelism Dimension for injection
         self._par_dim = kwargs.get("par_dim")
         if self._par_dim is not None:
             assert self._par_dim in self.grid.dimensions
         else:
             self._par_dim = self.grid.dimensions[0]
 
-        # This has one value per dimension (e.g. size=3 for 3D)
+        # This has one value per Dimension (e.g. size=3 for 3D)
         # Maybe this should be unique per SparseFunction,
         # but I can't see a need yet.
         ddim = Dimension('d')
@@ -1465,7 +1451,7 @@ def __init_finalize__(self, *args, **kwargs):
             allocator=self._allocator,
             space_order=0, parent=self)
 
-        # There is a coefficient array per grid dimension
+        # There is a coefficient array per grid Dimension
         # I could pack these into one array but that seems less readable?
         self.interpolation_coefficients = {}
         self.interpolation_coefficients_t_bogus = {}
@@ -1540,13 +1526,13 @@ def __init_finalize__(self, *args, **kwargs):
         )
 
         # This loop maintains a map of nnz indices which touch each
-        # coordinate of the parallised injection dimension
+        # coordinate of the parallised injection Dimension
         # This takes the form of a list of nnz indices, and a start/end
         # position in that list for each index in the parallel dim
         self.par_dim_to_nnz_dim = DynamicDimension('par_dim_to_nnz_%s' % self.name)
 
         # This map acts as an indirect sort of the sources according to their
-        # position along the parallelisation dimension
+        # position along the parallelisation Dimension
         self._par_dim_to_nnz_map = SubFunction(
             name='par_dim_to_nnz_map_%s' % self.name,
             dtype=np.int32,
@@ -1682,7 +1668,7 @@ def interpolate(self, expr, offset=0, u_t=None, p_t=None):
             coefficients = self.interpolation_coefficients[d].indexed
 
             # If radius is set to None, then the coefficient array is
-            # actually the full size of the grid dimension itself
+            # actually the full size of the grid Dimension itself
             if self._radius[d] is not None:
                 dim_subs.append((d, rd + gridpoints[row, i]))
             else:
@@ -1731,7 +1717,7 @@ def inject(self, field, expr, offset=0, u_t=None, p_t=None):
         dim_subs = [(pdim, mcol[nnz_index])]
         coeffs = [mval[nnz_index]]
 
-        # Devito requires a fixed ordering of dimensions across
+        # Devito requires a fixed ordering of Dimensions across
         # all loops, which means we need to respect that when constructing
         # the loops for this injection.
 
@@ -1749,14 +1735,14 @@ def inject(self, field, expr, offset=0, u_t=None, p_t=None):
             # There are four cases here.
             if d is self._par_dim:
                 if self._radius[d] is None:
-                    # If d is the parallelism dimension, AND this dimension is
+                    # If d is the parallelism Dimension, AND this Dimension is
                     # non-local (i.e. all sources touch all indices, and
                     # gridpoint for this dim is ignored)
                     coeffs.append(coefficients[row, d])
                 else:
-                    # d is the parallelism dimension, so the index into
+                    # d is the parallelism Dimension, so the index into
                     # the coefficients array is derived from the value of
-                    # this dimension minus the gridpoint of the point
+                    # this Dimension minus the gridpoint of the point
                     coeffs.append(coefficients[row, d - gridpoints[row, i]])
 
                 # loop dim here is always d
@@ -1766,16 +1752,16 @@ def inject(self, field, expr, offset=0, u_t=None, p_t=None):
                 par_dim_seen = True
             else:
                 if self._radius[d] is None:
-                    # d is not the parallelism dimension, AND this dimension
+                    # d is not the parallelism Dimension, AND this Dimension
                     # is non-local (i.e. all sources touch all indices,
                     # and gridpoint for this dim is ignored)
 
-                    # the loop is therefore over the original dimension d
+                    # the loop is therefore over the original Dimension d
                     coeffs.append(coefficients[row, d])
                     loop_dim = d
                 else:
-                    # d is not the parallelism dimension, and it _is_
-                    # local. In this case the loop is over the radius dimension
+                    # d is not the parallelism Dimension, and it _is_
+                    # local. In this case the loop is over the radius Dimension
                     # and we need to substitute d with the offset from the
                     # grid point
                     dim_subs.append((d, rd + gridpoints[row, i]))
@@ -1811,11 +1797,11 @@ def inject(self, field, expr, offset=0, u_t=None, p_t=None):
     @classmethod
     def __indices_setup__(cls, *args, **kwargs):
         """
-        Return the default dimension indices for a given data shape.
+        Return the default Dimension indices for a given data shape.
         """
-        dimensions = kwargs.get('dimensions')
-        if dimensions is None:
-            dimensions = (kwargs['grid'].time_dim, Dimension(
+        Dimensions = kwargs.get('dimensions')
+        if Dimensions is None:
+            Dimensions = (kwargs['grid'].time_dim, Dimension(
                 name='p_%s' % kwargs["name"]))
 
         if args:
@@ -1865,7 +1851,7 @@ def _rank_to_points(self):
         """
         distributor = self.grid.distributor
 
-        # Along each dimension, the coordinate indices are broken into
+        # Along each Dimension, the coordinate indices are broken into
         # 2*decomposition_size+3 groups, numbered starting at 0
 
         # Group 2*i contributes only to rank i-1
@@ -1877,7 +1863,7 @@ def _rank_to_points(self):
         #  (these contributes to rank "decomp_size")
 
         # binned_gridpoints will hold which group the particular
-        # point is along that decomposed dimension.
+        # point is along that decomposed Dimension.
         binned_gridpoints = np.empty_like(self._gridpoints.data)
         dim_group_dim_rank = []
 
@@ -1924,7 +1910,7 @@ def _rank_to_points(self):
             dim_group_dim_rank.append(this_group_rank_map)
 
         # This allows the points to be grouped into non-overlapping sets
-        # based on their bin in each dimension.  For each set we build a list
+        # based on their bin in each Dimension.  For each set we build a list
         # of points.
         bins, inverse, counts = np.unique(
             binned_gridpoints,
@@ -1956,7 +1942,7 @@ def _rank_to_points(self):
 
         from itertools import product
         for bi in bins:
-            # This is a list of sets for the dimension-specific rank
+            # This is a list of sets for the Dimension-specific rank
             dim_rank_sets = [dgdr[bii]
                              for dgdr, bii in zip(dim_group_dim_rank, bi)]
 
@@ -1995,7 +1981,7 @@ def _build_par_dim_to_nnz(self, active_gp, active_mrow):
                 ),
             }
 
-        # Get the radius along the parallel dimension
+        # Get the radius along the parallel Dimension
         r = self._radius[self._par_dim]
 
         # now, the parameters can be devito.Data, which doesn't like fancy indexing
@@ -2121,7 +2107,7 @@ def manual_scatter(self, *, data_all_zero=False):
 
         # now recreate the matrix to only contain points in our
         # local domain.
-        # along each dimension, each point is in one of 5 groups
+        # along each Dimension, each point is in one of 5 groups
         #  0 - completely to the left
         #  1 - to the left, but the injection stencil touches our domain
         #  2 - completely in our domain
@@ -2184,7 +2170,7 @@ def manual_scatter(self, *, data_all_zero=False):
                 scattered_coeffs[idim][mask, -(ir+1)] = 0
 
             # finally, we translate to local coordinates
-            # no need for this in the broadcasted dimensions
+            # no need for this in the broadcasted Dimensions
             if self.r[dim] is not None:
                 scattered_gp[:, idim] -= _left
 
@@ -2210,11 +2196,11 @@ def _dist_scatter(self, data=None):
 
     # The implementation in AbstractSparseFunction now relies on us
     # having a .coordinates property, which we don't have.
-    def _arg_apply(self, *dataobj, alias=None):
+    def _arg_apply(self, dataobj, *subfunc, alias=None):
         key = alias if alias is not None else self
         if isinstance(key, AbstractSparseFunction):
             # Gather into `self.data`
-            key._dist_gather(self._C_as_ndarray(dataobj[0]))
+            key._dist_gather(self._C_as_ndarray(dataobj))
         elif self.grid.distributor.nprocs > 1:
             raise NotImplementedError("Don't know how to gather data from an "
                                       "object of type `%s`" % type(key))
diff --git a/tests/test_pickle.py b/tests/test_pickle.py
index 52aa77e1ff..60df9ed80e 100644
--- a/tests/test_pickle.py
+++ b/tests/test_pickle.py
@@ -290,7 +290,7 @@ def test_precomputed_sparse_function(mode):
     assert getattr(sf, othermode) is None
     assert getattr(new_sf, othermode) is None
 
-    assert sf._radius == new_sf._radius == 2
+    assert sf._radius == new_sf._radius == 1
     assert sf.space_order == new_sf.space_order
     assert sf.time_order == new_sf.time_order
     assert sf.dtype == new_sf.dtype

From 2e749f701303ee31b97ec35f381a37771918de31 Mon Sep 17 00:00:00 2001
From: mloubout <mathias.louboutin@gmail.com>
Date: Tue, 23 May 2023 14:53:37 -0400
Subject: [PATCH 14/90] tests: add test for precomputed time injection that was
 missing (and implementation)

---
 devito/types/sparse.py      |  8 ++++----
 tests/test_interpolation.py | 35 +++++++++++++++++++++++++++++++++++
 2 files changed, 39 insertions(+), 4 deletions(-)

diff --git a/devito/types/sparse.py b/devito/types/sparse.py
index 54bde5b637..fe09103158 100644
--- a/devito/types/sparse.py
+++ b/devito/types/sparse.py
@@ -215,7 +215,7 @@ def _subfunc_names(self):
     @property
     def _coords_indices(self):
         if self.gridpoints_data is not None:
-            return self.gridpoints_data._local
+            return self.gridpoints_data
         else:
             if self.coordinates_data is None:
                 raise ValueError("No coordinates or gridpoints attached"
@@ -234,7 +234,7 @@ def gridpoints(self):
     @property
     def gridpoints_data(self):
         try:
-            return self._gridpoints.data._local
+            return self._gridpoints.data._local.view(np.ndarray)
         except AttributeError:
             return None
 
@@ -248,7 +248,7 @@ def coordinates(self):
     @property
     def coordinates_data(self):
         try:
-            return self.coordinates.data._local
+            return self.coordinates.data._local.view(np.ndarray)
         except AttributeError:
             return None
 
@@ -1137,7 +1137,7 @@ def interpolation_coeffs(self):
 
     @property
     def interpolation_coeffs_data(self):
-        return self.interpolation_coeffs.data._local
+        return self.interpolation_coeffs.data._local.view(np.ndarray)
 
     @cached_property
     def _coordinate_symbols(self):
diff --git a/tests/test_interpolation.py b/tests/test_interpolation.py
index d46cec7992..79a5ebeffc 100644
--- a/tests/test_interpolation.py
+++ b/tests/test_interpolation.py
@@ -203,6 +203,41 @@ def test_precomputed_injection():
     assert np.allclose(m.data[indices], result, rtol=1.e-5)
 
 
+def test_precomputed_injection_time():
+    """Test injection with PrecomputedSparseFunction which accepts
+       precomputed values for interpolation coefficients
+    """
+    shape = (11, 11)
+    coords = [(.05, .95), (.45, .45)]
+    origin = (0, 0)
+    result = 0.25
+    nt = 20
+
+    # Constant for linear interpolation
+    # because we interpolate across 2 neighbouring points in each dimension
+    r = 2
+
+    m = unit_box_time(shape=shape)
+    m.data[:] = 0.
+
+    gridpoints, interpolation_coeffs = precompute_linear_interpolation(coords,
+                                                                       m.grid, origin)
+
+    sf = PrecomputedSparseTimeFunction(name='s', grid=m.grid, r=r, npoint=len(coords),
+                                       gridpoints=gridpoints, nt=nt,
+                                       interpolation_coeffs=interpolation_coeffs)
+
+    expr = sf.inject(m, Float(1.))
+
+    Operator(expr)()
+    for ti in range(2):
+        indices = [slice(0, 2, 1), slice(9, 11, 1)]
+        assert np.allclose(m.data[ti][indices], nt*result/2, rtol=1.e-5)
+
+        indices = [slice(4, 6, 1) for _ in coords]
+        assert np.allclose(m.data[ti][indices], nt*result/2, rtol=1.e-5)
+
+
 @pytest.mark.parametrize('shape, coords', [
     ((11, 11), [(.05, .9), (.01, .8)]),
     ((11, 11, 11), [(.05, .9), (.01, .8), (0.07, 0.84)])

From 424a5489174a022c07625b74a2b97d792eecf819 Mon Sep 17 00:00:00 2001
From: mloubout <mathias.louboutin@gmail.com>
Date: Fri, 2 Jun 2023 11:15:31 -0400
Subject: [PATCH 15/90] api: switch interp to r dim

---
 devito/operations/interpolators.py | 117 ++++++++++++++---------------
 1 file changed, 57 insertions(+), 60 deletions(-)

diff --git a/devito/operations/interpolators.py b/devito/operations/interpolators.py
index ffa50601fa..057b38fb05 100644
--- a/devito/operations/interpolators.py
+++ b/devito/operations/interpolators.py
@@ -1,5 +1,4 @@
 from abc import ABC, abstractmethod
-from itertools import product
 
 import sympy
 from cached_property import cached_property
@@ -7,7 +6,9 @@
 from devito.finite_differences.elementary import floor
 from devito.symbolics import retrieve_function_carriers, INT
 from devito.tools import as_tuple, flatten, prod
-from devito.types import (ConditionalDimension, Eq, Inc, Evaluable, Symbol)
+from devito.types import (ConditionalDimension, Eq, Inc, Evaluable, Symbol,
+                          CustomDimension)
+from devito.types.utils import DimensionTuple
 
 __all__ = ['LinearInterpolator', 'PrecomputedInterpolator']
 
@@ -118,7 +119,7 @@ class WeightedInterpolator(GenericInterpolator):
     def __init__(self, sfunction):
         self.sfunction = sfunction
 
-    @cached_property
+    @property
     def grid(self):
         return self.sfunction.grid
 
@@ -126,35 +127,29 @@ def grid(self):
     def _weights(self):
         raise NotImplementedError
 
-    @cached_property
+    @property
     def _psym(self):
         return self.sfunction._point_symbols
 
-    @cached_property
+    @property
     def _gdim(self):
         return self.grid.dimensions
 
-    def implicit_dims(self, implicit_dims):
-        return as_tuple(implicit_dims) + self.sfunction.dimensions
-
     @cached_property
     def r(self):
         return self.sfunction.r
 
-    @property
-    def _interp_points(self):
-        return range(-self.r+1, self.r+1)
+    @cached_property
+    def _rdim(self):
 
-    @property
-    def _nd_points(self):
-        return product(self._interp_points, repeat=self.grid.dim)
+        dims = [CustomDimension("r%s%s" % (self.sfunction.name, d.name),
+                                -self.r+1, self.r, len(range(-self.r+1, self.r+1)))
+                for d in self._gdim]
 
-    @property
-    def _interpolation_coeffs(self):
-        coeffs = {}
-        for p in self._nd_points:
-            coeffs[p] = prod([self._weights[d][i] for (d, i) in zip(self._gdim, p)])
-        return list(coeffs.values())
+        return DimensionTuple(*dims, getters=self._gdim)
+
+    def implicit_dims(self, implicit_dims):
+        return as_tuple(implicit_dims) + self.sfunction.dimensions
 
     def _coeff_temps(self, implicit_dims):
         return []
@@ -163,15 +158,6 @@ def _positions(self, implicit_dims):
         return [Eq(v, k, implicit_dims=implicit_dims)
                 for k, v in self.sfunction._position_map.items()]
 
-    def subs_coords(self, _expr, *idx_subs):
-        return [_expr.xreplace(v_sub) * b.xreplace(v_sub)
-                for b, v_sub in zip(self._interpolation_coeffs, idx_subs)]
-
-    def subs_coords_eq(self, field, _expr, *idx_subs, implicit_dims=None):
-        return [Inc(field.xreplace(vsub), _expr.xreplace(vsub) * b,
-                    implicit_dims=implicit_dims)
-                for b, vsub in zip(self._interpolation_coeffs, idx_subs)]
-
     def _interpolation_indices(self, variables, offset=0, field_offset=0,
                                implicit_dims=None):
         """
@@ -190,27 +176,29 @@ def _interpolation_indices(self, variables, offset=0, field_offset=0,
         # Coefficient symbol expression
         temps.extend(self._coeff_temps(implicit_dims))
 
+        try:
+            pdim = self.sfunction.coordinates.dimensions[-1]
+        except AttributeError:
+            pdim = self.sfunction.gridpoints.dimensions[-1]
+
         # Create positions and indices temporaries/indirections
-        for ((di, d), pos) in zip(enumerate(self._gdim), pmap):
-            for (ri, r) in enumerate(self._interp_points):
-                p = Symbol(name='ii_%s_%s_%d' % (self.sfunction.name, d.name, ri))
-                points[d].append(p)
-                # Conditionals to avoid OOB
-                lb = sympy.And(p >= d.symbolic_min - self.r, evaluate=False)
-                ub = sympy.And(p <= d.symbolic_max + self.r, evaluate=False)
-                condition = sympy.And(lb, ub, evaluate=False)
-                mapper[d].append(ConditionalDimension(p.name, self.sfunction._sparse_dim,
-                                                      condition=condition, indirect=True))
-                temps.extend([Eq(p, pos + r, implicit_dims=implicit_dims)])
-
-        # Substitution mapper
-        for p in self._nd_points:
-            # Apply mapper to each variable with origin correction before the
-            # Dimensions get replaced
-            subs = {v: v.subs({k: c[pi] - v.origin.get(k, 0)
-                              for ((k, c), pi) in zip(mapper.items(), p)})
+        for ((di, d), pos, rd) in zip(enumerate(self._gdim), pmap, self._rdim):
+            p = Symbol(name='ii_%s_%s' % (self.sfunction.name, d.name))
+            temps.extend([Eq(p, pos._subs(pdim, di) + rd,
+                             implicit_dims=implicit_dims)])
+
+            # Add conditional
+            lb = sympy.And(p >= d.symbolic_min - self.r, evaluate=False)
+            ub = sympy.And(p <= d.symbolic_max + self.r, evaluate=False)
+            condition = sympy.And(lb, ub, evaluate=False)
+            mapper[d] = ConditionalDimension(p.name, self.sfunction._sparse_dim,
+                                             condition=condition, indirect=True)
+            points[d] = p
+
+        # Substitution mapper for variables
+        idx_subs = {v: v.subs({k: c - v.origin.get(k, 0)
+                               for ((k, c), pi) in zip(mapper.items(), points)})
                     for v in variables}
-            idx_subs.append(subs)
 
         return idx_subs, temps
 
@@ -253,7 +241,7 @@ def callback():
             )
 
             # Substitute coordinate base symbols into the interpolation coefficients
-            args = self.subs_coords(_expr, *idx_subs)
+            args = [_expr.xreplace(idx_subs) * self._weights]
             # Accumulate point-wise contributions into a temporary
             rhs = Symbol(name='sum', dtype=self.sfunction.dtype)
             summands = [Eq(rhs, 0., implicit_dims=implicit_dims)]
@@ -263,7 +251,7 @@ def callback():
             lhs = self.sfunction.subs(self_subs)
             ecls = Inc if increment else Eq
             last = [ecls(lhs, rhs, implicit_dims=implicit_dims)]
-
+            print(temps)
             return temps + summands + last
 
         return Interpolation(expr, offset, increment, self_subs, self, callback)
@@ -305,8 +293,8 @@ def callback():
             )
 
             # Substitute coordinate base symbols into the interpolation coefficients
-            eqns = self.subs_coords_eq(field, _expr, *idx_subs,
-                                       implicit_dims=implicit_dims)
+            eqns = [Inc(field.xreplace(vsub), _expr.xreplace(vsub) * self._weights,
+                    implicit_dims=implicit_dims) for vsub in idx_subs]
 
             return temps + eqns
 
@@ -323,16 +311,26 @@ class LinearInterpolator(WeightedInterpolator):
     sfunction: The SparseFunction that this Interpolator operates on.
     """
 
+    @cached_property
+    def _csym(self):
+        return [Symbol(name='c_%s_%s' % (self.sfunction.name, d.name))
+                for d in self._rdim]
+
     @property
     def _weights(self):
-        return {d: [1 - p/d.spacing, p/d.spacing]
-                for (d, p) in zip(self._gdim, self._psym)}
+        return prod(self._csym)
 
     def _coeff_temps(self, implicit_dims):
+        # Positions
         pmap = self.sfunction._position_map.values()
-        return [Eq(self._psym[d], pos - d.spacing*INT(floor(pos/d.spacing)),
-                   implicit_dims=implicit_dims)
-                for (d, pos) in zip(self._gdim, pmap)]
+        poseq = [Eq(self._psym[d], pos - d.spacing*INT(floor(pos/d.spacing)),
+                    implicit_dims=implicit_dims)
+                 for (d, pos) in zip(self._gdim, pmap)]
+        # Coeffs
+        ceq = [Eq(c, (r - r._symbolic_min)*(1 - p/d.spacing) +
+                  (1 - r - r._symbolic_min)*p/d.spacing)
+               for (c, d, r, p) in zip(self._csym, self._gdim, self._rdim, self._psym)]
+        return poseq + ceq
 
 
 class PrecomputedInterpolator(WeightedInterpolator):
@@ -360,6 +358,5 @@ def interpolation_coeffs(self):
     @property
     def _weights(self):
         ddim, cdim = self.interpolation_coeffs.dimensions[1:]
-        return {d: [self.interpolation_coeffs.subs({ddim: di, cdim: k})
-                    for k in self._interp_points]
-                for (di, d) in enumerate(self._gdim)}
+        return prod([self.interpolation_coeffs.subs({ddim: ri, cdim: rd-rd._symbolic_min})
+                     for (ri, rd) in enumerate(self._rdim)])

From 30ad1008f3163a363889920d2a6b5133d5e76edc Mon Sep 17 00:00:00 2001
From: mloubout <mathias.louboutin@gmail.com>
Date: Mon, 5 Jun 2023 15:10:49 -0400
Subject: [PATCH 16/90] operations: remove unused points and cleanup  weights

---
 devito/ir/equations/algorithms.py  | 12 +++--
 devito/operations/interpolators.py | 47 ++++++++++----------
 devito/tools/algorithms.py         |  1 +
 devito/types/sparse.py             |  2 +-
 tests/test_interpolation.py        | 71 ++++++++++++++++++++++--------
 5 files changed, 87 insertions(+), 46 deletions(-)

diff --git a/devito/ir/equations/algorithms.py b/devito/ir/equations/algorithms.py
index 22b018cbdd..c50f2a17ab 100644
--- a/devito/ir/equations/algorithms.py
+++ b/devito/ir/equations/algorithms.py
@@ -5,7 +5,7 @@
 
 from devito.symbolics import retrieve_indexed, uxreplace
 from devito.tools import PartialOrderTuple, as_tuple, filter_sorted, flatten
-from devito.types import Dimension, IgnoreDimSort
+from devito.types import Dimension, IgnoreDimSort, ConditionalDimension
 from devito.types.basic import AbstractFunction
 
 __all__ = ['dimension_sort', 'lower_exprs']
@@ -33,8 +33,14 @@ def handle_indexed(indexed):
 
                 # Fallback: Just insert all the Dimensions we find, regardless of
                 # what the user is attempting to do
-                relation.extend([d for d in filter_sorted(i.free_symbols)
-                                 if isinstance(d, Dimension)])
+                rels = []
+                for d in filter_sorted(i.free_symbols):
+                    if isinstance(d, ConditionalDimension) and d.indirect:
+                        continue
+                    elif isinstance(d, Dimension):
+                        rels.append(d)
+
+                relation.extend(rels)
 
         # StencilDimensions are lowered subsequently through special compiler
         # passes, so they can be ignored here
diff --git a/devito/operations/interpolators.py b/devito/operations/interpolators.py
index 057b38fb05..927f67f61d 100644
--- a/devito/operations/interpolators.py
+++ b/devito/operations/interpolators.py
@@ -7,7 +7,7 @@
 from devito.symbolics import retrieve_function_carriers, INT
 from devito.tools import as_tuple, flatten, prod
 from devito.types import (ConditionalDimension, Eq, Inc, Evaluable, Symbol,
-                          CustomDimension)
+                          CustomDimension, Function)
 from devito.types.utils import DimensionTuple
 
 __all__ = ['LinearInterpolator', 'PrecomputedInterpolator']
@@ -149,6 +149,7 @@ def _rdim(self):
         return DimensionTuple(*dims, getters=self._gdim)
 
     def implicit_dims(self, implicit_dims):
+
         return as_tuple(implicit_dims) + self.sfunction.dimensions
 
     def _coeff_temps(self, implicit_dims):
@@ -164,14 +165,14 @@ def _interpolation_indices(self, variables, offset=0, field_offset=0,
         Generate interpolation indices for the DiscreteFunctions in ``variables``.
         """
         idx_subs = []
-        points = {d: [] for d in self._gdim}
         mapper = {d: [] for d in self._gdim}
 
         # Positon map and temporaries for it
         pmap = self.sfunction._coordinate_indices
 
         # Temporaries for the position
-        temps = self._positions(implicit_dims)
+        # temps = self._positions(implicit_dims)
+        temps = []
 
         # Coefficient symbol expression
         temps.extend(self._coeff_temps(implicit_dims))
@@ -184,20 +185,17 @@ def _interpolation_indices(self, variables, offset=0, field_offset=0,
         # Create positions and indices temporaries/indirections
         for ((di, d), pos, rd) in zip(enumerate(self._gdim), pmap, self._rdim):
             p = Symbol(name='ii_%s_%s' % (self.sfunction.name, d.name))
-            temps.extend([Eq(p, pos._subs(pdim, di) + rd,
-                             implicit_dims=implicit_dims)])
+            temps.extend([Eq(p, pos.subs({pdim: di}), implicit_dims=implicit_dims)])
 
             # Add conditional
             lb = sympy.And(p >= d.symbolic_min - self.r, evaluate=False)
             ub = sympy.And(p <= d.symbolic_max + self.r, evaluate=False)
             condition = sympy.And(lb, ub, evaluate=False)
             mapper[d] = ConditionalDimension(p.name, self.sfunction._sparse_dim,
-                                             condition=condition, indirect=True)
-            points[d] = p
+                                             condition=condition, indirect=True) + rd
 
         # Substitution mapper for variables
-        idx_subs = {v: v.subs({k: c - v.origin.get(k, 0)
-                               for ((k, c), pi) in zip(mapper.items(), points)})
+        idx_subs = {v: v.subs({k: c - v.origin.get(k, 0) for (k, c) in mapper.items()})
                     for v in variables}
 
         return idx_subs, temps
@@ -239,19 +237,18 @@ def callback():
             idx_subs, temps = self._interpolation_indices(
                 variables, offset, field_offset=field_offset, implicit_dims=implicit_dims
             )
-
-            # Substitute coordinate base symbols into the interpolation coefficients
-            args = [_expr.xreplace(idx_subs) * self._weights]
             # Accumulate point-wise contributions into a temporary
             rhs = Symbol(name='sum', dtype=self.sfunction.dtype)
             summands = [Eq(rhs, 0., implicit_dims=implicit_dims)]
-            summands.extend([Inc(rhs, i, implicit_dims=implicit_dims) for i in args])
+            # Substitute coordinate base symbols into the interpolation coefficients
+            summands.extend([Inc(rhs, _expr.xreplace(idx_subs) * self._weights,
+                                 implicit_dims=implicit_dims + self._rdim)])
 
             # Write/Incr `self`
             lhs = self.sfunction.subs(self_subs)
             ecls = Inc if increment else Eq
             last = [ecls(lhs, rhs, implicit_dims=implicit_dims)]
-            print(temps)
+
             return temps + summands + last
 
         return Interpolation(expr, offset, increment, self_subs, self, callback)
@@ -293,8 +290,9 @@ def callback():
             )
 
             # Substitute coordinate base symbols into the interpolation coefficients
-            eqns = [Inc(field.xreplace(vsub), _expr.xreplace(vsub) * self._weights,
-                    implicit_dims=implicit_dims) for vsub in idx_subs]
+            eqns = [Inc(field.xreplace(idx_subs),
+                        _expr.xreplace(idx_subs) * self._weights,
+                        implicit_dims=implicit_dims + self._rdim)]
 
             return temps + eqns
 
@@ -313,8 +311,9 @@ class LinearInterpolator(WeightedInterpolator):
 
     @cached_property
     def _csym(self):
-        return [Symbol(name='c_%s_%s' % (self.sfunction.name, d.name))
-                for d in self._rdim]
+        return [Function(name='c_%s_%s' % (self.sfunction.name, d.name),
+                         dimensions=(r,), shape=(self.r,), space_order=0)
+                for (d, r) in zip(self._gdim, self._rdim)]
 
     @property
     def _weights(self):
@@ -322,14 +321,15 @@ def _weights(self):
 
     def _coeff_temps(self, implicit_dims):
         # Positions
-        pmap = self.sfunction._position_map.values()
+        pmap = self.sfunction._position_map
         poseq = [Eq(self._psym[d], pos - d.spacing*INT(floor(pos/d.spacing)),
                     implicit_dims=implicit_dims)
                  for (d, pos) in zip(self._gdim, pmap)]
         # Coeffs
-        ceq = [Eq(c, (r - r._symbolic_min)*(1 - p/d.spacing) +
-                  (1 - r - r._symbolic_min)*p/d.spacing)
-               for (c, d, r, p) in zip(self._csym, self._gdim, self._rdim, self._psym)]
+        ceq = [eq for (c, d, p) in zip(self._csym, self._gdim, self._psym)
+               for eq in [Eq(c[0], (1 - p/d.spacing), implicit_dims=implicit_dims),
+                          Eq(c[1], p/d.spacing, implicit_dims=implicit_dims)]]
+
         return poseq + ceq
 
 
@@ -346,8 +346,7 @@ class PrecomputedInterpolator(WeightedInterpolator):
 
     def _positions(self, implicit_dims):
         if self.sfunction.gridpoints is None:
-            return [Eq(v, k, implicit_dims=implicit_dims)
-                    for k, v in self.sfunction._position_map.items()]
+            return super()._positions(implicit_dims)
         # No position temp as we have directly the gridpoints
         return []
 
diff --git a/devito/tools/algorithms.py b/devito/tools/algorithms.py
index f7edde22f5..e770d59597 100644
--- a/devito/tools/algorithms.py
+++ b/devito/tools/algorithms.py
@@ -70,6 +70,7 @@ def toposort(data):
             processed = sorted(ordered) + processed
         mapper = OrderedDict([(item, (dep - ordered)) for item, dep in mapper.items()
                               if item not in ordered])
+
     if len(processed) != len(set(flatten(data) + flatten(data.values()))):
         raise ValueError("A cyclic dependency exists amongst %r" % data)
 
diff --git a/devito/types/sparse.py b/devito/types/sparse.py
index fe09103158..6a8e366a8f 100644
--- a/devito/types/sparse.py
+++ b/devito/types/sparse.py
@@ -296,7 +296,7 @@ def _coordinate_indices(self):
         individually (hence the need for a position map).
         """
         return tuple([INT(floor(p / i.spacing))
-                      for p, i in zip(self._position_map.values(),
+                      for p, i in zip(self._position_map,
                                       self.grid.dimensions[:self.grid.dim])])
 
     def _coordinate_bases(self, field_offset):
diff --git a/tests/test_interpolation.py b/tests/test_interpolation.py
index 79a5ebeffc..21a3533144 100644
--- a/tests/test_interpolation.py
+++ b/tests/test_interpolation.py
@@ -130,6 +130,7 @@ def init(data):
                                    interpolation_coeffs=interpolation_coeffs)
     eqn = sf.interpolate(m)
     op = Operator(eqn)
+    print(op)
     op()
     expected_values = [sin(point[0]) + sin(point[1]) for point in points]
     assert(all(np.isclose(sf.data, expected_values, rtol=1e-6)))
@@ -163,6 +164,7 @@ def test_precomputed_interpolation_time():
 
     eqn = sf.interpolate(u)
     op = Operator(eqn)
+    print(op)
     op(time_m=0, time_M=4)
 
     for it in range(5):
@@ -194,8 +196,9 @@ def test_precomputed_injection():
 
     expr = sf.inject(m, Float(1.))
 
-    Operator(expr)()
-
+    op = Operator(expr)
+    print(op)
+    op()
     indices = [slice(0, 2, 1), slice(9, 11, 1)]
     assert np.allclose(m.data[indices], result, rtol=1.e-5)
 
@@ -229,7 +232,9 @@ def test_precomputed_injection_time():
 
     expr = sf.inject(m, Float(1.))
 
-    Operator(expr)()
+    op = Operator(expr)
+    print(op)
+    op()
     for ti in range(2):
         indices = [slice(0, 2, 1), slice(9, 11, 1)]
         assert np.allclose(m.data[ti][indices], nt*result/2, rtol=1.e-5)
@@ -251,7 +256,9 @@ def test_interpolate(shape, coords, npoints=20):
     xcoords = p.coordinates.data[:, 0]
 
     expr = p.interpolate(a)
-    Operator(expr)(a=a)
+    op = Operator(expr)
+    print(op)
+    op(a=a)
 
     assert np.allclose(p.data[:], xcoords, rtol=1e-6)
 
@@ -270,7 +277,9 @@ def test_interpolate_cumm(shape, coords, npoints=20):
 
     p.data[:] = 1.
     expr = p.interpolate(a, increment=True)
-    Operator(expr)(a=a)
+    op = Operator(expr)
+    print(op)
+    op(a=a)
 
     assert np.allclose(p.data[:], xcoords + 1., rtol=1e-6)
 
@@ -290,20 +299,26 @@ def test_interpolate_time_shift(shape, coords, npoints=20):
 
     p.data[:] = 1.
     expr = p.interpolate(a, u_t=a.indices[0]+1)
-    Operator(expr)(a=a)
+    op = Operator(expr)
+    print(op)
+    op(a=a)
 
     assert np.allclose(p.data[0, :], xcoords, rtol=1e-6)
 
     p.data[:] = 1.
     expr = p.interpolate(a, p_t=p.indices[0]+1)
-    Operator(expr)(a=a)
+    op = Operator(expr)
+    print(op)
+    op(a=a)
 
     assert np.allclose(p.data[1, :], xcoords, rtol=1e-6)
 
     p.data[:] = 1.
     expr = p.interpolate(a, u_t=a.indices[0]+1,
                          p_t=p.indices[0]+1)
-    Operator(expr)(a=a)
+    op = Operator(expr)
+    print(op)
+    op(a=a)
 
     assert np.allclose(p.data[1, :], xcoords, rtol=1e-6)
 
@@ -321,7 +336,9 @@ def test_interpolate_array(shape, coords, npoints=20):
     xcoords = p.coordinates.data[:, 0]
 
     expr = p.interpolate(a)
-    Operator(expr)(a=a, points=p.data[:])
+    op = Operator(expr)
+    print(op)
+    op(a=a, points=p.data[:])
 
     assert np.allclose(p.data[:], xcoords, rtol=1e-6)
 
@@ -340,7 +357,9 @@ def test_interpolate_custom(shape, coords, npoints=20):
 
     p.data[:] = 1.
     expr = p.interpolate(a * p.indices[0])
-    Operator(expr)(a=a)
+    op = Operator(expr)
+    print(op)
+    op(a=a)
 
     assert np.allclose(p.data[0, :], 0.0 * xcoords, rtol=1e-6)
     assert np.allclose(p.data[1, :], 1.0 * xcoords, rtol=1e-6)
@@ -357,7 +376,7 @@ def test_interpolation_dx():
     sf1.coordinates.data[0, :] = (0.5, 0.5)
 
     op = Operator(sf1.interpolate(u.dx))
-
+    print(op)
     assert sf1.data.shape == (1,)
     u.data[:] = 0.0
     u.data[5, 5] = 4.0
@@ -384,7 +403,9 @@ def test_interpolate_indexed(shape, coords, npoints=20):
 
     p.data[:] = 1.
     expr = p.interpolate(a[a.grid.dimensions] * p.indices[0])
-    Operator(expr)(a=a)
+    op = Operator(expr)
+    print(op)
+    op(a=a)
 
     assert np.allclose(p.data[0, :], 0.0 * xcoords, rtol=1e-6)
     assert np.allclose(p.data[1, :], 1.0 * xcoords, rtol=1e-6)
@@ -405,7 +426,9 @@ def test_inject(shape, coords, result, npoints=19):
 
     expr = p.inject(a, Float(1.))
 
-    Operator(expr)(a=a)
+    op = Operator(expr)
+    print(op)
+    op(a=a)
 
     indices = [slice(4, 6, 1) for _ in coords]
     indices[0] = slice(1, -1, 1)
@@ -427,7 +450,9 @@ def test_inject_time_shift(shape, coords, result, npoints=19):
 
     expr = p.inject(a, Float(1.), u_t=a.indices[0]+1)
 
-    Operator(expr)(a=a, time=1)
+    op = Operator(expr)
+    print(op)
+    op(a=a, time=1)
 
     indices = [slice(1, 1, 1)] + [slice(4, 6, 1) for _ in coords]
     indices[1] = slice(1, -1, 1)
@@ -436,7 +461,9 @@ def test_inject_time_shift(shape, coords, result, npoints=19):
     a.data[:] = 0.
     expr = p.inject(a, Float(1.), p_t=p.indices[0]+1)
 
-    Operator(expr)(a=a, time=1)
+    op = Operator(expr)
+    print(op)
+    op(a=a, time=1)
 
     indices = [slice(0, 0, 1)] + [slice(4, 6, 1) for _ in coords]
     indices[1] = slice(1, -1, 1)
@@ -445,7 +472,9 @@ def test_inject_time_shift(shape, coords, result, npoints=19):
     a.data[:] = 0.
     expr = p.inject(a, Float(1.), u_t=a.indices[0]+1, p_t=p.indices[0]+1)
 
-    Operator(expr)(a=a, time=1)
+    op = Operator(expr)
+    print(op)
+    op(a=a, time=1)
 
     indices = [slice(1, 1, 1)] + [slice(4, 6, 1) for _ in coords]
     indices[1] = slice(1, -1, 1)
@@ -467,7 +496,9 @@ def test_inject_array(shape, coords, result, npoints=19):
     p2.data[:] = 1.
     expr = p.inject(a, p)
 
-    Operator(expr)(a=a, points=p2.data[:])
+    op = Operator(expr)
+    print(op)
+    op(a=a, points=p2.data[:])
 
     indices = [slice(4, 6, 1) for _ in coords]
     indices[0] = slice(1, -1, 1)
@@ -489,7 +520,9 @@ def test_inject_from_field(shape, coords, result, npoints=19):
     p = points(a.grid, ranges=coords, npoints=npoints)
 
     expr = p.inject(field=a, expr=b)
-    Operator(expr)(a=a, b=b)
+    op = Operator(expr)
+    print(op)
+    op(a=a, b=b)
 
     indices = [slice(4, 6, 1) for _ in coords]
     indices[0] = slice(1, -1, 1)
@@ -562,6 +595,7 @@ def test_edge_sparse():
     expr = sf1.interpolate(u)
     subs = {d.spacing: v for d, v in zip(u.grid.dimensions, u.grid.spacing)}
     op = Operator(expr, subs=subs)
+    print(op)
     op()
     assert sf1.data[0] == 0
 
@@ -656,6 +690,7 @@ class SparseFirst(SparseFunction):
     # No time dependence so need the implicit dim
     rec = s.interpolate(expr=s+fs, implicit_dims=grid.stepping_dim)
     op = Operator(eqs + rec)
+    print(op)
 
     op(time_M=10)
     expected = 10*11/2  # n (n+1)/2

From f7e955632141400578b2b3bdb0c2aed13dca2213 Mon Sep 17 00:00:00 2001
From: mloubout <mathias.louboutin@gmail.com>
Date: Thu, 8 Jun 2023 08:38:56 -0400
Subject: [PATCH 17/90] compiler: fix dimension_sort to avoid missing indirect
 conditionals

---
 devito/ir/equations/algorithms.py  | 27 +++++++-------
 devito/operations/interpolators.py | 57 +++++++++++-------------------
 devito/tools/algorithms.py         |  8 ++---
 devito/types/sparse.py             |  6 ++--
 tests/test_interpolation.py        | 45 ++++++++++++-----------
 5 files changed, 62 insertions(+), 81 deletions(-)

diff --git a/devito/ir/equations/algorithms.py b/devito/ir/equations/algorithms.py
index c50f2a17ab..529cac27d1 100644
--- a/devito/ir/equations/algorithms.py
+++ b/devito/ir/equations/algorithms.py
@@ -3,8 +3,8 @@
 
 from sympy import sympify
 
-from devito.symbolics import retrieve_indexed, uxreplace
-from devito.tools import PartialOrderTuple, as_tuple, filter_sorted, flatten
+from devito.symbolics import retrieve_indexed, uxreplace, retrieve_dimensions
+from devito.tools import PartialOrderTuple, as_tuple, filter_ordered, flatten, filter_sorted
 from devito.types import Dimension, IgnoreDimSort, ConditionalDimension
 from devito.types.basic import AbstractFunction
 
@@ -33,14 +33,8 @@ def handle_indexed(indexed):
 
                 # Fallback: Just insert all the Dimensions we find, regardless of
                 # what the user is attempting to do
-                rels = []
-                for d in filter_sorted(i.free_symbols):
-                    if isinstance(d, ConditionalDimension) and d.indirect:
-                        continue
-                    elif isinstance(d, Dimension):
-                        rels.append(d)
-
-                relation.extend(rels)
+                relation.extend(filter_sorted([d for d in i.free_symbols
+                                               if isinstance(d, Dimension)]))
 
         # StencilDimensions are lowered subsequently through special compiler
         # passes, so they can be ignored here
@@ -56,16 +50,18 @@ def handle_indexed(indexed):
     # Add in any implicit dimension (typical of scalar temporaries, or Step)
     relations.add(expr.implicit_dims)
 
-    # Add in leftover free dimensions (not an Indexed' index)
-    extra = set([i for i in expr.free_symbols if isinstance(i, Dimension)])
+    # Add in leftover free dimensions (not an Indexed' index if used purely as expr)
+    extra = set(retrieve_dimensions(expr))
 
     # Add in pure data dimensions (e.g., those accessed only via explicit values,
     # such as A[3])
     indexeds = retrieve_indexed(expr, deep=True)
-    extra.update(set().union(*[set(i.function.dimensions) for i in indexeds]))
+    for i in indexeds:
+        expl_dims = {d for (d, e) in zip(i.function.dimensions, i.indices) if e.is_integer}
+        extra.update(expl_dims)
 
     # Enforce determinism
-    extra = filter_sorted(extra, key=attrgetter('name'))
+    extra = filter_sorted(extra)
 
     # Add in implicit relations for parent dimensions
     # -----------------------------------------------
@@ -74,11 +70,12 @@ def handle_indexed(indexed):
     # preceed `time`, while `t`, and therefore `time`, *must* appear before `x`,
     # as indicated by the second relation
     implicit_relations = {(d.parent, d) for d in extra if d.is_Derived}
+
     # 2) To handle cases such as `((time, xi), (x,))`, where `xi` a SubDimension
     # of `x`, besides `(x, xi)`, we also have to add `(time, x)` so that we
     # obtain the desired ordering `(time, x, xi)`. W/o `(time, x)`, the ordering
     # `(x, time, xi)` might be returned instead, which would be non-sense
-    implicit_relations.update({tuple(d.root for d in i) for i in relations})
+    implicit_relations.update({tuple(filter_ordered(d.root for d in i)) for i in relations})
 
     ordering = PartialOrderTuple(extra, relations=(relations | implicit_relations))
 
diff --git a/devito/operations/interpolators.py b/devito/operations/interpolators.py
index 927f67f61d..5757ccac0e 100644
--- a/devito/operations/interpolators.py
+++ b/devito/operations/interpolators.py
@@ -141,7 +141,6 @@ def r(self):
 
     @cached_property
     def _rdim(self):
-
         dims = [CustomDimension("r%s%s" % (self.sfunction.name, d.name),
                                 -self.r+1, self.r, len(range(-self.r+1, self.r+1)))
                 for d in self._gdim]
@@ -149,7 +148,6 @@ def _rdim(self):
         return DimensionTuple(*dims, getters=self._gdim)
 
     def implicit_dims(self, implicit_dims):
-
         return as_tuple(implicit_dims) + self.sfunction.dimensions
 
     def _coeff_temps(self, implicit_dims):
@@ -166,33 +164,30 @@ def _interpolation_indices(self, variables, offset=0, field_offset=0,
         """
         idx_subs = []
         mapper = {d: [] for d in self._gdim}
-
+        pdim = self.sfunction._sparse_dim
+    
         # Positon map and temporaries for it
         pmap = self.sfunction._coordinate_indices
 
         # Temporaries for the position
-        # temps = self._positions(implicit_dims)
-        temps = []
-
+        temps = self._positions(implicit_dims)
+    
         # Coefficient symbol expression
         temps.extend(self._coeff_temps(implicit_dims))
-
-        try:
-            pdim = self.sfunction.coordinates.dimensions[-1]
-        except AttributeError:
-            pdim = self.sfunction.gridpoints.dimensions[-1]
-
+    
         # Create positions and indices temporaries/indirections
+        pr = []
         for ((di, d), pos, rd) in zip(enumerate(self._gdim), pmap, self._rdim):
             p = Symbol(name='ii_%s_%s' % (self.sfunction.name, d.name))
-            temps.extend([Eq(p, pos.subs({pdim: di}), implicit_dims=implicit_dims)])
+            temps.extend([Eq(p, pos + rd, implicit_dims=implicit_dims + tuple(pr))])
 
             # Add conditional
-            lb = sympy.And(p >= d.symbolic_min - self.r, evaluate=False)
-            ub = sympy.And(p <= d.symbolic_max + self.r, evaluate=False)
+            lb = sympy.And(p >= d.symbolic_min-self.r, evaluate=False)
+            ub = sympy.And(p <= d.symbolic_max+self.r, evaluate=False)
             condition = sympy.And(lb, ub, evaluate=False)
             mapper[d] = ConditionalDimension(p.name, self.sfunction._sparse_dim,
-                                             condition=condition, indirect=True) + rd
+                                             condition=condition, indirect=True)
+            pr.append(rd)
 
         # Substitution mapper for variables
         idx_subs = {v: v.subs({k: c - v.origin.get(k, 0) for (k, c) in mapper.items()})
@@ -231,7 +226,7 @@ def callback():
             variables = list(retrieve_function_carriers(_expr))
 
             # Need to get origin of the field in case it is staggered
-            # TODO: handle each variable staggereing spearately
+            # TODO: handle each variable staggering separately
             field_offset = variables[0].origin
             # List of indirection indices for all adjacent grid points
             idx_subs, temps = self._interpolation_indices(
@@ -242,14 +237,14 @@ def callback():
             summands = [Eq(rhs, 0., implicit_dims=implicit_dims)]
             # Substitute coordinate base symbols into the interpolation coefficients
             summands.extend([Inc(rhs, _expr.xreplace(idx_subs) * self._weights,
-                                 implicit_dims=implicit_dims + self._rdim)])
+                                 implicit_dims=implicit_dims)])
 
             # Write/Incr `self`
             lhs = self.sfunction.subs(self_subs)
             ecls = Inc if increment else Eq
             last = [ecls(lhs, rhs, implicit_dims=implicit_dims)]
 
-            return temps + summands + last
+            return [summands[0]] + temps + summands[1:] + last
 
         return Interpolation(expr, offset, increment, self_subs, self, callback)
 
@@ -292,7 +287,7 @@ def callback():
             # Substitute coordinate base symbols into the interpolation coefficients
             eqns = [Inc(field.xreplace(idx_subs),
                         _expr.xreplace(idx_subs) * self._weights,
-                        implicit_dims=implicit_dims + self._rdim)]
+                        implicit_dims=implicit_dims)]
 
             return temps + eqns
 
@@ -308,29 +303,19 @@ class LinearInterpolator(WeightedInterpolator):
     ----------
     sfunction: The SparseFunction that this Interpolator operates on.
     """
-
-    @cached_property
-    def _csym(self):
-        return [Function(name='c_%s_%s' % (self.sfunction.name, d.name),
-                         dimensions=(r,), shape=(self.r,), space_order=0)
-                for (d, r) in zip(self._gdim, self._rdim)]
-
     @property
     def _weights(self):
-        return prod(self._csym)
+        c = [(1 - p) * (1 - rd) + rd * p
+             for (p, d, rd) in zip(self._psym, self._gdim, self._rdim)]
+        return prod(c)
 
     def _coeff_temps(self, implicit_dims):
         # Positions
         pmap = self.sfunction._position_map
-        poseq = [Eq(self._psym[d], pos - d.spacing*INT(floor(pos/d.spacing)),
+        poseq = [Eq(self._psym[d], pos/d.spacing - floor(pos/d.spacing),
                     implicit_dims=implicit_dims)
-                 for (d, pos) in zip(self._gdim, pmap)]
-        # Coeffs
-        ceq = [eq for (c, d, p) in zip(self._csym, self._gdim, self._psym)
-               for eq in [Eq(c[0], (1 - p/d.spacing), implicit_dims=implicit_dims),
-                          Eq(c[1], p/d.spacing, implicit_dims=implicit_dims)]]
-
-        return poseq + ceq
+                 for (d, pos) in zip(self._gdim, pmap.values())]
+        return poseq
 
 
 class PrecomputedInterpolator(WeightedInterpolator):
diff --git a/devito/tools/algorithms.py b/devito/tools/algorithms.py
index e770d59597..ec074455ac 100644
--- a/devito/tools/algorithms.py
+++ b/devito/tools/algorithms.py
@@ -10,10 +10,10 @@
 
 def build_dependence_lists(elements):
     """
-    Given an iterable of dependences, return the dependence lists as a
+    Given an iterable of dependencies, return the dependence lists as a
     mapper suitable for graph-like algorithms. A dependence is an iterable of
-    elements ``[a, b, c, ...]``, meaning that ``a`` preceeds ``b`` and ``c``,
-    ``b`` preceeds ``c``, and so on.
+    elements ``[a, b, c, ...]``, meaning that ``a`` precedes ``b`` and ``c``,
+    ``b`` precedes ``c``, and so on.
     """
     mapper = OrderedDict()
     for element in elements:
@@ -35,7 +35,7 @@ def toposort(data):
           dependent items. The dictionary may contain self-dependencies
           (which are ignored), and dependent items that are not also
           dict keys.
-        * An iterable of dependences as expected by :func:`build_dependence_lists`.
+        * An iterable of dependencies as expected by :func:`build_dependence_lists`.
 
     Readapted from: ::
 
diff --git a/devito/types/sparse.py b/devito/types/sparse.py
index 6a8e366a8f..98275388d1 100644
--- a/devito/types/sparse.py
+++ b/devito/types/sparse.py
@@ -108,7 +108,7 @@ def __subfunc_setup__(self, key, suffix):
                              "or iterable (e.g., list, np.ndarray)" % key)
 
         name = '%s_%s' % (self.name, suffix)
-        dimensions = (self.indices[self._sparse_position], Dimension(name='d'))
+        dimensions = (self._sparse_dim, Dimension(name='d'))
         shape = (self.npoint, self.grid.dim)
 
         if key is None:
@@ -296,7 +296,7 @@ def _coordinate_indices(self):
         individually (hence the need for a position map).
         """
         return tuple([INT(floor(p / i.spacing))
-                      for p, i in zip(self._position_map,
+                      for p, i in zip(self._position_map.values(),
                                       self.grid.dimensions[:self.grid.dim])])
 
     def _coordinate_bases(self, field_offset):
@@ -1171,7 +1171,7 @@ def _coordinate_indices(self):
             return tuple([self.gridpoints._subs(ddim, di) for di in range(self.grid.dim)])
         else:
             return tuple([INT(floor(p / i.spacing))
-                          for p, i in zip(self._position_map,
+                          for p, i in zip(self._position_map.values(),
                                           self.grid.dimensions[:self.grid.dim])])
 
 
diff --git a/tests/test_interpolation.py b/tests/test_interpolation.py
index 21a3533144..5ea40769f9 100644
--- a/tests/test_interpolation.py
+++ b/tests/test_interpolation.py
@@ -130,7 +130,7 @@ def init(data):
                                    interpolation_coeffs=interpolation_coeffs)
     eqn = sf.interpolate(m)
     op = Operator(eqn)
-    print(op)
+
     op()
     expected_values = [sin(point[0]) + sin(point[1]) for point in points]
     assert(all(np.isclose(sf.data, expected_values, rtol=1e-6)))
@@ -164,7 +164,7 @@ def test_precomputed_interpolation_time():
 
     eqn = sf.interpolate(u)
     op = Operator(eqn)
-    print(op)
+
     op(time_m=0, time_M=4)
 
     for it in range(5):
@@ -197,7 +197,7 @@ def test_precomputed_injection():
     expr = sf.inject(m, Float(1.))
 
     op = Operator(expr)
-    print(op)
+
     op()
     indices = [slice(0, 2, 1), slice(9, 11, 1)]
     assert np.allclose(m.data[indices], result, rtol=1.e-5)
@@ -233,7 +233,7 @@ def test_precomputed_injection_time():
     expr = sf.inject(m, Float(1.))
 
     op = Operator(expr)
-    print(op)
+
     op()
     for ti in range(2):
         indices = [slice(0, 2, 1), slice(9, 11, 1)]
@@ -257,9 +257,9 @@ def test_interpolate(shape, coords, npoints=20):
 
     expr = p.interpolate(a)
     op = Operator(expr)
-    print(op)
-    op(a=a)
 
+    op(a=a)
+    from IPython import embed; embed()
     assert np.allclose(p.data[:], xcoords, rtol=1e-6)
 
 
@@ -278,7 +278,7 @@ def test_interpolate_cumm(shape, coords, npoints=20):
     p.data[:] = 1.
     expr = p.interpolate(a, increment=True)
     op = Operator(expr)
-    print(op)
+
     op(a=a)
 
     assert np.allclose(p.data[:], xcoords + 1., rtol=1e-6)
@@ -300,7 +300,7 @@ def test_interpolate_time_shift(shape, coords, npoints=20):
     p.data[:] = 1.
     expr = p.interpolate(a, u_t=a.indices[0]+1)
     op = Operator(expr)
-    print(op)
+
     op(a=a)
 
     assert np.allclose(p.data[0, :], xcoords, rtol=1e-6)
@@ -308,7 +308,7 @@ def test_interpolate_time_shift(shape, coords, npoints=20):
     p.data[:] = 1.
     expr = p.interpolate(a, p_t=p.indices[0]+1)
     op = Operator(expr)
-    print(op)
+
     op(a=a)
 
     assert np.allclose(p.data[1, :], xcoords, rtol=1e-6)
@@ -317,7 +317,7 @@ def test_interpolate_time_shift(shape, coords, npoints=20):
     expr = p.interpolate(a, u_t=a.indices[0]+1,
                          p_t=p.indices[0]+1)
     op = Operator(expr)
-    print(op)
+
     op(a=a)
 
     assert np.allclose(p.data[1, :], xcoords, rtol=1e-6)
@@ -337,7 +337,7 @@ def test_interpolate_array(shape, coords, npoints=20):
 
     expr = p.interpolate(a)
     op = Operator(expr)
-    print(op)
+
     op(a=a, points=p.data[:])
 
     assert np.allclose(p.data[:], xcoords, rtol=1e-6)
@@ -358,7 +358,7 @@ def test_interpolate_custom(shape, coords, npoints=20):
     p.data[:] = 1.
     expr = p.interpolate(a * p.indices[0])
     op = Operator(expr)
-    print(op)
+
     op(a=a)
 
     assert np.allclose(p.data[0, :], 0.0 * xcoords, rtol=1e-6)
@@ -376,7 +376,7 @@ def test_interpolation_dx():
     sf1.coordinates.data[0, :] = (0.5, 0.5)
 
     op = Operator(sf1.interpolate(u.dx))
-    print(op)
+
     assert sf1.data.shape == (1,)
     u.data[:] = 0.0
     u.data[5, 5] = 4.0
@@ -404,7 +404,7 @@ def test_interpolate_indexed(shape, coords, npoints=20):
     p.data[:] = 1.
     expr = p.interpolate(a[a.grid.dimensions] * p.indices[0])
     op = Operator(expr)
-    print(op)
+
     op(a=a)
 
     assert np.allclose(p.data[0, :], 0.0 * xcoords, rtol=1e-6)
@@ -427,7 +427,7 @@ def test_inject(shape, coords, result, npoints=19):
     expr = p.inject(a, Float(1.))
 
     op = Operator(expr)
-    print(op)
+
     op(a=a)
 
     indices = [slice(4, 6, 1) for _ in coords]
@@ -451,7 +451,7 @@ def test_inject_time_shift(shape, coords, result, npoints=19):
     expr = p.inject(a, Float(1.), u_t=a.indices[0]+1)
 
     op = Operator(expr)
-    print(op)
+
     op(a=a, time=1)
 
     indices = [slice(1, 1, 1)] + [slice(4, 6, 1) for _ in coords]
@@ -462,7 +462,7 @@ def test_inject_time_shift(shape, coords, result, npoints=19):
     expr = p.inject(a, Float(1.), p_t=p.indices[0]+1)
 
     op = Operator(expr)
-    print(op)
+
     op(a=a, time=1)
 
     indices = [slice(0, 0, 1)] + [slice(4, 6, 1) for _ in coords]
@@ -473,7 +473,7 @@ def test_inject_time_shift(shape, coords, result, npoints=19):
     expr = p.inject(a, Float(1.), u_t=a.indices[0]+1, p_t=p.indices[0]+1)
 
     op = Operator(expr)
-    print(op)
+
     op(a=a, time=1)
 
     indices = [slice(1, 1, 1)] + [slice(4, 6, 1) for _ in coords]
@@ -497,7 +497,7 @@ def test_inject_array(shape, coords, result, npoints=19):
     expr = p.inject(a, p)
 
     op = Operator(expr)
-    print(op)
+
     op(a=a, points=p2.data[:])
 
     indices = [slice(4, 6, 1) for _ in coords]
@@ -521,7 +521,7 @@ def test_inject_from_field(shape, coords, result, npoints=19):
 
     expr = p.inject(field=a, expr=b)
     op = Operator(expr)
-    print(op)
+
     op(a=a, b=b)
 
     indices = [slice(4, 6, 1) for _ in coords]
@@ -595,7 +595,7 @@ def test_edge_sparse():
     expr = sf1.interpolate(u)
     subs = {d.spacing: v for d, v in zip(u.grid.dimensions, u.grid.spacing)}
     op = Operator(expr, subs=subs)
-    print(op)
+
     op()
     assert sf1.data[0] == 0
 
@@ -634,7 +634,7 @@ def test_msf_interpolate():
 
     eqn = sf.interpolate(u)
     op = Operator(eqn)
-    print(op)
+
 
     sf.manual_scatter()
     op(time_m=0, time_M=4)
@@ -691,7 +691,6 @@ class SparseFirst(SparseFunction):
     rec = s.interpolate(expr=s+fs, implicit_dims=grid.stepping_dim)
     op = Operator(eqs + rec)
     print(op)
-
     op(time_M=10)
     expected = 10*11/2  # n (n+1)/2
     assert np.allclose(s.data, expected)

From 845db2361917314491f807a09e01455d9c4d1f1c Mon Sep 17 00:00:00 2001
From: mloubout <mathias.louboutin@gmail.com>
Date: Fri, 9 Jun 2023 09:14:11 -0400
Subject: [PATCH 18/90] compiler: fix dtype of aliases

---
 devito/ir/equations/algorithms.py  |  16 ++-
 devito/operations/interpolators.py |  28 ++---
 devito/operator/operator.py        |   2 +-
 devito/passes/clusters/aliases.py  |   9 +-
 devito/symbolics/inspection.py     |  21 +++-
 devito/tools/dtypes_lowering.py    |   1 +
 devito/types/dense.py              |  14 +--
 devito/types/dimension.py          |   9 +-
 devito/types/sparse.py             | 173 +++++++++++++----------------
 tests/test_interpolation.py        |   7 +-
 tests/test_pickle.py               |  85 +++++++-------
 11 files changed, 184 insertions(+), 181 deletions(-)

diff --git a/devito/ir/equations/algorithms.py b/devito/ir/equations/algorithms.py
index 529cac27d1..11e778f185 100644
--- a/devito/ir/equations/algorithms.py
+++ b/devito/ir/equations/algorithms.py
@@ -1,11 +1,11 @@
 from collections.abc import Iterable
-from operator import attrgetter
 
 from sympy import sympify
 
 from devito.symbolics import retrieve_indexed, uxreplace, retrieve_dimensions
-from devito.tools import PartialOrderTuple, as_tuple, filter_ordered, flatten, filter_sorted
-from devito.types import Dimension, IgnoreDimSort, ConditionalDimension
+from devito.tools import (PartialOrderTuple, as_tuple, flatten,
+                          filter_sorted, filter_ordered)
+from devito.types import Dimension, IgnoreDimSort
 from devito.types.basic import AbstractFunction
 
 __all__ = ['dimension_sort', 'lower_exprs']
@@ -50,15 +50,13 @@ def handle_indexed(indexed):
     # Add in any implicit dimension (typical of scalar temporaries, or Step)
     relations.add(expr.implicit_dims)
 
-    # Add in leftover free dimensions (not an Indexed' index if used purely as expr)
+    # Add in leftover free dimensions (not an Indexed' index)
     extra = set(retrieve_dimensions(expr))
 
     # Add in pure data dimensions (e.g., those accessed only via explicit values,
     # such as A[3])
     indexeds = retrieve_indexed(expr, deep=True)
-    for i in indexeds:
-        expl_dims = {d for (d, e) in zip(i.function.dimensions, i.indices) if e.is_integer}
-        extra.update(expl_dims)
+    extra.update(set().union(*[set(i.function.dimensions) for i in indexeds]))
 
     # Enforce determinism
     extra = filter_sorted(extra)
@@ -70,12 +68,12 @@ def handle_indexed(indexed):
     # preceed `time`, while `t`, and therefore `time`, *must* appear before `x`,
     # as indicated by the second relation
     implicit_relations = {(d.parent, d) for d in extra if d.is_Derived}
-
     # 2) To handle cases such as `((time, xi), (x,))`, where `xi` a SubDimension
     # of `x`, besides `(x, xi)`, we also have to add `(time, x)` so that we
     # obtain the desired ordering `(time, x, xi)`. W/o `(time, x)`, the ordering
     # `(x, time, xi)` might be returned instead, which would be non-sense
-    implicit_relations.update({tuple(filter_ordered(d.root for d in i)) for i in relations})
+    implicit_relations.update({tuple(filter_ordered(d.root for d in i))
+                               for i in relations})
 
     ordering = PartialOrderTuple(extra, relations=(relations | implicit_relations))
 
diff --git a/devito/operations/interpolators.py b/devito/operations/interpolators.py
index 5757ccac0e..19bee16068 100644
--- a/devito/operations/interpolators.py
+++ b/devito/operations/interpolators.py
@@ -1,13 +1,14 @@
 from abc import ABC, abstractmethod
+from collections import defaultdict
 
 import sympy
 from cached_property import cached_property
 
 from devito.finite_differences.elementary import floor
-from devito.symbolics import retrieve_function_carriers, INT
+from devito.symbolics import retrieve_function_carriers
 from devito.tools import as_tuple, flatten, prod
 from devito.types import (ConditionalDimension, Eq, Inc, Evaluable, Symbol,
-                          CustomDimension, Function)
+                          CustomDimension)
 from devito.types.utils import DimensionTuple
 
 __all__ = ['LinearInterpolator', 'PrecomputedInterpolator']
@@ -135,7 +136,7 @@ def _psym(self):
     def _gdim(self):
         return self.grid.dimensions
 
-    @cached_property
+    @property
     def r(self):
         return self.sfunction.r
 
@@ -147,7 +148,7 @@ def _rdim(self):
 
         return DimensionTuple(*dims, getters=self._gdim)
 
-    def implicit_dims(self, implicit_dims):
+    def _augment_implicit_dims(self, implicit_dims):
         return as_tuple(implicit_dims) + self.sfunction.dimensions
 
     def _coeff_temps(self, implicit_dims):
@@ -163,27 +164,26 @@ def _interpolation_indices(self, variables, offset=0, field_offset=0,
         Generate interpolation indices for the DiscreteFunctions in ``variables``.
         """
         idx_subs = []
-        mapper = {d: [] for d in self._gdim}
-        pdim = self.sfunction._sparse_dim
-    
+        mapper = defaultdict(list)
+
         # Positon map and temporaries for it
         pmap = self.sfunction._coordinate_indices
 
         # Temporaries for the position
         temps = self._positions(implicit_dims)
-    
+
         # Coefficient symbol expression
         temps.extend(self._coeff_temps(implicit_dims))
-    
+
         # Create positions and indices temporaries/indirections
         pr = []
         for ((di, d), pos, rd) in zip(enumerate(self._gdim), pmap, self._rdim):
             p = Symbol(name='ii_%s_%s' % (self.sfunction.name, d.name))
             temps.extend([Eq(p, pos + rd, implicit_dims=implicit_dims + tuple(pr))])
 
-            # Add conditional
-            lb = sympy.And(p >= d.symbolic_min-self.r, evaluate=False)
-            ub = sympy.And(p <= d.symbolic_max+self.r, evaluate=False)
+            # Add conditional to avoid OOB
+            lb = sympy.And(p >= d.symbolic_min, evaluate=False)
+            ub = sympy.And(p <= d.symbolic_max, evaluate=False)
             condition = sympy.And(lb, ub, evaluate=False)
             mapper[d] = ConditionalDimension(p.name, self.sfunction._sparse_dim,
                                              condition=condition, indirect=True)
@@ -213,7 +213,7 @@ def interpolate(self, expr, offset=0, increment=False, self_subs={},
             interpolation expression, but that should be honored when constructing
             the operator.
         """
-        implicit_dims = self.implicit_dims(implicit_dims)
+        implicit_dims = self._augment_implicit_dims(implicit_dims)
 
         def callback():
             # Derivatives must be evaluated before the introduction of indirect accesses
@@ -265,7 +265,7 @@ def inject(self, field, expr, offset=0, implicit_dims=None):
             injection expression, but that should be honored when constructing
             the operator.
         """
-        implicit_dims = self.implicit_dims(implicit_dims)
+        implicit_dims = self._augment_implicit_dims(implicit_dims)
 
         def callback():
             # Derivatives must be evaluated before the introduction of indirect accesses
diff --git a/devito/operator/operator.py b/devito/operator/operator.py
index 5d8828fe63..61a70272a0 100644
--- a/devito/operator/operator.py
+++ b/devito/operator/operator.py
@@ -632,7 +632,7 @@ def _postprocess_arguments(self, args, **kwargs):
         """Process runtime arguments upon returning from ``.apply()``."""
         for p in self.parameters:
             try:
-                subfuncs = (args[s] for s in p._subfunc_names)
+                subfuncs = (args[s.name] for s in p._sub_functions)
                 p._arg_apply(args[p.name], *subfuncs, alias=kwargs.get(p.name))
             except AttributeError:
                 p._arg_apply(args[p.name], alias=kwargs.get(p.name))
diff --git a/devito/passes/clusters/aliases.py b/devito/passes/clusters/aliases.py
index 1e8626da18..2546e948e3 100644
--- a/devito/passes/clusters/aliases.py
+++ b/devito/passes/clusters/aliases.py
@@ -12,7 +12,8 @@
                        Queue, IntervalGroup, LabeledVector, normalize_properties,
                        relax_properties, sdims_min, sdims_max)
 from devito.symbolics import (Uxmapper, compare_ops, estimate_cost, q_constant,
-                              reuse_if_untouched, retrieve_indexed, search, uxreplace)
+                              reuse_if_untouched, retrieve_indexed, search, uxreplace,
+                              sympy_dtype)
 from devito.tools import (Stamp, as_mapper, as_tuple, flatten, frozendict, generator,
                           split, timed_pass)
 from devito.types import (Array, TempFunction, Eq, Symbol, Temp, ModuloDimension,
@@ -832,7 +833,11 @@ def lower_schedule(schedule, meta, sregistry, ftemps):
     subs = {}
     for pivot, writeto, ispace, aliaseds, indicess, _ in schedule:
         name = sregistry.make_name()
-        dtype = meta.dtype
+        # Infer the dtype for the pivot
+        # This prevents cases such as `floor(a*b)` with `a` and `b` floats
+        # that would creat a temporary `int r = b` leading to erronous numerical results
+        # Such cases happen with the positions for sparse functions for example.
+        dtype = sympy_dtype(pivot, meta.dtype) or meta.dtype
 
         if writeto:
             # The Dimensions defining the shape of Array
diff --git a/devito/symbolics/inspection.py b/devito/symbolics/inspection.py
index cc05188cfb..f58787ad07 100644
--- a/devito/symbolics/inspection.py
+++ b/devito/symbolics/inspection.py
@@ -10,8 +10,9 @@
                                              DefFunction, ReservedWord)
 from devito.symbolics.queries import q_routine
 from devito.tools import as_tuple, prod
+from devito.tools.dtypes_lowering import infer_dtype
 
-__all__ = ['compare_ops', 'estimate_cost', 'has_integer_args']
+__all__ = ['compare_ops', 'estimate_cost', 'has_integer_args', 'sympy_dtype']
 
 
 def compare_ops(e1, e2):
@@ -260,3 +261,21 @@ def has_integer_args(*args):
         except AttributeError:
             res = res and has_integer_args(a)
     return res
+
+
+def sympy_dtype(expr, default):
+    """
+    Try to infer the data type of the expression
+    returns the default if non is found
+    """
+    args = expr.args
+
+    # Symbol/... without argument, check its dtype
+    if len(args) == 0:
+        try:
+            return expr.dtype
+        except AttributeError:
+            return default
+    else:
+        # Infer expression dtype from its arguments
+        return infer_dtype([sympy_dtype(a, default) for a in expr.args])
diff --git a/devito/tools/dtypes_lowering.py b/devito/tools/dtypes_lowering.py
index ac42a33965..c8fe8a3fa5 100644
--- a/devito/tools/dtypes_lowering.py
+++ b/devito/tools/dtypes_lowering.py
@@ -104,6 +104,7 @@ def dtype_to_ctype(dtype):
         return ctypes_vector_mapper[dtype]
     except KeyError:
         pass
+
     if issubclass(dtype, ctypes._SimpleCData):
         # Bypass np.ctypeslib's normalization rules such as
         # `np.ctypeslib.as_ctypes_type(ctypes.c_void_p) -> ctypes.c_ulong`
diff --git a/devito/types/dense.py b/devito/types/dense.py
index 0ad9b1f3fe..9fb96ec5cd 100644
--- a/devito/types/dense.py
+++ b/devito/types/dense.py
@@ -1466,13 +1466,6 @@ def __padding_setup__(self, **kwargs):
     def _halo_exchange(self):
         return
 
-    @property
-    def origin(self):
-        """
-        SubFunction have zero origin
-        """
-        return DimensionTuple(*(0 for _ in range(self.ndim)), getters=self.dimensions)
-
     def _arg_values(self, **kwargs):
         if self.name in kwargs:
             raise RuntimeError("`%s` is a SubFunction, so it can't be assigned "
@@ -1484,6 +1477,13 @@ def _arg_values(self, **kwargs):
     def parent(self):
         return self._parent
 
+    @property
+    def origin(self):
+        """
+        SubFunction have zero origin
+        """
+        return DimensionTuple(*(0 for _ in range(self.ndim)), getters=self.dimensions)
+
 
 class TempFunction(DiscreteFunction):
 
diff --git a/devito/types/dimension.py b/devito/types/dimension.py
index 6626f09517..dfb45a50d7 100644
--- a/devito/types/dimension.py
+++ b/devito/types/dimension.py
@@ -1581,9 +1581,12 @@ def _separate_dims(cls, d0, d1, ofs_items):
             return None
 
 
-def dimensions(names):
-    assert type(names) is str
-    return tuple(Dimension(i) for i in names.split())
+def dimensions(names, n=1):
+    if n > 1:
+        return tuple(Dimension('%s%s' % (names, i)) for i in range(n))
+    else:
+        assert type(names) is str
+        return tuple(Dimension(i) for i in names.split())
 
 
 BOTTOM = Dimension(name='⊥')
diff --git a/devito/types/sparse.py b/devito/types/sparse.py
index 98275388d1..8ca20c670d 100644
--- a/devito/types/sparse.py
+++ b/devito/types/sparse.py
@@ -12,7 +12,6 @@
 
 from devito.finite_differences import generate_fd_shortcuts
 from devito.finite_differences.elementary import floor
-from devito.logger import warning
 from devito.mpi import MPI, SparseDistributor
 from devito.operations import LinearInterpolator, PrecomputedInterpolator
 from devito.symbolics import (INT, cast_mapper, indexify,
@@ -57,12 +56,6 @@ def __init_finalize__(self, *args, **kwargs):
         # Dynamically add derivative short-cuts
         self._fd = self.__fd_setup__()
 
-    def __fd_setup__(self):
-        """
-        Dynamically add derivative short-cuts.
-        """
-        return generate_fd_shortcuts(self.dimensions, self.space_order)
-
     @classmethod
     def __indices_setup__(cls, *args, **kwargs):
         dimensions = as_tuple(kwargs.get('dimensions'))
@@ -89,6 +82,12 @@ def __shape_setup__(cls, **kwargs):
             shape = (glb_npoint[grid.distributor.myrank],)
         return shape
 
+    def __fd_setup__(self):
+        """
+        Dynamically add derivative short-cuts.
+        """
+        return generate_fd_shortcuts(self.dimensions, self.space_order)
+
     def __distributor_setup__(self, **kwargs):
         """
         A `SparseDistributor` handles the SparseFunction decomposition based on
@@ -117,10 +116,11 @@ def __subfunc_setup__(self, key, suffix):
         else:
             if not isinstance(key, np.ndarray):
                 key = np.array(key)
-            # Need to fix this check to get global npoint, global_shape broken
-            # if shape != key.shape[:2] and self.distributor.nprocs == 1:
-            #     raise ValueError("Incompatible shape for %s, `%s`; expected `%s`" %
-            #                      (suffix, shape, key.shape[:2]))
+
+            if (shape != key.shape[:2] and key.shape != (shape[1],)) and \
+                    self._distributor.nprocs == 1:
+                raise ValueError("Incompatible shape for %s, `%s`; expected `%s`" %
+                                 (suffix, key.shape[:2], shape))
 
             # Infer dtype
             if np.issubdtype(key.dtype.type, np.integer):
@@ -130,8 +130,7 @@ def __subfunc_setup__(self, key, suffix):
 
         if key is not None and key.ndim > 2:
             shape = (*shape, *key.shape[2:])
-            # Safely assume there is at most 3 (3D) extra dimensions
-            dimensions = (*dimensions, *mkdims("ijk"[:(key.ndim-2)]))
+            dimensions = (*dimensions, *mkdims("i", n=key.ndim-2))
 
         sf = SubFunction(
             name=name, parent=self, dtype=dtype, dimensions=dimensions,
@@ -148,10 +147,6 @@ def __subfunc_setup__(self, key, suffix):
 
         return sf
 
-    def _halo_exchange(self):
-        # no-op for SparseFunctions
-        return
-
     @property
     def npoint(self):
         return self.shape[self._sparse_position]
@@ -180,7 +175,7 @@ def r(self):
     def _sparse_dim(self):
         return self.dimensions[self._sparse_position]
 
-    @cached_property
+    @property
     def dist_origin(self):
         return self._dist_origin
 
@@ -195,22 +190,8 @@ def _smpitype(self):
         return {s: dtype_to_mpidtype(s.dtype) for s in sfuncs}
 
     @property
-    def comm(self):
-        return self.grid.distributor.comm
-
-    @property
-    def distributor(self):
-        return self.grid.distributor
-
-    @property
-    def _subfunc_names(self):
-        names = []
-        for s in self._sub_functions:
-            try:
-                names.append(getattr(self, s).name)
-            except AttributeError:
-                pass
-        return tuple(names)
+    def _comm(self):
+        return self._distributor.comm
 
     @property
     def _coords_indices(self):
@@ -252,6 +233,24 @@ def coordinates_data(self):
         except AttributeError:
             return None
 
+    @property
+    def _support(self):
+        """
+        The grid points surrounding each sparse point within the radius of self's
+        injection/interpolation operators.
+        """
+        max_shape = np.array(self.grid.shape).reshape(1, self.grid.dim)
+        minmax = lambda arr: np.minimum(max_shape, np.maximum(0, arr))
+        return np.stack([minmax(self._coords_indices + s) for s in self._point_support],
+                        axis=2)
+
+    @property
+    def _dist_datamap(self):
+        """
+        Mapper ``M : MPI rank -> required sparse data``.
+        """
+        return self.grid._distributor.glb_to_rank(self._support) or {}
+
     @cached_property
     def _point_increments(self):
         """Index increments in each Dimension for each point symbol."""
@@ -299,6 +298,15 @@ def _coordinate_indices(self):
                       for p, i in zip(self._position_map.values(),
                                       self.grid.dimensions[:self.grid.dim])])
 
+    @cached_property
+    def _dist_reorder_mask(self):
+        """
+        An ordering mask that puts ``self._sparse_position`` at the front.
+        """
+        ret = (self._sparse_position,)
+        ret += tuple(i for i, d in enumerate(self.indices) if d is not self._sparse_dim)
+        return ret
+
     def _coordinate_bases(self, field_offset):
         """Symbol for the base coordinates of the reference grid point."""
         return tuple([cast_mapper[self.dtype](c - o - idx * i.spacing)
@@ -320,24 +328,6 @@ def inject(self, *args, **kwargs):
         """
         return self.interpolator.inject(*args, **kwargs)
 
-    @property
-    def _support(self):
-        """
-        The grid points surrounding each sparse point within the radius of self's
-        injection/interpolation operators.
-        """
-        max_shape = np.array(self.grid.shape).reshape(1, self.grid.dim)
-        minmax = lambda arr: np.minimum(max_shape, np.maximum(0, arr))
-        return np.stack([minmax(self._coords_indices + s) for s in self._point_support],
-                        axis=2)
-
-    @property
-    def _dist_datamap(self):
-        """
-        Mapper ``M : MPI rank -> required sparse data``.
-        """
-        return self.distributor.glb_to_rank(self._support) or {}
-
     def _dist_scatter_mask(self, dmap=None):
         """
         A mask to index into ``self.data``, which creates a new data array that
@@ -358,7 +348,7 @@ def _dist_count(self, dmap=None):
         is this MPI rank expected to send/receive to/from each other MPI rank.
         """
         dmap = dmap or self._dist_datamap
-        comm = self.grid.distributor.comm
+        comm = self._comm
 
         ssparse = np.array([len(dmap.get(i, [])) for i in range(comm.size)], dtype=int)
         rsparse = np.empty(comm.size, dtype=int)
@@ -366,15 +356,6 @@ def _dist_count(self, dmap=None):
 
         return ssparse, rsparse
 
-    @cached_property
-    def _dist_reorder_mask(self):
-        """
-        An ordering mask that puts ``self._sparse_position`` at the front.
-        """
-        ret = (self._sparse_position,)
-        ret += tuple(i for i, d in enumerate(self.indices) if d is not self._sparse_dim)
-        return ret
-
     def _dist_alltoall(self, dmap=None):
         """
         The metadata necessary to perform an ``MPI_Alltoallv`` distributing the
@@ -455,7 +436,7 @@ def _dist_data_scatter(self, data=None):
         data = data if data is not None else self.data._local
 
         # If not using MPI, don't waste time
-        if self.distributor.nprocs == 1:
+        if self._distributor.nprocs == 1:
             return data
 
         # Compute dist map only once
@@ -469,40 +450,40 @@ def _dist_data_scatter(self, data=None):
         # Send out the sparse point values
         _, scount, sdisp, rshape, rcount, rdisp = self._dist_alltoall(dmap=dmap)
         scattered = np.empty(shape=rshape, dtype=self.dtype)
-        self.comm.Alltoallv([data, scount, sdisp, self._mpitype],
-                            [scattered, rcount, rdisp, self._mpitype])
+        self._comm.Alltoallv([data, scount, sdisp, self._mpitype],
+                             [scattered, rcount, rdisp, self._mpitype])
 
         # Unpack data values so that they follow the expected storage layout
         return np.ascontiguousarray(np.transpose(scattered, self._dist_reorder_mask))
 
     def _dist_subfunc_scatter(self, subfunc):
         # If not using MPI, don't waste time
-        if self.distributor.nprocs == 1:
+        if self._distributor.nprocs == 1:
             return {subfunc: subfunc.data}
 
         # Compute dist map only once
         dmap = self._dist_datamap
         mask = self._dist_scatter_mask(dmap=dmap)
 
-        # Pack (reordered) subfunc values so that they can be sent out via an Alltoallv
+        # Pack (reordered) SubFuncion values so that they can be sent out via an Alltoallv
         sfuncd = subfunc.data._local[mask[self._sparse_position]]
 
-        # Send out the sparse point coordinates
+        # Send out the sparse point SubFuncion
         _, scount, sdisp, rshape, rcount, rdisp = \
             self._dist_subfunc_alltoall(subfunc, dmap=dmap)
         scattered = np.empty(shape=rshape, dtype=subfunc.dtype)
-        self.comm.Alltoallv([sfuncd, scount, sdisp, self._smpitype[subfunc]],
-                            [scattered, rcount, rdisp, self._smpitype[subfunc]])
+        self._comm.Alltoallv([sfuncd, scount, sdisp, self._smpitype[subfunc]],
+                             [scattered, rcount, rdisp, self._smpitype[subfunc]])
         sfuncd = scattered
 
-        # Translate global subfunc values into local subfunc values
+        # Translate global SubFuncion values into local SubFuncion values
         if self.dist_origin[subfunc] is not None:
             sfuncd = sfuncd - np.array(self.dist_origin[subfunc], dtype=subfunc.dtype)
         return {subfunc: sfuncd}
 
     def _dist_data_gather(self, data):
         # If not using MPI, don't waste time
-        if self.distributor.nprocs == 1:
+        if self._distributor.nprocs == 1:
             return
 
         # Compute dist map only once
@@ -511,11 +492,13 @@ def _dist_data_gather(self, data):
 
         # Pack sparse data values so that they can be sent out via an Alltoallv
         data = np.ascontiguousarray(np.transpose(data, self._dist_reorder_mask))
+
         # Send back the sparse point values
         sshape, scount, sdisp, _, rcount, rdisp = self._dist_alltoall(dmap=dmap)
         gathered = np.empty(shape=sshape, dtype=self.dtype)
-        self.comm.Alltoallv([data, rcount, rdisp, self._mpitype],
-                            [gathered, scount, sdisp, self._mpitype])
+        self._comm.Alltoallv([data, rcount, rdisp, self._mpitype],
+                             [gathered, scount, sdisp, self._mpitype])
+
         # Unpack data values so that they follow the expected storage layout
         gathered = np.ascontiguousarray(np.transpose(gathered, self._dist_reorder_mask))
         self.data
@@ -527,21 +510,23 @@ def _dist_subfunc_gather(self, sfuncd, sfunc):
         except AttributeError:
             pass
         # If not using MPI, don't waste time
-        if self.distributor.nprocs == 1:
+        if self._distributor.nprocs == 1:
             return
 
         # Compute dist map only once
         dmap = self._dist_datamap
         mask = self._dist_scatter_mask(dmap=dmap)
-        # Pack (reordered) subfunc values so that they can be sent out via an Alltoallv
+
+        # Pack (reordered) SubFuncion values so that they can be sent out via an Alltoallv
         if self.dist_origin[sfunc] is not None:
             sfuncd = sfuncd + np.array(self.dist_origin[sfunc], dtype=sfunc.dtype)
-        # Send out the sparse point subfunc values
+
+        # Send out the sparse point SubFuncion values
         sshape, scount, sdisp, _, rcount, rdisp = \
             self._dist_subfunc_alltoall(sfunc, dmap=dmap)
         gathered = np.empty(shape=sshape, dtype=sfunc.dtype)
-        self.comm.Alltoallv([sfuncd, rcount, rdisp, self._smpitype[sfunc]],
-                            [gathered, scount, sdisp, self._smpitype[sfunc]])
+        self._comm.Alltoallv([sfuncd, rcount, rdisp, self._smpitype[sfunc]],
+                             [gathered, scount, sdisp, self._smpitype[sfunc]])
         sfunc.data._local[mask[self._sparse_position]] = gathered[:]
 
         # Note: this method "mirrors" `_dist_scatter`: a sparse point that is sent
@@ -583,6 +568,10 @@ def _arg_defaults(self, alias=None):
     def _eval_at(self, func):
         return self
 
+    def _halo_exchange(self):
+        # no-op for SparseFunctions
+        return
+
     def _arg_values(self, **kwargs):
         # Add value override for own data if it is provided, otherwise
         # use defaults
@@ -604,12 +593,12 @@ def _arg_values(self, **kwargs):
 
         return values
 
-    def _arg_apply(self, dataobj, *subfunc, alias=None):
+    def _arg_apply(self, dataobj, *subfuncs, alias=None):
         key = alias if alias is not None else self
         if isinstance(key, AbstractSparseFunction):
             # Gather into `self.data`
-            key._dist_gather(self._C_as_ndarray(dataobj), *subfunc)
-        elif self.grid.distributor.nprocs > 1:
+            key._dist_gather(self._C_as_ndarray(dataobj), *subfuncs)
+        elif self._distributor.nprocs > 1:
             raise NotImplementedError("Don't know how to gather data from an "
                                       "object of type `%s`" % type(key))
 
@@ -1124,10 +1113,6 @@ def __init_finalize__(self, *args, **kwargs):
             self.__subfunc_setup__(interpolation_coeffs, 'interp_coeffs')
         self._dist_origin.update({self._interpolation_coeffs: None})
 
-        warning("Ensure that the provided interpolation coefficient and grid "
-                "point values are computed on the final grid that will be used "
-                "for other computations.")
-
         self.interpolator = PrecomputedInterpolator(self)
 
     @property
@@ -1491,7 +1476,7 @@ def __init_finalize__(self, *args, **kwargs):
         self.nnzdim = Dimension('nnz_%s' % self.name)
 
         # In the non-MPI case, at least, we should fill these in once
-        if self.grid.distributor.nprocs == 1:
+        if self._distributor.nprocs == 1:
             m_coo = self.matrix.tocoo(copy=False)
             nnz_size = m_coo.nnz
         else:
@@ -1561,7 +1546,7 @@ def __init_finalize__(self, *args, **kwargs):
             parent=self,
         )
 
-        if self.grid.distributor.nprocs == 1:
+        if self._distributor.nprocs == 1:
             self._mrow.data[:] = m_coo.row
             self._mcol.data[:] = m_coo.col
             self._mval.data[:] = m_coo.data
@@ -1831,7 +1816,7 @@ def gridpoints(self):
 
     def _rank_to_points(self):
         """
-        For each rank in self.grid.distributor, return
+        For each rank in self._distributor, return
         a numpy array of int32s for the positions within
         this rank's self.gridpoints/self.interpolation_coefficients (i.e.
         the locdim) which must be injected into that rank.
@@ -1849,7 +1834,7 @@ def _rank_to_points(self):
         It also requires that the halos be exchanged before
         interpolation (must verify that this occurs).
         """
-        distributor = self.grid.distributor
+        distributor = self._distributor
 
         # Along each Dimension, the coordinate indices are broken into
         # 2*decomposition_size+3 groups, numbered starting at 0
@@ -2018,7 +2003,7 @@ def _build_par_dim_to_nnz(self, active_gp, active_mrow):
         }
 
     def manual_scatter(self, *, data_all_zero=False):
-        distributor = self.grid.distributor
+        distributor = self._distributor
 
         if distributor.nprocs == 1:
             self.scattered_data = self.data
@@ -2196,19 +2181,19 @@ def _dist_scatter(self, data=None):
 
     # The implementation in AbstractSparseFunction now relies on us
     # having a .coordinates property, which we don't have.
-    def _arg_apply(self, dataobj, *subfunc, alias=None):
+    def _arg_apply(self, dataobj, *subfuncs, alias=None):
         key = alias if alias is not None else self
         if isinstance(key, AbstractSparseFunction):
             # Gather into `self.data`
             key._dist_gather(self._C_as_ndarray(dataobj))
-        elif self.grid.distributor.nprocs > 1:
+        elif self._distributor.nprocs > 1:
             raise NotImplementedError("Don't know how to gather data from an "
                                       "object of type `%s`" % type(key))
 
     def manual_gather(self):
         # data, in this case, is set to whatever dist_scatter provided?
         # on rank 0, this is the original data array (hack...)
-        distributor = self.grid.distributor
+        distributor = self._distributor
 
         # If not using MPI, don't waste time
         if distributor.nprocs == 1:
diff --git a/tests/test_interpolation.py b/tests/test_interpolation.py
index 5ea40769f9..96a83b7dc6 100644
--- a/tests/test_interpolation.py
+++ b/tests/test_interpolation.py
@@ -259,7 +259,7 @@ def test_interpolate(shape, coords, npoints=20):
     op = Operator(expr)
 
     op(a=a)
-    from IPython import embed; embed()
+
     assert np.allclose(p.data[:], xcoords, rtol=1e-6)
 
 
@@ -635,7 +635,6 @@ def test_msf_interpolate():
     eqn = sf.interpolate(u)
     op = Operator(eqn)
 
-
     sf.manual_scatter()
     op(time_m=0, time_M=4)
     sf.manual_gather()
@@ -648,7 +647,7 @@ def test_msf_interpolate():
 
     eqn_inject = sf.inject(field=u, expr=sf)
     op2 = Operator(eqn_inject)
-    print(op2)
+
     op2(time_m=0, time_M=4)
 
     # There should be 4 points touched for each source point
@@ -690,7 +689,7 @@ class SparseFirst(SparseFunction):
     # No time dependence so need the implicit dim
     rec = s.interpolate(expr=s+fs, implicit_dims=grid.stepping_dim)
     op = Operator(eqs + rec)
-    print(op)
+
     op(time_M=10)
     expected = 10*11/2  # n (n+1)/2
     assert np.allclose(s.data, expected)
diff --git a/tests/test_pickle.py b/tests/test_pickle.py
index 60df9ed80e..762fe7c5c4 100644
--- a/tests/test_pickle.py
+++ b/tests/test_pickle.py
@@ -8,8 +8,7 @@
 from conftest import skipif
 from devito import (Constant, Eq, Function, TimeFunction, SparseFunction, Grid,
                     Dimension, SubDimension, ConditionalDimension, IncrDimension,
-                    TimeDimension, SteppingDimension, Operator, MPI, Min, solve,
-                    PrecomputedSparseTimeFunction)
+                    TimeDimension, SteppingDimension, Operator, MPI, Min, solve)
 from devito.ir import GuardFactor
 from devito.data import LEFT, OWNED
 from devito.mpi.halo_scheme import Halo
@@ -18,7 +17,7 @@
 from devito.types import (Array, CustomDimension, Symbol as dSymbol, Scalar,
                           PointerArray, Lock, PThreadArray, SharedData, Timer,
                           DeviceID, NPThreads, ThreadID, TempFunction, Indirection,
-                          FIndexed)
+                          FIndexed, PrecomputedSparseTimeFunction)
 from devito.types.basic import BoundSymbol
 from devito.tools import EnrichedTuple
 from devito.symbolics import (IntDiv, ListInitializer, FieldFromPointer,
@@ -99,6 +98,42 @@ def test_sparse_function(self, pickle):
         assert sf.dtype == new_sf.dtype
         assert sf.npoint == new_sf.npoint
 
+    @pytest.mark.parametrize('mode', ['coordinates', 'gridpoints'])
+    def test_precomputed_sparse_function(self, mode, pickle):
+        grid = Grid(shape=(11, 11))
+
+        coords = [(0., 0.), (.5, .5), (.7, .2)]
+        gridpoints = [(0, 0), (6, 6), (8, 3)]
+        keys = {'coordinates': coords, 'gridpoints': gridpoints}
+        kw = {mode: keys[mode]}
+        othermode = 'coordinates' if mode == 'gridpoints' else 'gridpoints'
+
+        sf = PrecomputedSparseTimeFunction(
+            name='sf', grid=grid, r=2, npoint=3, nt=5,
+            interpolation_coeffs=np.ndarray(shape=(3, 2, 2)), **kw
+        )
+        sf.data[2, 1] = 5.
+
+        pkl_sf = pickle.dumps(sf)
+        new_sf = pickle.loads(pkl_sf)
+
+        # .data is initialized, so it should have been pickled too
+        assert new_sf.data[2, 1] == 5.
+
+        # gridpoints and interpolation coefficients must have been pickled
+        assert np.all(sf.interpolation_coeffs.data == new_sf.interpolation_coeffs.data)
+
+        # coordinates, since they were given, should also have been pickled
+        assert np.all(getattr(sf, mode).data == getattr(new_sf, mode).data)
+        assert getattr(sf, othermode) is None
+        assert getattr(new_sf, othermode) is None
+
+        assert sf._radius == new_sf._radius == 1
+        assert sf.space_order == new_sf.space_order
+        assert sf.time_order == new_sf.time_order
+        assert sf.dtype == new_sf.dtype
+        assert sf.npoint == new_sf.npoint == 3
+
     def test_internal_symbols(self, pickle):
         s = dSymbol(name='s', dtype=np.float32)
         pkl_s = pickle.dumps(s)
@@ -260,49 +295,7 @@ def test_shared_data(self, pickle):
         assert sdata.cfields == new_sdata.cfields
         assert sdata.ncfields == new_sdata.ncfields
 
-@pytest.mark.parametrize('mode', ['coordinates', 'gridpoints'])
-def test_precomputed_sparse_function(mode):
-    grid = Grid(shape=(11, 11))
-
-    coords = [(0., 0.), (.5, .5), (.7, .2)]
-    gridpoints = [(0, 0), (6, 6), (8, 3)]
-    keys = {'coordinates': coords, 'gridpoints': gridpoints}
-    kw = {mode: keys[mode]}
-    othermode = 'coordinates' if mode == 'gridpoints' else 'gridpoints'
-
-    sf = PrecomputedSparseTimeFunction(
-        name='sf', grid=grid, r=2, npoint=3, nt=5,
-        interpolation_coeffs=np.ndarray(shape=(3, 2, 2)), **kw
-    )
-    sf.data[2, 1] = 5.
-
-    pkl_sf = pickle.dumps(sf)
-    new_sf = pickle.loads(pkl_sf)
-
-    # .data is initialized, so it should have been pickled too
-    assert new_sf.data[2, 1] == 5.
-
-    # gridpoints and interpolation coefficients must have been pickled
-    assert np.all(sf.interpolation_coeffs.data == new_sf.interpolation_coeffs.data)
-
-    # coordinates, since they were given, should also have been pickled
-    assert np.all(getattr(sf, mode).data == getattr(new_sf, mode).data)
-    assert getattr(sf, othermode) is None
-    assert getattr(new_sf, othermode) is None
-
-    assert sf._radius == new_sf._radius == 1
-    assert sf.space_order == new_sf.space_order
-    assert sf.time_order == new_sf.time_order
-    assert sf.dtype == new_sf.dtype
-    assert sf.npoint == new_sf.npoint == 3
-
-
-def test_internal_symbols():
-    s = dSymbol(name='s', dtype=np.float32)
-    pkl_s = pickle.dumps(s)
-    new_s = pickle.loads(pkl_s)
-    assert new_s.name == s.name
-    assert new_s.dtype is np.float32
+        ffp = FieldFromPointer(sdata._field_flag, sdata.symbolic_base)
 
         pkl_ffp = pickle.dumps(ffp)
         new_ffp = pickle.loads(pkl_ffp)

From 48ab7dddb0f071d7c135becf081a9610ae092486 Mon Sep 17 00:00:00 2001
From: Fabio Luporini <fabio@devitocodes.com>
Date: Mon, 15 May 2023 10:20:31 +0000
Subject: [PATCH 19/90] api: Revamp PrecomputedSparseFunction

---
 devito/types/sparse.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/devito/types/sparse.py b/devito/types/sparse.py
index 8ca20c670d..5be5b8235b 100644
--- a/devito/types/sparse.py
+++ b/devito/types/sparse.py
@@ -12,6 +12,7 @@
 
 from devito.finite_differences import generate_fd_shortcuts
 from devito.finite_differences.elementary import floor
+from devito.logger import warning
 from devito.mpi import MPI, SparseDistributor
 from devito.operations import LinearInterpolator, PrecomputedInterpolator
 from devito.symbolics import (INT, cast_mapper, indexify,
@@ -1039,7 +1040,7 @@ class PrecomputedSparseFunction(AbstractSparseFunction):
         Discretisation order for space derivatives. Defaults to 0.
     shape : tuple of ints, optional
         Shape of the object. Defaults to `(npoint,)`.
-    Dimensions : tuple of Dimension, optional
+    dimensions : tuple of Dimension, optional
         Dimensions associated with the object. Only necessary if the SparseFunction
         defines a multi-dimensional tensor.
     dtype : data-type, optional
@@ -1189,9 +1190,9 @@ class PrecomputedSparseTimeFunction(AbstractSparseTimeFunction,
     interpolation_coeffs : np.ndarray, optional
         An array containing the coefficient for each of the r^2 (2D) or r^3
         (3D) gridpoints that each sparse point will be interpolated to. The
-        coefficient is split across the n Dimensions such that the contribution
+        coefficient is split across the n dimensions such that the contribution
         of the point (i, j, k) will be multiplied by
-        `interp_coeffs[..., i]*interp_coeffs[...,j]*interp_coeffs[...,k]`.
+        `interp_coeffs[..., i]*interpo_coeffs[...,j]*interp_coeffs[...,k]`.
         So for `r=6`, we will store 18 coefficients per sparse point (instead of
         potentially 216).  Must be a three-dimensional array of shape
         `(npoint, grid.ndim, r)`.
@@ -1201,7 +1202,7 @@ class PrecomputedSparseTimeFunction(AbstractSparseTimeFunction,
         Discretisation order for time derivatives. Default to 1.
     shape : tuple of ints, optional
         Shape of the object. Defaults to `(npoint,)`.
-    Dimensions : tuple of Dimension, optional
+    dimensions : tuple of Dimension, optional
         Dimensions associated with the object. Only necessary if the SparseFunction
         defines a multi-dimensional tensor.
     dtype : data-type, optional

From 1ea1bac38ee8bef067b227bb9222e114e337f90e Mon Sep 17 00:00:00 2001
From: Fabio Luporini <fabio@devitocodes.com>
Date: Mon, 15 May 2023 13:24:06 +0000
Subject: [PATCH 20/90] api: Fix pickling for PrecomputedSparseFunction

---
 devito/types/sparse.py | 6 +++++-
 tests/test_pickle.py   | 3 ++-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/devito/types/sparse.py b/devito/types/sparse.py
index 5be5b8235b..7d9fc787bc 100644
--- a/devito/types/sparse.py
+++ b/devito/types/sparse.py
@@ -1065,6 +1065,10 @@ class PrecomputedSparseFunction(AbstractSparseFunction):
                    ('r', 'gridpoints_data', 'coordinates_data',
                     'interpolation_coeffs_data'))
 
+    __rkwargs__ = (AbstractSparseFunction.__rkwargs__ +
+                   ('r', 'coordinates_data', 'gridpoints_data',
+                    'interpolation_coeffs_data'))
+
     def __init_finalize__(self, *args, **kwargs):
         super().__init_finalize__(*args, **kwargs)
 
@@ -1192,7 +1196,7 @@ class PrecomputedSparseTimeFunction(AbstractSparseTimeFunction,
         (3D) gridpoints that each sparse point will be interpolated to. The
         coefficient is split across the n dimensions such that the contribution
         of the point (i, j, k) will be multiplied by
-        `interp_coeffs[..., i]*interpo_coeffs[...,j]*interp_coeffs[...,k]`.
+        `interp_coeffs[..., i]*interp_coeffs[...,j]*interp_coeffs[...,k]`.
         So for `r=6`, we will store 18 coefficients per sparse point (instead of
         potentially 216).  Must be a three-dimensional array of shape
         `(npoint, grid.ndim, r)`.
diff --git a/tests/test_pickle.py b/tests/test_pickle.py
index 762fe7c5c4..f17da8ac14 100644
--- a/tests/test_pickle.py
+++ b/tests/test_pickle.py
@@ -8,7 +8,8 @@
 from conftest import skipif
 from devito import (Constant, Eq, Function, TimeFunction, SparseFunction, Grid,
                     Dimension, SubDimension, ConditionalDimension, IncrDimension,
-                    TimeDimension, SteppingDimension, Operator, MPI, Min, solve)
+                    TimeDimension, SteppingDimension, Operator, MPI, Min,
+                    PrecomputedSparseTimeFunction)
 from devito.ir import GuardFactor
 from devito.data import LEFT, OWNED
 from devito.mpi.halo_scheme import Halo

From 065222b1104e87edb23fb0bb8a60fed024237db4 Mon Sep 17 00:00:00 2001
From: mloubout <mathias.louboutin@gmail.com>
Date: Mon, 15 May 2023 11:30:40 -0400
Subject: [PATCH 21/90] types: setup_coordinates for all sparse functions

---
 tests/test_pickle.py | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/tests/test_pickle.py b/tests/test_pickle.py
index f17da8ac14..85a6e33a7d 100644
--- a/tests/test_pickle.py
+++ b/tests/test_pickle.py
@@ -8,7 +8,7 @@
 from conftest import skipif
 from devito import (Constant, Eq, Function, TimeFunction, SparseFunction, Grid,
                     Dimension, SubDimension, ConditionalDimension, IncrDimension,
-                    TimeDimension, SteppingDimension, Operator, MPI, Min,
+                    TimeDimension, SteppingDimension, Operator, MPI, Min, solve,
                     PrecomputedSparseTimeFunction)
 from devito.ir import GuardFactor
 from devito.data import LEFT, OWNED
@@ -312,12 +312,6 @@ def test_shared_data(self, pickle):
 
     def test_findexed(self, pickle):
         grid = Grid(shape=(3, 3, 3))
-        f = Function(name='f', grid=grid)
-
-        fi = FIndexed.from_indexed(f.indexify(), "foo", strides=(1, 2))
-
-        pkl_fi = pickle.dumps(fi)
-        new_fi = pickle.loads(pkl_fi)
 
         assert new_fi.name == fi.name
         assert new_fi.pname == fi.pname

From c5f9541f52a9ae6f39b477982435bf709a504b2a Mon Sep 17 00:00:00 2001
From: Fabio Luporini <fabio@devitocodes.com>
Date: Tue, 16 May 2023 09:22:59 +0000
Subject: [PATCH 22/90] api: Rework AbstractSparseFunction.__subfunc_setup__

---
 devito/types/sparse.py | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/devito/types/sparse.py b/devito/types/sparse.py
index 7d9fc787bc..dd5d469e35 100644
--- a/devito/types/sparse.py
+++ b/devito/types/sparse.py
@@ -1058,12 +1058,7 @@ class PrecomputedSparseFunction(AbstractSparseFunction):
     The parameters must always be given as keyword arguments, since SymPy
     uses `*args` to (re-)create the Dimension arguments of the symbolic object.
     """
-
-    _sub_functions = ('gridpoints', 'coordinates', 'interpolation_coeffs')
-
-    __rkwargs__ = (AbstractSparseFunction.__rkwargs__ +
-                   ('r', 'gridpoints_data', 'coordinates_data',
-                    'interpolation_coeffs_data'))
+     _sub_functions = ('gridpoints', 'coordinates', 'interpolation_coeffs')
 
     __rkwargs__ = (AbstractSparseFunction.__rkwargs__ +
                    ('r', 'coordinates_data', 'gridpoints_data',

From d495e271160e38a7f55472b6eb69ba9915cba439 Mon Sep 17 00:00:00 2001
From: mloubout <mathias.louboutin@gmail.com>
Date: Wed, 17 May 2023 15:38:33 -0400
Subject: [PATCH 23/90] sparse: add MPI support to PrecomputedSparseFunction

---
 devito/types/sparse.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/devito/types/sparse.py b/devito/types/sparse.py
index dd5d469e35..2c4f3dbf09 100644
--- a/devito/types/sparse.py
+++ b/devito/types/sparse.py
@@ -139,12 +139,12 @@ def __subfunc_setup__(self, key, suffix):
             distributor=self._distributor
         )
 
-        if self.npoint == 0:
-            # This is a corner case -- we might get here, for example, when
-            # running with MPI and some processes get 0-size arrays after
-            # domain decomposition. We "touch" the data anyway to avoid the
-            # case ``self._data is None``
-            sf.data
+        # if self.npoint == 0:
+        #     # This is a corner case -- we might get here, for example, when
+        #     # running with MPI and some processes get 0-size arrays after
+        #     # domain decomposition. We "touch" the data anyway to avoid the
+        #     # case ``self._data is None``
+        sf.data
 
         return sf
 

From da695e1b21893a5adf025cd6deffd92df4663eae Mon Sep 17 00:00:00 2001
From: mloubout <mathias.louboutin@gmail.com>
Date: Thu, 18 May 2023 11:00:39 -0400
Subject: [PATCH 24/90] mpi: rewrok interpolation to work with precomputed and
 mpi

---
 devito/operations/interpolators.py |  12 ++++
 devito/types/sparse.py             | 101 +++++++++++++++++++++++++++--
 2 files changed, 107 insertions(+), 6 deletions(-)

diff --git a/devito/operations/interpolators.py b/devito/operations/interpolators.py
index 19bee16068..2716b76310 100644
--- a/devito/operations/interpolators.py
+++ b/devito/operations/interpolators.py
@@ -195,6 +195,18 @@ def _interpolation_indices(self, variables, offset=0, field_offset=0,
 
         return idx_subs, temps
 
+    def subs_coords(self, _expr, *idx_subs):
+        return [_expr.xreplace(v_sub) * b.xreplace(v_sub)
+                for b, v_sub in zip(self._interpolation_coeffs, idx_subs)]
+
+    def subs_coords_eq(self, field, _expr, *idx_subs, implicit_dims=None):
+        return [Inc(field.xreplace(vsub), _expr.xreplace(vsub) * b,
+                    implicit_dims=implicit_dims)
+                for b, vsub in zip(self._interpolation_coeffs, idx_subs)]
+
+    def implicit_dims(self, implicit_dims):
+        return as_tuple(implicit_dims) + self.sfunction.dimensions
+
     def interpolate(self, expr, offset=0, increment=False, self_subs={},
                     implicit_dims=None):
         """
diff --git a/devito/types/sparse.py b/devito/types/sparse.py
index 2c4f3dbf09..c0d2ddb7d9 100644
--- a/devito/types/sparse.py
+++ b/devito/types/sparse.py
@@ -139,12 +139,12 @@ def __subfunc_setup__(self, key, suffix):
             distributor=self._distributor
         )
 
-        # if self.npoint == 0:
-        #     # This is a corner case -- we might get here, for example, when
-        #     # running with MPI and some processes get 0-size arrays after
-        #     # domain decomposition. We "touch" the data anyway to avoid the
-        #     # case ``self._data is None``
-        sf.data
+        if self.npoint == 0:
+            # This is a corner case -- we might get here, for example, when
+            # running with MPI and some processes get 0-size arrays after
+            # domain decomposition. We "touch" the data anyway to avoid the
+            # case ``self._data is None``
+            sf.data
 
         return sf
 
@@ -206,6 +206,56 @@ def _coords_indices(self):
                 np.floor(self.coordinates_data - self.grid.origin) / self.grid.spacing
             ).astype(np.int32)
 
+    @cached_property
+    def _point_symbols(self):
+        """Symbol for coordinate value in each dimension of the point."""
+        return tuple(Symbol(name='p%s' % d, dtype=self.dtype)
+                     for d in self.grid.dimensions)
+
+    @cached_property
+    def _position_map(self):
+        """
+        Symbols map for the position of the sparse points relative to the grid
+        origin.
+
+        Notes
+        -----
+        The expression `(coord - origin)/spacing` could also be computed in the
+        mathematically equivalent expanded form `coord/spacing -
+        origin/spacing`. This particular form is problematic when a sparse
+        point is in close proximity of the grid origin, since due to a larger
+        machine precision error it may cause a +-1 error in the computation of
+        the position. We mitigate this problem by computing the positions
+        individually (hence the need for a position map).
+        """
+        symbols = [Symbol(name='pos%s' % d, dtype=self.dtype)
+                   for d in self.grid.dimensions]
+        return OrderedDict([(c - o, p) for p, c, o in zip(symbols,
+                                                          self._coordinate_symbols,
+                                                          self.grid.origin_symbols)])
+
+    @cached_property
+    def _point_increments(self):
+        """Index increments in each dimension for each point symbol."""
+        return tuple(product(range(self.r+1), repeat=self.grid.dim))
+
+    @cached_property
+    def _coordinate_indices(self):
+        """Symbol for each grid index according to the coordinates."""
+        return tuple([INT(floor((c - o) / i.spacing))
+                      for c, o, i in zip(self._coordinate_symbols,
+                                         self.grid.origin_symbols,
+                                         self.grid.dimensions[:self.grid.dim])])
+
+    def _coordinate_bases(self, field_offset):
+        """Symbol for the base coordinates of the reference grid point."""
+        return tuple([cast_mapper[self.dtype](c - o - idx * i.spacing)
+                      for c, o, idx, i, of in zip(self._coordinate_symbols,
+                                                  self.grid.origin_symbols,
+                                                  self._coordinate_indices,
+                                                  self.grid.dimensions[:self.grid.dim],
+                                                  field_offset)])
+
     @property
     def gridpoints(self):
         try:
@@ -1115,6 +1165,11 @@ def __init_finalize__(self, *args, **kwargs):
 
         self.interpolator = PrecomputedInterpolator(self)
 
+    @cached_property
+    def _point_increments(self):
+        """Index increments in each dimension for each point symbol."""
+        return tuple(product(range(-self.r//2+1, self.r//2+1), repeat=self.grid.dim))
+
     @property
     def interpolation_coeffs(self):
         """ The Precomputed interpolation coefficients."""
@@ -1159,6 +1214,40 @@ def _coordinate_indices(self):
                           for p, i in zip(self._position_map.values(),
                                           self.grid.dimensions[:self.grid.dim])])
 
+    @cached_property
+    def _coordinate_symbols(self):
+        """Symbol representing the coordinate values in each dimension."""
+        p_dim = self.indices[self._sparse_position]
+        if self._gridpoints is None:
+            return tuple([self.coordinates.indexify((p_dim, i))
+                        for i in range(self.grid.dim)])
+        else:
+            return tuple([self.gridpoints.indexify((p_dim, i)) * d
+                          for (i, d) in enumerate(self.grid.spacing_symbols)])
+
+    @memoized_meth
+    def _index_matrix(self, offset):
+        # Note about the use of *memoization*
+        # Since this method is called by `_interpolation_indices`, using
+        # memoization avoids a proliferation of symbolically identical
+        # ConditionalDimensions for a given set of indirection indices
+
+        # List of indirection indices for all adjacent grid points
+        if self._gridpoints is None:
+            index_matrix = [tuple(idx + ii + offset for ii, idx
+                                in zip(inc, self._coordinate_indices))
+                            for inc in self._point_increments]
+        else:
+            index_matrix = [tuple(self._gridpoints + ii + offset for ii in inc)
+                            for inc in self._point_increments]
+
+        # A unique symbol for each indirection index
+        indices = filter_ordered(flatten(index_matrix))
+        points = OrderedDict([(p, Symbol(name='ii_%s_%d' % (self.name, i)))
+                              for i, p in enumerate(indices)])
+    
+        return index_matrix, points
+
 
 class PrecomputedSparseTimeFunction(AbstractSparseTimeFunction,
                                     PrecomputedSparseFunction):

From 45875d8b0baf3d97ef4d15a90b3457e26d25e699 Mon Sep 17 00:00:00 2001
From: mloubout <mathias.louboutin@gmail.com>
Date: Thu, 18 May 2023 14:15:14 -0400
Subject: [PATCH 25/90] api: precomputed sparse function temps

---
 devito/types/sparse.py | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/devito/types/sparse.py b/devito/types/sparse.py
index c0d2ddb7d9..6095f5d8ea 100644
--- a/devito/types/sparse.py
+++ b/devito/types/sparse.py
@@ -1220,7 +1220,7 @@ def _coordinate_symbols(self):
         p_dim = self.indices[self._sparse_position]
         if self._gridpoints is None:
             return tuple([self.coordinates.indexify((p_dim, i))
-                        for i in range(self.grid.dim)])
+                          for i in range(self.grid.dim)])
         else:
             return tuple([self.gridpoints.indexify((p_dim, i)) * d
                           for (i, d) in enumerate(self.grid.spacing_symbols)])
@@ -1234,19 +1234,21 @@ def _index_matrix(self, offset):
 
         # List of indirection indices for all adjacent grid points
         if self._gridpoints is None:
-            index_matrix = [tuple(idx + ii + offset for ii, idx
-                                in zip(inc, self._coordinate_indices))
+            index_matrix = [tuple(idx + ii + offset
+                                  for ii, idx in zip(inc, self._coordinate_indices))
                             for inc in self._point_increments]
         else:
-            index_matrix = [tuple(self._gridpoints + ii + offset for ii in inc)
+            ddim = self._gridpoints.dimensions[1]
+            index_matrix = [tuple(self._gridpoints._subs(ddim, d) + ii + offset
+                                  for (ii, d) in zip(inc, range(self.grid.dim)))
                             for inc in self._point_increments]
-
+        shifts = [tuple(ii + offset for ii in inc)
+                  for inc in self._point_increments]
         # A unique symbol for each indirection index
         indices = filter_ordered(flatten(index_matrix))
         points = OrderedDict([(p, Symbol(name='ii_%s_%d' % (self.name, i)))
                               for i, p in enumerate(indices)])
-    
-        return index_matrix, points
+        return index_matrix, points, shifts
 
 
 class PrecomputedSparseTimeFunction(AbstractSparseTimeFunction,

From f27de452c83679d12e8d4231048009411d5d4ab8 Mon Sep 17 00:00:00 2001
From: mloubout <mathias.louboutin@gmail.com>
Date: Fri, 19 May 2023 11:06:34 -0400
Subject: [PATCH 26/90] operator: explicit alias of arg_apply

---
 devito/operator/operator.py |  2 +-
 devito/types/sparse.py      | 10 ++++++++++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/devito/operator/operator.py b/devito/operator/operator.py
index 61a70272a0..994f11783e 100644
--- a/devito/operator/operator.py
+++ b/devito/operator/operator.py
@@ -632,7 +632,7 @@ def _postprocess_arguments(self, args, **kwargs):
         """Process runtime arguments upon returning from ``.apply()``."""
         for p in self.parameters:
             try:
-                subfuncs = (args[s.name] for s in p._sub_functions)
+                subfuncs = (args[s] for s in p._sub_functions)
                 p._arg_apply(args[p.name], *subfuncs, alias=kwargs.get(p.name))
             except AttributeError:
                 p._arg_apply(args[p.name], alias=kwargs.get(p.name))
diff --git a/devito/types/sparse.py b/devito/types/sparse.py
index 6095f5d8ea..26d3815fb9 100644
--- a/devito/types/sparse.py
+++ b/devito/types/sparse.py
@@ -206,6 +206,16 @@ def _coords_indices(self):
                 np.floor(self.coordinates_data - self.grid.origin) / self.grid.spacing
             ).astype(np.int32)
 
+    @property
+    def _subfunc_names(self):
+        names = []
+        for s in self._sub_functions:
+            try:
+                names.append(getattr(self, s).name)
+            except AttributeError:
+                pass
+        return tuple(names)
+
     @cached_property
     def _point_symbols(self):
         """Symbol for coordinate value in each dimension of the point."""

From 1811ab9c88c0ca64c555023dd9b7ae6a5e0edb74 Mon Sep 17 00:00:00 2001
From: mloubout <mathias.louboutin@gmail.com>
Date: Fri, 19 May 2023 14:24:06 -0400
Subject: [PATCH 27/90] api: enforce gridpoints as subfunc for precomputed

---
 devito/types/sparse.py | 37 ++++++++++++++-----------------------
 1 file changed, 14 insertions(+), 23 deletions(-)

diff --git a/devito/types/sparse.py b/devito/types/sparse.py
index 26d3815fb9..3906ca0179 100644
--- a/devito/types/sparse.py
+++ b/devito/types/sparse.py
@@ -1121,8 +1121,7 @@ class PrecomputedSparseFunction(AbstractSparseFunction):
      _sub_functions = ('gridpoints', 'coordinates', 'interpolation_coeffs')
 
     __rkwargs__ = (AbstractSparseFunction.__rkwargs__ +
-                   ('r', 'coordinates_data', 'gridpoints_data',
-                    'interpolation_coeffs_data'))
+                   ('r', 'gridpoints_data', 'interpolation_coeffs_data'))
 
     def __init_finalize__(self, *args, **kwargs):
         super().__init_finalize__(*args, **kwargs)
@@ -1154,17 +1153,17 @@ def __init_finalize__(self, *args, **kwargs):
 
         # Specifying only `npoints` is acceptable; this will require the user
         # to setup the coordinates data later on
-        npoint = kwargs.get('npoint', None)
         if self.npoint and coordinates is None and gridpoints is None:
-            coordinates = np.zeros((npoint, self.grid.dim))
+            gridpoints = np.zeros((self.npoint, self.grid.dim))
 
         if coordinates is not None:
-            self._coordinates = self.__subfunc_setup__(coordinates, 'coords')
-            self._gridpoints = None
-            self._dist_origin = {self._coordinates: self.grid.origin_offset}
+            # Convert to gridpoints
+            if isinstance(coordinates, SubFunction):
+                raise ValueError("`coordinates` only accepted as array")
+            loc = np.floor((coordinates - self.grid.origin) / self.grid.spacing)
+            self._gridpoints = self.__subfunc_setup__(loc.astype(int), 'gridpoints')
         else:
             assert gridpoints is not None
-            self._coordinates = None
             self._gridpoints = self.__subfunc_setup__(gridpoints, 'gridpoints')
             self._dist_origin = {self._gridpoints: self.grid.origin_ioffset}
 
@@ -1228,12 +1227,8 @@ def _coordinate_indices(self):
     def _coordinate_symbols(self):
         """Symbol representing the coordinate values in each dimension."""
         p_dim = self.indices[self._sparse_position]
-        if self._gridpoints is None:
-            return tuple([self.coordinates.indexify((p_dim, i))
-                          for i in range(self.grid.dim)])
-        else:
-            return tuple([self.gridpoints.indexify((p_dim, i)) * d
-                          for (i, d) in enumerate(self.grid.spacing_symbols)])
+        return tuple([self.coordinates.indexify((p_dim, i))
+                      for i in range(self.grid.dim)])
 
     @memoized_meth
     def _index_matrix(self, offset):
@@ -1243,21 +1238,17 @@ def _index_matrix(self, offset):
         # ConditionalDimensions for a given set of indirection indices
 
         # List of indirection indices for all adjacent grid points
-        if self._gridpoints is None:
-            index_matrix = [tuple(idx + ii + offset
-                                  for ii, idx in zip(inc, self._coordinate_indices))
-                            for inc in self._point_increments]
-        else:
-            ddim = self._gridpoints.dimensions[1]
-            index_matrix = [tuple(self._gridpoints._subs(ddim, d) + ii + offset
-                                  for (ii, d) in zip(inc, range(self.grid.dim)))
-                            for inc in self._point_increments]
+        ddim = self._gridpoints.dimensions[1]
+        index_matrix = [tuple(self._gridpoints._subs(ddim, d) + ii + offset
+                              for (ii, d) in zip(inc, range(self.grid.dim)))
+                        for inc in self._point_increments]
         shifts = [tuple(ii + offset for ii in inc)
                   for inc in self._point_increments]
         # A unique symbol for each indirection index
         indices = filter_ordered(flatten(index_matrix))
         points = OrderedDict([(p, Symbol(name='ii_%s_%d' % (self.name, i)))
                               for i, p in enumerate(indices)])
+
         return index_matrix, points, shifts
 
 

From 854df6c690ff94fd090acb8c243c4b756ace6f66 Mon Sep 17 00:00:00 2001
From: mloubout <mathias.louboutin@gmail.com>
Date: Mon, 22 May 2023 12:52:00 -0400
Subject: [PATCH 28/90] api: revamp interpolator for better generalization

---
 devito/operations/interpolators.py |  43 +++++++++++-
 devito/types/dense.py              |   7 ++
 devito/types/sparse.py             | 102 ++++++++++++++++-------------
 3 files changed, 103 insertions(+), 49 deletions(-)

diff --git a/devito/operations/interpolators.py b/devito/operations/interpolators.py
index 2716b76310..4510c72f1b 100644
--- a/devito/operations/interpolators.py
+++ b/devito/operations/interpolators.py
@@ -204,8 +204,47 @@ def subs_coords_eq(self, field, _expr, *idx_subs, implicit_dims=None):
                     implicit_dims=implicit_dims)
                 for b, vsub in zip(self._interpolation_coeffs, idx_subs)]
 
-    def implicit_dims(self, implicit_dims):
-        return as_tuple(implicit_dims) + self.sfunction.dimensions
+    def _interpolation_indices(self, variables, offset=0, field_offset=0,
+                               implicit_dims=None):
+        """
+        Generate interpolation indices for the DiscreteFunctions in ``variables``.
+        """
+        idx_subs = []
+        points = {d: [] for d in self._gdim}
+        mapper = {d: [] for d in self._gdim}
+
+        # Positon map and temporaries for it
+        pmap = self.sfunction._coordinate_indices
+
+        # Temporaries for the position
+        temps = self._positions(implicit_dims)
+
+        # Coefficient symbol expression
+        temps.extend(self._coeff_temps(implicit_dims))
+
+        # Create positions and indices temporaries/indirections
+        for ((di, d), pos) in zip(enumerate(self._gdim), pmap):
+            for (ri, r) in enumerate(self._interp_points):
+                p = Symbol(name='ii_%s_%s_%d' % (self.sfunction.name, d.name, ri))
+                points[d].append(p)
+                # Conditionals to avoid OOB
+                lb = sympy.And(p >= d.symbolic_min - self.r, evaluate=False)
+                ub = sympy.And(p <= d.symbolic_max + self.r, evaluate=False)
+                condition = sympy.And(lb, ub, evaluate=False)
+                mapper[d].append(ConditionalDimension(p.name, self.sfunction._sparse_dim,
+                                                      condition=condition, indirect=True))
+                temps.extend([Eq(p, pos + r, implicit_dims=implicit_dims)])
+
+        # Substitution mapper
+        for p in self._nd_points:
+            # Apply mapper to each variable with origin correction before the
+            # Dimensions get replaced
+            subs = {v: v.subs({k: c[pi] - v.origin.get(k, 0)
+                              for ((k, c), pi) in zip(mapper.items(), p)})
+                    for v in variables}
+            idx_subs.append(subs)
+
+        return idx_subs, temps
 
     def interpolate(self, expr, offset=0, increment=False, self_subs={},
                     implicit_dims=None):
diff --git a/devito/types/dense.py b/devito/types/dense.py
index 9fb96ec5cd..40564053d8 100644
--- a/devito/types/dense.py
+++ b/devito/types/dense.py
@@ -1466,6 +1466,13 @@ def __padding_setup__(self, **kwargs):
     def _halo_exchange(self):
         return
 
+    @property
+    def origin(self):
+        """
+        SubFunction have zero origin
+        """
+        return DimensionTuple(*(0 for _ in range(self.ndim)), getters=self.dimensions)
+
     def _arg_values(self, **kwargs):
         if self.name in kwargs:
             raise RuntimeError("`%s` is a SubFunction, so it can't be assigned "
diff --git a/devito/types/sparse.py b/devito/types/sparse.py
index 3906ca0179..46f05d5b50 100644
--- a/devito/types/sparse.py
+++ b/devito/types/sparse.py
@@ -219,14 +219,26 @@ def _subfunc_names(self):
     @cached_property
     def _point_symbols(self):
         """Symbol for coordinate value in each dimension of the point."""
-        return tuple(Symbol(name='p%s' % d, dtype=self.dtype)
-                     for d in self.grid.dimensions)
+        return DimensionTuple(*(Symbol(name='p%s' % d, dtype=self.dtype)
+                                for d in self.grid.dimensions),
+                              getters=self.grid.dimensions)
 
     @cached_property
     def _position_map(self):
         """
-        Symbols map for the position of the sparse points relative to the grid
+        Symbols map for the physical position of the sparse points relative to the grid
         origin.
+        """
+        symbols = [Symbol(name='pos%s' % d, dtype=self.dtype)
+                   for d in self.grid.dimensions]
+        return OrderedDict([(c - o, p) for p, c, o in zip(symbols,
+                                                          self._coordinate_symbols,
+                                                          self.grid.origin_symbols)])
+
+    @cached_property
+    def _coordinate_indices(self):
+        """
+        Symbol for each grid index according to the coordinates.
 
         Notes
         -----
@@ -238,24 +250,9 @@ def _position_map(self):
         the position. We mitigate this problem by computing the positions
         individually (hence the need for a position map).
         """
-        symbols = [Symbol(name='pos%s' % d, dtype=self.dtype)
-                   for d in self.grid.dimensions]
-        return OrderedDict([(c - o, p) for p, c, o in zip(symbols,
-                                                          self._coordinate_symbols,
-                                                          self.grid.origin_symbols)])
-
-    @cached_property
-    def _point_increments(self):
-        """Index increments in each dimension for each point symbol."""
-        return tuple(product(range(self.r+1), repeat=self.grid.dim))
-
-    @cached_property
-    def _coordinate_indices(self):
-        """Symbol for each grid index according to the coordinates."""
-        return tuple([INT(floor((c - o) / i.spacing))
-                      for c, o, i in zip(self._coordinate_symbols,
-                                         self.grid.origin_symbols,
-                                         self.grid.dimensions[:self.grid.dim])])
+        return tuple([INT(floor(p / i.spacing))
+                      for p, i in zip(self._position_map.values(),
+                                      self.grid.dimensions[:self.grid.dim])])
 
     def _coordinate_bases(self, field_offset):
         """Symbol for the base coordinates of the reference grid point."""
@@ -1121,7 +1118,8 @@ class PrecomputedSparseFunction(AbstractSparseFunction):
      _sub_functions = ('gridpoints', 'coordinates', 'interpolation_coeffs')
 
     __rkwargs__ = (AbstractSparseFunction.__rkwargs__ +
-                   ('r', 'gridpoints_data', 'interpolation_coeffs_data'))
+                   ('r', 'gridpoints_data', 'coordinates_data',
+                    'interpolation_coeffs_data'))
 
     def __init_finalize__(self, *args, **kwargs):
         super().__init_finalize__(*args, **kwargs)
@@ -1153,17 +1151,17 @@ def __init_finalize__(self, *args, **kwargs):
 
         # Specifying only `npoints` is acceptable; this will require the user
         # to setup the coordinates data later on
+        npoint = kwargs.get('npoint', None)
         if self.npoint and coordinates is None and gridpoints is None:
-            gridpoints = np.zeros((self.npoint, self.grid.dim))
+            coordinates = np.zeros((npoint, self.grid.dim))
 
         if coordinates is not None:
-            # Convert to gridpoints
-            if isinstance(coordinates, SubFunction):
-                raise ValueError("`coordinates` only accepted as array")
-            loc = np.floor((coordinates - self.grid.origin) / self.grid.spacing)
-            self._gridpoints = self.__subfunc_setup__(loc.astype(int), 'gridpoints')
+            self._coordinates = self.__subfunc_setup__(coordinates, 'coords')
+            self._gridpoints = None
+            self._dist_origin = {self._coordinates: self.grid.origin_offset}
         else:
             assert gridpoints is not None
+            self._coordinates = None
             self._gridpoints = self.__subfunc_setup__(gridpoints, 'gridpoints')
             self._dist_origin = {self._gridpoints: self.grid.origin_ioffset}
 
@@ -1174,6 +1172,10 @@ def __init_finalize__(self, *args, **kwargs):
 
         self.interpolator = PrecomputedInterpolator(self)
 
+    @property
+    def r(self):
+        return self._radius
+
     @cached_property
     def _point_increments(self):
         """Index increments in each dimension for each point symbol."""
@@ -1227,29 +1229,35 @@ def _coordinate_indices(self):
     def _coordinate_symbols(self):
         """Symbol representing the coordinate values in each dimension."""
         p_dim = self.indices[self._sparse_position]
+        if self.gridpoints is not None:
+            return tuple([self.gridpoints.indexify((p_dim, di)) * d.spacing + o
+                          for ((di, d), o) in zip(enumerate(self.grid.dimensions),
+                                                  self.grid.origin)])
+
         return tuple([self.coordinates.indexify((p_dim, i))
                       for i in range(self.grid.dim)])
 
-    @memoized_meth
-    def _index_matrix(self, offset):
-        # Note about the use of *memoization*
-        # Since this method is called by `_interpolation_indices`, using
-        # memoization avoids a proliferation of symbolically identical
-        # ConditionalDimensions for a given set of indirection indices
-
-        # List of indirection indices for all adjacent grid points
-        ddim = self._gridpoints.dimensions[1]
-        index_matrix = [tuple(self._gridpoints._subs(ddim, d) + ii + offset
-                              for (ii, d) in zip(inc, range(self.grid.dim)))
-                        for inc in self._point_increments]
-        shifts = [tuple(ii + offset for ii in inc)
-                  for inc in self._point_increments]
-        # A unique symbol for each indirection index
-        indices = filter_ordered(flatten(index_matrix))
-        points = OrderedDict([(p, Symbol(name='ii_%s_%d' % (self.name, i)))
-                              for i, p in enumerate(indices)])
+    @cached_property
+    def _coordinate_indices(self):
+        """
+        Symbol for each grid index according to the coordinates.
 
-        return index_matrix, points, shifts
+        Notes
+        -----
+        The expression `(coord - origin)/spacing` could also be computed in the
+        mathematically equivalent expanded form `coord/spacing -
+        origin/spacing`. This particular form is problematic when a sparse
+        point is in close proximity of the grid origin, since due to a larger
+        machine precision error it may cause a +-1 error in the computation of
+        the position. We mitigate this problem by computing the positions
+        individually (hence the need for a position map).
+        """
+        if self.gridpoints is not None:
+            ddim = self.gridpoints.dimensions[-1]
+            return tuple([self.gridpoints._subs(ddim, di) for di in range(self.grid.dim)])
+        return tuple([INT(floor(p / i.spacing))
+                      for p, i in zip(self._position_map.keys(),
+                                      self.grid.dimensions[:self.grid.dim])])
 
 
 class PrecomputedSparseTimeFunction(AbstractSparseTimeFunction,

From d77f6d1bb10ee4f5d3d61241affb6e37f951711e Mon Sep 17 00:00:00 2001
From: mloubout <mathias.louboutin@gmail.com>
Date: Tue, 23 May 2023 11:54:51 -0400
Subject: [PATCH 29/90] api: cleanup hierachy and properties of sparse and
 interpolator

---
 devito/operations/interpolators.py |  6 +--
 devito/types/sparse.py             | 76 +++++++++++++++++++++++++-----
 tests/test_pickle.py               |  8 ++++
 3 files changed, 76 insertions(+), 14 deletions(-)

diff --git a/devito/operations/interpolators.py b/devito/operations/interpolators.py
index 4510c72f1b..7d06eb8444 100644
--- a/devito/operations/interpolators.py
+++ b/devito/operations/interpolators.py
@@ -120,7 +120,7 @@ class WeightedInterpolator(GenericInterpolator):
     def __init__(self, sfunction):
         self.sfunction = sfunction
 
-    @property
+    @cached_property
     def grid(self):
         return self.sfunction.grid
 
@@ -128,11 +128,11 @@ def grid(self):
     def _weights(self):
         raise NotImplementedError
 
-    @property
+    @cached_property
     def _psym(self):
         return self.sfunction._point_symbols
 
-    @property
+    @cached_property
     def _gdim(self):
         return self.grid.dimensions
 
diff --git a/devito/types/sparse.py b/devito/types/sparse.py
index 46f05d5b50..6df48adc18 100644
--- a/devito/types/sparse.py
+++ b/devito/types/sparse.py
@@ -176,6 +176,10 @@ def r(self):
     def _sparse_dim(self):
         return self.dimensions[self._sparse_position]
 
+    @cached_property
+    def dist_origin(self):
+        return self._dist_origin
+
     @property
     def dist_origin(self):
         return self._dist_origin
@@ -216,9 +220,58 @@ def _subfunc_names(self):
                 pass
         return tuple(names)
 
+    @property
+    def _coords_indices(self):
+        if self.gridpoints_data is not None:
+            return self.gridpoints_data._local
+        else:
+            if self.coordinates_data is None:
+                raise ValueError("No coordinates or gridpoints attached"
+                                 "to this SparseFunction")
+            return (
+                np.floor(self.coordinates_data - self.grid.origin) / self.grid.spacing
+            ).astype(np.int32)
+
+    @property
+    def gridpoints(self):
+        try:
+            return self._gridpoints
+        except AttributeError:
+            return self._coords_indices
+
+    @property
+    def gridpoints_data(self):
+        try:
+            return self._gridpoints.data._local
+        except AttributeError:
+            return None
+
+    @property
+    def coordinates(self):
+        try:
+            return self._coordinates
+        except AttributeError:
+            return None
+
+    @property
+    def coordinates_data(self):
+        try:
+            return self.coordinates.data._local
+        except AttributeError:
+            return None
+
+    @cached_property
+    def _point_increments(self):
+        """Index increments in each Dimension for each point symbol."""
+        return tuple(product(range(-self.r+1, self.r+1), repeat=self.grid.dim))
+
+    @cached_property
+    def _point_support(self):
+        return np.array(self._point_increments)
+
     @cached_property
     def _point_symbols(self):
-        """Symbol for coordinate value in each dimension of the point."""
+        """Symbol for coordinate value in each Dimension of the point."""
         return DimensionTuple(*(Symbol(name='p%s' % d, dtype=self.dtype)
                                 for d in self.grid.dimensions),
                               getters=self.grid.dimensions)
@@ -1097,7 +1150,7 @@ class PrecomputedSparseFunction(AbstractSparseFunction):
         Discretisation order for space derivatives. Defaults to 0.
     shape : tuple of ints, optional
         Shape of the object. Defaults to `(npoint,)`.
-    dimensions : tuple of Dimension, optional
+    Dimensions : tuple of Dimension, optional
         Dimensions associated with the object. Only necessary if the SparseFunction
         defines a multi-dimensional tensor.
     dtype : data-type, optional
@@ -1227,15 +1280,15 @@ def _coordinate_indices(self):
 
     @cached_property
     def _coordinate_symbols(self):
-        """Symbol representing the coordinate values in each dimension."""
+        """Symbol representing the coordinate values in each Dimension."""
         p_dim = self.indices[self._sparse_position]
         if self.gridpoints is not None:
             return tuple([self.gridpoints.indexify((p_dim, di)) * d.spacing + o
                           for ((di, d), o) in zip(enumerate(self.grid.dimensions),
                                                   self.grid.origin)])
-
-        return tuple([self.coordinates.indexify((p_dim, i))
-                      for i in range(self.grid.dim)])
+        else:
+            return tuple([self.coordinates.indexify((p_dim, i))
+                          for i in range(self.grid.dim)])
 
     @cached_property
     def _coordinate_indices(self):
@@ -1255,9 +1308,10 @@ def _coordinate_indices(self):
         if self.gridpoints is not None:
             ddim = self.gridpoints.dimensions[-1]
             return tuple([self.gridpoints._subs(ddim, di) for di in range(self.grid.dim)])
-        return tuple([INT(floor(p / i.spacing))
-                      for p, i in zip(self._position_map.keys(),
-                                      self.grid.dimensions[:self.grid.dim])])
+        else:
+            return tuple([INT(floor(p / i.spacing))
+                          for p, i in zip(self._position_map,
+                                          self.grid.dimensions[:self.grid.dim])])
 
 
 class PrecomputedSparseTimeFunction(AbstractSparseTimeFunction,
@@ -1289,7 +1343,7 @@ class PrecomputedSparseTimeFunction(AbstractSparseTimeFunction,
     interpolation_coeffs : np.ndarray, optional
         An array containing the coefficient for each of the r^2 (2D) or r^3
         (3D) gridpoints that each sparse point will be interpolated to. The
-        coefficient is split across the n dimensions such that the contribution
+        coefficient is split across the n Dimensions such that the contribution
         of the point (i, j, k) will be multiplied by
         `interp_coeffs[..., i]*interp_coeffs[...,j]*interp_coeffs[...,k]`.
         So for `r=6`, we will store 18 coefficients per sparse point (instead of
@@ -1301,7 +1355,7 @@ class PrecomputedSparseTimeFunction(AbstractSparseTimeFunction,
         Discretisation order for time derivatives. Default to 1.
     shape : tuple of ints, optional
         Shape of the object. Defaults to `(npoint,)`.
-    dimensions : tuple of Dimension, optional
+    Dimensions : tuple of Dimension, optional
         Dimensions associated with the object. Only necessary if the SparseFunction
         defines a multi-dimensional tensor.
     dtype : data-type, optional
diff --git a/tests/test_pickle.py b/tests/test_pickle.py
index 85a6e33a7d..e051f65625 100644
--- a/tests/test_pickle.py
+++ b/tests/test_pickle.py
@@ -360,7 +360,15 @@ def test_guard_factor(self, pickle):
         pkl_gf = pickle.dumps(gf)
         new_gf = pickle.loads(pkl_gf)
 
+<<<<<<< HEAD
         assert gf == new_gf
+=======
+    assert sf._radius == new_sf._radius == 1
+    assert sf.space_order == new_sf.space_order
+    assert sf.time_order == new_sf.time_order
+    assert sf.dtype == new_sf.dtype
+    assert sf.npoint == new_sf.npoint == 3
+>>>>>>> 113f6f860 (api: cleanup hierachy and properties of sparse and interpolator)
 
     def test_temp_function(self, pickle):
         grid = Grid(shape=(3, 3))

From 6696b91bcde27b938132e1f1ceedf9a6ec9e7d7d Mon Sep 17 00:00:00 2001
From: mloubout <mathias.louboutin@gmail.com>
Date: Tue, 23 May 2023 14:53:37 -0400
Subject: [PATCH 30/90] tests: add test for precomputed time injection that was
 missing (and implementation)

---
 devito/types/sparse.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/devito/types/sparse.py b/devito/types/sparse.py
index 6df48adc18..5f99877bff 100644
--- a/devito/types/sparse.py
+++ b/devito/types/sparse.py
@@ -223,7 +223,7 @@ def _subfunc_names(self):
     @property
     def _coords_indices(self):
         if self.gridpoints_data is not None:
-            return self.gridpoints_data._local
+            return self.gridpoints_data
         else:
             if self.coordinates_data is None:
                 raise ValueError("No coordinates or gridpoints attached"
@@ -242,7 +242,7 @@ def gridpoints(self):
     @property
     def gridpoints_data(self):
         try:
-            return self._gridpoints.data._local
+            return self._gridpoints.data._local.view(np.ndarray)
         except AttributeError:
             return None
 
@@ -256,7 +256,7 @@ def coordinates(self):
     @property
     def coordinates_data(self):
         try:
-            return self.coordinates.data._local
+            return self.coordinates.data._local.view(np.ndarray)
         except AttributeError:
             return None
 

From f168f3070f4a8ac22897b1fe5803b3ff870f7411 Mon Sep 17 00:00:00 2001
From: mloubout <mathias.louboutin@gmail.com>
Date: Fri, 2 Jun 2023 11:15:31 -0400
Subject: [PATCH 31/90] api: switch interp to r dim

---
 devito/operations/interpolators.py | 46 ++++++++++++++++--------------
 1 file changed, 24 insertions(+), 22 deletions(-)

diff --git a/devito/operations/interpolators.py b/devito/operations/interpolators.py
index 7d06eb8444..fa9017b52c 100644
--- a/devito/operations/interpolators.py
+++ b/devito/operations/interpolators.py
@@ -120,7 +120,7 @@ class WeightedInterpolator(GenericInterpolator):
     def __init__(self, sfunction):
         self.sfunction = sfunction
 
-    @cached_property
+    @property
     def grid(self):
         return self.sfunction.grid
 
@@ -128,11 +128,11 @@ def grid(self):
     def _weights(self):
         raise NotImplementedError
 
-    @cached_property
+    @property
     def _psym(self):
         return self.sfunction._point_symbols
 
-    @cached_property
+    @property
     def _gdim(self):
         return self.grid.dimensions
 
@@ -222,27 +222,29 @@ def _interpolation_indices(self, variables, offset=0, field_offset=0,
         # Coefficient symbol expression
         temps.extend(self._coeff_temps(implicit_dims))
 
+        try:
+            pdim = self.sfunction.coordinates.dimensions[-1]
+        except AttributeError:
+            pdim = self.sfunction.gridpoints.dimensions[-1]
+
         # Create positions and indices temporaries/indirections
-        for ((di, d), pos) in zip(enumerate(self._gdim), pmap):
-            for (ri, r) in enumerate(self._interp_points):
-                p = Symbol(name='ii_%s_%s_%d' % (self.sfunction.name, d.name, ri))
-                points[d].append(p)
-                # Conditionals to avoid OOB
-                lb = sympy.And(p >= d.symbolic_min - self.r, evaluate=False)
-                ub = sympy.And(p <= d.symbolic_max + self.r, evaluate=False)
-                condition = sympy.And(lb, ub, evaluate=False)
-                mapper[d].append(ConditionalDimension(p.name, self.sfunction._sparse_dim,
-                                                      condition=condition, indirect=True))
-                temps.extend([Eq(p, pos + r, implicit_dims=implicit_dims)])
-
-        # Substitution mapper
-        for p in self._nd_points:
-            # Apply mapper to each variable with origin correction before the
-            # Dimensions get replaced
-            subs = {v: v.subs({k: c[pi] - v.origin.get(k, 0)
-                              for ((k, c), pi) in zip(mapper.items(), p)})
+        for ((di, d), pos, rd) in zip(enumerate(self._gdim), pmap, self._rdim):
+            p = Symbol(name='ii_%s_%s' % (self.sfunction.name, d.name))
+            temps.extend([Eq(p, pos._subs(pdim, di) + rd,
+                             implicit_dims=implicit_dims)])
+
+            # Add conditional
+            lb = sympy.And(p >= d.symbolic_min - self.r, evaluate=False)
+            ub = sympy.And(p <= d.symbolic_max + self.r, evaluate=False)
+            condition = sympy.And(lb, ub, evaluate=False)
+            mapper[d] = ConditionalDimension(p.name, self.sfunction._sparse_dim,
+                                             condition=condition, indirect=True)
+            points[d] = p
+
+        # Substitution mapper for variables
+        idx_subs = {v: v.subs({k: c - v.origin.get(k, 0)
+                               for ((k, c), pi) in zip(mapper.items(), points)})
                     for v in variables}
-            idx_subs.append(subs)
 
         return idx_subs, temps
 

From 073bbc81876ec62f67394fb6b5c7d7497f1d2370 Mon Sep 17 00:00:00 2001
From: mloubout <mathias.louboutin@gmail.com>
Date: Mon, 5 Jun 2023 15:10:49 -0400
Subject: [PATCH 32/90] operations: remove unused points and cleanup  weights

---
 devito/operations/interpolators.py | 14 +++---
 devito/types/sparse.py             |  2 +-
 tests/test_interpolation.py        | 79 +++++++++++++++++++++++++++++-
 3 files changed, 85 insertions(+), 10 deletions(-)

diff --git a/devito/operations/interpolators.py b/devito/operations/interpolators.py
index fa9017b52c..c20a71abf6 100644
--- a/devito/operations/interpolators.py
+++ b/devito/operations/interpolators.py
@@ -8,7 +8,7 @@
 from devito.symbolics import retrieve_function_carriers
 from devito.tools import as_tuple, flatten, prod
 from devito.types import (ConditionalDimension, Eq, Inc, Evaluable, Symbol,
-                          CustomDimension)
+                          CustomDimension, Function)
 from devito.types.utils import DimensionTuple
 
 __all__ = ['LinearInterpolator', 'PrecomputedInterpolator']
@@ -217,7 +217,8 @@ def _interpolation_indices(self, variables, offset=0, field_offset=0,
         pmap = self.sfunction._coordinate_indices
 
         # Temporaries for the position
-        temps = self._positions(implicit_dims)
+        # temps = self._positions(implicit_dims)
+        temps = []
 
         # Coefficient symbol expression
         temps.extend(self._coeff_temps(implicit_dims))
@@ -230,20 +231,17 @@ def _interpolation_indices(self, variables, offset=0, field_offset=0,
         # Create positions and indices temporaries/indirections
         for ((di, d), pos, rd) in zip(enumerate(self._gdim), pmap, self._rdim):
             p = Symbol(name='ii_%s_%s' % (self.sfunction.name, d.name))
-            temps.extend([Eq(p, pos._subs(pdim, di) + rd,
-                             implicit_dims=implicit_dims)])
+            temps.extend([Eq(p, pos.subs({pdim: di}), implicit_dims=implicit_dims)])
 
             # Add conditional
             lb = sympy.And(p >= d.symbolic_min - self.r, evaluate=False)
             ub = sympy.And(p <= d.symbolic_max + self.r, evaluate=False)
             condition = sympy.And(lb, ub, evaluate=False)
             mapper[d] = ConditionalDimension(p.name, self.sfunction._sparse_dim,
-                                             condition=condition, indirect=True)
-            points[d] = p
+                                             condition=condition, indirect=True) + rd
 
         # Substitution mapper for variables
-        idx_subs = {v: v.subs({k: c - v.origin.get(k, 0)
-                               for ((k, c), pi) in zip(mapper.items(), points)})
+        idx_subs = {v: v.subs({k: c - v.origin.get(k, 0) for (k, c) in mapper.items()})
                     for v in variables}
 
         return idx_subs, temps
diff --git a/devito/types/sparse.py b/devito/types/sparse.py
index 5f99877bff..e601a68bdd 100644
--- a/devito/types/sparse.py
+++ b/devito/types/sparse.py
@@ -304,7 +304,7 @@ def _coordinate_indices(self):
         individually (hence the need for a position map).
         """
         return tuple([INT(floor(p / i.spacing))
-                      for p, i in zip(self._position_map.values(),
+                      for p, i in zip(self._position_map,
                                       self.grid.dimensions[:self.grid.dim])])
 
     def _coordinate_bases(self, field_offset):
diff --git a/tests/test_interpolation.py b/tests/test_interpolation.py
index 96a83b7dc6..6cbb67a0d7 100644
--- a/tests/test_interpolation.py
+++ b/tests/test_interpolation.py
@@ -130,7 +130,11 @@ def init(data):
                                    interpolation_coeffs=interpolation_coeffs)
     eqn = sf.interpolate(m)
     op = Operator(eqn)
+<<<<<<< HEAD
 
+=======
+    print(op)
+>>>>>>> 98cb40d85 (operations: remove unused points and cleanup  weights)
     op()
     expected_values = [sin(point[0]) + sin(point[1]) for point in points]
     assert(all(np.isclose(sf.data, expected_values, rtol=1e-6)))
@@ -164,7 +168,11 @@ def test_precomputed_interpolation_time():
 
     eqn = sf.interpolate(u)
     op = Operator(eqn)
+<<<<<<< HEAD
 
+=======
+    print(op)
+>>>>>>> 98cb40d85 (operations: remove unused points and cleanup  weights)
     op(time_m=0, time_M=4)
 
     for it in range(5):
@@ -197,7 +205,11 @@ def test_precomputed_injection():
     expr = sf.inject(m, Float(1.))
 
     op = Operator(expr)
+<<<<<<< HEAD
 
+=======
+    print(op)
+>>>>>>> 98cb40d85 (operations: remove unused points and cleanup  weights)
     op()
     indices = [slice(0, 2, 1), slice(9, 11, 1)]
     assert np.allclose(m.data[indices], result, rtol=1.e-5)
@@ -233,7 +245,11 @@ def test_precomputed_injection_time():
     expr = sf.inject(m, Float(1.))
 
     op = Operator(expr)
+<<<<<<< HEAD
 
+=======
+    print(op)
+>>>>>>> 98cb40d85 (operations: remove unused points and cleanup  weights)
     op()
     for ti in range(2):
         indices = [slice(0, 2, 1), slice(9, 11, 1)]
@@ -257,7 +273,11 @@ def test_interpolate(shape, coords, npoints=20):
 
     expr = p.interpolate(a)
     op = Operator(expr)
+<<<<<<< HEAD
 
+=======
+    print(op)
+>>>>>>> 98cb40d85 (operations: remove unused points and cleanup  weights)
     op(a=a)
 
     assert np.allclose(p.data[:], xcoords, rtol=1e-6)
@@ -278,7 +298,11 @@ def test_interpolate_cumm(shape, coords, npoints=20):
     p.data[:] = 1.
     expr = p.interpolate(a, increment=True)
     op = Operator(expr)
+<<<<<<< HEAD
 
+=======
+    print(op)
+>>>>>>> 98cb40d85 (operations: remove unused points and cleanup  weights)
     op(a=a)
 
     assert np.allclose(p.data[:], xcoords + 1., rtol=1e-6)
@@ -300,7 +324,11 @@ def test_interpolate_time_shift(shape, coords, npoints=20):
     p.data[:] = 1.
     expr = p.interpolate(a, u_t=a.indices[0]+1)
     op = Operator(expr)
+<<<<<<< HEAD
 
+=======
+    print(op)
+>>>>>>> 98cb40d85 (operations: remove unused points and cleanup  weights)
     op(a=a)
 
     assert np.allclose(p.data[0, :], xcoords, rtol=1e-6)
@@ -308,7 +336,11 @@ def test_interpolate_time_shift(shape, coords, npoints=20):
     p.data[:] = 1.
     expr = p.interpolate(a, p_t=p.indices[0]+1)
     op = Operator(expr)
+<<<<<<< HEAD
 
+=======
+    print(op)
+>>>>>>> 98cb40d85 (operations: remove unused points and cleanup  weights)
     op(a=a)
 
     assert np.allclose(p.data[1, :], xcoords, rtol=1e-6)
@@ -317,7 +349,11 @@ def test_interpolate_time_shift(shape, coords, npoints=20):
     expr = p.interpolate(a, u_t=a.indices[0]+1,
                          p_t=p.indices[0]+1)
     op = Operator(expr)
+<<<<<<< HEAD
 
+=======
+    print(op)
+>>>>>>> 98cb40d85 (operations: remove unused points and cleanup  weights)
     op(a=a)
 
     assert np.allclose(p.data[1, :], xcoords, rtol=1e-6)
@@ -337,7 +373,11 @@ def test_interpolate_array(shape, coords, npoints=20):
 
     expr = p.interpolate(a)
     op = Operator(expr)
+<<<<<<< HEAD
 
+=======
+    print(op)
+>>>>>>> 98cb40d85 (operations: remove unused points and cleanup  weights)
     op(a=a, points=p.data[:])
 
     assert np.allclose(p.data[:], xcoords, rtol=1e-6)
@@ -358,7 +398,11 @@ def test_interpolate_custom(shape, coords, npoints=20):
     p.data[:] = 1.
     expr = p.interpolate(a * p.indices[0])
     op = Operator(expr)
+<<<<<<< HEAD
 
+=======
+    print(op)
+>>>>>>> 98cb40d85 (operations: remove unused points and cleanup  weights)
     op(a=a)
 
     assert np.allclose(p.data[0, :], 0.0 * xcoords, rtol=1e-6)
@@ -376,7 +420,7 @@ def test_interpolation_dx():
     sf1.coordinates.data[0, :] = (0.5, 0.5)
 
     op = Operator(sf1.interpolate(u.dx))
-
+    print(op)
     assert sf1.data.shape == (1,)
     u.data[:] = 0.0
     u.data[5, 5] = 4.0
@@ -404,7 +448,11 @@ def test_interpolate_indexed(shape, coords, npoints=20):
     p.data[:] = 1.
     expr = p.interpolate(a[a.grid.dimensions] * p.indices[0])
     op = Operator(expr)
+<<<<<<< HEAD
 
+=======
+    print(op)
+>>>>>>> 98cb40d85 (operations: remove unused points and cleanup  weights)
     op(a=a)
 
     assert np.allclose(p.data[0, :], 0.0 * xcoords, rtol=1e-6)
@@ -427,7 +475,11 @@ def test_inject(shape, coords, result, npoints=19):
     expr = p.inject(a, Float(1.))
 
     op = Operator(expr)
+<<<<<<< HEAD
 
+=======
+    print(op)
+>>>>>>> 98cb40d85 (operations: remove unused points and cleanup  weights)
     op(a=a)
 
     indices = [slice(4, 6, 1) for _ in coords]
@@ -451,7 +503,11 @@ def test_inject_time_shift(shape, coords, result, npoints=19):
     expr = p.inject(a, Float(1.), u_t=a.indices[0]+1)
 
     op = Operator(expr)
+<<<<<<< HEAD
 
+=======
+    print(op)
+>>>>>>> 98cb40d85 (operations: remove unused points and cleanup  weights)
     op(a=a, time=1)
 
     indices = [slice(1, 1, 1)] + [slice(4, 6, 1) for _ in coords]
@@ -462,7 +518,11 @@ def test_inject_time_shift(shape, coords, result, npoints=19):
     expr = p.inject(a, Float(1.), p_t=p.indices[0]+1)
 
     op = Operator(expr)
+<<<<<<< HEAD
 
+=======
+    print(op)
+>>>>>>> 98cb40d85 (operations: remove unused points and cleanup  weights)
     op(a=a, time=1)
 
     indices = [slice(0, 0, 1)] + [slice(4, 6, 1) for _ in coords]
@@ -473,7 +533,11 @@ def test_inject_time_shift(shape, coords, result, npoints=19):
     expr = p.inject(a, Float(1.), u_t=a.indices[0]+1, p_t=p.indices[0]+1)
 
     op = Operator(expr)
+<<<<<<< HEAD
 
+=======
+    print(op)
+>>>>>>> 98cb40d85 (operations: remove unused points and cleanup  weights)
     op(a=a, time=1)
 
     indices = [slice(1, 1, 1)] + [slice(4, 6, 1) for _ in coords]
@@ -497,7 +561,11 @@ def test_inject_array(shape, coords, result, npoints=19):
     expr = p.inject(a, p)
 
     op = Operator(expr)
+<<<<<<< HEAD
 
+=======
+    print(op)
+>>>>>>> 98cb40d85 (operations: remove unused points and cleanup  weights)
     op(a=a, points=p2.data[:])
 
     indices = [slice(4, 6, 1) for _ in coords]
@@ -521,7 +589,11 @@ def test_inject_from_field(shape, coords, result, npoints=19):
 
     expr = p.inject(field=a, expr=b)
     op = Operator(expr)
+<<<<<<< HEAD
 
+=======
+    print(op)
+>>>>>>> 98cb40d85 (operations: remove unused points and cleanup  weights)
     op(a=a, b=b)
 
     indices = [slice(4, 6, 1) for _ in coords]
@@ -595,7 +667,11 @@ def test_edge_sparse():
     expr = sf1.interpolate(u)
     subs = {d.spacing: v for d, v in zip(u.grid.dimensions, u.grid.spacing)}
     op = Operator(expr, subs=subs)
+<<<<<<< HEAD
 
+=======
+    print(op)
+>>>>>>> 98cb40d85 (operations: remove unused points and cleanup  weights)
     op()
     assert sf1.data[0] == 0
 
@@ -689,6 +765,7 @@ class SparseFirst(SparseFunction):
     # No time dependence so need the implicit dim
     rec = s.interpolate(expr=s+fs, implicit_dims=grid.stepping_dim)
     op = Operator(eqs + rec)
+    print(op)
 
     op(time_M=10)
     expected = 10*11/2  # n (n+1)/2

From 840e4329341488bb2e1148bf973d22346e2476f3 Mon Sep 17 00:00:00 2001
From: mloubout <mathias.louboutin@gmail.com>
Date: Thu, 8 Jun 2023 08:38:56 -0400
Subject: [PATCH 33/90] compiler: fix dimension_sort to avoid missing indirect
 conditionals

---
 devito/ir/equations/algorithms.py  |  5 +-
 devito/operations/interpolators.py | 25 ++++-----
 devito/types/sparse.py             |  2 +-
 tests/test_interpolation.py        | 84 +++++++++++++++++++++++++++++-
 4 files changed, 98 insertions(+), 18 deletions(-)

diff --git a/devito/ir/equations/algorithms.py b/devito/ir/equations/algorithms.py
index 11e778f185..87b95a0350 100644
--- a/devito/ir/equations/algorithms.py
+++ b/devito/ir/equations/algorithms.py
@@ -56,7 +56,9 @@ def handle_indexed(indexed):
     # Add in pure data dimensions (e.g., those accessed only via explicit values,
     # such as A[3])
     indexeds = retrieve_indexed(expr, deep=True)
-    extra.update(set().union(*[set(i.function.dimensions) for i in indexeds]))
+    for i in indexeds:
+        expl_dims = {d for (d, e) in zip(i.function.dimensions, i.indices) if e.is_integer}
+        extra.update(expl_dims)
 
     # Enforce determinism
     extra = filter_sorted(extra)
@@ -68,6 +70,7 @@ def handle_indexed(indexed):
     # preceed `time`, while `t`, and therefore `time`, *must* appear before `x`,
     # as indicated by the second relation
     implicit_relations = {(d.parent, d) for d in extra if d.is_Derived}
+
     # 2) To handle cases such as `((time, xi), (x,))`, where `xi` a SubDimension
     # of `x`, besides `(x, xi)`, we also have to add `(time, x)` so that we
     # obtain the desired ordering `(time, x, xi)`. W/o `(time, x)`, the ordering
diff --git a/devito/operations/interpolators.py b/devito/operations/interpolators.py
index c20a71abf6..a1179b06e0 100644
--- a/devito/operations/interpolators.py
+++ b/devito/operations/interpolators.py
@@ -212,33 +212,30 @@ def _interpolation_indices(self, variables, offset=0, field_offset=0,
         idx_subs = []
         points = {d: [] for d in self._gdim}
         mapper = {d: [] for d in self._gdim}
-
+        pdim = self.sfunction._sparse_dim
+    
         # Positon map and temporaries for it
         pmap = self.sfunction._coordinate_indices
 
         # Temporaries for the position
-        # temps = self._positions(implicit_dims)
-        temps = []
-
+        temps = self._positions(implicit_dims)
+    
         # Coefficient symbol expression
         temps.extend(self._coeff_temps(implicit_dims))
-
-        try:
-            pdim = self.sfunction.coordinates.dimensions[-1]
-        except AttributeError:
-            pdim = self.sfunction.gridpoints.dimensions[-1]
-
+    
         # Create positions and indices temporaries/indirections
+        pr = []
         for ((di, d), pos, rd) in zip(enumerate(self._gdim), pmap, self._rdim):
             p = Symbol(name='ii_%s_%s' % (self.sfunction.name, d.name))
-            temps.extend([Eq(p, pos.subs({pdim: di}), implicit_dims=implicit_dims)])
+            temps.extend([Eq(p, pos + rd, implicit_dims=implicit_dims + tuple(pr))])
 
             # Add conditional
-            lb = sympy.And(p >= d.symbolic_min - self.r, evaluate=False)
-            ub = sympy.And(p <= d.symbolic_max + self.r, evaluate=False)
+            lb = sympy.And(p >= d.symbolic_min-self.r, evaluate=False)
+            ub = sympy.And(p <= d.symbolic_max+self.r, evaluate=False)
             condition = sympy.And(lb, ub, evaluate=False)
             mapper[d] = ConditionalDimension(p.name, self.sfunction._sparse_dim,
-                                             condition=condition, indirect=True) + rd
+                                             condition=condition, indirect=True)
+            pr.append(rd)
 
         # Substitution mapper for variables
         idx_subs = {v: v.subs({k: c - v.origin.get(k, 0) for (k, c) in mapper.items()})
diff --git a/devito/types/sparse.py b/devito/types/sparse.py
index e601a68bdd..5f99877bff 100644
--- a/devito/types/sparse.py
+++ b/devito/types/sparse.py
@@ -304,7 +304,7 @@ def _coordinate_indices(self):
         individually (hence the need for a position map).
         """
         return tuple([INT(floor(p / i.spacing))
-                      for p, i in zip(self._position_map,
+                      for p, i in zip(self._position_map.values(),
                                       self.grid.dimensions[:self.grid.dim])])
 
     def _coordinate_bases(self, field_offset):
diff --git a/tests/test_interpolation.py b/tests/test_interpolation.py
index 6cbb67a0d7..5ddaf00314 100644
--- a/tests/test_interpolation.py
+++ b/tests/test_interpolation.py
@@ -131,10 +131,14 @@ def init(data):
     eqn = sf.interpolate(m)
     op = Operator(eqn)
 <<<<<<< HEAD
+<<<<<<< HEAD
 
 =======
     print(op)
 >>>>>>> 98cb40d85 (operations: remove unused points and cleanup  weights)
+=======
+
+>>>>>>> 6ff72c157 (compiler: fix dimension_sort to avoid missing indirect conditionals)
     op()
     expected_values = [sin(point[0]) + sin(point[1]) for point in points]
     assert(all(np.isclose(sf.data, expected_values, rtol=1e-6)))
@@ -169,10 +173,14 @@ def test_precomputed_interpolation_time():
     eqn = sf.interpolate(u)
     op = Operator(eqn)
 <<<<<<< HEAD
+<<<<<<< HEAD
 
 =======
     print(op)
 >>>>>>> 98cb40d85 (operations: remove unused points and cleanup  weights)
+=======
+
+>>>>>>> 6ff72c157 (compiler: fix dimension_sort to avoid missing indirect conditionals)
     op(time_m=0, time_M=4)
 
     for it in range(5):
@@ -206,10 +214,14 @@ def test_precomputed_injection():
 
     op = Operator(expr)
 <<<<<<< HEAD
+<<<<<<< HEAD
 
 =======
     print(op)
 >>>>>>> 98cb40d85 (operations: remove unused points and cleanup  weights)
+=======
+
+>>>>>>> 6ff72c157 (compiler: fix dimension_sort to avoid missing indirect conditionals)
     op()
     indices = [slice(0, 2, 1), slice(9, 11, 1)]
     assert np.allclose(m.data[indices], result, rtol=1.e-5)
@@ -246,10 +258,14 @@ def test_precomputed_injection_time():
 
     op = Operator(expr)
 <<<<<<< HEAD
+<<<<<<< HEAD
 
 =======
     print(op)
 >>>>>>> 98cb40d85 (operations: remove unused points and cleanup  weights)
+=======
+
+>>>>>>> 6ff72c157 (compiler: fix dimension_sort to avoid missing indirect conditionals)
     op()
     for ti in range(2):
         indices = [slice(0, 2, 1), slice(9, 11, 1)]
@@ -274,12 +290,17 @@ def test_interpolate(shape, coords, npoints=20):
     expr = p.interpolate(a)
     op = Operator(expr)
 <<<<<<< HEAD
+<<<<<<< HEAD
 
 =======
     print(op)
 >>>>>>> 98cb40d85 (operations: remove unused points and cleanup  weights)
     op(a=a)
+=======
+>>>>>>> 6ff72c157 (compiler: fix dimension_sort to avoid missing indirect conditionals)
 
+    op(a=a)
+    from IPython import embed; embed()
     assert np.allclose(p.data[:], xcoords, rtol=1e-6)
 
 
@@ -299,10 +320,14 @@ def test_interpolate_cumm(shape, coords, npoints=20):
     expr = p.interpolate(a, increment=True)
     op = Operator(expr)
 <<<<<<< HEAD
+<<<<<<< HEAD
 
 =======
     print(op)
 >>>>>>> 98cb40d85 (operations: remove unused points and cleanup  weights)
+=======
+
+>>>>>>> 6ff72c157 (compiler: fix dimension_sort to avoid missing indirect conditionals)
     op(a=a)
 
     assert np.allclose(p.data[:], xcoords + 1., rtol=1e-6)
@@ -325,10 +350,14 @@ def test_interpolate_time_shift(shape, coords, npoints=20):
     expr = p.interpolate(a, u_t=a.indices[0]+1)
     op = Operator(expr)
 <<<<<<< HEAD
+<<<<<<< HEAD
 
 =======
     print(op)
 >>>>>>> 98cb40d85 (operations: remove unused points and cleanup  weights)
+=======
+
+>>>>>>> 6ff72c157 (compiler: fix dimension_sort to avoid missing indirect conditionals)
     op(a=a)
 
     assert np.allclose(p.data[0, :], xcoords, rtol=1e-6)
@@ -337,10 +366,14 @@ def test_interpolate_time_shift(shape, coords, npoints=20):
     expr = p.interpolate(a, p_t=p.indices[0]+1)
     op = Operator(expr)
 <<<<<<< HEAD
+<<<<<<< HEAD
 
 =======
     print(op)
 >>>>>>> 98cb40d85 (operations: remove unused points and cleanup  weights)
+=======
+
+>>>>>>> 6ff72c157 (compiler: fix dimension_sort to avoid missing indirect conditionals)
     op(a=a)
 
     assert np.allclose(p.data[1, :], xcoords, rtol=1e-6)
@@ -350,10 +383,14 @@ def test_interpolate_time_shift(shape, coords, npoints=20):
                          p_t=p.indices[0]+1)
     op = Operator(expr)
 <<<<<<< HEAD
+<<<<<<< HEAD
 
 =======
     print(op)
 >>>>>>> 98cb40d85 (operations: remove unused points and cleanup  weights)
+=======
+
+>>>>>>> 6ff72c157 (compiler: fix dimension_sort to avoid missing indirect conditionals)
     op(a=a)
 
     assert np.allclose(p.data[1, :], xcoords, rtol=1e-6)
@@ -374,10 +411,14 @@ def test_interpolate_array(shape, coords, npoints=20):
     expr = p.interpolate(a)
     op = Operator(expr)
 <<<<<<< HEAD
+<<<<<<< HEAD
 
 =======
     print(op)
 >>>>>>> 98cb40d85 (operations: remove unused points and cleanup  weights)
+=======
+
+>>>>>>> 6ff72c157 (compiler: fix dimension_sort to avoid missing indirect conditionals)
     op(a=a, points=p.data[:])
 
     assert np.allclose(p.data[:], xcoords, rtol=1e-6)
@@ -399,10 +440,14 @@ def test_interpolate_custom(shape, coords, npoints=20):
     expr = p.interpolate(a * p.indices[0])
     op = Operator(expr)
 <<<<<<< HEAD
+<<<<<<< HEAD
 
 =======
     print(op)
 >>>>>>> 98cb40d85 (operations: remove unused points and cleanup  weights)
+=======
+
+>>>>>>> 6ff72c157 (compiler: fix dimension_sort to avoid missing indirect conditionals)
     op(a=a)
 
     assert np.allclose(p.data[0, :], 0.0 * xcoords, rtol=1e-6)
@@ -420,7 +465,7 @@ def test_interpolation_dx():
     sf1.coordinates.data[0, :] = (0.5, 0.5)
 
     op = Operator(sf1.interpolate(u.dx))
-    print(op)
+
     assert sf1.data.shape == (1,)
     u.data[:] = 0.0
     u.data[5, 5] = 4.0
@@ -449,10 +494,14 @@ def test_interpolate_indexed(shape, coords, npoints=20):
     expr = p.interpolate(a[a.grid.dimensions] * p.indices[0])
     op = Operator(expr)
 <<<<<<< HEAD
+<<<<<<< HEAD
 
 =======
     print(op)
 >>>>>>> 98cb40d85 (operations: remove unused points and cleanup  weights)
+=======
+
+>>>>>>> 6ff72c157 (compiler: fix dimension_sort to avoid missing indirect conditionals)
     op(a=a)
 
     assert np.allclose(p.data[0, :], 0.0 * xcoords, rtol=1e-6)
@@ -476,10 +525,14 @@ def test_inject(shape, coords, result, npoints=19):
 
     op = Operator(expr)
 <<<<<<< HEAD
+<<<<<<< HEAD
 
 =======
     print(op)
 >>>>>>> 98cb40d85 (operations: remove unused points and cleanup  weights)
+=======
+
+>>>>>>> 6ff72c157 (compiler: fix dimension_sort to avoid missing indirect conditionals)
     op(a=a)
 
     indices = [slice(4, 6, 1) for _ in coords]
@@ -504,10 +557,14 @@ def test_inject_time_shift(shape, coords, result, npoints=19):
 
     op = Operator(expr)
 <<<<<<< HEAD
+<<<<<<< HEAD
 
 =======
     print(op)
 >>>>>>> 98cb40d85 (operations: remove unused points and cleanup  weights)
+=======
+
+>>>>>>> 6ff72c157 (compiler: fix dimension_sort to avoid missing indirect conditionals)
     op(a=a, time=1)
 
     indices = [slice(1, 1, 1)] + [slice(4, 6, 1) for _ in coords]
@@ -519,10 +576,14 @@ def test_inject_time_shift(shape, coords, result, npoints=19):
 
     op = Operator(expr)
 <<<<<<< HEAD
+<<<<<<< HEAD
 
 =======
     print(op)
 >>>>>>> 98cb40d85 (operations: remove unused points and cleanup  weights)
+=======
+
+>>>>>>> 6ff72c157 (compiler: fix dimension_sort to avoid missing indirect conditionals)
     op(a=a, time=1)
 
     indices = [slice(0, 0, 1)] + [slice(4, 6, 1) for _ in coords]
@@ -534,10 +595,14 @@ def test_inject_time_shift(shape, coords, result, npoints=19):
 
     op = Operator(expr)
 <<<<<<< HEAD
+<<<<<<< HEAD
 
 =======
     print(op)
 >>>>>>> 98cb40d85 (operations: remove unused points and cleanup  weights)
+=======
+
+>>>>>>> 6ff72c157 (compiler: fix dimension_sort to avoid missing indirect conditionals)
     op(a=a, time=1)
 
     indices = [slice(1, 1, 1)] + [slice(4, 6, 1) for _ in coords]
@@ -562,10 +627,14 @@ def test_inject_array(shape, coords, result, npoints=19):
 
     op = Operator(expr)
 <<<<<<< HEAD
+<<<<<<< HEAD
 
 =======
     print(op)
 >>>>>>> 98cb40d85 (operations: remove unused points and cleanup  weights)
+=======
+
+>>>>>>> 6ff72c157 (compiler: fix dimension_sort to avoid missing indirect conditionals)
     op(a=a, points=p2.data[:])
 
     indices = [slice(4, 6, 1) for _ in coords]
@@ -590,10 +659,14 @@ def test_inject_from_field(shape, coords, result, npoints=19):
     expr = p.inject(field=a, expr=b)
     op = Operator(expr)
 <<<<<<< HEAD
+<<<<<<< HEAD
 
 =======
     print(op)
 >>>>>>> 98cb40d85 (operations: remove unused points and cleanup  weights)
+=======
+
+>>>>>>> 6ff72c157 (compiler: fix dimension_sort to avoid missing indirect conditionals)
     op(a=a, b=b)
 
     indices = [slice(4, 6, 1) for _ in coords]
@@ -668,10 +741,14 @@ def test_edge_sparse():
     subs = {d.spacing: v for d, v in zip(u.grid.dimensions, u.grid.spacing)}
     op = Operator(expr, subs=subs)
 <<<<<<< HEAD
+<<<<<<< HEAD
 
 =======
     print(op)
 >>>>>>> 98cb40d85 (operations: remove unused points and cleanup  weights)
+=======
+
+>>>>>>> 6ff72c157 (compiler: fix dimension_sort to avoid missing indirect conditionals)
     op()
     assert sf1.data[0] == 0
 
@@ -710,6 +787,10 @@ def test_msf_interpolate():
 
     eqn = sf.interpolate(u)
     op = Operator(eqn)
+<<<<<<< HEAD
+=======
+
+>>>>>>> 6ff72c157 (compiler: fix dimension_sort to avoid missing indirect conditionals)
 
     sf.manual_scatter()
     op(time_m=0, time_M=4)
@@ -766,7 +847,6 @@ class SparseFirst(SparseFunction):
     rec = s.interpolate(expr=s+fs, implicit_dims=grid.stepping_dim)
     op = Operator(eqs + rec)
     print(op)
-
     op(time_M=10)
     expected = 10*11/2  # n (n+1)/2
     assert np.allclose(s.data, expected)

From 9b7fc38787a0b6856138a8dfedb555b52677584d Mon Sep 17 00:00:00 2001
From: mloubout <mathias.louboutin@gmail.com>
Date: Fri, 9 Jun 2023 09:14:11 -0400
Subject: [PATCH 34/90] compiler: fix dtype of aliases

---
 devito/ir/equations/algorithms.py  |   3 +-
 devito/ir/support/utils.py         |   1 +
 devito/operations/interpolators.py |  56 ++++----
 devito/types/dense.py              |   7 -
 devito/types/sparse.py             | 206 ++++++-----------------------
 tests/test_buffering.py            |   5 +-
 tests/test_interpolation.py        |   7 +-
 7 files changed, 77 insertions(+), 208 deletions(-)

diff --git a/devito/ir/equations/algorithms.py b/devito/ir/equations/algorithms.py
index 87b95a0350..a03822c8da 100644
--- a/devito/ir/equations/algorithms.py
+++ b/devito/ir/equations/algorithms.py
@@ -57,7 +57,8 @@ def handle_indexed(indexed):
     # such as A[3])
     indexeds = retrieve_indexed(expr, deep=True)
     for i in indexeds:
-        expl_dims = {d for (d, e) in zip(i.function.dimensions, i.indices) if e.is_integer}
+        expl_dims = {d for (d, e) in zip(i.function.dimensions, i.indices)
+                     if e.is_integer}
         extra.update(expl_dims)
 
     # Enforce determinism
diff --git a/devito/ir/support/utils.py b/devito/ir/support/utils.py
index 5f08f48020..9e202b7e1d 100644
--- a/devito/ir/support/utils.py
+++ b/devito/ir/support/utils.py
@@ -183,6 +183,7 @@ def detect_accesses(exprs):
     for e in as_tuple(exprs):
         other_dims.update(i for i in e.free_symbols if isinstance(i, Dimension))
         other_dims.update(e.implicit_dims)
+
     mapper[None] = Stencil([(i, 0) for i in other_dims])
 
     return mapper
diff --git a/devito/operations/interpolators.py b/devito/operations/interpolators.py
index a1179b06e0..caa384b407 100644
--- a/devito/operations/interpolators.py
+++ b/devito/operations/interpolators.py
@@ -8,7 +8,7 @@
 from devito.symbolics import retrieve_function_carriers
 from devito.tools import as_tuple, flatten, prod
 from devito.types import (ConditionalDimension, Eq, Inc, Evaluable, Symbol,
-                          CustomDimension, Function)
+                          CustomDimension)
 from devito.types.utils import DimensionTuple
 
 __all__ = ['LinearInterpolator', 'PrecomputedInterpolator']
@@ -57,12 +57,11 @@ class Interpolation(UnevaluatedSparseOperation):
     Evaluates to a list of Eq objects.
     """
 
-    def __new__(cls, expr, offset, increment, self_subs, interpolator, callback):
+    def __new__(cls, expr, increment, self_subs, interpolator, callback):
         obj = super().__new__(cls, interpolator, callback)
 
         # TODO: unused now, but will be necessary to compute the adjoint
         obj.expr = expr
-        obj.offset = offset
         obj.increment = increment
         obj.self_subs = self_subs
 
@@ -80,13 +79,12 @@ class Injection(UnevaluatedSparseOperation):
     Evaluates to a list of Eq objects.
     """
 
-    def __new__(cls, field, expr, offset, interpolator, callback):
+    def __new__(cls, field, expr, interpolator, callback):
         obj = super().__new__(cls, interpolator, callback)
 
         # TODO: unused now, but will be necessary to compute the adjoint
         obj.field = field
         obj.expr = expr
-        obj.offset = offset
 
         return obj
 
@@ -142,9 +140,15 @@ def r(self):
 
     @cached_property
     def _rdim(self):
-        dims = [CustomDimension("r%s%s" % (self.sfunction.name, d.name),
-                                -self.r+1, self.r, len(range(-self.r+1, self.r+1)))
-                for d in self._gdim]
+        dims = []
+        # Enforce ordering
+        prevdim = self.sfunction._sparse_dim
+        for d in self._gdim:
+            rd = CustomDimension("r%s%s" % (self.sfunction.name, d.name),
+                                 -self.r+1, self.r, len(range(-self.r+1, self.r+1)),
+                                 prevdim)
+            prevdim = rd
+            dims.append(rd)
 
         return DimensionTuple(*dims, getters=self._gdim)
 
@@ -158,8 +162,7 @@ def _positions(self, implicit_dims):
         return [Eq(v, k, implicit_dims=implicit_dims)
                 for k, v in self.sfunction._position_map.items()]
 
-    def _interpolation_indices(self, variables, offset=0, field_offset=0,
-                               implicit_dims=None):
+    def _interp_idx(self, variables, implicit_dims=None):
         """
         Generate interpolation indices for the DiscreteFunctions in ``variables``.
         """
@@ -219,17 +222,17 @@ def _interpolation_indices(self, variables, offset=0, field_offset=0,
 
         # Temporaries for the position
         temps = self._positions(implicit_dims)
-    
+
         # Coefficient symbol expression
         temps.extend(self._coeff_temps(implicit_dims))
-    
+
         # Create positions and indices temporaries/indirections
         pr = []
         for ((di, d), pos, rd) in zip(enumerate(self._gdim), pmap, self._rdim):
             p = Symbol(name='ii_%s_%s' % (self.sfunction.name, d.name))
             temps.extend([Eq(p, pos + rd, implicit_dims=implicit_dims + tuple(pr))])
 
-            # Add conditional
+            # Add conditional to avoid OOB
             lb = sympy.And(p >= d.symbolic_min-self.r, evaluate=False)
             ub = sympy.And(p <= d.symbolic_max+self.r, evaluate=False)
             condition = sympy.And(lb, ub, evaluate=False)
@@ -243,8 +246,7 @@ def _interpolation_indices(self, variables, offset=0, field_offset=0,
 
         return idx_subs, temps
 
-    def interpolate(self, expr, offset=0, increment=False, self_subs={},
-                    implicit_dims=None):
+    def interpolate(self, expr, increment=False, self_subs={}, implicit_dims=None):
         """
         Generate equations interpolating an arbitrary expression into ``self``.
 
@@ -252,8 +254,6 @@ def interpolate(self, expr, offset=0, increment=False, self_subs={},
         ----------
         expr : expr-like
             Input expression to interpolate.
-        offset : int, optional
-            Additional offset from the boundary.
         increment: bool, optional
             If True, generate increments (Inc) rather than assignments (Eq).
         implicit_dims : Dimension or list of Dimension, optional
@@ -273,13 +273,9 @@ def callback():
 
             variables = list(retrieve_function_carriers(_expr))
 
-            # Need to get origin of the field in case it is staggered
-            # TODO: handle each variable staggering separately
-            field_offset = variables[0].origin
             # List of indirection indices for all adjacent grid points
-            idx_subs, temps = self._interpolation_indices(
-                variables, offset, field_offset=field_offset, implicit_dims=implicit_dims
-            )
+            idx_subs, temps = self._interp_idx(variables, implicit_dims=implicit_dims)
+
             # Accumulate point-wise contributions into a temporary
             rhs = Symbol(name='sum', dtype=self.sfunction.dtype)
             summands = [Eq(rhs, 0., implicit_dims=implicit_dims)]
@@ -294,9 +290,9 @@ def callback():
 
             return [summands[0]] + temps + summands[1:] + last
 
-        return Interpolation(expr, offset, increment, self_subs, self, callback)
+        return Interpolation(expr, increment, self_subs, self, callback)
 
-    def inject(self, field, expr, offset=0, implicit_dims=None):
+    def inject(self, field, expr, implicit_dims=None):
         """
         Generate equations injecting an arbitrary expression into a field.
 
@@ -306,8 +302,6 @@ def inject(self, field, expr, offset=0, implicit_dims=None):
             Input field into which the injection is performed.
         expr : expr-like
             Injected expression.
-        offset : int, optional
-            Additional offset from the boundary.
         implicit_dims : Dimension or list of Dimension, optional
             An ordered list of Dimensions that do not explicitly appear in the
             injection expression, but that should be honored when constructing
@@ -325,12 +319,8 @@ def callback():
 
             variables = list(retrieve_function_carriers(_expr)) + [field]
 
-            # Need to get origin of the field in case it is staggered
-            field_offset = field.origin
             # List of indirection indices for all adjacent grid points
-            idx_subs, temps = self._interpolation_indices(
-                variables, offset, field_offset=field_offset, implicit_dims=implicit_dims
-            )
+            idx_subs, temps = self._interp_idx(variables, implicit_dims=implicit_dims)
 
             # Substitute coordinate base symbols into the interpolation coefficients
             eqns = [Inc(field.xreplace(idx_subs),
@@ -339,7 +329,7 @@ def callback():
 
             return temps + eqns
 
-        return Injection(field, expr, offset, self, callback)
+        return Injection(field, expr, self, callback)
 
 
 class LinearInterpolator(WeightedInterpolator):
diff --git a/devito/types/dense.py b/devito/types/dense.py
index 40564053d8..9fb96ec5cd 100644
--- a/devito/types/dense.py
+++ b/devito/types/dense.py
@@ -1466,13 +1466,6 @@ def __padding_setup__(self, **kwargs):
     def _halo_exchange(self):
         return
 
-    @property
-    def origin(self):
-        """
-        SubFunction have zero origin
-        """
-        return DimensionTuple(*(0 for _ in range(self.ndim)), getters=self.dimensions)
-
     def _arg_values(self, **kwargs):
         if self.name in kwargs:
             raise RuntimeError("`%s` is a SubFunction, so it can't be assigned "
diff --git a/devito/types/sparse.py b/devito/types/sparse.py
index 5f99877bff..d557e60814 100644
--- a/devito/types/sparse.py
+++ b/devito/types/sparse.py
@@ -12,13 +12,11 @@
 
 from devito.finite_differences import generate_fd_shortcuts
 from devito.finite_differences.elementary import floor
-from devito.logger import warning
 from devito.mpi import MPI, SparseDistributor
 from devito.operations import LinearInterpolator, PrecomputedInterpolator
-from devito.symbolics import (INT, cast_mapper, indexify,
-                              retrieve_function_carriers)
+from devito.symbolics import INT, indexify, retrieve_function_carriers
 from devito.tools import (ReducerMap, as_tuple, flatten, prod, filter_ordered,
-                          memoized_meth, is_integer, dtype_to_mpidtype)
+                          is_integer, dtype_to_mpidtype)
 from devito.types.dense import DiscreteFunction, SubFunction
 from devito.types.dimension import (Dimension, ConditionalDimension, DefaultDimension,
                                     DynamicDimension)
@@ -260,6 +258,24 @@ def coordinates_data(self):
         except AttributeError:
             return None
 
+    @property
+    def _support(self):
+        """
+        The grid points surrounding each sparse point within the radius of self's
+        injection/interpolation operators.
+        """
+        max_shape = np.array(self.grid.shape).reshape(1, self.grid.dim)
+        minmax = lambda arr: np.minimum(max_shape, np.maximum(0, arr))
+        return np.stack([minmax(self._coords_indices + s) for s in self._point_support],
+                        axis=2)
+
+    @property
+    def _dist_datamap(self):
+        """
+        Mapper ``M : MPI rank -> required sparse data``.
+        """
+        return self._distributor.glb_to_rank(self._support) or {}
+
     @cached_property
     def _point_increments(self):
         """Index increments in each Dimension for each point symbol."""
@@ -307,14 +323,14 @@ def _coordinate_indices(self):
                       for p, i in zip(self._position_map.values(),
                                       self.grid.dimensions[:self.grid.dim])])
 
-    def _coordinate_bases(self, field_offset):
-        """Symbol for the base coordinates of the reference grid point."""
-        return tuple([cast_mapper[self.dtype](c - o - idx * i.spacing)
-                      for c, o, idx, i, of in zip(self._coordinate_symbols,
-                                                  self.grid.origin_symbols,
-                                                  self._coordinate_indices,
-                                                  self.grid.dimensions[:self.grid.dim],
-                                                  field_offset)])
+    @cached_property
+    def _dist_reorder_mask(self):
+        """
+        An ordering mask that puts ``self._sparse_position`` at the front.
+        """
+        ret = (self._sparse_position,)
+        ret += tuple(i for i, d in enumerate(self.indices) if d is not self._sparse_dim)
+        return ret
 
     @property
     def gridpoints(self):
@@ -344,21 +360,18 @@ def coordinates_data(self):
         except AttributeError:
             return None
 
-    @property
-    def _support(self):
+    def guard(self, expr=None):
         """
-        The grid points surrounding each sparse point within the radius of self's
-        injection/interpolation operators.
-        """
-        max_shape = np.array(self.grid.shape).reshape(1, self.grid.dim)
-        minmax = lambda arr: np.minimum(max_shape, np.maximum(0, arr))
-        return np.stack([minmax(self._coords_indices + s) for s in self._point_support],
-                        axis=2)
+        Generate guarded expressions, that is expressions that are evaluated
+        by an Operator only if certain conditions are met.  The introduced
+        condition, here, is that all grid points in the support of a sparse
+        value must fall within the grid domain (i.e., *not* on the halo).
 
-    @property
-    def _dist_datamap(self):
-        """
-        Mapper ``M : MPI rank -> required sparse data``.
+        Parameters
+        ----------
+        expr : expr-like, optional
+            Input expression, from which the guarded expression is derived.
+            If not specified, defaults to ``self``.
         """
         return self.grid._distributor.glb_to_rank(self._support) or {}
 
@@ -897,70 +910,6 @@ def _coordinate_symbols(self):
         return tuple([self.coordinates.indexify((p_dim, i))
                       for i in range(self.grid.dim)])
 
-    @memoized_meth
-    def _index_matrix(self, offset):
-        # Note about the use of *memoization*
-        # Since this method is called by `_interpolation_indices`, using
-        # memoization avoids a proliferation of symbolically identical
-        # ConditionalDimensions for a given set of indirection indices
-
-        # List of indirection indices for all adjacent grid points
-        index_matrix = [tuple(idx + ii + offset for ii, idx
-                              in zip(inc, self._coordinate_indices))
-                        for inc in self._point_increments]
-
-        # A unique symbol for each indirection index
-        indices = filter_ordered(flatten(index_matrix))
-        points = OrderedDict([(p, Symbol(name='ii_%s_%d' % (self.name, i)))
-                              for i, p in enumerate(indices)])
-
-        return index_matrix, points
-
-    def guard(self, expr=None, offset=0):
-        """
-        Generate guarded expressions, that is expressions that are evaluated
-        by an Operator only if certain conditions are met.  The introduced
-        condition, here, is that all grid points in the support of a sparse
-        value must fall within the grid domain (i.e., *not* on the halo).
-
-        Parameters
-        ----------
-        expr : expr-like, optional
-            Input expression, from which the guarded expression is derived.
-            If not specified, defaults to ``self``.
-        offset : int, optional
-            Relax the guard condition by introducing a tolerance offset.
-        """
-        _, points = self._index_matrix(offset)
-
-        # Guard through ConditionalDimension
-        conditions = {}
-        for d, idx in zip(self.grid.dimensions, self._coordinate_indices):
-            p = points[idx]
-            lb = sympy.And(p >= d.symbolic_min - offset, evaluate=False)
-            ub = sympy.And(p <= d.symbolic_max + offset, evaluate=False)
-            conditions[p] = sympy.And(lb, ub, evaluate=False)
-        condition = sympy.And(*conditions.values(), evaluate=False)
-        cd = ConditionalDimension(self._sparse_dim.name, self._sparse_dim,
-                                  condition=condition, indirect=True)
-
-        if expr is None:
-            out = self.indexify().xreplace({self._sparse_dim: cd})
-        else:
-            functions = {f for f in retrieve_function_carriers(expr)
-                         if f.is_SparseFunction}
-            out = indexify(expr).xreplace({f._sparse_dim: cd for f in functions})
-
-        # Temporaries for the position
-        temps = [Eq(v, k, implicit_dims=self.dimensions)
-                 for k, v in self._position_map.items()]
-        # Temporaries for the indirection Dimensions
-        temps.extend([Eq(v, k.subs(self._position_map),
-                         implicit_dims=self.dimensions)
-                      for k, v in points.items() if v in conditions])
-
-        return out, temps
-
     @cached_property
     def _decomposition(self):
         mapper = {self._sparse_dim: self._distributor.decomposition[self._sparse_dim]}
@@ -1051,7 +1000,7 @@ class SparseTimeFunction(AbstractSparseTimeFunction, SparseFunction):
     __rkwargs__ = tuple(filter_ordered(AbstractSparseTimeFunction.__rkwargs__ +
                                        SparseFunction.__rkwargs__))
 
-    def interpolate(self, expr, offset=0, u_t=None, p_t=None, increment=False):
+    def interpolate(self, expr, u_t=None, p_t=None, increment=False):
         """
         Generate equations interpolating an arbitrary expression into ``self``.
 
@@ -1059,8 +1008,6 @@ def interpolate(self, expr, offset=0, u_t=None, p_t=None, increment=False):
         ----------
         expr : expr-like
             Input expression to interpolate.
-        offset : int, optional
-            Additional offset from the boundary.
         u_t : expr-like, optional
             Time index at which the interpolation is performed.
         p_t : expr-like, optional
@@ -1078,11 +1025,10 @@ def interpolate(self, expr, offset=0, u_t=None, p_t=None, increment=False):
         if p_t is not None:
             subs = {self.time_dim: p_t}
 
-        return super(SparseTimeFunction, self).interpolate(expr, offset=offset,
-                                                           increment=increment,
+        return super(SparseTimeFunction, self).interpolate(expr, increment=increment,
                                                            self_subs=subs)
 
-    def inject(self, field, expr, offset=0, u_t=None, p_t=None, implicit_dims=None):
+    def inject(self, field, expr, u_t=None, p_t=None, implicit_dims=None):
         """
         Generate equations injecting an arbitrary expression into a field.
 
@@ -1092,8 +1038,6 @@ def inject(self, field, expr, offset=0, u_t=None, p_t=None, implicit_dims=None):
             Input field into which the injection is performed.
         expr : expr-like
             Injected expression.
-        offset : int, optional
-            Additional offset from the boundary.
         u_t : expr-like, optional
             Time index at which the interpolation is performed.
         p_t : expr-like, optional
@@ -1109,7 +1053,7 @@ def inject(self, field, expr, offset=0, u_t=None, p_t=None, implicit_dims=None):
         if p_t is not None:
             expr = expr.subs({self.time_dim: p_t})
 
-        return super().inject(field, expr, offset=offset, implicit_dims=implicit_dims)
+        return super().inject(field, expr, implicit_dims=implicit_dims)
 
 
 class PrecomputedSparseFunction(AbstractSparseFunction):
@@ -1377,64 +1321,6 @@ class PrecomputedSparseTimeFunction(AbstractSparseTimeFunction,
     __rkwargs__ = tuple(filter_ordered(AbstractSparseTimeFunction.__rkwargs__ +
                                        PrecomputedSparseFunction.__rkwargs__))
 
-    def interpolate(self, expr, offset=0, u_t=None, p_t=None, increment=False):
-        """
-        Generate equations interpolating an arbitrary expression into ``self``.
-
-        Parameters
-        ----------
-        expr : expr-like
-            Input expression to interpolate.
-        offset : int, optional
-            Additional offset from the boundary.
-        u_t : expr-like, optional
-            Time index at which the interpolation is performed.
-        p_t : expr-like, optional
-            Time index at which the result of the interpolation is stored.
-        increment: bool, optional
-            If True, generate increments (Inc) rather than assignments (Eq).
-        """
-        subs = {}
-        if u_t is not None:
-            time = self.grid.time_dim
-            t = self.grid.stepping_dim
-            expr = expr.subs({time: u_t, t: u_t})
-
-        if p_t is not None:
-            subs = {self.time_dim: p_t}
-
-        return super().interpolate(expr, offset=offset,
-                                   increment=increment, self_subs=subs)
-
-    def inject(self, field, expr, offset=0, u_t=None, p_t=None, implicit_dims=None):
-        """
-        Generate equations injecting an arbitrary expression into a field.
-
-        Parameters
-        ----------
-        field : Function
-            Input field into which the injection is performed.
-        expr : expr-like
-            Injected expression.
-        offset : int, optional
-            Additional offset from the boundary.
-        u_t : expr-like, optional
-            Time index at which the interpolation is performed.
-        p_t : expr-like, optional
-            Time index at which the result of the interpolation is stored.
-        implicit_dims : Dimension or list of Dimension, optional
-            An ordered list of Dimensions that do not explicitly appear in the
-            injection expression, but that should be honored when constructing
-            the operator.
-        """
-        # Apply optional time symbol substitutions to field and expr
-        if u_t is not None:
-            field = field.subs({field.time_dim: u_t})
-        if p_t is not None:
-            expr = expr.subs({self.time_dim: p_t})
-
-        return super().inject(field, expr, offset=offset, implicit_dims=implicit_dims)
-
 
 class MatrixSparseTimeFunction(AbstractSparseTimeFunction):
     """
@@ -1770,13 +1656,11 @@ def _sub_functions(self):
                 'mrow', 'mcol', 'mval', 'par_dim_to_nnz_map',
                 'par_dim_to_nnz_m', 'par_dim_to_nnz_M')
 
-    def interpolate(self, expr, offset=0, u_t=None, p_t=None):
+    def interpolate(self, expr, u_t=None, p_t=None):
         """Creates a :class:`sympy.Eq` equation for the interpolation
         of an expression onto this sparse point collection.
 
         :param expr: The expression to interpolate.
-        :param offset: Additional offset from the boundary for
-                       absorbing boundary conditions.
         :param u_t: (Optional) time index to use for indexing into
                     field data in `expr`.
         :param p_t: (Optional) time index to use for indexing into
@@ -1822,13 +1706,11 @@ def interpolate(self, expr, offset=0, u_t=None, p_t=None):
 
         return [Eq(self, 0), Inc(lhs, rhs)]
 
-    def inject(self, field, expr, offset=0, u_t=None, p_t=None):
+    def inject(self, field, expr, u_t=None, p_t=None):
         """Symbol for injection of an expression onto a grid
 
         :param field: The grid field into which we inject.
         :param expr: The expression to inject.
-        :param offset: Additional offset from the boundary for
-                       absorbing boundary conditions.
         :param u_t: (Optional) time index to use for indexing into `field`.
         :param p_t: (Optional) time index to use for indexing into `expr`.
         """
diff --git a/tests/test_buffering.py b/tests/test_buffering.py
index 815347f21b..4ef0eb12af 100644
--- a/tests/test_buffering.py
+++ b/tests/test_buffering.py
@@ -3,7 +3,7 @@
 
 from conftest import skipif
 from devito import (Constant, Grid, TimeFunction, SparseTimeFunction, Operator,
-                    Eq, ConditionalDimension, SubDimension, SubDomain, configuration)
+                    Eq, ConditionalDimension, SubDimension, SubDomain)
 from devito.ir import FindSymbols, retrieve_iteration_tree
 from devito.exceptions import InvalidOperator
 
@@ -271,8 +271,7 @@ def test_over_injection():
     op1 = Operator(eqns, opt='buffering')
 
     # Check generated code
-    assert len(retrieve_iteration_tree(op1)) ==\
-        6 + bool(configuration['language'] != 'C')
+    assert len(retrieve_iteration_tree(op1)) == 10
     buffers = [i for i in FindSymbols().visit(op1) if i.is_Array]
     assert len(buffers) == 1
 
diff --git a/tests/test_interpolation.py b/tests/test_interpolation.py
index 5ddaf00314..b95dbead9b 100644
--- a/tests/test_interpolation.py
+++ b/tests/test_interpolation.py
@@ -300,7 +300,7 @@ def test_interpolate(shape, coords, npoints=20):
 >>>>>>> 6ff72c157 (compiler: fix dimension_sort to avoid missing indirect conditionals)
 
     op(a=a)
-    from IPython import embed; embed()
+
     assert np.allclose(p.data[:], xcoords, rtol=1e-6)
 
 
@@ -790,8 +790,11 @@ def test_msf_interpolate():
 <<<<<<< HEAD
 =======
 
+<<<<<<< HEAD
 >>>>>>> 6ff72c157 (compiler: fix dimension_sort to avoid missing indirect conditionals)
 
+=======
+>>>>>>> 66bf5974f (compiler: fix dtype of aliases)
     sf.manual_scatter()
     op(time_m=0, time_M=4)
     sf.manual_gather()
@@ -846,7 +849,7 @@ class SparseFirst(SparseFunction):
     # No time dependence so need the implicit dim
     rec = s.interpolate(expr=s+fs, implicit_dims=grid.stepping_dim)
     op = Operator(eqs + rec)
-    print(op)
+
     op(time_M=10)
     expected = 10*11/2  # n (n+1)/2
     assert np.allclose(s.data, expected)

From 71794b3a68f0d001a687a215a45e3da5daf6c6f4 Mon Sep 17 00:00:00 2001
From: mloubout <mathias.louboutin@gmail.com>
Date: Mon, 12 Jun 2023 10:17:06 -0400
Subject: [PATCH 35/90] api: rework interpolator to avoid un-necessary temps

---
 devito/ir/equations/algorithms.py             |  4 +-
 devito/ir/stree/algorithms.py                 |  3 +
 devito/operations/interpolators.py            | 53 +++++++--------
 devito/symbolics/inspection.py                |  7 +-
 devito/types/basic.py                         | 14 ++++
 devito/types/dimension.py                     |  2 +
 devito/types/sparse.py                        | 67 +++++++------------
 .../seismic/tutorials/08_snapshotting.ipynb   | 10 ++-
 tests/test_buffering.py                       |  2 +-
 tests/test_caching.py                         | 24 ++++---
 tests/test_dimension.py                       |  7 +-
 tests/test_dle.py                             | 17 +++--
 tests/test_dse.py                             | 36 +++++-----
 tests/test_ir.py                              |  2 +-
 tests/test_operator.py                        | 10 +--
 15 files changed, 136 insertions(+), 122 deletions(-)

diff --git a/devito/ir/equations/algorithms.py b/devito/ir/equations/algorithms.py
index a03822c8da..93ec8895a1 100644
--- a/devito/ir/equations/algorithms.py
+++ b/devito/ir/equations/algorithms.py
@@ -61,6 +61,8 @@ def handle_indexed(indexed):
                      if e.is_integer}
         extra.update(expl_dims)
 
+    # Remove all parents with the same name as its DerivedDimension to preserve conditions
+    extra = extra - {d.parent for d in extra if d.is_Derived and d.indirect}
     # Enforce determinism
     extra = filter_sorted(extra)
 
@@ -70,7 +72,7 @@ def handle_indexed(indexed):
     # wrong; for example, in `((t, time), (t, x, y), (x, y))`, `x` could now
     # preceed `time`, while `t`, and therefore `time`, *must* appear before `x`,
     # as indicated by the second relation
-    implicit_relations = {(d.parent, d) for d in extra if d.is_Derived}
+    implicit_relations = {(d.parent, d) for d in extra if d.is_Derived and not d.indirect}
 
     # 2) To handle cases such as `((time, xi), (x,))`, where `xi` a SubDimension
     # of `x`, besides `(x, xi)`, we also have to add `(time, x)` so that we
diff --git a/devito/ir/stree/algorithms.py b/devito/ir/stree/algorithms.py
index 58e8e844e6..dd9eb9b265 100644
--- a/devito/ir/stree/algorithms.py
+++ b/devito/ir/stree/algorithms.py
@@ -155,6 +155,9 @@ def preprocess(clusters, options=None, **kwargs):
                    len(intersection) > 0:
                     found.append(c1)
                     queue.remove(c1)
+                elif c1.is_halo_touch and len(intersection) == 0 and \
+                        distributed_aindices == set():
+                    queue.remove(c1)
 
             syncs = normalize_syncs(*[c1.syncs for c1 in found])
             if syncs:
diff --git a/devito/operations/interpolators.py b/devito/operations/interpolators.py
index caa384b407..4e684879c4 100644
--- a/devito/operations/interpolators.py
+++ b/devito/operations/interpolators.py
@@ -1,11 +1,10 @@
 from abc import ABC, abstractmethod
-from collections import defaultdict
 
 import sympy
 from cached_property import cached_property
 
 from devito.finite_differences.elementary import floor
-from devito.symbolics import retrieve_function_carriers
+from devito.symbolics import retrieve_function_carriers, INT
 from devito.tools import as_tuple, flatten, prod
 from devito.types import (ConditionalDimension, Eq, Inc, Evaluable, Symbol,
                           CustomDimension)
@@ -126,10 +125,6 @@ def grid(self):
     def _weights(self):
         raise NotImplementedError
 
-    @property
-    def _psym(self):
-        return self.sfunction._point_symbols
-
     @property
     def _gdim(self):
         return self.grid.dimensions
@@ -159,7 +154,7 @@ def _coeff_temps(self, implicit_dims):
         return []
 
     def _positions(self, implicit_dims):
-        return [Eq(v, k, implicit_dims=implicit_dims)
+        return [Eq(v, INT(floor(k)), implicit_dims=implicit_dims)
                 for k, v in self.sfunction._position_map.items()]
 
     def _interp_idx(self, variables, implicit_dims=None):
@@ -169,9 +164,6 @@ def _interp_idx(self, variables, implicit_dims=None):
         idx_subs = []
         mapper = defaultdict(list)
 
-        # Positon map and temporaries for it
-        pmap = self.sfunction._coordinate_indices
-
         # Temporaries for the position
         temps = self._positions(implicit_dims)
 
@@ -179,18 +171,15 @@ def _interp_idx(self, variables, implicit_dims=None):
         temps.extend(self._coeff_temps(implicit_dims))
 
         # Create positions and indices temporaries/indirections
-        pr = []
-        for ((di, d), pos, rd) in zip(enumerate(self._gdim), pmap, self._rdim):
-            p = Symbol(name='ii_%s_%s' % (self.sfunction.name, d.name))
-            temps.extend([Eq(p, pos + rd, implicit_dims=implicit_dims + tuple(pr))])
-
+        prev = self.sfunction.dimensions[-1]
+        for ((di, d), rd) in zip(enumerate(self._gdim), self._rdim):
             # Add conditional to avoid OOB
-            lb = sympy.And(p >= d.symbolic_min, evaluate=False)
-            ub = sympy.And(p <= d.symbolic_max, evaluate=False)
-            condition = sympy.And(lb, ub, evaluate=False)
-            mapper[d] = ConditionalDimension(p.name, self.sfunction._sparse_dim,
-                                             condition=condition, indirect=True)
-            pr.append(rd)
+            lb = sympy.And(rd >= d.symbolic_min, evaluate=False)
+            ub = sympy.And(rd <= d.symbolic_max, evaluate=False)
+            cond = sympy.And(lb, ub, evaluate=False)
+            mapper[d] = ConditionalDimension(rd.name, prev,
+                                             condition=cond, indirect=True)
+            prev = rd
 
         # Substitution mapper for variables
         idx_subs = {v: v.subs({k: c - v.origin.get(k, 0) for (k, c) in mapper.items()})
@@ -281,7 +270,7 @@ def callback():
             summands = [Eq(rhs, 0., implicit_dims=implicit_dims)]
             # Substitute coordinate base symbols into the interpolation coefficients
             summands.extend([Inc(rhs, _expr.xreplace(idx_subs) * self._weights,
-                                 implicit_dims=implicit_dims)])
+                                 implicit_dims=implicit_dims + self._rdim)])
 
             # Write/Incr `self`
             lhs = self.sfunction.subs(self_subs)
@@ -325,7 +314,7 @@ def callback():
             # Substitute coordinate base symbols into the interpolation coefficients
             eqns = [Inc(field.xreplace(idx_subs),
                         _expr.xreplace(idx_subs) * self._weights,
-                        implicit_dims=implicit_dims)]
+                        implicit_dims=implicit_dims + self._rdim)]
 
             return temps + eqns
 
@@ -343,16 +332,23 @@ class LinearInterpolator(WeightedInterpolator):
     """
     @property
     def _weights(self):
-        c = [(1 - p) * (1 - rd) + rd * p
-             for (p, d, rd) in zip(self._psym, self._gdim, self._rdim)]
+        c = [(1 - p) * (1 - (rd - rd._symbolic_min)) + (rd - rd._symbolic_min) * p
+             for (p, d, rd) in zip(self._point_symbols, self._gdim, self._rdim)]
         return prod(c)
 
+    @cached_property
+    def _point_symbols(self):
+        """Symbol for coordinate value in each Dimension of the point."""
+        return DimensionTuple(*(Symbol(name='p%s' % d, dtype=self.sfunction.dtype)
+                                for d in self.grid.dimensions),
+                              getters=self.grid.dimensions)
+
     def _coeff_temps(self, implicit_dims):
         # Positions
         pmap = self.sfunction._position_map
-        poseq = [Eq(self._psym[d], pos/d.spacing - floor(pos/d.spacing),
+        poseq = [Eq(self._point_symbols[d], pos - floor(pos),
                     implicit_dims=implicit_dims)
-                 for (d, pos) in zip(self._gdim, pmap.values())]
+                 for (d, pos) in zip(self._gdim, pmap.keys())]
         return poseq
 
 
@@ -371,7 +367,8 @@ def _positions(self, implicit_dims):
         if self.sfunction.gridpoints is None:
             return super()._positions(implicit_dims)
         # No position temp as we have directly the gridpoints
-        return []
+        return [Eq(p, k, implicit_dims=implicit_dims)
+                for (k, p) in self.sfunction._position_map.items()]
 
     @property
     def interpolation_coeffs(self):
diff --git a/devito/symbolics/inspection.py b/devito/symbolics/inspection.py
index f58787ad07..cdd408c643 100644
--- a/devito/symbolics/inspection.py
+++ b/devito/symbolics/inspection.py
@@ -1,7 +1,7 @@
 from functools import singledispatch
 
 import numpy as np
-from sympy import Function, Indexed, Integer, Mul, Number, Pow, S, Symbol, Tuple
+from sympy import Function, Indexed, Integer, Mul, Number, Pow, S, Symbol, Tuple, Add
 
 from devito.finite_differences import Derivative
 from devito.finite_differences.differentiable import IndexDerivative
@@ -269,6 +269,11 @@ def sympy_dtype(expr, default):
     returns the default if non is found
     """
     args = expr.args
+    # We can only infer the dtype for addition/multiplication or Symbols
+    # For other case the epxression function may modify the infered dtype
+    if not (isinstance(expr.func, Add) or isinstance(expr.func, Add)) or \
+            not expr.is_Symbol:
+        return default
 
     # Symbol/... without argument, check its dtype
     if len(args) == 0:
diff --git a/devito/types/basic.py b/devito/types/basic.py
index 45d54f51db..95f5d2d3d2 100644
--- a/devito/types/basic.py
+++ b/devito/types/basic.py
@@ -1463,3 +1463,17 @@ def compare(self, other):
             if c:
                 return c
         return 0
+
+    def _subs(self, old, new, **hints):
+        # Wrap in a try to make sure no substitution happens when
+        # old is an Indexed as only checkink `old is new` would lead to
+        # incorrect substitution of `old.base` by `new`
+        try:
+            if old.is_Indexed:
+                if old.base == self.base and old.indices == self.indices:
+                    return new
+                else:
+                    return self
+        except AttributeError:
+            pass
+        return super()._subs(old, new, **hints)
diff --git a/devito/types/dimension.py b/devito/types/dimension.py
index dfb45a50d7..c9743c73f4 100644
--- a/devito/types/dimension.py
+++ b/devito/types/dimension.py
@@ -454,6 +454,7 @@ class DerivedDimension(BasicDimension):
     """
 
     is_Derived = True
+    indirect = False
 
     __rargs__ = Dimension.__rargs__ + ('parent',)
     __rkwargs__ = ()
@@ -1205,6 +1206,7 @@ class CustomDimension(BasicDimension):
     """
 
     is_Custom = True
+    indirect = False
 
     __rkwargs__ = ('symbolic_min', 'symbolic_max', 'symbolic_size', 'parent')
 
diff --git a/devito/types/sparse.py b/devito/types/sparse.py
index d557e60814..69341d4fd5 100644
--- a/devito/types/sparse.py
+++ b/devito/types/sparse.py
@@ -11,10 +11,9 @@
 from cached_property import cached_property
 
 from devito.finite_differences import generate_fd_shortcuts
-from devito.finite_differences.elementary import floor
 from devito.mpi import MPI, SparseDistributor
 from devito.operations import LinearInterpolator, PrecomputedInterpolator
-from devito.symbolics import INT, indexify, retrieve_function_carriers
+from devito.symbolics import indexify, retrieve_function_carriers
 from devito.tools import (ReducerMap, as_tuple, flatten, prod, filter_ordered,
                           is_integer, dtype_to_mpidtype)
 from devito.types.dense import DiscreteFunction, SubFunction
@@ -23,7 +22,7 @@
 from devito.types.dimension import dimensions as mkdims
 from devito.types.basic import Symbol
 from devito.types.equation import Eq, Inc
-from devito.types.utils import IgnoreDimSort, DimensionTuple
+from devito.types.utils import IgnoreDimSort
 
 
 __all__ = ['SparseFunction', 'SparseTimeFunction', 'PrecomputedSparseFunction',
@@ -98,7 +97,7 @@ def __distributor_setup__(self, **kwargs):
             kwargs['grid'].distributor
         )
 
-    def __subfunc_setup__(self, key, suffix):
+    def __subfunc_setup__(self, key, suffix, dtype=None):
         if isinstance(key, SubFunction):
             return key
         elif key is not None and not isinstance(key, Iterable):
@@ -111,7 +110,7 @@ def __subfunc_setup__(self, key, suffix):
 
         if key is None:
             # Fallback to default behaviour
-            dtype = self.dtype
+            dtype = dtype or self.dtype
         else:
             if not isinstance(key, np.ndarray):
                 key = np.array(key)
@@ -123,9 +122,9 @@ def __subfunc_setup__(self, key, suffix):
 
             # Infer dtype
             if np.issubdtype(key.dtype.type, np.integer):
-                dtype = np.int32
+                dtype = dtype or np.int32
             else:
-                dtype = self.dtype
+                dtype = dtype or self.dtype
 
         if key is not None and key.ndim > 2:
             shape = (*shape, *key.shape[2:])
@@ -375,6 +374,11 @@ def guard(self, expr=None):
         """
         return self.grid._distributor.glb_to_rank(self._support) or {}
 
+    @cached_property
+    def _pos_symbols(self):
+        return [Symbol(name='pos%s' % d, dtype=np.int32)
+                for d in self.grid.dimensions]
+
     @cached_property
     def _point_increments(self):
         """Index increments in each Dimension for each point symbol."""
@@ -384,43 +388,17 @@ def _point_increments(self):
     def _point_support(self):
         return np.array(self._point_increments)
 
-    @cached_property
-    def _point_symbols(self):
-        """Symbol for coordinate value in each Dimension of the point."""
-        return DimensionTuple(*(Symbol(name='p%s' % d, dtype=self.dtype)
-                                for d in self.grid.dimensions),
-                              getters=self.grid.dimensions)
-
     @cached_property
     def _position_map(self):
         """
         Symbols map for the physical position of the sparse points relative to the grid
         origin.
         """
-        symbols = [Symbol(name='pos%s' % d, dtype=self.dtype)
-                   for d in self.grid.dimensions]
-        return OrderedDict([(c - o, p) for p, c, o in zip(symbols,
-                                                          self._coordinate_symbols,
-                                                          self.grid.origin_symbols)])
-
-    @cached_property
-    def _coordinate_indices(self):
-        """
-        Symbol for each grid index according to the coordinates.
-
-        Notes
-        -----
-        The expression `(coord - origin)/spacing` could also be computed in the
-        mathematically equivalent expanded form `coord/spacing -
-        origin/spacing`. This particular form is problematic when a sparse
-        point is in close proximity of the grid origin, since due to a larger
-        machine precision error it may cause a +-1 error in the computation of
-        the position. We mitigate this problem by computing the positions
-        individually (hence the need for a position map).
-        """
-        return tuple([INT(floor(p / i.spacing))
-                      for p, i in zip(self._position_map.values(),
-                                      self.grid.dimensions[:self.grid.dim])])
+        return OrderedDict([((c - o)/d.spacing, p)
+                            for p, c, d, o in zip(self._pos_symbols,
+                                                  self._coordinate_symbols,
+                                                  self.grid.dimensions,
+                                                  self.grid.origin_symbols)])
 
     @cached_property
     def _dist_reorder_mask(self):
@@ -1159,7 +1137,8 @@ def __init_finalize__(self, *args, **kwargs):
         else:
             assert gridpoints is not None
             self._coordinates = None
-            self._gridpoints = self.__subfunc_setup__(gridpoints, 'gridpoints')
+            self._gridpoints = self.__subfunc_setup__(gridpoints, 'gridpoints',
+                                                      dtype=np.int32)
             self._dist_origin = {self._gridpoints: self.grid.origin_ioffset}
 
         # Setup the interpolation coefficients. These are compulsory
@@ -1200,7 +1179,7 @@ def _coordinate_symbols(self):
                           for i in range(self.grid.dim)])
 
     @cached_property
-    def _coordinate_indices(self):
+    def _position_map(self):
         """
         Symbol for each grid index according to the coordinates.
 
@@ -1216,11 +1195,11 @@ def _coordinate_indices(self):
         """
         if self.gridpoints is not None:
             ddim = self.gridpoints.dimensions[-1]
-            return tuple([self.gridpoints._subs(ddim, di) for di in range(self.grid.dim)])
+            return OrderedDict((self.gridpoints._subs(ddim, di), p)
+                               for (di, p) in zip(range(self.grid.dim),
+                                                  self._pos_symbols))
         else:
-            return tuple([INT(floor(p / i.spacing))
-                          for p, i in zip(self._position_map.values(),
-                                          self.grid.dimensions[:self.grid.dim])])
+            return super()._position_map
 
     @cached_property
     def _coordinate_symbols(self):
diff --git a/examples/seismic/tutorials/08_snapshotting.ipynb b/examples/seismic/tutorials/08_snapshotting.ipynb
index f0cd092d70..3784bc87bd 100644
--- a/examples/seismic/tutorials/08_snapshotting.ipynb
+++ b/examples/seismic/tutorials/08_snapshotting.ipynb
@@ -159,9 +159,8 @@
     "# Set symbolics of the operator, source and receivers:\n",
     "pde = model.m * u.dt2 - u.laplace + model.damp * u.dt\n",
     "stencil = Eq(u.forward, solve(pde, u.forward))\n",
-    "src_term = src.inject(field=u.forward, expr=src * dt**2 / model.m,\n",
-    "                      offset=model.nbl)\n",
-    "rec_term = rec.interpolate(expr=u, offset=model.nbl)\n",
+    "src_term = src.inject(field=u.forward, expr=src * dt**2 / model.m)\n",
+    "rec_term = rec.interpolate(expr=u)\n",
     "op = Operator([stencil] + src_term + rec_term, subs=model.spacing_map)\n",
     "\n",
     "# Run the operator for `(nt-2)` time steps:\n",
@@ -350,9 +349,8 @@
     "stencil = Eq(u.forward, solve(pde, u.forward))\n",
     "src_term = src.inject(\n",
     "    field=u.forward,\n",
-    "    expr=src * dt**2 / model.m,\n",
-    "    offset=model.nbl)\n",
-    "rec_term = rec.interpolate(expr=u, offset=model.nbl)\n",
+    "    expr=src * dt**2 / model.m)\n",
+    "rec_term = rec.interpolate(expr=u)\n",
     "\n",
     "#Part 2 #############\n",
     "op1 = Operator([stencil] + src_term + rec_term,\n",
diff --git a/tests/test_buffering.py b/tests/test_buffering.py
index 4ef0eb12af..25b68cef56 100644
--- a/tests/test_buffering.py
+++ b/tests/test_buffering.py
@@ -271,7 +271,7 @@ def test_over_injection():
     op1 = Operator(eqns, opt='buffering')
 
     # Check generated code
-    assert len(retrieve_iteration_tree(op1)) == 10
+    assert len(retrieve_iteration_tree(op1)) == 8
     buffers = [i for i in FindSymbols().visit(op1) if i.is_Array]
     assert len(buffers) == 1
 
diff --git a/tests/test_caching.py b/tests/test_caching.py
index aa2b61ac6b..41d8f76afa 100644
--- a/tests/test_caching.py
+++ b/tests/test_caching.py
@@ -638,9 +638,10 @@ def test_sparse_function(self, operate_on_empty_cache):
 
         i = u.inject(expr=u, field=u)
 
-        # created: ii_u_0*2 (Symbol and ConditionalDimension), ii_u_1*2, ii_u_2*2,
-        # ii_u_3*2, px, py, posx, posy, u_coords (as indexified),
-        ncreated = 2+2+2+2+2+1+1+1
+        # created: rxu, rxy (radius dimensions) and spacings
+        # rxu, rxy (conditional dimensions)
+        # posx, posy, px, py, u_coords (as indexified),
+        ncreated = 2+2+2+2+2+1
         # Note that injection is now lazy so no new symbols should be created
         assert len(_SymbolCache) == cur_cache_size
         i.evaluate
@@ -655,15 +656,16 @@ def test_sparse_function(self, operate_on_empty_cache):
         del u
         del i
         clear_cache()
-        # At this point, not all children objects have been cleared. In
-        # particular, the ii_u_* Symbols are still alive, as well as p_u and
-        # h_p_u. This is because in the first clear_cache they were still
-        # referenced by their "parent" objects (e.g., ii_u_* by
-        # ConditionalDimensions, through `condition`)
-        assert len(_SymbolCache) == init_cache_size + 6
+        # At this point, not all children objects have been cleared. In particular, the
+        # ru* Symbols are still alive, as well as p_u and h_p_u and pos*. This is because
+        # in the first clear_cache they were still referenced by their "parent" objects
+        # (e.g., ru* by ConditionalDimensions, through `condition`)
+
+        assert len(_SymbolCache) == init_cache_size + 10
         clear_cache()
-        # Now we should be back to the original state
-        assert len(_SymbolCache) == init_cache_size
+        # Now we should be back to the original state except pos*
+        # that belong to the abstract class
+        assert len(_SymbolCache) == init_cache_size + 2
 
     def test_after_indexification(self):
         """
diff --git a/tests/test_dimension.py b/tests/test_dimension.py
index 150982ef9a..b0e1ee5cfc 100644
--- a/tests/test_dimension.py
+++ b/tests/test_dimension.py
@@ -5,7 +5,7 @@
 import pytest
 
 from conftest import assert_blocking, assert_structure, skipif, opts_tiling
-from devito import (ConditionalDimension, Grid, Function, TimeFunction,  # noqa
+from devito import (ConditionalDimension, Grid, Function, TimeFunction, floor,  # noqa
                     SparseFunction, SparseTimeFunction, Eq, Operator, Constant,
                     Dimension, DefaultDimension, SubDimension, switchconfig,
                     SubDomain, Lt, Le, Gt, Ge, Ne, Buffer, sin, SpaceDimension,
@@ -13,7 +13,7 @@
 from devito.arch.compiler import IntelCompiler, OneapiCompiler
 from devito.ir.iet import (Conditional, Expression, Iteration, FindNodes,
                            FindSymbols, retrieve_iteration_tree)
-from devito.symbolics import indexify, retrieve_functions, IntDiv
+from devito.symbolics import indexify, retrieve_functions, IntDiv, INT
 from devito.types import Array, StencilDimension, Symbol
 from devito.types.dimension import AffineIndexAccessFunction
 
@@ -1051,7 +1051,8 @@ def test_no_index_sparse(self):
         # 0 --- 0 --- 0 --- 0
 
         radius = 1
-        indices = [(i, i+radius) for i in sf._coordinate_indices]
+        indices = [(INT(floor(i)), INT(floor(i))+radius)
+                   for i in sf._position_map.keys()]
         bounds = [i.symbolic_size - radius for i in grid.dimensions]
 
         eqs = [Eq(p, v) for (v, p) in sf._position_map.items()]
diff --git a/tests/test_dle.py b/tests/test_dle.py
index d3f3b11eaf..f1a06cb916 100644
--- a/tests/test_dle.py
+++ b/tests/test_dle.py
@@ -187,13 +187,20 @@ def test_cache_blocking_structure_optrelax():
 
     op = Operator(eqns, opt=('advanced', {'blockrelax': True}))
 
-    bns, _ = assert_blocking(op, {'x0_blk0', 'p_src0_blk0'})
+    bns, _ = assert_blocking(op, {'p_src0_blk0', 'x0_blk0', 'p_src1_blk0'})
 
     iters = FindNodes(Iteration).visit(bns['p_src0_blk0'])
     assert len(iters) == 2
     assert iters[0].dim.is_Block
     assert iters[1].dim.is_Block
 
+    iters = FindNodes(Iteration).visit(bns['p_src1_blk0'])
+    assert len(iters) == 5
+    assert iters[0].dim.is_Block
+    assert iters[1].dim.is_Block
+    for i in range(2, 5):
+        assert not iters[i].dim.is_Block
+
 
 def test_cache_blocking_structure_optrelax_customdim():
     grid = Grid(shape=(8, 8, 8))
@@ -284,7 +291,8 @@ def test_cache_blocking_structure_optrelax_prec_inject():
                                           'openmp': True,
                                           'par-collapse-ncores': 1}))
 
-    assert_structure(op, ['t', 't,p_s0_blk0,p_s'], 't,p_s0_blk0,p_s')
+    assert_structure(op, ['t,p_s0_blk0,p_s', 't,p_s0_blk0,p_s,rsx,rsy'],
+                     't,p_s0_blk0,p_s,rsx,rsy')
 
 
 class TestBlockingParTile(object):
@@ -717,7 +725,7 @@ def test_scheduling(self):
         op = Operator(eqns, opt=('openmp', {'par-dynamic-work': 0}))
 
         iterations = FindNodes(Iteration).visit(op)
-        assert len(iterations) == 4
+        assert len(iterations) == 6
         assert iterations[1].is_Affine
         assert 'schedule(dynamic,1)' in iterations[1].pragmas[0].value
         assert not iterations[3].is_Affine
@@ -941,8 +949,9 @@ def test_parallel_prec_inject(self):
         op0 = Operator(eqns, opt=('advanced', {'openmp': True,
                                                'par-collapse-ncores': 1}))
         iterations = FindNodes(Iteration).visit(op0)
+
         assert not iterations[0].pragmas
-        assert 'omp for' in iterations[1].pragmas[0].value
+        assert 'omp for' in iterations[2].pragmas[0].value
 
 
 class TestNestedParallelism(object):
diff --git a/tests/test_dse.py b/tests/test_dse.py
index 0328ecbd65..c49faef629 100644
--- a/tests/test_dse.py
+++ b/tests/test_dse.py
@@ -42,9 +42,9 @@ def test_scheduling_after_rewrite():
     trees = retrieve_iteration_tree(op)
 
     # Check loop nest structure
-    assert all(i.dim is j for i, j in zip(trees[0], grid.dimensions))  # time invariant
-    assert trees[1].root.dim is grid.time_dim
-    assert all(trees[1].root.dim is tree.root.dim for tree in trees[1:])
+    assert all(i.dim is j for i, j in zip(trees[1], grid.dimensions))  # time invariant
+    assert trees[2].root.dim is grid.time_dim
+    assert all(trees[2].root.dim is tree.root.dim for tree in trees[2:])
 
 
 @pytest.mark.parametrize('exprs,expected,min_cost', [
@@ -1665,7 +1665,7 @@ def test_drop_redundants_after_fusion(self, rotate):
         op = Operator(eqns, opt=('advanced', {'cire-rotate': rotate}))
 
         arrays = [i for i in FindSymbols().visit(op) if i.is_Array]
-        assert len(arrays) == 2
+        assert len(arrays) == 4
         assert all(i._mem_heap and not i._mem_external for i in arrays)
 
     def test_full_shape_big_temporaries(self):
@@ -2670,11 +2670,11 @@ def test_fullopt(self):
         bns, _ = assert_blocking(op1, {'x0_blk0'})  # due to loop blocking
 
         assert summary0[('section0', None)].ops == 50
-        assert summary0[('section1', None)].ops == 122
+        assert summary0[('section1', None)].ops == 41
         assert np.isclose(summary0[('section0', None)].oi, 2.851, atol=0.001)
 
-        assert summary1[('section0', None)].ops == 31
-        assert np.isclose(summary1[('section0', None)].oi, 1.767, atol=0.001)
+        assert summary1[('section2', None)].ops == 31
+        assert np.isclose(summary1[('section2', None)].oi, 1.767, atol=0.001)
 
         assert np.allclose(u0.data, u1.data, atol=10e-5)
         assert np.allclose(rec0.data, rec1.data, atol=10e-5)
@@ -2734,8 +2734,8 @@ def test_fullopt(self):
         assert np.allclose(self.tti_noopt[1].data, rec.data, atol=10e-1)
 
         # Check expected opcount/oi
-        assert summary[('section1', None)].ops == 92
-        assert np.isclose(summary[('section1', None)].oi, 2.074, atol=0.001)
+        assert summary[('section3', None)].ops == 92
+        assert np.isclose(summary[('section3', None)].oi, 2.074, atol=0.001)
 
         # With optimizations enabled, there should be exactly four BlockDimensions
         op = wavesolver.op_fwd()
@@ -2746,12 +2746,14 @@ def test_fullopt(self):
         assert y.parent is y0_blk0
         assert not x._defines & y._defines
 
-        # Also, in this operator, we expect seven temporary Arrays:
-        # * all of the seven Arrays are allocated on the heap
-        # * with OpenMP, five Arrays are defined globally, and two additional
-        #   Arrays are defined locally
+        # Also, in this operator, we expect six temporary Arrays:
+        # * all of the six Arrays are allocated on the heap
+        # * with OpenMP:
+        #   four Arrays are defined globally for the cos/sin temporaries
+        #   six Arrays are defined globally for the sparse positions temporaries
+        # and two additional bock-sized Arrays are defined locally
         arrays = [i for i in FindSymbols().visit(op) if i.is_Array]
-        extra_arrays = 2
+        extra_arrays = 2+6
         assert len(arrays) == 4 + extra_arrays
         assert all(i._mem_heap and not i._mem_external for i in arrays)
         bns, pbs = assert_blocking(op, {'x0_blk0'})
@@ -2787,7 +2789,7 @@ def test_fullopt_w_mpi(self):
     def test_opcounts(self, space_order, expected):
         op = self.tti_operator(opt='advanced', space_order=space_order)
         sections = list(op.op_fwd()._profiler._sections.values())
-        assert sections[1].sops == expected
+        assert sections[3].sops == expected
 
     @switchconfig(profiling='advanced')
     @pytest.mark.parametrize('space_order,expected', [
@@ -2797,8 +2799,8 @@ def test_opcounts_adjoint(self, space_order, expected):
         wavesolver = self.tti_operator(opt=('advanced', {'openmp': False}))
         op = wavesolver.op_adj()
 
-        assert op._profiler._sections['section1'].sops == expected
-        assert len([i for i in FindSymbols().visit(op) if i.is_Array]) == 7
+        assert op._profiler._sections['section3'].sops == expected
+        assert len([i for i in FindSymbols().visit(op) if i.is_Array]) == 7+6
 
 
 class TestTTIv2(object):
diff --git a/tests/test_ir.py b/tests/test_ir.py
index fd961fe9e9..ac2977d15f 100644
--- a/tests/test_ir.py
+++ b/tests/test_ir.py
@@ -954,7 +954,7 @@ def test_iteration_parallelism_3d(self, exprs, atomic, parallel):
 class TestEquationAlgorithms(object):
 
     @pytest.mark.parametrize('expr,expected', [
-        ('Eq(a[time, p], b[time, c[p, 0]+r, c[p, 1]] * f[p, r])', '[time, p, r, d, x, y]')
+        ('Eq(a[time, p], b[time, c[p, 0]+r, c[p, 1]] * f[p, r])', '[time, p, r, d]')
     ])
     def test_dimension_sort(self, expr, expected):
         """
diff --git a/tests/test_operator.py b/tests/test_operator.py
index 5cdc546d51..ed34f8aaf8 100644
--- a/tests/test_operator.py
+++ b/tests/test_operator.py
@@ -1800,20 +1800,20 @@ def test_scheduling_sparse_functions(self):
         eqn4 = sf2.interpolate(u2)
 
         # Note: opts disabled only because with OpenMP otherwise there might be more
-        # `trees` than 4
+        # `trees` than 6
         op = Operator([eqn1] + eqn2 + [eqn3] + eqn4, opt=('noop', {'openmp': False}))
         trees = retrieve_iteration_tree(op)
-        assert len(trees) == 4
+        assert len(trees) == 6
         # Time loop not shared due to the WAR
         assert trees[0][0].dim is time and trees[0][0] is trees[1][0]  # this IS shared
-        assert trees[1][0] is not trees[2][0]
-        assert trees[2][0].dim is time and trees[2][0] is trees[3][0]  # this IS shared
+        assert trees[1][0] is not trees[3][0]
+        assert trees[3][0].dim is time and trees[3][0] is trees[4][0]  # this IS shared
 
         # Now single, shared time loop expected
         eqn2 = sf1.inject(u1.forward, expr=sf1)
         op = Operator([eqn1] + eqn2 + [eqn3] + eqn4, opt=('noop', {'openmp': False}))
         trees = retrieve_iteration_tree(op)
-        assert len(trees) == 4
+        assert len(trees) == 6
         assert all(trees[0][0] is i[0] for i in trees)
 
     def test_scheduling_with_free_dims(self):

From 6b7fccad6485b36da238fada1c0b94c3876a94b5 Mon Sep 17 00:00:00 2001
From: mloubout <mathias.louboutin@gmail.com>
Date: Thu, 15 Jun 2023 14:30:19 -0400
Subject: [PATCH 36/90] api: allow to inject into multiple fields at once

---
 devito/operations/interpolators.py | 22 +++++++++++++++++-----
 tests/test_interpolation.py        | 30 ++++++++++++++++++++++++++++++
 2 files changed, 47 insertions(+), 5 deletions(-)

diff --git a/devito/operations/interpolators.py b/devito/operations/interpolators.py
index 4e684879c4..6711734cc1 100644
--- a/devito/operations/interpolators.py
+++ b/devito/operations/interpolators.py
@@ -299,22 +299,34 @@ def inject(self, field, expr, implicit_dims=None):
         implicit_dims = self._augment_implicit_dims(implicit_dims)
 
         def callback():
+            # Make iterable to support inject((u, v), expr=expr)
+            # or inject((u, v), expr=(expr1, expr2))
+            fields, exprs = as_tuple(field), as_tuple(expr)
+            # Provide either one expr per field or on expr for all fields
+            if len(fields) > 1:
+                if len(exprs) == 1:
+                    exprs = tuple(exprs[0] for _ in fields)
+                else:
+                    assert len(exprs) == len(fields)
+
             # Derivatives must be evaluated before the introduction of indirect accesses
             try:
-                _expr = expr.evaluate
+                _exprs = tuple(e.evaluate for e in exprs)
             except AttributeError:
                 # E.g., a generic SymPy expression or a number
-                _expr = expr
+                _exprs = exprs
 
-            variables = list(retrieve_function_carriers(_expr)) + [field]
+            variables = list(v for e in _exprs for v in retrieve_function_carriers(e))
+            variables = variables + list(fields)
 
             # List of indirection indices for all adjacent grid points
             idx_subs, temps = self._interp_idx(variables, implicit_dims=implicit_dims)
 
             # Substitute coordinate base symbols into the interpolation coefficients
-            eqns = [Inc(field.xreplace(idx_subs),
+            eqns = [Inc(_field.xreplace(idx_subs),
                         _expr.xreplace(idx_subs) * self._weights,
-                        implicit_dims=implicit_dims + self._rdim)]
+                        implicit_dims=implicit_dims + self._rdim)
+                    for (_field, _expr) in zip(fields, _exprs)]
 
             return temps + eqns
 
diff --git a/tests/test_interpolation.py b/tests/test_interpolation.py
index b95dbead9b..ec14597849 100644
--- a/tests/test_interpolation.py
+++ b/tests/test_interpolation.py
@@ -540,6 +540,36 @@ def test_inject(shape, coords, result, npoints=19):
     assert np.allclose(a.data[indices], result, rtol=1.e-5)
 
 
+@pytest.mark.parametrize('shape, coords, nexpr, result', [
+    ((11, 11), [(.05, .95), (.45, .45)], 1, 1.),
+    ((11, 11), [(.05, .95), (.45, .45)], 2, 1.),
+    ((11, 11, 11), [(.05, .95), (.45, .45), (.45, .45)], 1, 0.5),
+    ((11, 11, 11), [(.05, .95), (.45, .45), (.45, .45)], 2, 0.5)
+])
+def test_multi_inject(shape, coords, nexpr, result, npoints=19):
+    """Test point injection with a set of points forming a line
+    through the middle of the grid.
+    """
+    a1 = unit_box(name='a1', shape=shape)
+    a2 = unit_box(name='a2', shape=shape, grid=a1.grid)
+    a1.data[:] = 0.
+    a2.data[:] = 0.
+    p = points(a1.grid, ranges=coords, npoints=npoints)
+
+    iexpr = Float(1.) if nexpr == 1 else (Float(1.), Float(2.))
+    expr = p.inject((a1, a2), iexpr)
+
+    op = Operator(expr)
+    print(op)
+    op(a1=a1, a2=a2)
+
+    indices = [slice(4, 6, 1) for _ in coords]
+    indices[0] = slice(1, -1, 1)
+    result = (result, result) if nexpr == 1 else (result, 2 * result)
+    for r, a in zip(result, (a1, a2)):
+        assert np.allclose(a.data[indices], r, rtol=1.e-5)
+
+
 @pytest.mark.parametrize('shape, coords, result', [
     ((11, 11), [(.05, .95), (.45, .45)], 1.),
     ((11, 11, 11), [(.05, .95), (.45, .45), (.45, .45)], 0.5)

From 940626287e31b82f9654d5539ca216d01170ab6f Mon Sep 17 00:00:00 2001
From: mloubout <mathias.louboutin@gmail.com>
Date: Thu, 15 Jun 2023 14:44:06 -0400
Subject: [PATCH 37/90] CI: add missing test in examples.yml

---
 .github/workflows/examples-mpi.yml    |   1 +
 .github/workflows/examples.yml        |   8 +-
 devito/data/data.py                   |   2 +-
 devito/ir/equations/algorithms.py     |   4 +-
 devito/operations/interpolators.py    | 232 +++++++++++------------
 devito/operator/operator.py           |   2 +-
 devito/tools/algorithms.py            |   3 +
 devito/types/basic.py                 |  12 +-
 devito/types/dimension.py             |   2 +-
 devito/types/sparse.py                | 254 ++++++--------------------
 examples/seismic/elastic/operators.py |   7 +-
 tests/test_caching.py                 |   4 +-
 tests/test_interpolation.py           | 165 +----------------
 tests/test_mpi.py                     |   9 +-
 tests/test_pickle.py                  |  35 +---
 15 files changed, 204 insertions(+), 536 deletions(-)

diff --git a/.github/workflows/examples-mpi.yml b/.github/workflows/examples-mpi.yml
index d6498da49a..2c8fb09f85 100644
--- a/.github/workflows/examples-mpi.yml
+++ b/.github/workflows/examples-mpi.yml
@@ -57,6 +57,7 @@ jobs:
       run: |
         pip install --upgrade pip
         pip install -e .[extras,mpi,tests]
+        python3 scripts/clear_devito_cache.py
 
     - name: Test mpi notebooks
       continue-on-error: true
diff --git a/.github/workflows/examples.yml b/.github/workflows/examples.yml
index deac69f1b0..8bc9c34f30 100644
--- a/.github/workflows/examples.yml
+++ b/.github/workflows/examples.yml
@@ -49,7 +49,13 @@ jobs:
         pip install -e .[tests,extras]
 
     - name: Tests in examples
-      run: py.test --cov --cov-config=.coveragerc --cov-report=xml examples/
+      run: |
+        py.test --cov --cov-config=.coveragerc --cov-report=xml examples/
+        py.test --cov --cov-config=.coveragerc --cov-report=xml examples/seismic/acoustic/acoustic_example.py
+        py.test --cov --cov-config=.coveragerc --cov-report=xml examples/seismic/viscoacoustic/viscoacoustic_example.py
+        py.test --cov --cov-config=.coveragerc --cov-report=xml examples/seismic/tti/tti_example.py
+        py.test --cov --cov-config=.coveragerc --cov-report=xml examples/seismic/elastic/elastic_example.py
+        py.test --cov --cov-config=.coveragerc --cov-report=xml examples/seismic/viscoelastic/viscoelastic_example.py
 
     - name: Seismic acoustic examples
       run: |
diff --git a/devito/data/data.py b/devito/data/data.py
index 1ef35490b8..78859cdb7e 100644
--- a/devito/data/data.py
+++ b/devito/data/data.py
@@ -140,7 +140,7 @@ def __array_finalize__(self, obj):
     @property
     def _local(self):
         """A view of ``self`` with global indexing disabled."""
-        ret = self.view(np.ndarray)
+        ret = self.view()
         ret._is_distributed = False
         return ret
 
diff --git a/devito/ir/equations/algorithms.py b/devito/ir/equations/algorithms.py
index 93ec8895a1..89400eae1e 100644
--- a/devito/ir/equations/algorithms.py
+++ b/devito/ir/equations/algorithms.py
@@ -51,7 +51,7 @@ def handle_indexed(indexed):
     relations.add(expr.implicit_dims)
 
     # Add in leftover free dimensions (not an Indexed' index)
-    extra = set(retrieve_dimensions(expr))
+    extra = set(retrieve_dimensions(expr, deep=True))
 
     # Add in pure data dimensions (e.g., those accessed only via explicit values,
     # such as A[3])
@@ -61,8 +61,6 @@ def handle_indexed(indexed):
                      if e.is_integer}
         extra.update(expl_dims)
 
-    # Remove all parents with the same name as its DerivedDimension to preserve conditions
-    extra = extra - {d.parent for d in extra if d.is_Derived and d.indirect}
     # Enforce determinism
     extra = filter_sorted(extra)
 
diff --git a/devito/operations/interpolators.py b/devito/operations/interpolators.py
index 6711734cc1..0bd8e97b13 100644
--- a/devito/operations/interpolators.py
+++ b/devito/operations/interpolators.py
@@ -29,16 +29,15 @@ class UnevaluatedSparseOperation(sympy.Expr, Evaluable):
 
     subdomain = None
 
-    def __new__(cls, interpolator, callback):
+    def __new__(cls, interpolator):
         obj = super().__new__(cls)
 
         obj.interpolator = interpolator
-        obj.callback = callback
 
         return obj
 
     def _evaluate(self, **kwargs):
-        return_value = self.callback()
+        return_value = self.interpolator._evauate(**kwargs)
         assert(all(isinstance(i, Eq) for i in return_value))
         return return_value
 
@@ -56,16 +55,22 @@ class Interpolation(UnevaluatedSparseOperation):
     Evaluates to a list of Eq objects.
     """
 
-    def __new__(cls, expr, increment, self_subs, interpolator, callback):
-        obj = super().__new__(cls, interpolator, callback)
+    def __new__(cls, expr, increment, implicit_dims, self_subs, interpolator):
+        obj = super().__new__(cls, interpolator)
 
         # TODO: unused now, but will be necessary to compute the adjoint
         obj.expr = expr
         obj.increment = increment
         obj.self_subs = self_subs
+        obj.implicit_dims = implicit_dims
 
         return obj
 
+    def _evaluate(self, **kwargs):
+        return self.interpolator._interpolate(expr=self.expr, increment=self.increment,
+                                              self_subs=self.self_subs,
+                                              implicit_dims=self.implicit_dims)
+
     def __repr__(self):
         return "Interpolation(%s into %s)" % (repr(self.expr),
                                               repr(self.interpolator.sfunction))
@@ -78,15 +83,20 @@ class Injection(UnevaluatedSparseOperation):
     Evaluates to a list of Eq objects.
     """
 
-    def __new__(cls, field, expr, interpolator, callback):
-        obj = super().__new__(cls, interpolator, callback)
+    def __new__(cls, field, expr, implicit_dims, interpolator):
+        obj = super().__new__(cls, interpolator)
 
         # TODO: unused now, but will be necessary to compute the adjoint
         obj.field = field
         obj.expr = expr
+        obj.implicit_dims = implicit_dims
 
         return obj
 
+    def _evaluate(self, **kwargs):
+        return self.interpolator._inject(expr=self.expr, field=self.field,
+                                         implicit_dims=self.implicit_dims)
+
     def __repr__(self):
         return "Injection(%s into %s)" % (repr(self.expr), repr(self.field))
 
@@ -135,15 +145,9 @@ def r(self):
 
     @cached_property
     def _rdim(self):
-        dims = []
-        # Enforce ordering
-        prevdim = self.sfunction._sparse_dim
-        for d in self._gdim:
-            rd = CustomDimension("r%s%s" % (self.sfunction.name, d.name),
-                                 -self.r+1, self.r, len(range(-self.r+1, self.r+1)),
-                                 prevdim)
-            prevdim = rd
-            dims.append(rd)
+        dims = [CustomDimension("r%s%s" % (self.sfunction.name, d.name),
+                                p-self.r+1, p+self.r, len(range(-self.r+1, self.r+1)))
+                for (p, d) in zip(self.sfunction._position_map.values(), self._gdim)]
 
         return DimensionTuple(*dims, getters=self._gdim)
 
@@ -161,81 +165,63 @@ def _interp_idx(self, variables, implicit_dims=None):
         """
         Generate interpolation indices for the DiscreteFunctions in ``variables``.
         """
-        idx_subs = []
-        mapper = defaultdict(list)
-
+        mapper = {}
         # Temporaries for the position
         temps = self._positions(implicit_dims)
 
         # Coefficient symbol expression
         temps.extend(self._coeff_temps(implicit_dims))
-
-        # Create positions and indices temporaries/indirections
-        prev = self.sfunction.dimensions[-1]
+        pr = self.sfunction.dimensions[-1]
         for ((di, d), rd) in zip(enumerate(self._gdim), self._rdim):
             # Add conditional to avoid OOB
             lb = sympy.And(rd >= d.symbolic_min, evaluate=False)
             ub = sympy.And(rd <= d.symbolic_max, evaluate=False)
             cond = sympy.And(lb, ub, evaluate=False)
-            mapper[d] = ConditionalDimension(rd.name, prev,
-                                             condition=cond, indirect=True)
-            prev = rd
+            mapper[d] = ConditionalDimension(rd.name, pr, condition=cond, indirect=True)
+            pr = rd
 
         # Substitution mapper for variables
         idx_subs = {v: v.subs({k: c - v.origin.get(k, 0) for (k, c) in mapper.items()})
                     for v in variables}
+        idx_subs.update({rd: crd for (rd, crd) in zip(self._rdim, mapper.values())})
 
         return idx_subs, temps
 
-    def subs_coords(self, _expr, *idx_subs):
-        return [_expr.xreplace(v_sub) * b.xreplace(v_sub)
-                for b, v_sub in zip(self._interpolation_coeffs, idx_subs)]
-
-    def subs_coords_eq(self, field, _expr, *idx_subs, implicit_dims=None):
-        return [Inc(field.xreplace(vsub), _expr.xreplace(vsub) * b,
-                    implicit_dims=implicit_dims)
-                for b, vsub in zip(self._interpolation_coeffs, idx_subs)]
-
-    def _interpolation_indices(self, variables, offset=0, field_offset=0,
-                               implicit_dims=None):
-        """
-        Generate interpolation indices for the DiscreteFunctions in ``variables``.
+    def interpolate(self, expr, increment=False, self_subs={}, implicit_dims=None):
         """
-        idx_subs = []
-        points = {d: [] for d in self._gdim}
-        mapper = {d: [] for d in self._gdim}
-        pdim = self.sfunction._sparse_dim
-    
-        # Positon map and temporaries for it
-        pmap = self.sfunction._coordinate_indices
-
-        # Temporaries for the position
-        temps = self._positions(implicit_dims)
-
-        # Coefficient symbol expression
-        temps.extend(self._coeff_temps(implicit_dims))
-
-        # Create positions and indices temporaries/indirections
-        pr = []
-        for ((di, d), pos, rd) in zip(enumerate(self._gdim), pmap, self._rdim):
-            p = Symbol(name='ii_%s_%s' % (self.sfunction.name, d.name))
-            temps.extend([Eq(p, pos + rd, implicit_dims=implicit_dims + tuple(pr))])
+        Generate equations interpolating an arbitrary expression into ``self``.
 
-            # Add conditional to avoid OOB
-            lb = sympy.And(p >= d.symbolic_min-self.r, evaluate=False)
-            ub = sympy.And(p <= d.symbolic_max+self.r, evaluate=False)
-            condition = sympy.And(lb, ub, evaluate=False)
-            mapper[d] = ConditionalDimension(p.name, self.sfunction._sparse_dim,
-                                             condition=condition, indirect=True)
-            pr.append(rd)
+        Parameters
+        ----------
+        expr : expr-like
+            Input expression to interpolate.
+        increment: bool, optional
+            If True, generate increments (Inc) rather than assignments (Eq).
+        implicit_dims : Dimension or list of Dimension, optional
+            An ordered list of Dimensions that do not explicitly appear in the
+            interpolation expression, but that should be honored when constructing
+            the operator.
+        """
+        return Interpolation(expr, increment, implicit_dims, self_subs, self)
 
-        # Substitution mapper for variables
-        idx_subs = {v: v.subs({k: c - v.origin.get(k, 0) for (k, c) in mapper.items()})
-                    for v in variables}
+    def inject(self, field, expr, implicit_dims=None):
+        """
+        Generate equations injecting an arbitrary expression into a field.
 
-        return idx_subs, temps
+        Parameters
+        ----------
+        field : Function
+            Input field into which the injection is performed.
+        expr : expr-like
+            Injected expression.
+        implicit_dims : Dimension or list of Dimension, optional
+            An ordered list of Dimensions that do not explicitly appear in the
+            injection expression, but that should be honored when constructing
+            the operator.
+        """
+        return Injection(field, expr, implicit_dims, self)
 
-    def interpolate(self, expr, increment=False, self_subs={}, implicit_dims=None):
+    def _interpolate(self, expr, increment=False, self_subs={}, implicit_dims=None):
         """
         Generate equations interpolating an arbitrary expression into ``self``.
 
@@ -252,36 +238,33 @@ def interpolate(self, expr, increment=False, self_subs={}, implicit_dims=None):
         """
         implicit_dims = self._augment_implicit_dims(implicit_dims)
 
-        def callback():
-            # Derivatives must be evaluated before the introduction of indirect accesses
-            try:
-                _expr = expr.evaluate
-            except AttributeError:
-                # E.g., a generic SymPy expression or a number
-                _expr = expr
-
-            variables = list(retrieve_function_carriers(_expr))
+        # Derivatives must be evaluated before the introduction of indirect accesses
+        try:
+            _expr = expr.evaluate
+        except AttributeError:
+            # E.g., a generic SymPy expression or a number
+            _expr = expr
 
-            # List of indirection indices for all adjacent grid points
-            idx_subs, temps = self._interp_idx(variables, implicit_dims=implicit_dims)
+        variables = list(retrieve_function_carriers(_expr))
 
-            # Accumulate point-wise contributions into a temporary
-            rhs = Symbol(name='sum', dtype=self.sfunction.dtype)
-            summands = [Eq(rhs, 0., implicit_dims=implicit_dims)]
-            # Substitute coordinate base symbols into the interpolation coefficients
-            summands.extend([Inc(rhs, _expr.xreplace(idx_subs) * self._weights,
-                                 implicit_dims=implicit_dims + self._rdim)])
+        # List of indirection indices for all adjacent grid points
+        idx_subs, temps = self._interp_idx(variables, implicit_dims=implicit_dims)
 
-            # Write/Incr `self`
-            lhs = self.sfunction.subs(self_subs)
-            ecls = Inc if increment else Eq
-            last = [ecls(lhs, rhs, implicit_dims=implicit_dims)]
+        # Accumulate point-wise contributions into a temporary
+        rhs = Symbol(name='sum', dtype=self.sfunction.dtype)
+        summands = [Eq(rhs, 0., implicit_dims=implicit_dims)]
+        # Substitute coordinate base symbols into the interpolation coefficients
+        summands.extend([Inc(rhs, (_expr * self._weights).xreplace(idx_subs),
+                             implicit_dims=implicit_dims)])
 
-            return [summands[0]] + temps + summands[1:] + last
+        # Write/Incr `self`
+        lhs = self.sfunction.subs(self_subs)
+        ecls = Inc if increment else Eq
+        last = [ecls(lhs, rhs, implicit_dims=implicit_dims)]
 
-        return Interpolation(expr, increment, self_subs, self, callback)
+        return temps + summands + last
 
-    def inject(self, field, expr, implicit_dims=None):
+    def _inject(self, field, expr, implicit_dims=None):
         """
         Generate equations injecting an arbitrary expression into a field.
 
@@ -298,39 +281,36 @@ def inject(self, field, expr, implicit_dims=None):
         """
         implicit_dims = self._augment_implicit_dims(implicit_dims)
 
-        def callback():
-            # Make iterable to support inject((u, v), expr=expr)
-            # or inject((u, v), expr=(expr1, expr2))
-            fields, exprs = as_tuple(field), as_tuple(expr)
-            # Provide either one expr per field or on expr for all fields
-            if len(fields) > 1:
-                if len(exprs) == 1:
-                    exprs = tuple(exprs[0] for _ in fields)
-                else:
-                    assert len(exprs) == len(fields)
-
-            # Derivatives must be evaluated before the introduction of indirect accesses
-            try:
-                _exprs = tuple(e.evaluate for e in exprs)
-            except AttributeError:
-                # E.g., a generic SymPy expression or a number
-                _exprs = exprs
-
-            variables = list(v for e in _exprs for v in retrieve_function_carriers(e))
-            variables = variables + list(fields)
-
-            # List of indirection indices for all adjacent grid points
-            idx_subs, temps = self._interp_idx(variables, implicit_dims=implicit_dims)
-
-            # Substitute coordinate base symbols into the interpolation coefficients
-            eqns = [Inc(_field.xreplace(idx_subs),
-                        _expr.xreplace(idx_subs) * self._weights,
-                        implicit_dims=implicit_dims + self._rdim)
-                    for (_field, _expr) in zip(fields, _exprs)]
-
-            return temps + eqns
-
-        return Injection(field, expr, self, callback)
+        # Make iterable to support inject((u, v), expr=expr)
+        # or inject((u, v), expr=(expr1, expr2))
+        fields, exprs = as_tuple(field), as_tuple(expr)
+        # Provide either one expr per field or on expr for all fields
+        if len(fields) > 1:
+            if len(exprs) == 1:
+                exprs = tuple(exprs[0] for _ in fields)
+            else:
+                assert len(exprs) == len(fields)
+
+        # Derivatives must be evaluated before the introduction of indirect accesses
+        try:
+            _exprs = tuple(e.evaluate for e in exprs)
+        except AttributeError:
+            # E.g., a generic SymPy expression or a number
+            _exprs = exprs
+
+        variables = list(v for e in _exprs for v in retrieve_function_carriers(e))
+        variables = variables + list(fields)
+
+        # List of indirection indices for all adjacent grid points
+        idx_subs, temps = self._interp_idx(variables, implicit_dims=implicit_dims)
+
+        # Substitute coordinate base symbols into the interpolation coefficients
+        eqns = [Inc(_field.xreplace(idx_subs),
+                    (_expr * self._weights).xreplace(idx_subs),
+                    implicit_dims=implicit_dims)
+                for (_field, _expr) in zip(fields, _exprs)]
+
+        return temps + eqns
 
 
 class LinearInterpolator(WeightedInterpolator):
diff --git a/devito/operator/operator.py b/devito/operator/operator.py
index 994f11783e..d1bee9fa66 100644
--- a/devito/operator/operator.py
+++ b/devito/operator/operator.py
@@ -632,7 +632,7 @@ def _postprocess_arguments(self, args, **kwargs):
         """Process runtime arguments upon returning from ``.apply()``."""
         for p in self.parameters:
             try:
-                subfuncs = (args[s] for s in p._sub_functions)
+                subfuncs = (args[getattr(p, s).name] for s in p._sub_functions)
                 p._arg_apply(args[p.name], *subfuncs, alias=kwargs.get(p.name))
             except AttributeError:
                 p._arg_apply(args[p.name], alias=kwargs.get(p.name))
diff --git a/devito/tools/algorithms.py b/devito/tools/algorithms.py
index ec074455ac..13d349149e 100644
--- a/devito/tools/algorithms.py
+++ b/devito/tools/algorithms.py
@@ -60,14 +60,17 @@ def toposort(data):
     # Perform the topological sorting
     extra_items_in_deps = reduce(set.union, mapper.values()) - set(mapper)
     mapper.update(OrderedDict([(item, set()) for item in extra_items_in_deps]))
+
     while True:
         ordered = set(item for item, dep in mapper.items() if not dep)
         if not ordered:
             break
+
         try:
             processed = sorted(ordered, key=attrgetter('name')) + processed
         except AttributeError:
             processed = sorted(ordered) + processed
+
         mapper = OrderedDict([(item, (dep - ordered)) for item, dep in mapper.items()
                               if item not in ordered])
 
diff --git a/devito/types/basic.py b/devito/types/basic.py
index 95f5d2d3d2..b4620ca093 100644
--- a/devito/types/basic.py
+++ b/devito/types/basic.py
@@ -607,11 +607,11 @@ def _new(cls, *args, **kwargs):
         if args:
             try:
                 # Constructor if input is (rows, cols, lambda)
-                newobj = super(AbstractTensor, cls)._new(*args)
+                newobj = super()._new(*args)
             except ValueError:
                 # Constructor if input is list of list as (row, cols, list_of_list)
                 # doesn't work as it expects a flattened.
-                newobj = super(AbstractTensor, cls)._new(args[2])
+                newobj = super()._new(args[2])
 
             # Filter grid and dimensions
             grid, dimensions = newobj._infer_dims()
@@ -624,7 +624,7 @@ def _new(cls, *args, **kwargs):
             # Initialize components and create new Matrix from standard
             # Devito inputs
             comps = cls.__subfunc_setup__(*args, **kwargs)
-            newobj = super(AbstractTensor, cls)._new(comps)
+            newobj = super()._new(comps)
             newobj.__init_finalize__(*args, **kwargs)
 
         return newobj
@@ -638,7 +638,7 @@ def _fromrep(cls, rep):
         This class method is only accessible from an existing AbstractTensor
         that contains a grid or dimensions.
         """
-        newobj = super(AbstractTensor, cls)._fromrep(rep)
+        newobj = super()._fromrep(rep)
         grid, dimensions = newobj._infer_dims()
         try:
             # This is needed when `_fromrep` is called directly in 1.9
@@ -1405,7 +1405,7 @@ def __str__(self):
         return super().__str__()
 
     def _hashable_content(self):
-        return super(Indexed, self)._hashable_content() + (self.base.function,)
+        return super()._hashable_content() + (self.base.function,)
 
     @cached_property
     def indices(self):
@@ -1430,7 +1430,7 @@ def origin(self):
     @cached_property
     def free_symbols(self):
         # Make it cached, since it's relatively expensive and called often
-        ret = super(Indexed, self).free_symbols
+        ret = super().free_symbols
         # Get rid of the IndexedBase label this Indexed stems from
         # as in Devito we can't have it floating around in Eq's
         ret.discard(self.base.label)
diff --git a/devito/types/dimension.py b/devito/types/dimension.py
index c9743c73f4..472d84378c 100644
--- a/devito/types/dimension.py
+++ b/devito/types/dimension.py
@@ -486,7 +486,7 @@ def _defines(self):
     def _arg_names(self):
         return self.parent._arg_names
 
-    def _arg_check(self, *args):
+    def _arg_check(self, *args, **kwargs):
         """A DerivedDimension performs no runtime checks."""
         return
 
diff --git a/devito/types/sparse.py b/devito/types/sparse.py
index 69341d4fd5..4e5993f194 100644
--- a/devito/types/sparse.py
+++ b/devito/types/sparse.py
@@ -173,14 +173,6 @@ def r(self):
     def _sparse_dim(self):
         return self.dimensions[self._sparse_position]
 
-    @cached_property
-    def dist_origin(self):
-        return self._dist_origin
-
-    @property
-    def dist_origin(self):
-        return self._dist_origin
-
     @property
     def _mpitype(self):
         return dtype_to_mpidtype(self.dtype)
@@ -207,28 +199,6 @@ def _coords_indices(self):
                 np.floor(self.coordinates_data - self.grid.origin) / self.grid.spacing
             ).astype(np.int32)
 
-    @property
-    def _subfunc_names(self):
-        names = []
-        for s in self._sub_functions:
-            try:
-                names.append(getattr(self, s).name)
-            except AttributeError:
-                pass
-        return tuple(names)
-
-    @property
-    def _coords_indices(self):
-        if self.gridpoints_data is not None:
-            return self.gridpoints_data
-        else:
-            if self.coordinates_data is None:
-                raise ValueError("No coordinates or gridpoints attached"
-                                 "to this SparseFunction")
-            return (
-                np.floor(self.coordinates_data - self.grid.origin) / self.grid.spacing
-            ).astype(np.int32)
-
     @property
     def gridpoints(self):
         try:
@@ -273,106 +243,11 @@ def _dist_datamap(self):
         """
         Mapper ``M : MPI rank -> required sparse data``.
         """
-        return self._distributor.glb_to_rank(self._support) or {}
-
-    @cached_property
-    def _point_increments(self):
-        """Index increments in each Dimension for each point symbol."""
-        return tuple(product(range(-self.r+1, self.r+1), repeat=self.grid.dim))
-
-    @cached_property
-    def _point_support(self):
-        return np.array(self._point_increments)
-
-    @cached_property
-    def _point_symbols(self):
-        """Symbol for coordinate value in each Dimension of the point."""
-        return DimensionTuple(*(Symbol(name='p%s' % d, dtype=self.dtype)
-                                for d in self.grid.dimensions),
-                              getters=self.grid.dimensions)
-
-    @cached_property
-    def _position_map(self):
-        """
-        Symbols map for the physical position of the sparse points relative to the grid
-        origin.
-        """
-        symbols = [Symbol(name='pos%s' % d, dtype=self.dtype)
-                   for d in self.grid.dimensions]
-        return OrderedDict([(c - o, p) for p, c, o in zip(symbols,
-                                                          self._coordinate_symbols,
-                                                          self.grid.origin_symbols)])
-
-    @cached_property
-    def _coordinate_indices(self):
-        """
-        Symbol for each grid index according to the coordinates.
-
-        Notes
-        -----
-        The expression `(coord - origin)/spacing` could also be computed in the
-        mathematically equivalent expanded form `coord/spacing -
-        origin/spacing`. This particular form is problematic when a sparse
-        point is in close proximity of the grid origin, since due to a larger
-        machine precision error it may cause a +-1 error in the computation of
-        the position. We mitigate this problem by computing the positions
-        individually (hence the need for a position map).
-        """
-        return tuple([INT(floor(p / i.spacing))
-                      for p, i in zip(self._position_map.values(),
-                                      self.grid.dimensions[:self.grid.dim])])
+        return self.grid._distributor.glb_to_rank(self._support) or {}
 
     @cached_property
-    def _dist_reorder_mask(self):
-        """
-        An ordering mask that puts ``self._sparse_position`` at the front.
-        """
-        ret = (self._sparse_position,)
-        ret += tuple(i for i, d in enumerate(self.indices) if d is not self._sparse_dim)
-        return ret
-
-    @property
-    def gridpoints(self):
-        try:
-            return self._gridpoints
-        except AttributeError:
-            return self._coords_indices
-
-    @property
-    def gridpoints_data(self):
-        try:
-            return self._gridpoints.data._local.view(np.ndarray)
-        except AttributeError:
-            return None
-
-    @property
-    def coordinates(self):
-        try:
-            return self._coordinates
-        except AttributeError:
-            return None
-
-    @property
-    def coordinates_data(self):
-        try:
-            return self.coordinates.data._local.view(np.ndarray)
-        except AttributeError:
-            return None
-
-    def guard(self, expr=None):
-        """
-        Generate guarded expressions, that is expressions that are evaluated
-        by an Operator only if certain conditions are met.  The introduced
-        condition, here, is that all grid points in the support of a sparse
-        value must fall within the grid domain (i.e., *not* on the halo).
-
-        Parameters
-        ----------
-        expr : expr-like, optional
-            Input expression, from which the guarded expression is derived.
-            If not specified, defaults to ``self``.
-        """
-        return self.grid._distributor.glb_to_rank(self._support) or {}
+    def dist_origin(self):
+        return self._dist_origin
 
     @cached_property
     def _pos_symbols(self):
@@ -409,15 +284,6 @@ def _dist_reorder_mask(self):
         ret += tuple(i for i, d in enumerate(self.indices) if d is not self._sparse_dim)
         return ret
 
-    def _coordinate_bases(self, field_offset):
-        """Symbol for the base coordinates of the reference grid point."""
-        return tuple([cast_mapper[self.dtype](c - o - idx * i.spacing)
-                      for c, o, idx, i, of in zip(self._coordinate_symbols,
-                                                  self.grid.origin_symbols,
-                                                  self._coordinate_indices,
-                                                  self.grid.dimensions[:self.grid.dim],
-                                                  field_offset)])
-
     def interpolate(self, *args, **kwargs):
         """
         Implement an interpolation operation from the grid onto the given sparse points
@@ -430,6 +296,47 @@ def inject(self, *args, **kwargs):
         """
         return self.interpolator.inject(*args, **kwargs)
 
+    def guard(self, expr=None):
+        """
+        Generate guarded expressions, that is expressions that are evaluated
+        by an Operator only if certain conditions are met.  The introduced
+        condition, here, is that all grid points in the support of a sparse
+        value must fall within the grid domain (i.e., *not* on the halo).
+
+        Parameters
+        ----------
+        expr : expr-like, optional
+            Input expression, from which the guarded expression is derived.
+            If not specified, defaults to ``self``.
+        """
+        conditions = {}
+
+        # Positon map and temporaries for it
+        pmap = self._position_map
+
+        # Temporaries for the position
+        temps = self.interpolator._positions(self.dimensions)
+
+        # Create positions and indices temporaries/indirections
+        for ((di, d), pos) in zip(enumerate(self.grid.dimensions), pmap.values()):
+            # Add conditional to avoid OOB
+            lb = sympy.And(pos >= d.symbolic_min, evaluate=False)
+            ub = sympy.And(pos <= d.symbolic_max, evaluate=False)
+            conditions[d] = sympy.And(lb, ub, evaluate=False)
+        condition = sympy.And(*conditions.values(), evaluate=False)
+        cd = ConditionalDimension(self._sparse_dim.name,
+                                  self._sparse_dim,
+                                  condition=condition, indirect=True)
+
+        if expr is None:
+            out = self.indexify().xreplace({self._sparse_dim: cd})
+        else:
+            functions = {f for f in retrieve_function_carriers(expr)
+                         if f.is_SparseFunction}
+            out = indexify(expr).xreplace({f._sparse_dim: cd for f in functions})
+
+        return out, temps
+
     def _dist_scatter_mask(self, dmap=None):
         """
         A mask to index into ``self.data``, which creates a new data array that
@@ -646,8 +553,17 @@ def _dist_gather(self, data, *subfunc):
         self._dist_data_gather(data)
         for (sg, s) in zip(subfunc, self._sub_functions):
             if getattr(self, s) is not None:
+                if np.sum([sg._obj.size[i] for i in range(self.ndim)]) > 0:
+                    sg = getattr(self, s)._C_as_ndarray(sg)
                 self._dist_subfunc_gather(sg, getattr(self, s))
 
+    def _eval_at(self, func):
+        return self
+
+    def _halo_exchange(self):
+        # no-op for SparseFunctions
+        return
+
     def _arg_defaults(self, alias=None):
         key = alias or self
         mapper = {self: key}
@@ -667,13 +583,6 @@ def _arg_defaults(self, alias=None):
 
         return args
 
-    def _eval_at(self, func):
-        return self
-
-    def _halo_exchange(self):
-        # no-op for SparseFunctions
-        return
-
     def _arg_values(self, **kwargs):
         # Add value override for own data if it is provided, otherwise
         # use defaults
@@ -722,7 +631,7 @@ def __init_finalize__(self, *args, **kwargs):
         if not isinstance(self.time_order, int):
             raise ValueError("`time_order` must be int")
 
-        super(AbstractSparseTimeFunction, self).__init_finalize__(*args, **kwargs)
+        super().__init_finalize__(*args, **kwargs)
 
     def __fd_setup__(self):
         """
@@ -873,7 +782,7 @@ class SparseFunction(AbstractSparseFunction):
     __rkwargs__ = AbstractSparseFunction.__rkwargs__ + ('coordinates_data',)
 
     def __init_finalize__(self, *args, **kwargs):
-        super(SparseFunction, self).__init_finalize__(*args, **kwargs)
+        super().__init_finalize__(*args, **kwargs)
         self.interpolator = LinearInterpolator(self)
 
         # Set up sparse point coordinates
@@ -1003,8 +912,7 @@ def interpolate(self, expr, u_t=None, p_t=None, increment=False):
         if p_t is not None:
             subs = {self.time_dim: p_t}
 
-        return super(SparseTimeFunction, self).interpolate(expr, increment=increment,
-                                                           self_subs=subs)
+        return super().interpolate(expr, increment=increment, self_subs=subs)
 
     def inject(self, field, expr, u_t=None, p_t=None, implicit_dims=None):
         """
@@ -1090,7 +998,8 @@ class PrecomputedSparseFunction(AbstractSparseFunction):
     The parameters must always be given as keyword arguments, since SymPy
     uses `*args` to (re-)create the Dimension arguments of the symbolic object.
     """
-     _sub_functions = ('gridpoints', 'coordinates', 'interpolation_coeffs')
+
+    _sub_functions = ('gridpoints', 'coordinates', 'interpolation_coeffs')
 
     __rkwargs__ = (AbstractSparseFunction.__rkwargs__ +
                    ('r', 'gridpoints_data', 'coordinates_data',
@@ -1148,15 +1057,6 @@ def __init_finalize__(self, *args, **kwargs):
 
         self.interpolator = PrecomputedInterpolator(self)
 
-    @property
-    def r(self):
-        return self._radius
-
-    @cached_property
-    def _point_increments(self):
-        """Index increments in each dimension for each point symbol."""
-        return tuple(product(range(-self.r//2+1, self.r//2+1), repeat=self.grid.dim))
-
     @property
     def interpolation_coeffs(self):
         """ The Precomputed interpolation coefficients."""
@@ -1201,41 +1101,6 @@ def _position_map(self):
         else:
             return super()._position_map
 
-    @cached_property
-    def _coordinate_symbols(self):
-        """Symbol representing the coordinate values in each Dimension."""
-        p_dim = self.indices[self._sparse_position]
-        if self.gridpoints is not None:
-            return tuple([self.gridpoints.indexify((p_dim, di)) * d.spacing + o
-                          for ((di, d), o) in zip(enumerate(self.grid.dimensions),
-                                                  self.grid.origin)])
-        else:
-            return tuple([self.coordinates.indexify((p_dim, i))
-                          for i in range(self.grid.dim)])
-
-    @cached_property
-    def _coordinate_indices(self):
-        """
-        Symbol for each grid index according to the coordinates.
-
-        Notes
-        -----
-        The expression `(coord - origin)/spacing` could also be computed in the
-        mathematically equivalent expanded form `coord/spacing -
-        origin/spacing`. This particular form is problematic when a sparse
-        point is in close proximity of the grid origin, since due to a larger
-        machine precision error it may cause a +-1 error in the computation of
-        the position. We mitigate this problem by computing the positions
-        individually (hence the need for a position map).
-        """
-        if self.gridpoints is not None:
-            ddim = self.gridpoints.dimensions[-1]
-            return tuple([self.gridpoints._subs(ddim, di) for di in range(self.grid.dim)])
-        else:
-            return tuple([INT(floor(p / i.spacing))
-                          for p, i in zip(self._position_map,
-                                          self.grid.dimensions[:self.grid.dim])])
-
 
 class PrecomputedSparseTimeFunction(AbstractSparseTimeFunction,
                                     PrecomputedSparseFunction):
@@ -1405,8 +1270,7 @@ def __init_finalize__(self, *args, **kwargs):
 
         # Rows are locations, columns are source/receivers
         nloc, npoint = self.matrix.shape
-        super().__init_finalize__(
-            *args, **kwargs, npoint=npoint)
+        super().__init_finalize__(*args, **kwargs, npoint=npoint)
 
         # Grid points per sparse point
         r = kwargs.get('r')
diff --git a/examples/seismic/elastic/operators.py b/examples/seismic/elastic/operators.py
index d6714797e8..dd9d793dfe 100644
--- a/examples/seismic/elastic/operators.py
+++ b/examples/seismic/elastic/operators.py
@@ -17,12 +17,7 @@ def src_rec(v, tau, model, geometry):
                     npoint=geometry.nrec)
 
     # The source injection term
-    src_xx = src.inject(field=tau[0, 0].forward, expr=src * s)
-    src_zz = src.inject(field=tau[-1, -1].forward, expr=src * s)
-    src_expr = src_xx + src_zz
-    if model.grid.dim == 3:
-        src_yy = src.inject(field=tau[1, 1].forward, expr=src * s)
-        src_expr += src_yy
+    src_expr = src.inject(tau.forward.diagonal(), expr=src * s)
 
     # Create interpolation expression for receivers
     rec_term1 = rec1.interpolate(expr=tau[-1, -1])
diff --git a/tests/test_caching.py b/tests/test_caching.py
index 41d8f76afa..a5f8ba58d6 100644
--- a/tests/test_caching.py
+++ b/tests/test_caching.py
@@ -639,9 +639,9 @@ def test_sparse_function(self, operate_on_empty_cache):
         i = u.inject(expr=u, field=u)
 
         # created: rxu, rxy (radius dimensions) and spacings
-        # rxu, rxy (conditional dimensions)
+        # conditional sparse dim
         # posx, posy, px, py, u_coords (as indexified),
-        ncreated = 2+2+2+2+2+1
+        ncreated = 2+1+2+2+2+1
         # Note that injection is now lazy so no new symbols should be created
         assert len(_SymbolCache) == cur_cache_size
         i.evaluate
diff --git a/tests/test_interpolation.py b/tests/test_interpolation.py
index ec14597849..186ab6cffd 100644
--- a/tests/test_interpolation.py
+++ b/tests/test_interpolation.py
@@ -130,15 +130,7 @@ def init(data):
                                    interpolation_coeffs=interpolation_coeffs)
     eqn = sf.interpolate(m)
     op = Operator(eqn)
-<<<<<<< HEAD
-<<<<<<< HEAD
 
-=======
-    print(op)
->>>>>>> 98cb40d85 (operations: remove unused points and cleanup  weights)
-=======
-
->>>>>>> 6ff72c157 (compiler: fix dimension_sort to avoid missing indirect conditionals)
     op()
     expected_values = [sin(point[0]) + sin(point[1]) for point in points]
     assert(all(np.isclose(sf.data, expected_values, rtol=1e-6)))
@@ -172,15 +164,7 @@ def test_precomputed_interpolation_time():
 
     eqn = sf.interpolate(u)
     op = Operator(eqn)
-<<<<<<< HEAD
-<<<<<<< HEAD
-
-=======
-    print(op)
->>>>>>> 98cb40d85 (operations: remove unused points and cleanup  weights)
-=======
 
->>>>>>> 6ff72c157 (compiler: fix dimension_sort to avoid missing indirect conditionals)
     op(time_m=0, time_M=4)
 
     for it in range(5):
@@ -213,15 +197,7 @@ def test_precomputed_injection():
     expr = sf.inject(m, Float(1.))
 
     op = Operator(expr)
-<<<<<<< HEAD
-<<<<<<< HEAD
-
-=======
-    print(op)
->>>>>>> 98cb40d85 (operations: remove unused points and cleanup  weights)
-=======
 
->>>>>>> 6ff72c157 (compiler: fix dimension_sort to avoid missing indirect conditionals)
     op()
     indices = [slice(0, 2, 1), slice(9, 11, 1)]
     assert np.allclose(m.data[indices], result, rtol=1.e-5)
@@ -257,15 +233,7 @@ def test_precomputed_injection_time():
     expr = sf.inject(m, Float(1.))
 
     op = Operator(expr)
-<<<<<<< HEAD
-<<<<<<< HEAD
 
-=======
-    print(op)
->>>>>>> 98cb40d85 (operations: remove unused points and cleanup  weights)
-=======
-
->>>>>>> 6ff72c157 (compiler: fix dimension_sort to avoid missing indirect conditionals)
     op()
     for ti in range(2):
         indices = [slice(0, 2, 1), slice(9, 11, 1)]
@@ -289,18 +257,8 @@ def test_interpolate(shape, coords, npoints=20):
 
     expr = p.interpolate(a)
     op = Operator(expr)
-<<<<<<< HEAD
-<<<<<<< HEAD
-
-=======
-    print(op)
->>>>>>> 98cb40d85 (operations: remove unused points and cleanup  weights)
-    op(a=a)
-=======
->>>>>>> 6ff72c157 (compiler: fix dimension_sort to avoid missing indirect conditionals)
 
     op(a=a)
-
     assert np.allclose(p.data[:], xcoords, rtol=1e-6)
 
 
@@ -319,15 +277,7 @@ def test_interpolate_cumm(shape, coords, npoints=20):
     p.data[:] = 1.
     expr = p.interpolate(a, increment=True)
     op = Operator(expr)
-<<<<<<< HEAD
-<<<<<<< HEAD
 
-=======
-    print(op)
->>>>>>> 98cb40d85 (operations: remove unused points and cleanup  weights)
-=======
-
->>>>>>> 6ff72c157 (compiler: fix dimension_sort to avoid missing indirect conditionals)
     op(a=a)
 
     assert np.allclose(p.data[:], xcoords + 1., rtol=1e-6)
@@ -349,15 +299,7 @@ def test_interpolate_time_shift(shape, coords, npoints=20):
     p.data[:] = 1.
     expr = p.interpolate(a, u_t=a.indices[0]+1)
     op = Operator(expr)
-<<<<<<< HEAD
-<<<<<<< HEAD
-
-=======
-    print(op)
->>>>>>> 98cb40d85 (operations: remove unused points and cleanup  weights)
-=======
 
->>>>>>> 6ff72c157 (compiler: fix dimension_sort to avoid missing indirect conditionals)
     op(a=a)
 
     assert np.allclose(p.data[0, :], xcoords, rtol=1e-6)
@@ -365,15 +307,7 @@ def test_interpolate_time_shift(shape, coords, npoints=20):
     p.data[:] = 1.
     expr = p.interpolate(a, p_t=p.indices[0]+1)
     op = Operator(expr)
-<<<<<<< HEAD
-<<<<<<< HEAD
-
-=======
-    print(op)
->>>>>>> 98cb40d85 (operations: remove unused points and cleanup  weights)
-=======
 
->>>>>>> 6ff72c157 (compiler: fix dimension_sort to avoid missing indirect conditionals)
     op(a=a)
 
     assert np.allclose(p.data[1, :], xcoords, rtol=1e-6)
@@ -382,15 +316,7 @@ def test_interpolate_time_shift(shape, coords, npoints=20):
     expr = p.interpolate(a, u_t=a.indices[0]+1,
                          p_t=p.indices[0]+1)
     op = Operator(expr)
-<<<<<<< HEAD
-<<<<<<< HEAD
 
-=======
-    print(op)
->>>>>>> 98cb40d85 (operations: remove unused points and cleanup  weights)
-=======
-
->>>>>>> 6ff72c157 (compiler: fix dimension_sort to avoid missing indirect conditionals)
     op(a=a)
 
     assert np.allclose(p.data[1, :], xcoords, rtol=1e-6)
@@ -410,15 +336,7 @@ def test_interpolate_array(shape, coords, npoints=20):
 
     expr = p.interpolate(a)
     op = Operator(expr)
-<<<<<<< HEAD
-<<<<<<< HEAD
 
-=======
-    print(op)
->>>>>>> 98cb40d85 (operations: remove unused points and cleanup  weights)
-=======
-
->>>>>>> 6ff72c157 (compiler: fix dimension_sort to avoid missing indirect conditionals)
     op(a=a, points=p.data[:])
 
     assert np.allclose(p.data[:], xcoords, rtol=1e-6)
@@ -439,15 +357,7 @@ def test_interpolate_custom(shape, coords, npoints=20):
     p.data[:] = 1.
     expr = p.interpolate(a * p.indices[0])
     op = Operator(expr)
-<<<<<<< HEAD
-<<<<<<< HEAD
 
-=======
-    print(op)
->>>>>>> 98cb40d85 (operations: remove unused points and cleanup  weights)
-=======
-
->>>>>>> 6ff72c157 (compiler: fix dimension_sort to avoid missing indirect conditionals)
     op(a=a)
 
     assert np.allclose(p.data[0, :], 0.0 * xcoords, rtol=1e-6)
@@ -493,15 +403,7 @@ def test_interpolate_indexed(shape, coords, npoints=20):
     p.data[:] = 1.
     expr = p.interpolate(a[a.grid.dimensions] * p.indices[0])
     op = Operator(expr)
-<<<<<<< HEAD
-<<<<<<< HEAD
-
-=======
-    print(op)
->>>>>>> 98cb40d85 (operations: remove unused points and cleanup  weights)
-=======
 
->>>>>>> 6ff72c157 (compiler: fix dimension_sort to avoid missing indirect conditionals)
     op(a=a)
 
     assert np.allclose(p.data[0, :], 0.0 * xcoords, rtol=1e-6)
@@ -524,15 +426,7 @@ def test_inject(shape, coords, result, npoints=19):
     expr = p.inject(a, Float(1.))
 
     op = Operator(expr)
-<<<<<<< HEAD
-<<<<<<< HEAD
 
-=======
-    print(op)
->>>>>>> 98cb40d85 (operations: remove unused points and cleanup  weights)
-=======
-
->>>>>>> 6ff72c157 (compiler: fix dimension_sort to avoid missing indirect conditionals)
     op(a=a)
 
     indices = [slice(4, 6, 1) for _ in coords]
@@ -560,7 +454,7 @@ def test_multi_inject(shape, coords, nexpr, result, npoints=19):
     expr = p.inject((a1, a2), iexpr)
 
     op = Operator(expr)
-    print(op)
+
     op(a1=a1, a2=a2)
 
     indices = [slice(4, 6, 1) for _ in coords]
@@ -586,15 +480,7 @@ def test_inject_time_shift(shape, coords, result, npoints=19):
     expr = p.inject(a, Float(1.), u_t=a.indices[0]+1)
 
     op = Operator(expr)
-<<<<<<< HEAD
-<<<<<<< HEAD
-
-=======
-    print(op)
->>>>>>> 98cb40d85 (operations: remove unused points and cleanup  weights)
-=======
 
->>>>>>> 6ff72c157 (compiler: fix dimension_sort to avoid missing indirect conditionals)
     op(a=a, time=1)
 
     indices = [slice(1, 1, 1)] + [slice(4, 6, 1) for _ in coords]
@@ -605,15 +491,7 @@ def test_inject_time_shift(shape, coords, result, npoints=19):
     expr = p.inject(a, Float(1.), p_t=p.indices[0]+1)
 
     op = Operator(expr)
-<<<<<<< HEAD
-<<<<<<< HEAD
-
-=======
-    print(op)
->>>>>>> 98cb40d85 (operations: remove unused points and cleanup  weights)
-=======
 
->>>>>>> 6ff72c157 (compiler: fix dimension_sort to avoid missing indirect conditionals)
     op(a=a, time=1)
 
     indices = [slice(0, 0, 1)] + [slice(4, 6, 1) for _ in coords]
@@ -624,15 +502,7 @@ def test_inject_time_shift(shape, coords, result, npoints=19):
     expr = p.inject(a, Float(1.), u_t=a.indices[0]+1, p_t=p.indices[0]+1)
 
     op = Operator(expr)
-<<<<<<< HEAD
-<<<<<<< HEAD
 
-=======
-    print(op)
->>>>>>> 98cb40d85 (operations: remove unused points and cleanup  weights)
-=======
-
->>>>>>> 6ff72c157 (compiler: fix dimension_sort to avoid missing indirect conditionals)
     op(a=a, time=1)
 
     indices = [slice(1, 1, 1)] + [slice(4, 6, 1) for _ in coords]
@@ -656,15 +526,7 @@ def test_inject_array(shape, coords, result, npoints=19):
     expr = p.inject(a, p)
 
     op = Operator(expr)
-<<<<<<< HEAD
-<<<<<<< HEAD
-
-=======
-    print(op)
->>>>>>> 98cb40d85 (operations: remove unused points and cleanup  weights)
-=======
 
->>>>>>> 6ff72c157 (compiler: fix dimension_sort to avoid missing indirect conditionals)
     op(a=a, points=p2.data[:])
 
     indices = [slice(4, 6, 1) for _ in coords]
@@ -688,15 +550,7 @@ def test_inject_from_field(shape, coords, result, npoints=19):
 
     expr = p.inject(field=a, expr=b)
     op = Operator(expr)
-<<<<<<< HEAD
-<<<<<<< HEAD
-
-=======
-    print(op)
->>>>>>> 98cb40d85 (operations: remove unused points and cleanup  weights)
-=======
 
->>>>>>> 6ff72c157 (compiler: fix dimension_sort to avoid missing indirect conditionals)
     op(a=a, b=b)
 
     indices = [slice(4, 6, 1) for _ in coords]
@@ -770,15 +624,7 @@ def test_edge_sparse():
     expr = sf1.interpolate(u)
     subs = {d.spacing: v for d, v in zip(u.grid.dimensions, u.grid.spacing)}
     op = Operator(expr, subs=subs)
-<<<<<<< HEAD
-<<<<<<< HEAD
 
-=======
-    print(op)
->>>>>>> 98cb40d85 (operations: remove unused points and cleanup  weights)
-=======
-
->>>>>>> 6ff72c157 (compiler: fix dimension_sort to avoid missing indirect conditionals)
     op()
     assert sf1.data[0] == 0
 
@@ -817,14 +663,7 @@ def test_msf_interpolate():
 
     eqn = sf.interpolate(u)
     op = Operator(eqn)
-<<<<<<< HEAD
-=======
-
-<<<<<<< HEAD
->>>>>>> 6ff72c157 (compiler: fix dimension_sort to avoid missing indirect conditionals)
 
-=======
->>>>>>> 66bf5974f (compiler: fix dtype of aliases)
     sf.manual_scatter()
     op(time_m=0, time_M=4)
     sf.manual_gather()
@@ -879,7 +718,7 @@ class SparseFirst(SparseFunction):
     # No time dependence so need the implicit dim
     rec = s.interpolate(expr=s+fs, implicit_dims=grid.stepping_dim)
     op = Operator(eqs + rec)
-
+    print(op)
     op(time_M=10)
     expected = 10*11/2  # n (n+1)/2
     assert np.allclose(s.data, expected)
diff --git a/tests/test_mpi.py b/tests/test_mpi.py
index 4aba518d5f..4c3f72caae 100644
--- a/tests/test_mpi.py
+++ b/tests/test_mpi.py
@@ -538,7 +538,10 @@ def test_sparse_coords(self):
             coords_loc = sf.coordinates.data[i, 1]
             if coords_loc is not None:
                 coords_loc += sf.coordinates.data[i, 0]
-            assert sf.data[i] == coords_loc
+            if sf.data[i] == coords_loc:
+                assert sf.data[i] == coords_loc
+            else:
+                print(sf._comm.rank, i, sf.data[i], coords_loc)
 
     @pytest.mark.parallel(mode=4)
     def test_sparse_coords_issue1823(self):
@@ -2556,7 +2559,7 @@ def test_adjoint_F_no_omp(self):
     # TestDecomposition().test_reshape_left_right()
     # TestOperatorSimple().test_trivial_eq_2d()
     # TestFunction().test_halo_exchange_bilateral()
-    # TestSparseFunction().test_sparse_coords()
-    TestSparseFunction().test_precomputed_sparse(2)
+    TestSparseFunction().test_sparse_coords()
+    # TestSparseFunction().test_precomputed_sparse(2)
     # TestOperatorAdvanced().test_fission_due_to_antidep()
     # TestIsotropicAcoustic().test_adjoint_F(1)
diff --git a/tests/test_pickle.py b/tests/test_pickle.py
index e051f65625..faf514feba 100644
--- a/tests/test_pickle.py
+++ b/tests/test_pickle.py
@@ -18,7 +18,7 @@
 from devito.types import (Array, CustomDimension, Symbol as dSymbol, Scalar,
                           PointerArray, Lock, PThreadArray, SharedData, Timer,
                           DeviceID, NPThreads, ThreadID, TempFunction, Indirection,
-                          FIndexed, PrecomputedSparseTimeFunction)
+                          FIndexed)
 from devito.types.basic import BoundSymbol
 from devito.tools import EnrichedTuple
 from devito.symbolics import (IntDiv, ListInitializer, FieldFromPointer,
@@ -312,6 +312,12 @@ def test_shared_data(self, pickle):
 
     def test_findexed(self, pickle):
         grid = Grid(shape=(3, 3, 3))
+        f = Function(name='f', grid=grid)
+
+        fi = FIndexed.from_indexed(f.indexify(), "foo", strides=(1, 2))
+
+        pkl_fi = pickle.dumps(fi)
+        new_fi = pickle.loads(pkl_fi)
 
         assert new_fi.name == fi.name
         assert new_fi.pname == fi.pname
@@ -360,15 +366,7 @@ def test_guard_factor(self, pickle):
         pkl_gf = pickle.dumps(gf)
         new_gf = pickle.loads(pkl_gf)
 
-<<<<<<< HEAD
         assert gf == new_gf
-=======
-    assert sf._radius == new_sf._radius == 1
-    assert sf.space_order == new_sf.space_order
-    assert sf.time_order == new_sf.time_order
-    assert sf.dtype == new_sf.dtype
-    assert sf.npoint == new_sf.npoint == 3
->>>>>>> 113f6f860 (api: cleanup hierachy and properties of sparse and interpolator)
 
     def test_temp_function(self, pickle):
         grid = Grid(shape=(3, 3))
@@ -432,25 +430,6 @@ def test_receiver(self, pickle):
         assert np.all(new_rec.data == 1)
         assert np.all(new_rec.coordinates.data == [[0.], [1.], [2.]])
 
-    def test_alias_sparse_function(self, pickle):
-        grid = Grid(shape=(3,))
-        sf = SparseFunction(name='sf', grid=grid, npoint=3, space_order=2,
-                            coordinates=[(0.,), (1.,), (2.,)])
-        sf.data[0] = 1.
-
-        # Create alias
-        f0 = sf._rebuild(name='f0', alias=True)
-
-        pkl_f0 = pickle.dumps(f0)
-        new_f0 = pickle.loads(pkl_f0)
-
-        assert f0.data is None and new_f0.data is None
-        assert f0.coordinates.data is None and new_f0.coordinates.data is None
-
-        assert sf.space_order == f0.space_order == new_f0.space_order
-        assert sf.dtype == f0.dtype == new_f0.dtype
-        assert sf.npoint == f0.npoint == new_f0.npoint
-
 
 @pytest.mark.parametrize('pickle', [pickle0, pickle1])
 class TestOperator(object):

From c60318da1e28c8c8eb2ab7c25a38880bbfd6b1a1 Mon Sep 17 00:00:00 2001
From: mloubout <mathias.louboutin@gmail.com>
Date: Wed, 21 Jun 2023 12:32:42 -0400
Subject: [PATCH 38/90] api: cleanup interpolator api

---
 devito/ir/clusters/cluster.py      |  1 +
 devito/ir/equations/algorithms.py  |  3 +-
 devito/ir/stree/algorithms.py      |  3 --
 devito/ir/support/basic.py         |  2 +-
 devito/ir/support/utils.py         |  1 -
 devito/operations/interpolators.py | 14 +++++---
 devito/types/dimension.py          |  5 +--
 devito/types/sparse.py             | 53 +++++++++++++++---------------
 tests/test_buffering.py            |  5 +--
 tests/test_caching.py              |  4 +--
 tests/test_dle.py                  |  1 +
 tests/test_mpi.py                  |  5 +--
 12 files changed, 49 insertions(+), 48 deletions(-)

diff --git a/devito/ir/clusters/cluster.py b/devito/ir/clusters/cluster.py
index 0dc3200b4f..de4cd8f29f 100644
--- a/devito/ir/clusters/cluster.py
+++ b/devito/ir/clusters/cluster.py
@@ -334,6 +334,7 @@ def dspace(self):
 
         # Construct the `intervals` of the DataSpace, that is a global,
         # Dimension-centric view of the data space
+
         intervals = IntervalGroup.generate('union', *parts.values())
         # E.g., `db0 -> time`, but `xi NOT-> x`
         intervals = intervals.promote(lambda d: not d.is_Sub)
diff --git a/devito/ir/equations/algorithms.py b/devito/ir/equations/algorithms.py
index 89400eae1e..adc462d059 100644
--- a/devito/ir/equations/algorithms.py
+++ b/devito/ir/equations/algorithms.py
@@ -33,8 +33,7 @@ def handle_indexed(indexed):
 
                 # Fallback: Just insert all the Dimensions we find, regardless of
                 # what the user is attempting to do
-                relation.extend(filter_sorted([d for d in i.free_symbols
-                                               if isinstance(d, Dimension)]))
+                relation.extend(filter_sorted(i.atoms(Dimension)))
 
         # StencilDimensions are lowered subsequently through special compiler
         # passes, so they can be ignored here
diff --git a/devito/ir/stree/algorithms.py b/devito/ir/stree/algorithms.py
index dd9eb9b265..58e8e844e6 100644
--- a/devito/ir/stree/algorithms.py
+++ b/devito/ir/stree/algorithms.py
@@ -155,9 +155,6 @@ def preprocess(clusters, options=None, **kwargs):
                    len(intersection) > 0:
                     found.append(c1)
                     queue.remove(c1)
-                elif c1.is_halo_touch and len(intersection) == 0 and \
-                        distributed_aindices == set():
-                    queue.remove(c1)
 
             syncs = normalize_syncs(*[c1.syncs for c1 in found])
             if syncs:
diff --git a/devito/ir/support/basic.py b/devito/ir/support/basic.py
index 36ea735109..9f24f99989 100644
--- a/devito/ir/support/basic.py
+++ b/devito/ir/support/basic.py
@@ -123,7 +123,7 @@ def index_mode(self):
     def aindices(self):
         retval = []
         for i, fi in zip(self, self.findices):
-            dims = {j for j in i.free_symbols if isinstance(j, Dimension)}
+            dims = i.atoms(Dimension)
             sdims = {d for d in dims if d.is_Stencil}
             candidates = dims - sdims
 
diff --git a/devito/ir/support/utils.py b/devito/ir/support/utils.py
index 9e202b7e1d..5f08f48020 100644
--- a/devito/ir/support/utils.py
+++ b/devito/ir/support/utils.py
@@ -183,7 +183,6 @@ def detect_accesses(exprs):
     for e in as_tuple(exprs):
         other_dims.update(i for i in e.free_symbols if isinstance(i, Dimension))
         other_dims.update(e.implicit_dims)
-
     mapper[None] = Stencil([(i, 0) for i in other_dims])
 
     return mapper
diff --git a/devito/operations/interpolators.py b/devito/operations/interpolators.py
index 0bd8e97b13..d4b68f9f52 100644
--- a/devito/operations/interpolators.py
+++ b/devito/operations/interpolators.py
@@ -37,10 +37,14 @@ def __new__(cls, interpolator):
         return obj
 
     def _evaluate(self, **kwargs):
-        return_value = self.interpolator._evauate(**kwargs)
+        return_value = self.operation(**kwargs)
         assert(all(isinstance(i, Eq) for i in return_value))
         return return_value
 
+    @abstractmethod
+    def operation(self, **kwargs):
+        pass
+
     def __add__(self, other):
         return flatten([self, other])
 
@@ -66,7 +70,7 @@ def __new__(cls, expr, increment, implicit_dims, self_subs, interpolator):
 
         return obj
 
-    def _evaluate(self, **kwargs):
+    def operation(self, **kwargs):
         return self.interpolator._interpolate(expr=self.expr, increment=self.increment,
                                               self_subs=self.self_subs,
                                               implicit_dims=self.implicit_dims)
@@ -93,7 +97,7 @@ def __new__(cls, field, expr, implicit_dims, interpolator):
 
         return obj
 
-    def _evaluate(self, **kwargs):
+    def operation(self, **kwargs):
         return self.interpolator._inject(expr=self.expr, field=self.field,
                                          implicit_dims=self.implicit_dims)
 
@@ -174,8 +178,8 @@ def _interp_idx(self, variables, implicit_dims=None):
         pr = self.sfunction.dimensions[-1]
         for ((di, d), rd) in zip(enumerate(self._gdim), self._rdim):
             # Add conditional to avoid OOB
-            lb = sympy.And(rd >= d.symbolic_min, evaluate=False)
-            ub = sympy.And(rd <= d.symbolic_max, evaluate=False)
+            lb = sympy.And(rd >= d.symbolic_min - self.r, evaluate=False)
+            ub = sympy.And(rd <= d.symbolic_max + self.r, evaluate=False)
             cond = sympy.And(lb, ub, evaluate=False)
             mapper[d] = ConditionalDimension(rd.name, pr, condition=cond, indirect=True)
             pr = rd
diff --git a/devito/types/dimension.py b/devito/types/dimension.py
index 472d84378c..6051584f9a 100644
--- a/devito/types/dimension.py
+++ b/devito/types/dimension.py
@@ -107,6 +107,8 @@ class Dimension(ArgProvider):
     is_Incr = False
     is_Block = False
 
+    indirect = False
+
     # Prioritize self's __add__ and __sub__ to construct AffineIndexAccessFunction
     _op_priority = sympy.Expr._op_priority + 1.
 
@@ -823,7 +825,7 @@ def index(self):
 
     @cached_property
     def free_symbols(self):
-        retval = set(super(ConditionalDimension, self).free_symbols)
+        retval = set(super().free_symbols)
         if self.condition is not None:
             retval |= self.condition.free_symbols
         try:
@@ -1206,7 +1208,6 @@ class CustomDimension(BasicDimension):
     """
 
     is_Custom = True
-    indirect = False
 
     __rkwargs__ = ('symbolic_min', 'symbolic_max', 'symbolic_size', 'parent')
 
diff --git a/devito/types/sparse.py b/devito/types/sparse.py
index 4e5993f194..743e245f9c 100644
--- a/devito/types/sparse.py
+++ b/devito/types/sparse.py
@@ -1,9 +1,9 @@
 from collections import OrderedDict
 try:
-    from collections import Iterable
-except ImportError:
-    # After python 3.10
     from collections.abc import Iterable
+except ImportError:
+    # Before python 3.10
+    from collections import Iterable
 from itertools import product
 
 import sympy
@@ -196,8 +196,8 @@ def _coords_indices(self):
                 raise ValueError("No coordinates or gridpoints attached"
                                  "to this SparseFunction")
             return (
-                np.floor(self.coordinates_data - self.grid.origin) / self.grid.spacing
-            ).astype(np.int32)
+                np.floor((self.coordinates_data - self.grid.origin) / self.grid.spacing)
+            ).astype(int)
 
     @property
     def gridpoints(self):
@@ -243,7 +243,7 @@ def _dist_datamap(self):
         """
         Mapper ``M : MPI rank -> required sparse data``.
         """
-        return self.grid._distributor.glb_to_rank(self._support) or {}
+        return self.grid.distributor.glb_to_rank(self._support) or {}
 
     @cached_property
     def dist_origin(self):
@@ -281,7 +281,8 @@ def _dist_reorder_mask(self):
         An ordering mask that puts ``self._sparse_position`` at the front.
         """
         ret = (self._sparse_position,)
-        ret += tuple(i for i, d in enumerate(self.indices) if d is not self._sparse_dim)
+        ret += tuple(i for i, d in enumerate(self.dimensions)
+                     if d is not self._sparse_dim)
         return ret
 
     def interpolate(self, *args, **kwargs):
@@ -496,6 +497,7 @@ def _dist_data_gather(self, data):
             return
 
         # Compute dist map only once
+        data = self._C_as_ndarray(data)
         dmap = self._dist_datamap
         mask = self._dist_scatter_mask(dmap=dmap)
 
@@ -503,19 +505,19 @@ def _dist_data_gather(self, data):
         data = np.ascontiguousarray(np.transpose(data, self._dist_reorder_mask))
 
         # Send back the sparse point values
-        sshape, scount, sdisp, _, rcount, rdisp = self._dist_alltoall(dmap=dmap)
+        sshape, scount, sdisp, rshape, rcount, rdisp = self._dist_alltoall(dmap=dmap)
         gathered = np.empty(shape=sshape, dtype=self.dtype)
+
         self._comm.Alltoallv([data, rcount, rdisp, self._mpitype],
                              [gathered, scount, sdisp, self._mpitype])
 
         # Unpack data values so that they follow the expected storage layout
         gathered = np.ascontiguousarray(np.transpose(gathered, self._dist_reorder_mask))
-        self.data
         self._data[mask] = gathered[:]
 
-    def _dist_subfunc_gather(self, sfuncd, sfunc):
+    def _dist_subfunc_gather(self, sfuncd, subfunc):
         try:
-            sfuncd = sfunc._C_as_ndarray(sfuncd)
+            sfuncd = subfunc._C_as_ndarray(sfuncd)
         except AttributeError:
             pass
         # If not using MPI, don't waste time
@@ -527,16 +529,16 @@ def _dist_subfunc_gather(self, sfuncd, sfunc):
         mask = self._dist_scatter_mask(dmap=dmap)
 
         # Pack (reordered) SubFuncion values so that they can be sent out via an Alltoallv
-        if self.dist_origin[sfunc] is not None:
-            sfuncd = sfuncd + np.array(self.dist_origin[sfunc], dtype=sfunc.dtype)
+        if self.dist_origin[subfunc] is not None:
+            sfuncd = sfuncd + np.array(self.dist_origin[subfunc], dtype=subfunc.dtype)
 
         # Send out the sparse point SubFuncion values
         sshape, scount, sdisp, _, rcount, rdisp = \
-            self._dist_subfunc_alltoall(sfunc, dmap=dmap)
-        gathered = np.empty(shape=sshape, dtype=sfunc.dtype)
-        self._comm.Alltoallv([sfuncd, rcount, rdisp, self._smpitype[sfunc]],
-                             [gathered, scount, sdisp, self._smpitype[sfunc]])
-        sfunc.data._local[mask[self._sparse_position]] = gathered[:]
+            self._dist_subfunc_alltoall(subfunc, dmap=dmap)
+        gathered = np.empty(shape=sshape, dtype=subfunc.dtype)
+        self._comm.Alltoallv([sfuncd, rcount, rdisp, self._smpitype[subfunc]],
+                             [gathered, scount, sdisp, self._smpitype[subfunc]])
+        subfunc.data._local[mask[self._sparse_position]] = gathered[:]
 
         # Note: this method "mirrors" `_dist_scatter`: a sparse point that is sent
         # in `_dist_scatter` is here received; a sparse point that is received in
@@ -553,8 +555,6 @@ def _dist_gather(self, data, *subfunc):
         self._dist_data_gather(data)
         for (sg, s) in zip(subfunc, self._sub_functions):
             if getattr(self, s) is not None:
-                if np.sum([sg._obj.size[i] for i in range(self.ndim)]) > 0:
-                    sg = getattr(self, s)._C_as_ndarray(sg)
                 self._dist_subfunc_gather(sg, getattr(self, s))
 
     def _eval_at(self, func):
@@ -608,7 +608,7 @@ def _arg_apply(self, dataobj, *subfuncs, alias=None):
         key = alias if alias is not None else self
         if isinstance(key, AbstractSparseFunction):
             # Gather into `self.data`
-            key._dist_gather(self._C_as_ndarray(dataobj), *subfuncs)
+            key._dist_gather(dataobj, *subfuncs)
         elif self._distributor.nprocs > 1:
             raise NotImplementedError("Don't know how to gather data from an "
                                       "object of type `%s`" % type(key))
@@ -793,8 +793,8 @@ def __init_finalize__(self, *args, **kwargs):
     @cached_property
     def _coordinate_symbols(self):
         """Symbol representing the coordinate values in each Dimension."""
-        p_dim = self.indices[self._sparse_position]
-        return tuple([self.coordinates.indexify((p_dim, i))
+        d_dim = self.coordinates.dimensions[1]
+        return tuple([self.coordinates._subs(d_dim, i)
                       for i in range(self.grid.dim)])
 
     @cached_property
@@ -1069,13 +1069,14 @@ def interpolation_coeffs_data(self):
     @cached_property
     def _coordinate_symbols(self):
         """Symbol representing the coordinate values in each Dimension."""
-        p_dim = self.indices[self._sparse_position]
         if self.gridpoints is not None:
-            return tuple([self.gridpoints.indexify((p_dim, di)) * d.spacing + o
+            d_dim = self.gridpoints.dimensions[1]
+            return tuple([self.gridpoints._subs(d_dim, di) * d.spacing + o
                           for ((di, d), o) in zip(enumerate(self.grid.dimensions),
                                                   self.grid.origin)])
         else:
-            return tuple([self.coordinates.indexify((p_dim, i))
+            d_dim = self.coordinates.dimensions[1]
+            return tuple([self.coordinates._subs(d_dim, i)
                           for i in range(self.grid.dim)])
 
     @cached_property
diff --git a/tests/test_buffering.py b/tests/test_buffering.py
index 25b68cef56..74eea9764b 100644
--- a/tests/test_buffering.py
+++ b/tests/test_buffering.py
@@ -3,7 +3,7 @@
 
 from conftest import skipif
 from devito import (Constant, Grid, TimeFunction, SparseTimeFunction, Operator,
-                    Eq, ConditionalDimension, SubDimension, SubDomain)
+                    Eq, ConditionalDimension, SubDimension, SubDomain, configuration)
 from devito.ir import FindSymbols, retrieve_iteration_tree
 from devito.exceptions import InvalidOperator
 
@@ -271,7 +271,8 @@ def test_over_injection():
     op1 = Operator(eqns, opt='buffering')
 
     # Check generated code
-    assert len(retrieve_iteration_tree(op1)) == 8
+    assert len(retrieve_iteration_tree(op1)) == \
+        7 + bool(configuration['language'] != 'C')
     buffers = [i for i in FindSymbols().visit(op1) if i.is_Array]
     assert len(buffers) == 1
 
diff --git a/tests/test_caching.py b/tests/test_caching.py
index a5f8ba58d6..bb35f04a3f 100644
--- a/tests/test_caching.py
+++ b/tests/test_caching.py
@@ -638,10 +638,10 @@ def test_sparse_function(self, operate_on_empty_cache):
 
         i = u.inject(expr=u, field=u)
 
-        # created: rxu, rxy (radius dimensions) and spacings
+        # created: rxu, rxy (radius dimensions) and spacings and conditionals
         # conditional sparse dim
         # posx, posy, px, py, u_coords (as indexified),
-        ncreated = 2+1+2+2+2+1
+        ncreated = 2+2+1+2+2+2+1
         # Note that injection is now lazy so no new symbols should be created
         assert len(_SymbolCache) == cur_cache_size
         i.evaluate
diff --git a/tests/test_dle.py b/tests/test_dle.py
index f1a06cb916..3f2c9c9e27 100644
--- a/tests/test_dle.py
+++ b/tests/test_dle.py
@@ -725,6 +725,7 @@ def test_scheduling(self):
         op = Operator(eqns, opt=('openmp', {'par-dynamic-work': 0}))
 
         iterations = FindNodes(Iteration).visit(op)
+
         assert len(iterations) == 6
         assert iterations[1].is_Affine
         assert 'schedule(dynamic,1)' in iterations[1].pragmas[0].value
diff --git a/tests/test_mpi.py b/tests/test_mpi.py
index 4c3f72caae..0fe4ee4a08 100644
--- a/tests/test_mpi.py
+++ b/tests/test_mpi.py
@@ -538,10 +538,7 @@ def test_sparse_coords(self):
             coords_loc = sf.coordinates.data[i, 1]
             if coords_loc is not None:
                 coords_loc += sf.coordinates.data[i, 0]
-            if sf.data[i] == coords_loc:
-                assert sf.data[i] == coords_loc
-            else:
-                print(sf._comm.rank, i, sf.data[i], coords_loc)
+            assert sf.data[i] == coords_loc
 
     @pytest.mark.parallel(mode=4)
     def test_sparse_coords_issue1823(self):

From 351b4c3d434d5da6ae88f3e1862812419aba7160 Mon Sep 17 00:00:00 2001
From: mloubout <mathias.louboutin@gmail.com>
Date: Tue, 27 Jun 2023 10:01:46 -0400
Subject: [PATCH 39/90] compiler: prevent inner custom dimension to be parallel
 to avoid deviceptr out of acc loop

---
 devito/ir/clusters/analysis.py     |  2 +-
 devito/ir/support/basic.py         |  2 +-
 devito/operations/interpolators.py | 26 ++++++++++++++------------
 devito/symbolics/inspection.py     |  2 +-
 requirements-testing.txt           |  3 ++-
 tests/test_builtins.py             |  6 +++---
 tests/test_caching.py              |  7 +++++--
 tests/test_dimension.py            |  8 +++++---
 tests/test_dle.py                  |  4 ++--
 tests/test_dse.py                  |  5 ++++-
 10 files changed, 38 insertions(+), 27 deletions(-)

diff --git a/devito/ir/clusters/analysis.py b/devito/ir/clusters/analysis.py
index 4778f2b2b9..3f6b3099c7 100644
--- a/devito/ir/clusters/analysis.py
+++ b/devito/ir/clusters/analysis.py
@@ -100,7 +100,7 @@ def _callback(self, clusters, d, prefix):
                 # False alarm, the dependence is over a locally-defined symbol
                 continue
 
-            if dep.is_reduction:
+            if dep.is_reduction and not (d.is_Custom and d.is_Derived):
                 is_parallel_atomic = True
                 continue
 
diff --git a/devito/ir/support/basic.py b/devito/ir/support/basic.py
index 9f24f99989..4e26c50935 100644
--- a/devito/ir/support/basic.py
+++ b/devito/ir/support/basic.py
@@ -123,7 +123,7 @@ def index_mode(self):
     def aindices(self):
         retval = []
         for i, fi in zip(self, self.findices):
-            dims = i.atoms(Dimension)
+            dims = set(d.root if d.indirect else d for d in i.atoms(Dimension))
             sdims = {d for d in dims if d.is_Stencil}
             candidates = dims - sdims
 
diff --git a/devito/operations/interpolators.py b/devito/operations/interpolators.py
index d4b68f9f52..116cf1699d 100644
--- a/devito/operations/interpolators.py
+++ b/devito/operations/interpolators.py
@@ -149,9 +149,10 @@ def r(self):
 
     @cached_property
     def _rdim(self):
+        parent = self.sfunction.dimensions[-1]
         dims = [CustomDimension("r%s%s" % (self.sfunction.name, d.name),
-                                p-self.r+1, p+self.r, len(range(-self.r+1, self.r+1)))
-                for (p, d) in zip(self.sfunction._position_map.values(), self._gdim)]
+                                -self.r+1, self.r, 2*self.r, parent)
+                for d in self._gdim]
 
         return DimensionTuple(*dims, getters=self._gdim)
 
@@ -170,24 +171,23 @@ def _interp_idx(self, variables, implicit_dims=None):
         Generate interpolation indices for the DiscreteFunctions in ``variables``.
         """
         mapper = {}
+        pos = self.sfunction._position_map.values()
         # Temporaries for the position
         temps = self._positions(implicit_dims)
 
         # Coefficient symbol expression
         temps.extend(self._coeff_temps(implicit_dims))
-        pr = self.sfunction.dimensions[-1]
-        for ((di, d), rd) in zip(enumerate(self._gdim), self._rdim):
+        for ((di, d), rd, p) in zip(enumerate(self._gdim), self._rdim, pos):
             # Add conditional to avoid OOB
-            lb = sympy.And(rd >= d.symbolic_min - self.r, evaluate=False)
-            ub = sympy.And(rd <= d.symbolic_max + self.r, evaluate=False)
+            lb = sympy.And(rd + p >= d.symbolic_min - self.r, evaluate=False)
+            ub = sympy.And(rd + p <= d.symbolic_max + self.r, evaluate=False)
             cond = sympy.And(lb, ub, evaluate=False)
-            mapper[d] = ConditionalDimension(rd.name, pr, condition=cond, indirect=True)
-            pr = rd
+            mapper[d] = ConditionalDimension(rd.name, rd, condition=cond, indirect=True)
 
         # Substitution mapper for variables
-        idx_subs = {v: v.subs({k: c - v.origin.get(k, 0) for (k, c) in mapper.items()})
+        idx_subs = {v: v.subs({k: c - v.origin.get(k, 0) + p
+                    for ((k, c), p) in zip(mapper.items(), pos)})
                     for v in variables}
-        idx_subs.update({rd: crd for (rd, crd) in zip(self._rdim, mapper.values())})
 
         return idx_subs, temps
 
@@ -328,7 +328,9 @@ class LinearInterpolator(WeightedInterpolator):
     """
     @property
     def _weights(self):
-        c = [(1 - p) * (1 - (rd - rd._symbolic_min)) + (rd - rd._symbolic_min) * p
+        # (1 - p) * (1 - rd) + rd * p
+        # simplified for better arithmetic
+        c = [1 - p + rd * (2*p - 1)
              for (p, d, rd) in zip(self._point_symbols, self._gdim, self._rdim)]
         return prod(c)
 
@@ -373,5 +375,5 @@ def interpolation_coeffs(self):
     @property
     def _weights(self):
         ddim, cdim = self.interpolation_coeffs.dimensions[1:]
-        return prod([self.interpolation_coeffs.subs({ddim: ri, cdim: rd-rd._symbolic_min})
+        return prod([self.interpolation_coeffs.subs({ddim: ri, cdim: rd-rd.symbolic_min})
                      for (ri, rd) in enumerate(self._rdim)])
diff --git a/devito/symbolics/inspection.py b/devito/symbolics/inspection.py
index cdd408c643..2083f444d9 100644
--- a/devito/symbolics/inspection.py
+++ b/devito/symbolics/inspection.py
@@ -271,7 +271,7 @@ def sympy_dtype(expr, default):
     args = expr.args
     # We can only infer the dtype for addition/multiplication or Symbols
     # For other case the epxression function may modify the infered dtype
-    if not (isinstance(expr.func, Add) or isinstance(expr.func, Add)) or \
+    if not (isinstance(expr.func, Add) or isinstance(expr.func, Mul)) or \
             not expr.is_Symbol:
         return default
 
diff --git a/requirements-testing.txt b/requirements-testing.txt
index 1459047d8b..88b7e88518 100644
--- a/requirements-testing.txt
+++ b/requirements-testing.txt
@@ -4,4 +4,5 @@ pytest-cov
 codecov
 flake8>=2.1.0
 nbval
-scipy
\ No newline at end of file
+scipy
+pooch; python_version >= "3.8"
diff --git a/tests/test_builtins.py b/tests/test_builtins.py
index 17b08213e8..21c36e319b 100644
--- a/tests/test_builtins.py
+++ b/tests/test_builtins.py
@@ -1,7 +1,7 @@
 import pytest
 import numpy as np
 from scipy.ndimage import gaussian_filter
-from scipy import misc
+from scipy.misc import ascent
 
 from conftest import skipif
 from devito import ConditionalDimension, Grid, Function, TimeFunction, switchconfig
@@ -154,7 +154,7 @@ def test_gs_1d_float(self, sigma):
     def test_gs_2d_int(self, sigma):
         """Test the Gaussian smoother in 2d."""
 
-        a = misc.ascent()
+        a = ascent()
         sp_smoothed = gaussian_filter(a, sigma=sigma)
         dv_smoothed = gaussian_smooth(a, sigma=sigma)
 
@@ -168,7 +168,7 @@ def test_gs_2d_int(self, sigma):
     def test_gs_2d_float(self, sigma):
         """Test the Gaussian smoother in 2d."""
 
-        a = misc.ascent()
+        a = ascent()
         a = a+0.1
         sp_smoothed = gaussian_filter(a, sigma=sigma)
         dv_smoothed = gaussian_smooth(a, sigma=sigma)
diff --git a/tests/test_caching.py b/tests/test_caching.py
index bb35f04a3f..978a09e896 100644
--- a/tests/test_caching.py
+++ b/tests/test_caching.py
@@ -663,9 +663,12 @@ def test_sparse_function(self, operate_on_empty_cache):
 
         assert len(_SymbolCache) == init_cache_size + 10
         clear_cache()
-        # Now we should be back to the original state except pos*
-        # that belong to the abstract class
+        # Now we should be back to the original state except for
+        # pos* that belong to the abstract class
         assert len(_SymbolCache) == init_cache_size + 2
+        clear_cache()
+        # Now we should be back to the original state
+        assert len(_SymbolCache) == init_cache_size
 
     def test_after_indexification(self):
         """
diff --git a/tests/test_dimension.py b/tests/test_dimension.py
index b0e1ee5cfc..c5beb3ad06 100644
--- a/tests/test_dimension.py
+++ b/tests/test_dimension.py
@@ -1450,9 +1450,11 @@ def test_sparse_time_function(self):
 
         assert np.all(p.data[0] == 0)
         # Note the endpoint of the range is 12 because we inject at p.forward
-        assert all(p.data[i].sum() == i - 1 for i in range(1, 12))
-        assert all(p.data[i, 10, 10, 10] == i - 1 for i in range(1, 12))
-        assert all(np.all(p.data[i] == 0) for i in range(12, 20))
+        for i in range(1, 12):
+            assert p.data[i].sum() == i - 1
+            assert p.data[i, 10, 10, 10] == i - 1
+        for i in range(12, 20):
+            assert np.all(p.data[i] == 0)
 
     @pytest.mark.parametrize('init_value,expected', [
         ([2, 1, 3], [2, 2, 0]),  # updates f1, f2
diff --git a/tests/test_dle.py b/tests/test_dle.py
index 3f2c9c9e27..4cb25f1121 100644
--- a/tests/test_dle.py
+++ b/tests/test_dle.py
@@ -291,7 +291,7 @@ def test_cache_blocking_structure_optrelax_prec_inject():
                                           'openmp': True,
                                           'par-collapse-ncores': 1}))
 
-    assert_structure(op, ['t,p_s0_blk0,p_s', 't,p_s0_blk0,p_s,rsx,rsy'],
+    assert_structure(op, ['t', 't,p_s0_blk0,p_s', 't,p_s0_blk0,p_s,rsx,rsy'],
                      't,p_s0_blk0,p_s,rsx,rsy')
 
 
@@ -952,7 +952,7 @@ def test_parallel_prec_inject(self):
         iterations = FindNodes(Iteration).visit(op0)
 
         assert not iterations[0].pragmas
-        assert 'omp for' in iterations[2].pragmas[0].value
+        assert 'omp for' in iterations[1].pragmas[0].value
 
 
 class TestNestedParallelism(object):
diff --git a/tests/test_dse.py b/tests/test_dse.py
index c49faef629..30181acec8 100644
--- a/tests/test_dse.py
+++ b/tests/test_dse.py
@@ -2670,10 +2670,13 @@ def test_fullopt(self):
         bns, _ = assert_blocking(op1, {'x0_blk0'})  # due to loop blocking
 
         assert summary0[('section0', None)].ops == 50
-        assert summary0[('section1', None)].ops == 41
+        assert summary0[('section1', None)].ops == 50
         assert np.isclose(summary0[('section0', None)].oi, 2.851, atol=0.001)
 
+        assert summary1[('section0', None)].ops == 9
+        assert summary1[('section1', None)].ops == 9
         assert summary1[('section2', None)].ops == 31
+        assert summary1[('section3', None)].ops == 32
         assert np.isclose(summary1[('section2', None)].oi, 1.767, atol=0.001)
 
         assert np.allclose(u0.data, u1.data, atol=10e-5)

From b51c488f3edf7c9fd151bb88a5206ab43d6c4dab Mon Sep 17 00:00:00 2001
From: mloubout <mathias.louboutin@gmail.com>
Date: Thu, 29 Jun 2023 15:51:28 -0400
Subject: [PATCH 40/90] CI: add test for and fixes #920

---
 devito/symbolics/inspection.py |  8 +-------
 devito/types/sparse.py         |  5 ++++-
 tests/test_dse.py              | 15 +++++++++++++++
 tests/test_gpu_openacc.py      |  2 +-
 4 files changed, 21 insertions(+), 9 deletions(-)

diff --git a/devito/symbolics/inspection.py b/devito/symbolics/inspection.py
index 2083f444d9..2e5698f189 100644
--- a/devito/symbolics/inspection.py
+++ b/devito/symbolics/inspection.py
@@ -1,7 +1,7 @@
 from functools import singledispatch
 
 import numpy as np
-from sympy import Function, Indexed, Integer, Mul, Number, Pow, S, Symbol, Tuple, Add
+from sympy import Function, Indexed, Integer, Mul, Number, Pow, S, Symbol, Tuple
 
 from devito.finite_differences import Derivative
 from devito.finite_differences.differentiable import IndexDerivative
@@ -269,12 +269,6 @@ def sympy_dtype(expr, default):
     returns the default if non is found
     """
     args = expr.args
-    # We can only infer the dtype for addition/multiplication or Symbols
-    # For other case the epxression function may modify the infered dtype
-    if not (isinstance(expr.func, Add) or isinstance(expr.func, Mul)) or \
-            not expr.is_Symbol:
-        return default
-
     # Symbol/... without argument, check its dtype
     if len(args) == 0:
         try:
diff --git a/devito/types/sparse.py b/devito/types/sparse.py
index 743e245f9c..7dac77cac5 100644
--- a/devito/types/sparse.py
+++ b/devito/types/sparse.py
@@ -497,7 +497,10 @@ def _dist_data_gather(self, data):
             return
 
         # Compute dist map only once
-        data = self._C_as_ndarray(data)
+        try:
+            data = self._C_as_ndarray(data)
+        except AttributeError:
+            pass
         dmap = self._dist_datamap
         mask = self._dist_scatter_mask(dmap=dmap)
 
diff --git a/tests/test_dse.py b/tests/test_dse.py
index 30181acec8..99cd6d1314 100644
--- a/tests/test_dse.py
+++ b/tests/test_dse.py
@@ -2627,6 +2627,21 @@ def test_issue_2163(self):
                          subdomain=grid.interior))
         assert_structure(op, ['t,i0x,i0y'], 'ti0xi0y')
 
+    def test_dtype_aliases(self):
+        a = np.arange(64).reshape((8, 8))
+        grid = Grid(shape=a.shape, extent=(8, 8))
+
+        so = 2
+        f = Function(name='f', grid=grid, space_order=so, dtype=np.int32)
+        f.data[:] = a
+
+        fo = Function(name='fo', grid=grid, space_order=so, dtype=np.int32)
+        op = Operator(Eq(fo, f.dx))
+        op.apply()
+
+        assert FindNodes(Expression).visit(op)[0].dtype == np.float32
+        assert np.all(fo.data[:-1, :-1] == 6)
+
 
 class TestIsoAcoustic(object):
 
diff --git a/tests/test_gpu_openacc.py b/tests/test_gpu_openacc.py
index 38b7eb5514..d1ae13300d 100644
--- a/tests/test_gpu_openacc.py
+++ b/tests/test_gpu_openacc.py
@@ -102,7 +102,7 @@ def test_tile_insteadof_collapse(self, par_tile):
                       opt=('advanced', {'par-tile': par_tile}))
 
         trees = retrieve_iteration_tree(op)
-        assert len(trees) == 4
+        assert len(trees) == 6
 
         assert trees[0][1].pragmas[0].value ==\
             'acc parallel loop tile(32,4,4) present(u)'

From f2fc9b1126dc5ff13aa2aea43a2ba12360b172dd Mon Sep 17 00:00:00 2001
From: mloubout <mathias.louboutin@gmail.com>
Date: Fri, 30 Jun 2023 10:47:44 -0400
Subject: [PATCH 41/90] mpi: drop halospots with empty iters

---
 devito/passes/iet/languages/openacc.py |  5 ++---
 devito/passes/iet/mpi.py               |  7 +++++++
 devito/passes/iet/parpragma.py         |  9 +++++----
 devito/symbolics/inspection.py         | 14 +++++++++-----
 devito/tools/data_structures.py        | 18 ++++++++++++++++++
 tests/test_dle.py                      |  1 +
 tests/test_gpu_openacc.py              | 14 +++++++-------
 tests/test_mpi.py                      |  6 ++++--
 8 files changed, 53 insertions(+), 21 deletions(-)

diff --git a/devito/passes/iet/languages/openacc.py b/devito/passes/iet/languages/openacc.py
index af23264b16..939a68f304 100644
--- a/devito/passes/iet/languages/openacc.py
+++ b/devito/passes/iet/languages/openacc.py
@@ -155,7 +155,7 @@ class DeviceAccizer(PragmaDeviceAwareTransformer):
 
     lang = AccBB
 
-    def _make_partree(self, candidates, nthreads=None, index=0):
+    def _make_partree(self, candidates, nthreads=None):
         assert candidates
 
         root, collapsable = self._select_candidates(candidates)
@@ -164,8 +164,7 @@ def _make_partree(self, candidates, nthreads=None, index=0):
         if self._is_offloadable(root) and \
            all(i.is_Affine for i in [root] + collapsable) and \
            self.par_tile:
-            idx = min(index, len(self.par_tile) - 1)
-            tile = self.par_tile[idx]
+            tile = self.par_tile.next()
             assert isinstance(tile, tuple)
             nremainder = (ncollapsable + 1) - len(tile)
             if nremainder >= 0:
diff --git a/devito/passes/iet/mpi.py b/devito/passes/iet/mpi.py
index 00d96213aa..34170bcc2e 100644
--- a/devito/passes/iet/mpi.py
+++ b/devito/passes/iet/mpi.py
@@ -45,6 +45,13 @@ def _drop_halospots(iet):
             if f in hs.fmapper and all(i.is_reduction for i in v):
                 mapper[hs].add(f)
 
+    # If a HaloSpot is outside any iteration it is not needed
+    for iters, halo_spots in MapNodes(Iteration, HaloSpot, 'groupby').visit(iet).items():
+        if not iters and halo_spots:
+            for hs in halo_spots:
+                for f in hs.fmapper:
+                    mapper[hs].add(f)
+
     # Transform the IET introducing the "reduced" HaloSpots
     subs = {hs: hs._rebuild(halo_scheme=hs.halo_scheme.drop(mapper[hs]))
             for hs in FindNodes(HaloSpot).visit(iet)}
diff --git a/devito/passes/iet/parpragma.py b/devito/passes/iet/parpragma.py
index 08818787f9..4fdcd36a34 100644
--- a/devito/passes/iet/parpragma.py
+++ b/devito/passes/iet/parpragma.py
@@ -14,6 +14,7 @@
                                         make_sections_from_imask)
 from devito.symbolics import INT, ccode
 from devito.tools import as_tuple, flatten, is_integer, prod
+from devito.tools.data_structures import UnboundTuple
 from devito.types import Symbol
 
 __all__ = ['PragmaSimdTransformer', 'PragmaShmTransformer',
@@ -347,7 +348,7 @@ def _make_threaded_prodders(self, partree):
         partree = Transformer(mapper).visit(partree)
         return partree
 
-    def _make_partree(self, candidates, nthreads=None, index=None):
+    def _make_partree(self, candidates, nthreads=None):
         assert candidates
 
         # Get the collapsable Iterations
@@ -465,7 +466,7 @@ def _make_nested_partree(self, partree):
     def _make_parallel(self, iet):
         mapper = {}
         parrays = {}
-        for i, tree in enumerate(retrieve_iteration_tree(iet, mode='superset')):
+        for tree in retrieve_iteration_tree(iet, mode='superset'):
             # Get the parallelizable Iterations in `tree`
             candidates = filter_iterations(tree, key=self.key)
             if not candidates:
@@ -477,7 +478,7 @@ def _make_parallel(self, iet):
                 continue
 
             # Outer parallelism
-            root, partree = self._make_partree(candidates, index=i)
+            root, partree = self._make_partree(candidates)
             if partree is None or root in mapper:
                 continue
 
@@ -566,7 +567,7 @@ def __init__(self, sregistry, options, platform, compiler):
         super().__init__(sregistry, options, platform, compiler)
 
         self.gpu_fit = options['gpu-fit']
-        self.par_tile = options['par-tile']
+        self.par_tile = UnboundTuple(options['par-tile'])
         self.par_disabled = options['par-disabled']
 
     def _make_threaded_prodders(self, partree):
diff --git a/devito/symbolics/inspection.py b/devito/symbolics/inspection.py
index 2e5698f189..535f48670d 100644
--- a/devito/symbolics/inspection.py
+++ b/devito/symbolics/inspection.py
@@ -1,7 +1,9 @@
 from functools import singledispatch
 
 import numpy as np
-from sympy import Function, Indexed, Integer, Mul, Number, Pow, S, Symbol, Tuple
+from sympy import (Function, Indexed, Integer, Mul, Number,
+                   Pow, S, Symbol, Tuple)
+from sympy.core.operations import AssocOp
 
 from devito.finite_differences import Derivative
 from devito.finite_differences.differentiable import IndexDerivative
@@ -268,13 +270,15 @@ def sympy_dtype(expr, default):
     Try to infer the data type of the expression
     returns the default if non is found
     """
-    args = expr.args
     # Symbol/... without argument, check its dtype
-    if len(args) == 0:
+    if len(expr.args) == 0:
         try:
             return expr.dtype
         except AttributeError:
             return default
     else:
-        # Infer expression dtype from its arguments
-        return infer_dtype([sympy_dtype(a, default) for a in expr.args])
+        if not (isinstance(expr.func, AssocOp) or expr.is_Pow):
+            return default
+        else:
+            # Infer expression dtype from its arguments
+            return infer_dtype([sympy_dtype(a, default) for a in expr.args])
diff --git a/devito/tools/data_structures.py b/devito/tools/data_structures.py
index 10a1d90672..be1bd4edc9 100644
--- a/devito/tools/data_structures.py
+++ b/devito/tools/data_structures.py
@@ -599,3 +599,21 @@ def next(self):
         if self.curiter is None:
             raise StopIteration
         return next(self.curiter)
+
+
+class UnboundTuple(object):
+    """
+    A simple data structure that returns the last element forever once reached
+    """
+    def __init__(self, items):
+        self.items = as_tuple(items)
+        self.last = len(self.items)
+        self.current = 0
+
+    def next(self):
+        item = self.items[self.current]
+        self.current = min(self.last - 1, self.current+1)
+        return item
+
+    def __len__(self):
+        return self.last
diff --git a/tests/test_dle.py b/tests/test_dle.py
index 4cb25f1121..bc6f3771e4 100644
--- a/tests/test_dle.py
+++ b/tests/test_dle.py
@@ -928,6 +928,7 @@ def test_simd_space_invariant(self):
         assert 'omp simd' in iterations[3].pragmas[0].value
 
         op.apply()
+        print(op._lib)
         assert np.isclose(np.linalg.norm(f.data), 37.1458, rtol=1e-5)
 
     def test_parallel_prec_inject(self):
diff --git a/tests/test_gpu_openacc.py b/tests/test_gpu_openacc.py
index d1ae13300d..db92db3c83 100644
--- a/tests/test_gpu_openacc.py
+++ b/tests/test_gpu_openacc.py
@@ -104,13 +104,13 @@ def test_tile_insteadof_collapse(self, par_tile):
         trees = retrieve_iteration_tree(op)
         assert len(trees) == 6
 
-        assert trees[0][1].pragmas[0].value ==\
-            'acc parallel loop tile(32,4,4) present(u)'
         assert trees[1][1].pragmas[0].value ==\
+            'acc parallel loop tile(32,4,4) present(u)'
+        assert trees[2][1].pragmas[0].value ==\
             'acc parallel loop tile(32,4) present(u)'
         # Only the AFFINE Iterations are tiled
-        assert trees[3][1].pragmas[0].value ==\
-            'acc parallel loop present(src,src_coords,u)'
+        assert trees[4][1].pragmas[0].value ==\
+            'acc parallel loop present(src,src_coords,u) deviceptr(r1,r2,r3)'
 
     @pytest.mark.parametrize('par_tile', [((32, 4, 4), (8, 8)), ((32, 4), (8, 8)),
                                           ((32, 4, 4), (8, 8, 8))])
@@ -130,11 +130,11 @@ def test_multiple_tile_sizes(self, par_tile):
                       opt=('advanced', {'par-tile': par_tile}))
 
         trees = retrieve_iteration_tree(op)
-        assert len(trees) == 4
+        assert len(trees) == 6
 
-        assert trees[0][1].pragmas[0].value ==\
-            'acc parallel loop tile(32,4,4) present(u)'
         assert trees[1][1].pragmas[0].value ==\
+            'acc parallel loop tile(32,4,4) present(u)'
+        assert trees[2][1].pragmas[0].value ==\
             'acc parallel loop tile(8,8) present(u)'
 
     def test_multi_tile_blocking_structure(self):
diff --git a/tests/test_mpi.py b/tests/test_mpi.py
index 0fe4ee4a08..d3a652c0b9 100644
--- a/tests/test_mpi.py
+++ b/tests/test_mpi.py
@@ -2499,8 +2499,10 @@ def test_adjoint_codegen(self, shape, kernel, space_order, save):
         op_adj = solver.op_adj()
         adj_calls = FindNodes(Call).visit(op_adj)
 
-        assert len(fwd_calls) == 1
-        assert len(adj_calls) == 1
+        # one halo, 2 * ndim memalign and free (pos temp src/rec)
+        sf_calls = 2 * len(shape) + 2 * len(shape)
+        assert len(fwd_calls) == 1 + sf_calls
+        assert len(adj_calls) == 1 + sf_calls
 
     def run_adjoint_F(self, nd):
         """

From 84335d6e49ebd81c8595daaaaa90f40f1c2ee083 Mon Sep 17 00:00:00 2001
From: mloubout <mathias.louboutin@gmail.com>
Date: Mon, 3 Jul 2023 10:57:24 -0400
Subject: [PATCH 42/90] CI: add large radius precomputed interp test

---
 devito/ir/clusters/analysis.py     |  2 +-
 devito/ir/support/basic.py         |  6 ++--
 devito/operations/interpolators.py | 13 ++++-----
 devito/passes/iet/mpi.py           |  6 ++--
 devito/passes/iet/parpragma.py     |  6 +++-
 tests/test_buffering.py            |  2 +-
 tests/test_dimension.py            |  5 ++--
 tests/test_dse.py                  |  8 ++---
 tests/test_gradient.py             |  1 +
 tests/test_interpolation.py        | 47 +++++++++++++++++++-----------
 10 files changed, 56 insertions(+), 40 deletions(-)

diff --git a/devito/ir/clusters/analysis.py b/devito/ir/clusters/analysis.py
index 3f6b3099c7..4778f2b2b9 100644
--- a/devito/ir/clusters/analysis.py
+++ b/devito/ir/clusters/analysis.py
@@ -100,7 +100,7 @@ def _callback(self, clusters, d, prefix):
                 # False alarm, the dependence is over a locally-defined symbol
                 continue
 
-            if dep.is_reduction and not (d.is_Custom and d.is_Derived):
+            if dep.is_reduction:
                 is_parallel_atomic = True
                 continue
 
diff --git a/devito/ir/support/basic.py b/devito/ir/support/basic.py
index 4e26c50935..2f7a47c0e9 100644
--- a/devito/ir/support/basic.py
+++ b/devito/ir/support/basic.py
@@ -660,9 +660,9 @@ def is_const(self, dim):
         """
         True if a constant dependence, that is no Dimensions involved, False otherwise.
         """
-        return (self.source.aindices[dim] is None and
-                self.sink.aindices[dim] is None and
-                self.distance_mapper[dim] == 0)
+        return (self.source.aindices.get(dim, None) is None and
+                self.sink.aindices.get(dim, None) is None and
+                self.distance_mapper.get(dim, 0) == 0)
 
     @memoized_meth
     def is_carried(self, dim=None):
diff --git a/devito/operations/interpolators.py b/devito/operations/interpolators.py
index 116cf1699d..4550b5b143 100644
--- a/devito/operations/interpolators.py
+++ b/devito/operations/interpolators.py
@@ -3,9 +3,10 @@
 import sympy
 from cached_property import cached_property
 
+from devito.finite_differences.differentiable import Mul
 from devito.finite_differences.elementary import floor
 from devito.symbolics import retrieve_function_carriers, INT
-from devito.tools import as_tuple, flatten, prod
+from devito.tools import as_tuple, flatten
 from devito.types import (ConditionalDimension, Eq, Inc, Evaluable, Symbol,
                           CustomDimension)
 from devito.types.utils import DimensionTuple
@@ -328,11 +329,9 @@ class LinearInterpolator(WeightedInterpolator):
     """
     @property
     def _weights(self):
-        # (1 - p) * (1 - rd) + rd * p
-        # simplified for better arithmetic
-        c = [1 - p + rd * (2*p - 1)
-             for (p, d, rd) in zip(self._point_symbols, self._gdim, self._rdim)]
-        return prod(c)
+        c = [(1 - p) * (1 - r) + p * r
+             for (p, d, r) in zip(self._point_symbols, self._gdim, self._rdim)]
+        return Mul(*c)
 
     @cached_property
     def _point_symbols(self):
@@ -375,5 +374,5 @@ def interpolation_coeffs(self):
     @property
     def _weights(self):
         ddim, cdim = self.interpolation_coeffs.dimensions[1:]
-        return prod([self.interpolation_coeffs.subs({ddim: ri, cdim: rd-rd.symbolic_min})
+        return Mul(*[self.interpolation_coeffs.subs({ddim: ri, cdim: rd-rd.symbolic_min})
                      for (ri, rd) in enumerate(self._rdim)])
diff --git a/devito/passes/iet/mpi.py b/devito/passes/iet/mpi.py
index 34170bcc2e..1343a33b8a 100644
--- a/devito/passes/iet/mpi.py
+++ b/devito/passes/iet/mpi.py
@@ -47,9 +47,9 @@ def _drop_halospots(iet):
 
     # If a HaloSpot is outside any iteration it is not needed
     for iters, halo_spots in MapNodes(Iteration, HaloSpot, 'groupby').visit(iet).items():
-        if not iters and halo_spots:
-            for hs in halo_spots:
-                for f in hs.fmapper:
+        for hs in halo_spots:
+            for f, v in hs.fmapper.items():
+                if not iters and v.loc_indices:
                     mapper[hs].add(f)
 
     # Transform the IET introducing the "reduced" HaloSpots
diff --git a/devito/passes/iet/parpragma.py b/devito/passes/iet/parpragma.py
index 4fdcd36a34..c0b6f2d155 100644
--- a/devito/passes/iet/parpragma.py
+++ b/devito/passes/iet/parpragma.py
@@ -285,6 +285,10 @@ def _select_candidates(self, candidates):
                 if i.is_Vectorized:
                     break
 
+                # Also, we do not want to collapse small atomic reductions
+                if i.is_ParallelAtomic and i.dim.is_Custom:
+                    break
+
                 # Would there be enough work per parallel iteration?
                 nested = candidates[n+1:]
                 if nested:
@@ -422,7 +426,7 @@ def _make_guard(self, parregion):
 
     def _make_nested_partree(self, partree):
         # Apply heuristic
-        if self.nhyperthreads <= self.nested:
+        if self.nhyperthreads <= self.nested or partree.root.is_ParallelAtomic:
             return partree
 
         # Note: there might be multiple sub-trees amenable to nested parallelism,
diff --git a/tests/test_buffering.py b/tests/test_buffering.py
index 74eea9764b..b7f59e61a5 100644
--- a/tests/test_buffering.py
+++ b/tests/test_buffering.py
@@ -272,7 +272,7 @@ def test_over_injection():
 
     # Check generated code
     assert len(retrieve_iteration_tree(op1)) == \
-        7 + bool(configuration['language'] != 'C')
+        8 + bool(configuration['language'] != 'C')
     buffers = [i for i in FindSymbols().visit(op1) if i.is_Array]
     assert len(buffers) == 1
 
diff --git a/tests/test_dimension.py b/tests/test_dimension.py
index c5beb3ad06..217d7c1e83 100644
--- a/tests/test_dimension.py
+++ b/tests/test_dimension.py
@@ -10,7 +10,6 @@
                     Dimension, DefaultDimension, SubDimension, switchconfig,
                     SubDomain, Lt, Le, Gt, Ge, Ne, Buffer, sin, SpaceDimension,
                     CustomDimension, dimensions, configuration)
-from devito.arch.compiler import IntelCompiler, OneapiCompiler
 from devito.ir.iet import (Conditional, Expression, Iteration, FindNodes,
                            FindSymbols, retrieve_iteration_tree)
 from devito.symbolics import indexify, retrieve_functions, IntDiv, INT
@@ -1417,8 +1416,7 @@ def test_affiness(self):
         iterations = [i for i in FindNodes(Iteration).visit(op) if i.dim is not time]
         assert all(i.is_Affine for i in iterations)
 
-    @switchconfig(condition=isinstance(configuration['compiler'],
-                  (IntelCompiler, OneapiCompiler)), safe_math=True)
+    @switchconfig(safe_math=True)
     def test_sparse_time_function(self):
         nt = 20
 
@@ -1452,6 +1450,7 @@ def test_sparse_time_function(self):
         # Note the endpoint of the range is 12 because we inject at p.forward
         for i in range(1, 12):
             assert p.data[i].sum() == i - 1
+            print(p.data[i, 10, 10, 10])
             assert p.data[i, 10, 10, 10] == i - 1
         for i in range(12, 20):
             assert np.all(p.data[i] == 0)
diff --git a/tests/test_dse.py b/tests/test_dse.py
index 99cd6d1314..da07b61e48 100644
--- a/tests/test_dse.py
+++ b/tests/test_dse.py
@@ -2629,7 +2629,7 @@ def test_issue_2163(self):
 
     def test_dtype_aliases(self):
         a = np.arange(64).reshape((8, 8))
-        grid = Grid(shape=a.shape, extent=(8, 8))
+        grid = Grid(shape=a.shape, extent=(7, 7))
 
         so = 2
         f = Function(name='f', grid=grid, space_order=so, dtype=np.int32)
@@ -2640,7 +2640,7 @@ def test_dtype_aliases(self):
         op.apply()
 
         assert FindNodes(Expression).visit(op)[0].dtype == np.float32
-        assert np.all(fo.data[:-1, :-1] == 6)
+        assert np.all(fo.data[:-1, :-1] == 8)
 
 
 class TestIsoAcoustic(object):
@@ -2685,13 +2685,13 @@ def test_fullopt(self):
         bns, _ = assert_blocking(op1, {'x0_blk0'})  # due to loop blocking
 
         assert summary0[('section0', None)].ops == 50
-        assert summary0[('section1', None)].ops == 50
+        assert summary0[('section1', None)].ops == 44
         assert np.isclose(summary0[('section0', None)].oi, 2.851, atol=0.001)
 
         assert summary1[('section0', None)].ops == 9
         assert summary1[('section1', None)].ops == 9
         assert summary1[('section2', None)].ops == 31
-        assert summary1[('section3', None)].ops == 32
+        assert summary1[('section3', None)].ops == 26
         assert np.isclose(summary1[('section2', None)].oi, 1.767, atol=0.001)
 
         assert np.allclose(u0.data, u1.data, atol=10e-5)
diff --git a/tests/test_gradient.py b/tests/test_gradient.py
index 9c91138c84..5624c5d461 100644
--- a/tests/test_gradient.py
+++ b/tests/test_gradient.py
@@ -15,6 +15,7 @@
 class TestGradient(object):
 
     @skipif(['chkpnt', 'cpu64-icc'])
+    @switchconfig(safe_math=True)
     @pytest.mark.parametrize('dtype', [np.float32, np.float64])
     @pytest.mark.parametrize('opt', [('advanced', {'openmp': True}),
                                      ('noop', {'openmp': True})])
diff --git a/tests/test_interpolation.py b/tests/test_interpolation.py
index 186ab6cffd..dca94c8f40 100644
--- a/tests/test_interpolation.py
+++ b/tests/test_interpolation.py
@@ -84,25 +84,31 @@ def custom_points(grid, ranges, npoints, name='points'):
     return points
 
 
-def precompute_linear_interpolation(points, grid, origin):
-    """ Sample precompute function that, given point and grid information
-        precomputes gridpoints and interpolation coefficients according to a linear
-        scheme to be used in PrecomputedSparseFunction.
+def precompute_linear_interpolation(points, grid, origin, r=2):
+    """
+    Sample precompute function that, given point and grid information
+    precomputes gridpoints and interpolation coefficients according to a linear
+    scheme to be used in PrecomputedSparseFunction.
+
+    Allow larger radius with zero weights for testing.
     """
     gridpoints = [tuple(floor((point[i]-origin[i])/grid.spacing[i])
                         for i in range(len(point))) for point in points]
 
-    interpolation_coeffs = np.zeros((len(points), 2, 2))
+    interpolation_coeffs = np.zeros((len(points), grid.dim, r))
+    rs = r // 2 - 1
     for i, point in enumerate(points):
         for d in range(grid.dim):
-            interpolation_coeffs[i, d, 0] = ((gridpoints[i][d] + 1)*grid.spacing[d] -
-                                             point[d])/grid.spacing[d]
-            interpolation_coeffs[i, d, 1] = (point[d]-gridpoints[i][d]*grid.spacing[d])\
+            gd = gridpoints[i][d]
+            interpolation_coeffs[i, d, rs] = ((gd + 1)*grid.spacing[d] -
+                                              point[d])/grid.spacing[d]
+            interpolation_coeffs[i, d, rs+1] = (point[d]-gd*grid.spacing[d])\
                 / grid.spacing[d]
     return gridpoints, interpolation_coeffs
 
 
-def test_precomputed_interpolation():
+@pytest.mark.parametrize('r', [2, 4, 6])
+def test_precomputed_interpolation(r):
     """ Test interpolation with PrecomputedSparseFunction which accepts
         precomputed values for interpolation coefficients
     """
@@ -123,7 +129,8 @@ def init(data):
     m = Function(name='m', grid=grid, initializer=init, space_order=0)
 
     gridpoints, interpolation_coeffs = precompute_linear_interpolation(points,
-                                                                       grid, origin)
+                                                                       grid, origin,
+                                                                       r=r)
 
     sf = PrecomputedSparseFunction(name='s', grid=grid, r=r, npoint=len(points),
                                    gridpoints=gridpoints,
@@ -136,7 +143,8 @@ def init(data):
     assert(all(np.isclose(sf.data, expected_values, rtol=1e-6)))
 
 
-def test_precomputed_interpolation_time():
+@pytest.mark.parametrize('r', [2, 4, 6])
+def test_precomputed_interpolation_time(r):
     """ Test interpolation with PrecomputedSparseFunction which accepts
         precomputed values for interpolation coefficients, but this time
         with a TimeFunction
@@ -154,7 +162,8 @@ def test_precomputed_interpolation_time():
         u.data[it, :] = it
 
     gridpoints, interpolation_coeffs = precompute_linear_interpolation(points,
-                                                                       grid, origin)
+                                                                       grid, origin,
+                                                                       r=r)
 
     sf = PrecomputedSparseTimeFunction(name='s', grid=grid, r=r, npoint=len(points),
                                        nt=5, gridpoints=gridpoints,
@@ -171,7 +180,8 @@ def test_precomputed_interpolation_time():
         assert np.allclose(sf.data[it, :], it)
 
 
-def test_precomputed_injection():
+@pytest.mark.parametrize('r', [2, 4, 6])
+def test_precomputed_injection(r):
     """Test injection with PrecomputedSparseFunction which accepts
        precomputed values for interpolation coefficients
     """
@@ -188,7 +198,8 @@ def test_precomputed_injection():
     m.data[:] = 0.
 
     gridpoints, interpolation_coeffs = precompute_linear_interpolation(coords,
-                                                                       m.grid, origin)
+                                                                       m.grid, origin,
+                                                                       r=r)
 
     sf = PrecomputedSparseFunction(name='s', grid=m.grid, r=r, npoint=len(coords),
                                    gridpoints=gridpoints,
@@ -206,7 +217,8 @@ def test_precomputed_injection():
     assert np.allclose(m.data[indices], result, rtol=1.e-5)
 
 
-def test_precomputed_injection_time():
+@pytest.mark.parametrize('r', [2, 4, 6])
+def test_precomputed_injection_time(r):
     """Test injection with PrecomputedSparseFunction which accepts
        precomputed values for interpolation coefficients
     """
@@ -224,7 +236,8 @@ def test_precomputed_injection_time():
     m.data[:] = 0.
 
     gridpoints, interpolation_coeffs = precompute_linear_interpolation(coords,
-                                                                       m.grid, origin)
+                                                                       m.grid, origin,
+                                                                       r=r)
 
     sf = PrecomputedSparseTimeFunction(name='s', grid=m.grid, r=r, npoint=len(coords),
                                        gridpoints=gridpoints, nt=nt,
@@ -718,7 +731,7 @@ class SparseFirst(SparseFunction):
     # No time dependence so need the implicit dim
     rec = s.interpolate(expr=s+fs, implicit_dims=grid.stepping_dim)
     op = Operator(eqs + rec)
-    print(op)
+
     op(time_M=10)
     expected = 10*11/2  # n (n+1)/2
     assert np.allclose(s.data, expected)

From 0abe77bb7b2741a306ef402dc87dd59b5ff079d3 Mon Sep 17 00:00:00 2001
From: mloubout <mathias.louboutin@gmail.com>
Date: Mon, 24 Jul 2023 12:00:11 -0400
Subject: [PATCH 43/90] api: cleanup based on first review

---
 devito/ir/clusters/cluster.py      |  1 -
 devito/ir/equations/algorithms.py  |  9 ++--
 devito/ir/support/basic.py         |  4 +-
 devito/operations/interpolators.py | 17 ++++---
 devito/passes/clusters/aliases.py  |  2 +-
 devito/passes/iet/mpi.py           |  4 +-
 devito/passes/iet/parpragma.py     |  7 ++-
 devito/symbolics/inspection.py     |  3 +-
 devito/tools/data_structures.py    |  3 +-
 devito/tools/dtypes_lowering.py    |  3 ++
 devito/types/dense.py              |  4 +-
 devito/types/dimension.py          | 17 ++++---
 devito/types/sparse.py             | 81 +++++++++++++-----------------
 tests/test_dimension.py            |  1 -
 tests/test_pickle.py               | 18 +++++++
 15 files changed, 96 insertions(+), 78 deletions(-)

diff --git a/devito/ir/clusters/cluster.py b/devito/ir/clusters/cluster.py
index de4cd8f29f..0dc3200b4f 100644
--- a/devito/ir/clusters/cluster.py
+++ b/devito/ir/clusters/cluster.py
@@ -334,7 +334,6 @@ def dspace(self):
 
         # Construct the `intervals` of the DataSpace, that is a global,
         # Dimension-centric view of the data space
-
         intervals = IntervalGroup.generate('union', *parts.values())
         # E.g., `db0 -> time`, but `xi NOT-> x`
         intervals = intervals.promote(lambda d: not d.is_Sub)
diff --git a/devito/ir/equations/algorithms.py b/devito/ir/equations/algorithms.py
index adc462d059..2606d2dd2d 100644
--- a/devito/ir/equations/algorithms.py
+++ b/devito/ir/equations/algorithms.py
@@ -56,9 +56,7 @@ def handle_indexed(indexed):
     # such as A[3])
     indexeds = retrieve_indexed(expr, deep=True)
     for i in indexeds:
-        expl_dims = {d for (d, e) in zip(i.function.dimensions, i.indices)
-                     if e.is_integer}
-        extra.update(expl_dims)
+        extra.update({d for d in i.function.dimensions if i.indices[d].is_integer})
 
     # Enforce determinism
     extra = filter_sorted(extra)
@@ -75,8 +73,9 @@ def handle_indexed(indexed):
     # of `x`, besides `(x, xi)`, we also have to add `(time, x)` so that we
     # obtain the desired ordering `(time, x, xi)`. W/o `(time, x)`, the ordering
     # `(x, time, xi)` might be returned instead, which would be non-sense
-    implicit_relations.update({tuple(filter_ordered(d.root for d in i))
-                               for i in relations})
+    for i in relations:
+        dims = [di for d in i for di in (d.index, d)]
+        implicit_relations.update({tuple(filter_ordered(dims))})
 
     ordering = PartialOrderTuple(extra, relations=(relations | implicit_relations))
 
diff --git a/devito/ir/support/basic.py b/devito/ir/support/basic.py
index 2f7a47c0e9..98ba9da51a 100644
--- a/devito/ir/support/basic.py
+++ b/devito/ir/support/basic.py
@@ -660,8 +660,8 @@ def is_const(self, dim):
         """
         True if a constant dependence, that is no Dimensions involved, False otherwise.
         """
-        return (self.source.aindices.get(dim, None) is None and
-                self.sink.aindices.get(dim, None) is None and
+        return (self.source.aindices.get(dim) is None and
+                self.sink.aindices.get(dim) is None and
                 self.distance_mapper.get(dim, 0) == 0)
 
     @memoized_meth
diff --git a/devito/operations/interpolators.py b/devito/operations/interpolators.py
index 4550b5b143..b480c9f113 100644
--- a/devito/operations/interpolators.py
+++ b/devito/operations/interpolators.py
@@ -141,7 +141,7 @@ def _weights(self):
         raise NotImplementedError
 
     @property
-    def _gdim(self):
+    def _gdims(self):
         return self.grid.dimensions
 
     @property
@@ -153,12 +153,15 @@ def _rdim(self):
         parent = self.sfunction.dimensions[-1]
         dims = [CustomDimension("r%s%s" % (self.sfunction.name, d.name),
                                 -self.r+1, self.r, 2*self.r, parent)
-                for d in self._gdim]
+                for d in self._gdims]
 
-        return DimensionTuple(*dims, getters=self._gdim)
+        return DimensionTuple(*dims, getters=self._gdims)
 
     def _augment_implicit_dims(self, implicit_dims):
-        return as_tuple(implicit_dims) + self.sfunction.dimensions
+        if self.sfunction._sparse_position == -1:
+            return self.sfunction.dimensions + as_tuple(implicit_dims)
+        else:
+            return as_tuple(implicit_dims) + self.sfunction.dimensions
 
     def _coeff_temps(self, implicit_dims):
         return []
@@ -178,7 +181,7 @@ def _interp_idx(self, variables, implicit_dims=None):
 
         # Coefficient symbol expression
         temps.extend(self._coeff_temps(implicit_dims))
-        for ((di, d), rd, p) in zip(enumerate(self._gdim), self._rdim, pos):
+        for ((di, d), rd, p) in zip(enumerate(self._gdims), self._rdim, pos):
             # Add conditional to avoid OOB
             lb = sympy.And(rd + p >= d.symbolic_min - self.r, evaluate=False)
             ub = sympy.And(rd + p <= d.symbolic_max + self.r, evaluate=False)
@@ -330,7 +333,7 @@ class LinearInterpolator(WeightedInterpolator):
     @property
     def _weights(self):
         c = [(1 - p) * (1 - r) + p * r
-             for (p, d, r) in zip(self._point_symbols, self._gdim, self._rdim)]
+             for (p, d, r) in zip(self._point_symbols, self._gdims, self._rdim)]
         return Mul(*c)
 
     @cached_property
@@ -345,7 +348,7 @@ def _coeff_temps(self, implicit_dims):
         pmap = self.sfunction._position_map
         poseq = [Eq(self._point_symbols[d], pos - floor(pos),
                     implicit_dims=implicit_dims)
-                 for (d, pos) in zip(self._gdim, pmap.keys())]
+                 for (d, pos) in zip(self._gdims, pmap.keys())]
         return poseq
 
 
diff --git a/devito/passes/clusters/aliases.py b/devito/passes/clusters/aliases.py
index 2546e948e3..e0783323fe 100644
--- a/devito/passes/clusters/aliases.py
+++ b/devito/passes/clusters/aliases.py
@@ -837,7 +837,7 @@ def lower_schedule(schedule, meta, sregistry, ftemps):
         # This prevents cases such as `floor(a*b)` with `a` and `b` floats
         # that would creat a temporary `int r = b` leading to erronous numerical results
         # Such cases happen with the positions for sparse functions for example.
-        dtype = sympy_dtype(pivot, meta.dtype) or meta.dtype
+        dtype = sympy_dtype(pivot, meta.dtype)
 
         if writeto:
             # The Dimensions defining the shape of Array
diff --git a/devito/passes/iet/mpi.py b/devito/passes/iet/mpi.py
index 1343a33b8a..de87bb13a8 100644
--- a/devito/passes/iet/mpi.py
+++ b/devito/passes/iet/mpi.py
@@ -47,9 +47,11 @@ def _drop_halospots(iet):
 
     # If a HaloSpot is outside any iteration it is not needed
     for iters, halo_spots in MapNodes(Iteration, HaloSpot, 'groupby').visit(iet).items():
+        if iters:
+            continue
         for hs in halo_spots:
             for f, v in hs.fmapper.items():
-                if not iters and v.loc_indices:
+                if v.loc_indices:
                     mapper[hs].add(f)
 
     # Transform the IET introducing the "reduced" HaloSpots
diff --git a/devito/passes/iet/parpragma.py b/devito/passes/iet/parpragma.py
index c0b6f2d155..016726c001 100644
--- a/devito/passes/iet/parpragma.py
+++ b/devito/passes/iet/parpragma.py
@@ -426,7 +426,12 @@ def _make_guard(self, parregion):
 
     def _make_nested_partree(self, partree):
         # Apply heuristic
-        if self.nhyperthreads <= self.nested or partree.root.is_ParallelAtomic:
+        if self.nhyperthreads <= self.nested:
+            return partree
+
+        # Loop nest with atomic reductions are more likely to have less latency
+        # keep outer loop parallel
+        if partree.root.is_ParallelAtomic:
             return partree
 
         # Note: there might be multiple sub-trees amenable to nested parallelism,
diff --git a/devito/symbolics/inspection.py b/devito/symbolics/inspection.py
index 535f48670d..06ad8ed373 100644
--- a/devito/symbolics/inspection.py
+++ b/devito/symbolics/inspection.py
@@ -281,4 +281,5 @@ def sympy_dtype(expr, default):
             return default
         else:
             # Infer expression dtype from its arguments
-            return infer_dtype([sympy_dtype(a, default) for a in expr.args])
+            dtype = infer_dtype([sympy_dtype(a, default) for a in expr.args])
+            return dtype or default
diff --git a/devito/tools/data_structures.py b/devito/tools/data_structures.py
index be1bd4edc9..539f75d593 100644
--- a/devito/tools/data_structures.py
+++ b/devito/tools/data_structures.py
@@ -66,7 +66,7 @@ def __getnewargs_ex__(self):
         # objects with varying number of attributes
         return (tuple(self), dict(self.__dict__))
 
-    def get(self, key, val):
+    def get(self, key, val=None):
         return self._getters.get(key, val)
 
 
@@ -605,6 +605,7 @@ class UnboundTuple(object):
     """
     A simple data structure that returns the last element forever once reached
     """
+
     def __init__(self, items):
         self.items = as_tuple(items)
         self.last = len(self.items)
diff --git a/devito/tools/dtypes_lowering.py b/devito/tools/dtypes_lowering.py
index c8fe8a3fa5..0b3cd53ebf 100644
--- a/devito/tools/dtypes_lowering.py
+++ b/devito/tools/dtypes_lowering.py
@@ -130,6 +130,9 @@ def dtype_to_mpitype(dtype):
 
 
 def dtype_to_mpidtype(dtype):
+    """
+    Map numpy type to MPI internal types for communication
+    """
     from devito.mpi import MPI
     return MPI._typedict[np.dtype(dtype).char]
 
diff --git a/devito/types/dense.py b/devito/types/dense.py
index 9fb96ec5cd..d9adfcedc3 100644
--- a/devito/types/dense.py
+++ b/devito/types/dense.py
@@ -1479,9 +1479,7 @@ def parent(self):
 
     @property
     def origin(self):
-        """
-        SubFunction have zero origin
-        """
+        # SubFunction have zero origin
         return DimensionTuple(*(0 for _ in range(self.ndim)), getters=self.dimensions)
 
 
diff --git a/devito/types/dimension.py b/devito/types/dimension.py
index 6051584f9a..ca30e2217e 100644
--- a/devito/types/dimension.py
+++ b/devito/types/dimension.py
@@ -107,8 +107,6 @@ class Dimension(ArgProvider):
     is_Incr = False
     is_Block = False
 
-    indirect = False
-
     # Prioritize self's __add__ and __sub__ to construct AffineIndexAccessFunction
     _op_priority = sympy.Expr._op_priority + 1.
 
@@ -183,6 +181,14 @@ def min_name(self):
     def max_name(self):
         return "%s_M" % self.name
 
+    @property
+    def indirect(self):
+        return False
+
+    @property
+    def index(self):
+        return self if self.indirect is True else getattr(self, 'parent', self)
+
     @property
     def is_const(self):
         return False
@@ -456,7 +462,6 @@ class DerivedDimension(BasicDimension):
     """
 
     is_Derived = True
-    indirect = False
 
     __rargs__ = Dimension.__rargs__ + ('parent',)
     __rkwargs__ = ()
@@ -819,10 +824,6 @@ def condition(self):
     def indirect(self):
         return self._indirect
 
-    @property
-    def index(self):
-        return self if self.indirect is True else self.parent
-
     @cached_property
     def free_symbols(self):
         retval = set(super().free_symbols)
@@ -1216,7 +1217,7 @@ def __init_finalize__(self, name, symbolic_min=None, symbolic_max=None,
         self._symbolic_min = symbolic_min
         self._symbolic_max = symbolic_max
         self._symbolic_size = symbolic_size
-        self._parent = parent
+        self._parent = parent or BOTTOM
         super().__init_finalize__(name)
 
     @property
diff --git a/devito/types/sparse.py b/devito/types/sparse.py
index 7dac77cac5..ba9631e677 100644
--- a/devito/types/sparse.py
+++ b/devito/types/sparse.py
@@ -145,30 +145,6 @@ def __subfunc_setup__(self, key, suffix, dtype=None):
 
         return sf
 
-    @property
-    def npoint(self):
-        return self.shape[self._sparse_position]
-
-    @property
-    def npoint_global(self):
-        """
-        Global `npoint`s. This only differs from `self.npoint` in an MPI context.
-
-        Issues
-        ------
-        * https://github.com/devitocodes/devito/issues/1498
-        """
-        return self._npoint
-
-    @property
-    def space_order(self):
-        """The space order."""
-        return self._space_order
-
-    @property
-    def r(self):
-        return self._radius
-
     @property
     def _sparse_dim(self):
         return self.dimensions[self._sparse_position]
@@ -199,6 +175,37 @@ def _coords_indices(self):
                 np.floor((self.coordinates_data - self.grid.origin) / self.grid.spacing)
             ).astype(int)
 
+    @property
+    def _support(self):
+        """
+        The grid points surrounding each sparse point within the radius of self's
+        injection/interpolation operators.
+        """
+        max_shape = np.array(self.grid.shape).reshape(1, self.grid.dim)
+        minmax = lambda arr: np.minimum(max_shape, np.maximum(0, arr))
+        return np.stack([minmax(self._coords_indices + s) for s in self._point_support],
+                        axis=2)
+
+    @property
+    def _dist_datamap(self):
+        """
+        Mapper ``M : MPI rank -> required sparse data``.
+        """
+        return self.grid.distributor.glb_to_rank(self._support) or {}
+
+    @property
+    def npoint(self):
+        return self.shape[self._sparse_position]
+
+    @property
+    def space_order(self):
+        """The space order."""
+        return self._space_order
+
+    @property
+    def r(self):
+        return self._radius
+
     @property
     def gridpoints(self):
         try:
@@ -227,28 +234,6 @@ def coordinates_data(self):
         except AttributeError:
             return None
 
-    @property
-    def _support(self):
-        """
-        The grid points surrounding each sparse point within the radius of self's
-        injection/interpolation operators.
-        """
-        max_shape = np.array(self.grid.shape).reshape(1, self.grid.dim)
-        minmax = lambda arr: np.minimum(max_shape, np.maximum(0, arr))
-        return np.stack([minmax(self._coords_indices + s) for s in self._point_support],
-                        axis=2)
-
-    @property
-    def _dist_datamap(self):
-        """
-        Mapper ``M : MPI rank -> required sparse data``.
-        """
-        return self.grid.distributor.glb_to_rank(self._support) or {}
-
-    @cached_property
-    def dist_origin(self):
-        return self._dist_origin
-
     @cached_property
     def _pos_symbols(self):
         return [Symbol(name='pos%s' % d, dtype=np.int32)
@@ -285,6 +270,10 @@ def _dist_reorder_mask(self):
                      if d is not self._sparse_dim)
         return ret
 
+    @cached_property
+    def dist_origin(self):
+        return self._dist_origin
+
     def interpolate(self, *args, **kwargs):
         """
         Implement an interpolation operation from the grid onto the given sparse points
diff --git a/tests/test_dimension.py b/tests/test_dimension.py
index 217d7c1e83..32da3b22e3 100644
--- a/tests/test_dimension.py
+++ b/tests/test_dimension.py
@@ -1450,7 +1450,6 @@ def test_sparse_time_function(self):
         # Note the endpoint of the range is 12 because we inject at p.forward
         for i in range(1, 12):
             assert p.data[i].sum() == i - 1
-            print(p.data[i, 10, 10, 10])
             assert p.data[i, 10, 10, 10] == i - 1
         for i in range(12, 20):
             assert np.all(p.data[i] == 0)
diff --git a/tests/test_pickle.py b/tests/test_pickle.py
index faf514feba..62423b2c15 100644
--- a/tests/test_pickle.py
+++ b/tests/test_pickle.py
@@ -135,6 +135,24 @@ def test_precomputed_sparse_function(self, mode, pickle):
         assert sf.dtype == new_sf.dtype
         assert sf.npoint == new_sf.npoint == 3
 
+    def test_alias_sparse_function(self, pickle):
+        grid = Grid(shape=(3,))
+        sf = SparseFunction(name='sf', grid=grid, npoint=3, space_order=2,
+                            coordinates=[(0.,), (1.,), (2.,)])
+        sf.data[0] = 1.
+
+        # Create alias
+        f0 = sf._rebuild(name='f0', alias=True)
+        pkl_f0 = pickle.dumps(f0)
+        new_f0 = pickle.loads(pkl_f0)
+
+        assert f0.data is None and new_f0.data is None
+        assert f0.coordinates.data is None and new_f0.coordinates.data is None
+
+        assert sf.space_order == f0.space_order == new_f0.space_order
+        assert sf.dtype == f0.dtype == new_f0.dtype
+        assert sf.npoint == f0.npoint == new_f0.npoint
+
     def test_internal_symbols(self, pickle):
         s = dSymbol(name='s', dtype=np.float32)
         pkl_s = pickle.dumps(s)

From 1aa35df5c911ab3bd54a80b46a365c5b0bbabc9d Mon Sep 17 00:00:00 2001
From: mloubout <mathias.louboutin@gmail.com>
Date: Fri, 28 Jul 2023 08:55:09 -0400
Subject: [PATCH 44/90] api: move interp coefficient inside most inner loop

---
 devito/operations/interpolators.py |  2 +-
 devito/passes/iet/parpragma.py     |  7 ++-----
 devito/symbolics/printer.py        |  4 ++++
 tests/test_buffering.py            |  2 +-
 tests/test_dle.py                  | 30 ++++++++++++++++++------------
 tests/test_dse.py                  | 29 ++++++++++++++---------------
 tests/test_gpu_common.py           |  4 +++-
 tests/test_gpu_openacc.py          | 12 ++++++------
 tests/test_gpu_openmp.py           |  2 +-
 tests/test_mpi.py                  |  4 ++--
 tests/test_operator.py             |  4 ++--
 11 files changed, 54 insertions(+), 46 deletions(-)

diff --git a/devito/operations/interpolators.py b/devito/operations/interpolators.py
index b480c9f113..e082848ffc 100644
--- a/devito/operations/interpolators.py
+++ b/devito/operations/interpolators.py
@@ -287,7 +287,7 @@ def _inject(self, field, expr, implicit_dims=None):
             injection expression, but that should be honored when constructing
             the operator.
         """
-        implicit_dims = self._augment_implicit_dims(implicit_dims)
+        implicit_dims = self._augment_implicit_dims(implicit_dims) + self._rdim
 
         # Make iterable to support inject((u, v), expr=expr)
         # or inject((u, v), expr=(expr1, expr2))
diff --git a/devito/passes/iet/parpragma.py b/devito/passes/iet/parpragma.py
index 016726c001..1d1ca334d2 100644
--- a/devito/passes/iet/parpragma.py
+++ b/devito/passes/iet/parpragma.py
@@ -285,10 +285,6 @@ def _select_candidates(self, candidates):
                 if i.is_Vectorized:
                     break
 
-                # Also, we do not want to collapse small atomic reductions
-                if i.is_ParallelAtomic and i.dim.is_Custom:
-                    break
-
                 # Would there be enough work per parallel iteration?
                 nested = candidates[n+1:]
                 if nested:
@@ -299,7 +295,8 @@ def _select_candidates(self, candidates):
                     except TypeError:
                         pass
 
-                collapsable.append(i)
+                if not i.is_ParallelAtomic or nested:
+                    collapsable.append(i)
 
             # Give a score to this candidate, based on the number of fully-parallel
             # Iterations and their position (i.e. outermost to innermost) in the nest
diff --git a/devito/symbolics/printer.py b/devito/symbolics/printer.py
index c47ef95bfc..8f7ef6a719 100644
--- a/devito/symbolics/printer.py
+++ b/devito/symbolics/printer.py
@@ -105,6 +105,10 @@ def _print_Mod(self, expr):
         args = ['(%s)' % self._print(a) for a in expr.args]
         return '%'.join(args)
 
+    def _print_Mul(self, expr):
+        term = super()._print_Mul(expr)
+        return term.replace("(-1)*", "-")
+
     def _print_Min(self, expr):
         if has_integer_args(*expr.args) and len(expr.args) == 2:
             return "MIN(%s)" % self._print(expr.args)[1:-1]
diff --git a/tests/test_buffering.py b/tests/test_buffering.py
index b7f59e61a5..16f98b4f94 100644
--- a/tests/test_buffering.py
+++ b/tests/test_buffering.py
@@ -272,7 +272,7 @@ def test_over_injection():
 
     # Check generated code
     assert len(retrieve_iteration_tree(op1)) == \
-        8 + bool(configuration['language'] != 'C')
+        7 + int(configuration['language'] != 'C')
     buffers = [i for i in FindSymbols().visit(op1) if i.is_Array]
     assert len(buffers) == 1
 
diff --git a/tests/test_dle.py b/tests/test_dle.py
index bc6f3771e4..86a288ac00 100644
--- a/tests/test_dle.py
+++ b/tests/test_dle.py
@@ -187,14 +187,9 @@ def test_cache_blocking_structure_optrelax():
 
     op = Operator(eqns, opt=('advanced', {'blockrelax': True}))
 
-    bns, _ = assert_blocking(op, {'p_src0_blk0', 'x0_blk0', 'p_src1_blk0'})
+    bns, _ = assert_blocking(op, {'x0_blk0', 'p_src0_blk0'})
 
     iters = FindNodes(Iteration).visit(bns['p_src0_blk0'])
-    assert len(iters) == 2
-    assert iters[0].dim.is_Block
-    assert iters[1].dim.is_Block
-
-    iters = FindNodes(Iteration).visit(bns['p_src1_blk0'])
     assert len(iters) == 5
     assert iters[0].dim.is_Block
     assert iters[1].dim.is_Block
@@ -291,7 +286,7 @@ def test_cache_blocking_structure_optrelax_prec_inject():
                                           'openmp': True,
                                           'par-collapse-ncores': 1}))
 
-    assert_structure(op, ['t', 't,p_s0_blk0,p_s', 't,p_s0_blk0,p_s,rsx,rsy'],
+    assert_structure(op, ['t', 't,p_s0_blk0,p_s,rsx,rsy'],
                      't,p_s0_blk0,p_s,rsx,rsy')
 
 
@@ -750,13 +745,14 @@ def test_array_sum_reduction(self, so, dim):
         iterations = FindNodes(Iteration).visit(op)
         parallelized = iterations[dim+1]
         assert parallelized.pragmas
-        if parallelized is iterations[-1]:
+        if parallelized.dim is iterations[-1]:
             # With the `f[z] += u[t0][x + 1][y + 1][z + 1] + 1` expr, the innermost
             # `z` Iteration gets parallelized, nothing is collapsed, hence no
             # reduction is required
             assert "reduction" not in parallelized.pragmas[0].value
         elif Ompizer._support_array_reduction(configuration['compiler']):
-            assert "reduction(+:f[0:f_vec->size[0]])" in parallelized.pragmas[0].value
+            if "collapse" in parallelized.pragmas[0].value:
+                assert "reduction(+:f[0:f_vec->size[0]])" in parallelized.pragmas[0].value
         else:
             # E.g. old GCC's
             assert "atomic update" in str(iterations[-1])
@@ -817,8 +813,10 @@ def test_incs_no_atomic(self):
         # All loops get collapsed, but the `y` and `z` loops are PARALLEL_IF_ATOMIC,
         # hence an atomic pragma is expected
         op0 = Operator(Inc(uf, 1), opt=('advanced', {'openmp': True,
-                                                     'par-collapse-ncores': 1}))
-        assert 'collapse(3)' in str(op0)
+                                                     'par-collapse-ncores': 1,
+                                                     'par-collapse-work': 0}))
+
+        assert 'collapse(2)' in str(op0)
         assert 'atomic' in str(op0)
 
         # Now only `x` is parallelized
@@ -928,7 +926,6 @@ def test_simd_space_invariant(self):
         assert 'omp simd' in iterations[3].pragmas[0].value
 
         op.apply()
-        print(op._lib)
         assert np.isclose(np.linalg.norm(f.data), 37.1458, rtol=1e-5)
 
     def test_parallel_prec_inject(self):
@@ -955,6 +952,14 @@ def test_parallel_prec_inject(self):
         assert not iterations[0].pragmas
         assert 'omp for' in iterations[1].pragmas[0].value
 
+        op0 = Operator(eqns, opt=('advanced', {'openmp': True,
+                                               'par-collapse-ncores': 1,
+                                               'par-collapse-work': 1}))
+        iterations = FindNodes(Iteration).visit(op0)
+
+        assert not iterations[0].pragmas
+        assert 'omp for collapse(2)' in iterations[1].pragmas[0].value
+
 
 class TestNestedParallelism(object):
 
@@ -1007,6 +1012,7 @@ def test_collapsing(self):
 
         # Does it produce the right result
         op.apply(t_M=9)
+
         assert np.all(u.data[0] == 10)
 
         bns, _ = assert_blocking(op, {'x0_blk0'})
diff --git a/tests/test_dse.py b/tests/test_dse.py
index da07b61e48..730021c3d8 100644
--- a/tests/test_dse.py
+++ b/tests/test_dse.py
@@ -42,9 +42,9 @@ def test_scheduling_after_rewrite():
     trees = retrieve_iteration_tree(op)
 
     # Check loop nest structure
-    assert all(i.dim is j for i, j in zip(trees[1], grid.dimensions))  # time invariant
-    assert trees[2].root.dim is grid.time_dim
-    assert all(trees[2].root.dim is tree.root.dim for tree in trees[2:])
+    assert all(i.dim is j for i, j in zip(trees[0], grid.dimensions))  # time invariant
+    assert trees[1].root.dim is grid.time_dim
+    assert all(trees[1].root.dim is tree.root.dim for tree in trees[1:])
 
 
 @pytest.mark.parametrize('exprs,expected,min_cost', [
@@ -1665,7 +1665,7 @@ def test_drop_redundants_after_fusion(self, rotate):
         op = Operator(eqns, opt=('advanced', {'cire-rotate': rotate}))
 
         arrays = [i for i in FindSymbols().visit(op) if i.is_Array]
-        assert len(arrays) == 4
+        assert len(arrays) == 2
         assert all(i._mem_heap and not i._mem_external for i in arrays)
 
     def test_full_shape_big_temporaries(self):
@@ -2689,10 +2689,9 @@ def test_fullopt(self):
         assert np.isclose(summary0[('section0', None)].oi, 2.851, atol=0.001)
 
         assert summary1[('section0', None)].ops == 9
-        assert summary1[('section1', None)].ops == 9
-        assert summary1[('section2', None)].ops == 31
-        assert summary1[('section3', None)].ops == 26
-        assert np.isclose(summary1[('section2', None)].oi, 1.767, atol=0.001)
+        assert summary1[('section1', None)].ops == 31
+        assert summary1[('section2', None)].ops == 88
+        assert np.isclose(summary1[('section1', None)].oi, 1.767, atol=0.001)
 
         assert np.allclose(u0.data, u1.data, atol=10e-5)
         assert np.allclose(rec0.data, rec1.data, atol=10e-5)
@@ -2752,8 +2751,8 @@ def test_fullopt(self):
         assert np.allclose(self.tti_noopt[1].data, rec.data, atol=10e-1)
 
         # Check expected opcount/oi
-        assert summary[('section3', None)].ops == 92
-        assert np.isclose(summary[('section3', None)].oi, 2.074, atol=0.001)
+        assert summary[('section2', None)].ops == 92
+        assert np.isclose(summary[('section2', None)].oi, 2.074, atol=0.001)
 
         # With optimizations enabled, there should be exactly four BlockDimensions
         op = wavesolver.op_fwd()
@@ -2768,10 +2767,10 @@ def test_fullopt(self):
         # * all of the six Arrays are allocated on the heap
         # * with OpenMP:
         #   four Arrays are defined globally for the cos/sin temporaries
-        #   six Arrays are defined globally for the sparse positions temporaries
+        #   3 Arrays are defined globally for the sparse positions temporaries
         # and two additional bock-sized Arrays are defined locally
         arrays = [i for i in FindSymbols().visit(op) if i.is_Array]
-        extra_arrays = 2+6
+        extra_arrays = 2+3
         assert len(arrays) == 4 + extra_arrays
         assert all(i._mem_heap and not i._mem_external for i in arrays)
         bns, pbs = assert_blocking(op, {'x0_blk0'})
@@ -2807,7 +2806,7 @@ def test_fullopt_w_mpi(self):
     def test_opcounts(self, space_order, expected):
         op = self.tti_operator(opt='advanced', space_order=space_order)
         sections = list(op.op_fwd()._profiler._sections.values())
-        assert sections[3].sops == expected
+        assert sections[2].sops == expected
 
     @switchconfig(profiling='advanced')
     @pytest.mark.parametrize('space_order,expected', [
@@ -2817,8 +2816,8 @@ def test_opcounts_adjoint(self, space_order, expected):
         wavesolver = self.tti_operator(opt=('advanced', {'openmp': False}))
         op = wavesolver.op_adj()
 
-        assert op._profiler._sections['section3'].sops == expected
-        assert len([i for i in FindSymbols().visit(op) if i.is_Array]) == 7+6
+        assert op._profiler._sections['section2'].sops == expected
+        assert len([i for i in FindSymbols().visit(op) if i.is_Array]) == 7+3
 
 
 class TestTTIv2(object):
diff --git a/tests/test_gpu_common.py b/tests/test_gpu_common.py
index a93d280fc7..031bd9181b 100644
--- a/tests/test_gpu_common.py
+++ b/tests/test_gpu_common.py
@@ -1403,7 +1403,9 @@ def test_empty_arrays(self):
         f = TimeFunction(name='f', grid=grid, space_order=0)
         f.data[:] = 1.
         sf1 = SparseTimeFunction(name='sf1', grid=grid, npoint=0, nt=10)
-        sf2 = SparseTimeFunction(name='sf2', grid=grid, npoint=0, nt=10)
+        sf2 = SparseTimeFunction(name='sf2', grid=grid, npoint=0, nt=10,
+                                 coordinates=sf1.coordinates,
+                                 dimensions=sf1.dimensions)
         assert sf1.size == 0
         assert sf2.size == 0
 
diff --git a/tests/test_gpu_openacc.py b/tests/test_gpu_openacc.py
index db92db3c83..9c4a0e885e 100644
--- a/tests/test_gpu_openacc.py
+++ b/tests/test_gpu_openacc.py
@@ -102,15 +102,15 @@ def test_tile_insteadof_collapse(self, par_tile):
                       opt=('advanced', {'par-tile': par_tile}))
 
         trees = retrieve_iteration_tree(op)
-        assert len(trees) == 6
+        assert len(trees) == 4
 
-        assert trees[1][1].pragmas[0].value ==\
+        assert trees[0][1].pragmas[0].value ==\
             'acc parallel loop tile(32,4,4) present(u)'
-        assert trees[2][1].pragmas[0].value ==\
+        assert trees[1][1].pragmas[0].value ==\
             'acc parallel loop tile(32,4) present(u)'
         # Only the AFFINE Iterations are tiled
-        assert trees[4][1].pragmas[0].value ==\
-            'acc parallel loop present(src,src_coords,u) deviceptr(r1,r2,r3)'
+        assert trees[3][1].pragmas[0].value ==\
+            'acc parallel loop collapse(3) present(src,src_coords,u)'
 
     @pytest.mark.parametrize('par_tile', [((32, 4, 4), (8, 8)), ((32, 4), (8, 8)),
                                           ((32, 4, 4), (8, 8, 8))])
@@ -130,7 +130,7 @@ def test_multiple_tile_sizes(self, par_tile):
                       opt=('advanced', {'par-tile': par_tile}))
 
         trees = retrieve_iteration_tree(op)
-        assert len(trees) == 6
+        assert len(trees) == 4
 
         assert trees[1][1].pragmas[0].value ==\
             'acc parallel loop tile(32,4,4) present(u)'
diff --git a/tests/test_gpu_openmp.py b/tests/test_gpu_openmp.py
index 29866508d8..bc2de71708 100644
--- a/tests/test_gpu_openmp.py
+++ b/tests/test_gpu_openmp.py
@@ -265,7 +265,7 @@ def test_timeparallel_reduction(self):
         assert not tree.root.pragmas
         assert len(tree[1].pragmas) == 1
         assert tree[1].pragmas[0].value ==\
-            ('omp target teams distribute parallel for collapse(3)'
+            ('omp target teams distribute parallel for collapse(2)'
              ' reduction(+:f[0])')
 
 
diff --git a/tests/test_mpi.py b/tests/test_mpi.py
index d3a652c0b9..14ddbec249 100644
--- a/tests/test_mpi.py
+++ b/tests/test_mpi.py
@@ -2499,8 +2499,8 @@ def test_adjoint_codegen(self, shape, kernel, space_order, save):
         op_adj = solver.op_adj()
         adj_calls = FindNodes(Call).visit(op_adj)
 
-        # one halo, 2 * ndim memalign and free (pos temp src/rec)
-        sf_calls = 2 * len(shape) + 2 * len(shape)
+        # one halo, ndim memalign and free (pos temp rec)
+        sf_calls = 2 * len(shape)
         assert len(fwd_calls) == 1 + sf_calls
         assert len(adj_calls) == 1 + sf_calls
 
diff --git a/tests/test_operator.py b/tests/test_operator.py
index ed34f8aaf8..f38ac01942 100644
--- a/tests/test_operator.py
+++ b/tests/test_operator.py
@@ -1803,7 +1803,7 @@ def test_scheduling_sparse_functions(self):
         # `trees` than 6
         op = Operator([eqn1] + eqn2 + [eqn3] + eqn4, opt=('noop', {'openmp': False}))
         trees = retrieve_iteration_tree(op)
-        assert len(trees) == 6
+        assert len(trees) == 5
         # Time loop not shared due to the WAR
         assert trees[0][0].dim is time and trees[0][0] is trees[1][0]  # this IS shared
         assert trees[1][0] is not trees[3][0]
@@ -1813,7 +1813,7 @@ def test_scheduling_sparse_functions(self):
         eqn2 = sf1.inject(u1.forward, expr=sf1)
         op = Operator([eqn1] + eqn2 + [eqn3] + eqn4, opt=('noop', {'openmp': False}))
         trees = retrieve_iteration_tree(op)
-        assert len(trees) == 6
+        assert len(trees) == 5
         assert all(trees[0][0] is i[0] for i in trees)
 
     def test_scheduling_with_free_dims(self):

From d7d2040522a6eb4b054dd7d754d12b3414b4b752 Mon Sep 17 00:00:00 2001
From: mloubout <mathias.louboutin@gmail.com>
Date: Mon, 31 Jul 2023 09:26:44 -0400
Subject: [PATCH 45/90] ci: new flake8 version fix

---
 .github/workflows/examples.yml |  5 -----
 devito/passes/iet/mpi.py       |  9 ---------
 devito/passes/iet/parpragma.py |  2 ++
 devito/symbolics/inspection.py |  4 ++--
 devito/types/basic.py          |  2 +-
 devito/types/dimension.py      | 10 +++++++++-
 devito/types/sparse.py         | 18 ++++++++++++++----
 tests/test_caching.py          |  7 +++----
 tests/test_gpu_openacc.py      |  4 ++--
 9 files changed, 33 insertions(+), 28 deletions(-)

diff --git a/.github/workflows/examples.yml b/.github/workflows/examples.yml
index 8bc9c34f30..b4eb3d55a1 100644
--- a/.github/workflows/examples.yml
+++ b/.github/workflows/examples.yml
@@ -51,11 +51,6 @@ jobs:
     - name: Tests in examples
       run: |
         py.test --cov --cov-config=.coveragerc --cov-report=xml examples/
-        py.test --cov --cov-config=.coveragerc --cov-report=xml examples/seismic/acoustic/acoustic_example.py
-        py.test --cov --cov-config=.coveragerc --cov-report=xml examples/seismic/viscoacoustic/viscoacoustic_example.py
-        py.test --cov --cov-config=.coveragerc --cov-report=xml examples/seismic/tti/tti_example.py
-        py.test --cov --cov-config=.coveragerc --cov-report=xml examples/seismic/elastic/elastic_example.py
-        py.test --cov --cov-config=.coveragerc --cov-report=xml examples/seismic/viscoelastic/viscoelastic_example.py
 
     - name: Seismic acoustic examples
       run: |
diff --git a/devito/passes/iet/mpi.py b/devito/passes/iet/mpi.py
index de87bb13a8..00d96213aa 100644
--- a/devito/passes/iet/mpi.py
+++ b/devito/passes/iet/mpi.py
@@ -45,15 +45,6 @@ def _drop_halospots(iet):
             if f in hs.fmapper and all(i.is_reduction for i in v):
                 mapper[hs].add(f)
 
-    # If a HaloSpot is outside any iteration it is not needed
-    for iters, halo_spots in MapNodes(Iteration, HaloSpot, 'groupby').visit(iet).items():
-        if iters:
-            continue
-        for hs in halo_spots:
-            for f, v in hs.fmapper.items():
-                if v.loc_indices:
-                    mapper[hs].add(f)
-
     # Transform the IET introducing the "reduced" HaloSpots
     subs = {hs: hs._rebuild(halo_scheme=hs.halo_scheme.drop(mapper[hs]))
             for hs in FindNodes(HaloSpot).visit(iet)}
diff --git a/devito/passes/iet/parpragma.py b/devito/passes/iet/parpragma.py
index 1d1ca334d2..44ee6afd6c 100644
--- a/devito/passes/iet/parpragma.py
+++ b/devito/passes/iet/parpragma.py
@@ -295,6 +295,8 @@ def _select_candidates(self, candidates):
                     except TypeError:
                         pass
 
+                # At least one inner loop (nested) or
+                # we do not collapse most inner loop if it is an atomic reduction
                 if not i.is_ParallelAtomic or nested:
                     collapsable.append(i)
 
diff --git a/devito/symbolics/inspection.py b/devito/symbolics/inspection.py
index 06ad8ed373..94279db4ab 100644
--- a/devito/symbolics/inspection.py
+++ b/devito/symbolics/inspection.py
@@ -267,8 +267,8 @@ def has_integer_args(*args):
 
 def sympy_dtype(expr, default):
     """
-    Try to infer the data type of the expression
-    returns the default if non is found
+    Infer the dtype of the expression
+    or default if could not be determined.
     """
     # Symbol/... without argument, check its dtype
     if len(expr.args) == 0:
diff --git a/devito/types/basic.py b/devito/types/basic.py
index b4620ca093..55fe1d07a1 100644
--- a/devito/types/basic.py
+++ b/devito/types/basic.py
@@ -830,7 +830,7 @@ def __new__(cls, *args, **kwargs):
             # Go straight through Basic, thus bypassing caching and machinery
             # in sympy.Application/Function that isn't really necessary
             # AbstractFunctions are unique by construction!
-            newobj = sympy.Basic.__new__(cls, *indices)
+            newobj = sympy.Basic.__new__(cls, *sympy.sympify(indices))
 
         # Initialization. The following attributes must be available
         # when executing __init_finalize__
diff --git a/devito/types/dimension.py b/devito/types/dimension.py
index ca30e2217e..76d9d9e60a 100644
--- a/devito/types/dimension.py
+++ b/devito/types/dimension.py
@@ -187,7 +187,7 @@ def indirect(self):
 
     @property
     def index(self):
-        return self if self.indirect is True else getattr(self, 'parent', self)
+        return self
 
     @property
     def is_const(self):
@@ -477,6 +477,10 @@ def __init_finalize__(self, name, parent):
     def parent(self):
         return self._parent
 
+    @property
+    def index(self):
+        return self if self.indirect else self.parent
+
     @property
     def root(self):
         return self._parent.root
@@ -1228,6 +1232,10 @@ def is_Derived(self):
     def parent(self):
         return self._parent
 
+    @property
+    def index(self):
+        return self.parent or self
+
     @property
     def root(self):
         if self.is_Derived:
diff --git a/devito/types/sparse.py b/devito/types/sparse.py
index ba9631e677..0ac006c26b 100644
--- a/devito/types/sparse.py
+++ b/devito/types/sparse.py
@@ -112,7 +112,7 @@ def __subfunc_setup__(self, key, suffix, dtype=None):
             # Fallback to default behaviour
             dtype = dtype or self.dtype
         else:
-            if not isinstance(key, np.ndarray):
+            if key is not None:
                 key = np.array(key)
 
             if (shape != key.shape[:2] and key.shape != (shape[1],)) and \
@@ -197,6 +197,16 @@ def _dist_datamap(self):
     def npoint(self):
         return self.shape[self._sparse_position]
 
+    @property
+    def npoint_global(self):
+        """
+        Global `npoint`s. This only differs from `self.npoint` in an MPI context.
+        Issues
+        ------
+        * https://github.com/devitocodes/devito/issues/1498
+        """
+        return self._npoint
+
     @property
     def space_order(self):
         """The space order."""
@@ -1656,9 +1666,9 @@ def __indices_setup__(cls, *args, **kwargs):
         """
         Return the default Dimension indices for a given data shape.
         """
-        Dimensions = kwargs.get('dimensions')
-        if Dimensions is None:
-            Dimensions = (kwargs['grid'].time_dim, Dimension(
+        dimensions = kwargs.get('dimensions')
+        if dimensions is None:
+            dimensions = (kwargs['grid'].time_dim, Dimension(
                 name='p_%s' % kwargs["name"]))
 
         if args:
diff --git a/tests/test_caching.py b/tests/test_caching.py
index 978a09e896..a11c6319a3 100644
--- a/tests/test_caching.py
+++ b/tests/test_caching.py
@@ -638,10 +638,9 @@ def test_sparse_function(self, operate_on_empty_cache):
 
         i = u.inject(expr=u, field=u)
 
-        # created: rxu, rxy (radius dimensions) and spacings and conditionals
-        # conditional sparse dim
+        # created: rux, ruy (radius dimensions) and spacings
         # posx, posy, px, py, u_coords (as indexified),
-        ncreated = 2+2+1+2+2+2+1
+        ncreated = 2+1+2+2+2+1
         # Note that injection is now lazy so no new symbols should be created
         assert len(_SymbolCache) == cur_cache_size
         i.evaluate
@@ -661,7 +660,7 @@ def test_sparse_function(self, operate_on_empty_cache):
         # in the first clear_cache they were still referenced by their "parent" objects
         # (e.g., ru* by ConditionalDimensions, through `condition`)
 
-        assert len(_SymbolCache) == init_cache_size + 10
+        assert len(_SymbolCache) == init_cache_size + 8
         clear_cache()
         # Now we should be back to the original state except for
         # pos* that belong to the abstract class
diff --git a/tests/test_gpu_openacc.py b/tests/test_gpu_openacc.py
index 9c4a0e885e..823d11854d 100644
--- a/tests/test_gpu_openacc.py
+++ b/tests/test_gpu_openacc.py
@@ -132,9 +132,9 @@ def test_multiple_tile_sizes(self, par_tile):
         trees = retrieve_iteration_tree(op)
         assert len(trees) == 4
 
-        assert trees[1][1].pragmas[0].value ==\
+        assert trees[0][1].pragmas[0].value ==\
             'acc parallel loop tile(32,4,4) present(u)'
-        assert trees[2][1].pragmas[0].value ==\
+        assert trees[1][1].pragmas[0].value ==\
             'acc parallel loop tile(8,8) present(u)'
 
     def test_multi_tile_blocking_structure(self):

From 92a60b82114502f6d22d07366c725241d54d7298 Mon Sep 17 00:00:00 2001
From: Mathias Louboutin <mathias.louboutin@gmail.com>
Date: Fri, 25 Aug 2023 10:59:16 -0400
Subject: [PATCH 46/90] dsl: tweak reconstruction of sparse functions

---
 devito/types/sparse.py    | 10 ++++++++++
 requirements-optional.txt |  2 +-
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/devito/types/sparse.py b/devito/types/sparse.py
index 0ac006c26b..d2eb1047b9 100644
--- a/devito/types/sparse.py
+++ b/devito/types/sparse.py
@@ -80,6 +80,16 @@ def __shape_setup__(cls, **kwargs):
             shape = (glb_npoint[grid.distributor.myrank],)
         return shape
 
+    def func(self, *args, **kwargs):
+        # Rebuild subfunctions first to avoid new data creation as we have to use `_data`
+        # as a reconstruction kwargs to avoid the circular dependency
+        # with the parent in SubFunction
+        # This is also necessary to avoid shaoe issue in the SubFunction with mpi
+        for s in self._sub_functions:
+            if getattr(self, s) is not None:
+                kwargs.update({s: getattr(self, s).func(*args, **kwargs)})
+        return super().func(*args, **kwargs)
+
     def __fd_setup__(self):
         """
         Dynamically add derivative short-cuts.
diff --git a/requirements-optional.txt b/requirements-optional.txt
index eaff31931b..b3f80f0d42 100644
--- a/requirements-optional.txt
+++ b/requirements-optional.txt
@@ -1,5 +1,5 @@
 matplotlib
 pandas
-pyrevolve
+pyrevolve==2.2.3
 scipy
 distributed
\ No newline at end of file

From 4d9abed4238b4ddf196a8efe68776b8679fa9480 Mon Sep 17 00:00:00 2001
From: Mathias Louboutin <mathias.louboutin@gmail.com>
Date: Tue, 5 Sep 2023 09:57:27 -0400
Subject: [PATCH 47/90] compiler: fix dimension sort determinism

---
 devito/ir/equations/algorithms.py  | 12 ++++++++++--
 devito/ir/support/utils.py         |  1 +
 devito/operations/interpolators.py | 11 +++++++----
 devito/types/sparse.py             | 10 +++++-----
 4 files changed, 23 insertions(+), 11 deletions(-)

diff --git a/devito/ir/equations/algorithms.py b/devito/ir/equations/algorithms.py
index 2606d2dd2d..0c0185055e 100644
--- a/devito/ir/equations/algorithms.py
+++ b/devito/ir/equations/algorithms.py
@@ -74,10 +74,18 @@ def handle_indexed(indexed):
     # obtain the desired ordering `(time, x, xi)`. W/o `(time, x)`, the ordering
     # `(x, time, xi)` might be returned instead, which would be non-sense
     for i in relations:
-        dims = [di for d in i for di in (d.index, d)]
+        dims = []
+        for d in i:
+            # Only add index if a different Dimension name to avoid dropping conditionals
+            # with the same name as the parent
+            if d.index.name == d.name:
+                dims.append(d)
+            else:
+                dims.extend([d.index, d])
+
         implicit_relations.update({tuple(filter_ordered(dims))})
 
-    ordering = PartialOrderTuple(extra, relations=(relations | implicit_relations))
+    ordering = PartialOrderTuple(extra, relations=implicit_relations)
 
     return ordering
 
diff --git a/devito/ir/support/utils.py b/devito/ir/support/utils.py
index 5f08f48020..3750b08a0e 100644
--- a/devito/ir/support/utils.py
+++ b/devito/ir/support/utils.py
@@ -183,6 +183,7 @@ def detect_accesses(exprs):
     for e in as_tuple(exprs):
         other_dims.update(i for i in e.free_symbols if isinstance(i, Dimension))
         other_dims.update(e.implicit_dims)
+    other_dims = filter_sorted(other_dims)
     mapper[None] = Stencil([(i, 0) for i in other_dims])
 
     return mapper
diff --git a/devito/operations/interpolators.py b/devito/operations/interpolators.py
index e082848ffc..3f1ad5e3b6 100644
--- a/devito/operations/interpolators.py
+++ b/devito/operations/interpolators.py
@@ -176,11 +176,7 @@ def _interp_idx(self, variables, implicit_dims=None):
         """
         mapper = {}
         pos = self.sfunction._position_map.values()
-        # Temporaries for the position
-        temps = self._positions(implicit_dims)
 
-        # Coefficient symbol expression
-        temps.extend(self._coeff_temps(implicit_dims))
         for ((di, d), rd, p) in zip(enumerate(self._gdims), self._rdim, pos):
             # Add conditional to avoid OOB
             lb = sympy.And(rd + p >= d.symbolic_min - self.r, evaluate=False)
@@ -188,10 +184,17 @@ def _interp_idx(self, variables, implicit_dims=None):
             cond = sympy.And(lb, ub, evaluate=False)
             mapper[d] = ConditionalDimension(rd.name, rd, condition=cond, indirect=True)
 
+        # Temporaries for the position
+        temps = self._positions(implicit_dims)
+
+        # Coefficient symbol expression
+        temps.extend(self._coeff_temps(implicit_dims))
+
         # Substitution mapper for variables
         idx_subs = {v: v.subs({k: c - v.origin.get(k, 0) + p
                     for ((k, c), p) in zip(mapper.items(), pos)})
                     for v in variables}
+        idx_subs.update(dict(zip(self._rdim, mapper.values())))
 
         return idx_subs, temps
 
diff --git a/devito/types/sparse.py b/devito/types/sparse.py
index d2eb1047b9..a1aef68f5f 100644
--- a/devito/types/sparse.py
+++ b/devito/types/sparse.py
@@ -84,7 +84,7 @@ def func(self, *args, **kwargs):
         # Rebuild subfunctions first to avoid new data creation as we have to use `_data`
         # as a reconstruction kwargs to avoid the circular dependency
         # with the parent in SubFunction
-        # This is also necessary to avoid shaoe issue in the SubFunction with mpi
+        # This is also necessary to avoid shape issue in the SubFunction with mpi
         for s in self._sub_functions:
             if getattr(self, s) is not None:
                 kwargs.update({s: getattr(self, s).func(*args, **kwargs)})
@@ -724,7 +724,7 @@ class SparseFunction(AbstractSparseFunction):
         Discretisation order for space derivatives. Defaults to 0.
     shape : tuple of ints, optional
         Shape of the object. Defaults to ``(npoint,)``.
-    Dimensions : tuple of Dimension, optional
+    dimensions : tuple of Dimension, optional
         Dimensions associated with the object. Only necessary if the SparseFunction
         defines a multi-dimensional tensor.
     dtype : data-type, optional
@@ -845,7 +845,7 @@ class SparseTimeFunction(AbstractSparseTimeFunction, SparseFunction):
         Discretisation order for time derivatives. Defaults to 1.
     shape : tuple of ints, optional
         Shape of the object. Defaults to ``(nt, npoint)``.
-    Dimensions : tuple of Dimension, optional
+    dimensions : tuple of Dimension, optional
         Dimensions associated with the object. Only necessary if the SparseFunction
         defines a multi-dimensional tensor.
     dtype : data-type, optional
@@ -992,7 +992,7 @@ class PrecomputedSparseFunction(AbstractSparseFunction):
         Discretisation order for space derivatives. Defaults to 0.
     shape : tuple of ints, optional
         Shape of the object. Defaults to `(npoint,)`.
-    Dimensions : tuple of Dimension, optional
+    dimensions : tuple of Dimension, optional
         Dimensions associated with the object. Only necessary if the SparseFunction
         defines a multi-dimensional tensor.
     dtype : data-type, optional
@@ -1156,7 +1156,7 @@ class PrecomputedSparseTimeFunction(AbstractSparseTimeFunction,
         Discretisation order for time derivatives. Default to 1.
     shape : tuple of ints, optional
         Shape of the object. Defaults to `(npoint,)`.
-    Dimensions : tuple of Dimension, optional
+    dimensions : tuple of Dimension, optional
         Dimensions associated with the object. Only necessary if the SparseFunction
         defines a multi-dimensional tensor.
     dtype : data-type, optional

From 9cb54fc94b42a3b84b2f837d7334ed655b285246 Mon Sep 17 00:00:00 2001
From: Fabio Luporini <fabio@devitocodes.com>
Date: Tue, 1 Aug 2023 10:01:16 +0100
Subject: [PATCH 48/90] builtins: Support batched initialize_function

---
 devito/builtins/initializers.py | 158 +++++++++++++++++++-------------
 tests/test_builtins.py          |  17 ++++
 2 files changed, 112 insertions(+), 63 deletions(-)

diff --git a/devito/builtins/initializers.py b/devito/builtins/initializers.py
index db0cd640cc..0f1e9ea5c2 100644
--- a/devito/builtins/initializers.py
+++ b/devito/builtins/initializers.py
@@ -216,6 +216,66 @@ def fset(f, g):
     return f
 
 
+def _initialize_function(function, data, nbl, mapper=None, mode='constant'):
+    """
+    Construct the symbolic objects for `initialize_function`.
+    """
+    nbl, slices = nbl_to_padsize(nbl, function.ndim)
+    if isinstance(data, dv.Function):
+        function.data[slices] = data.data[:]
+    else:
+        function.data[slices] = data
+    lhs = []
+    rhs = []
+    options = []
+
+    if mode == 'reflect' and function.grid.distributor.is_parallel:
+        # Check that HALO size is appropriate
+        halo = function.halo
+        local_size = function.shape
+
+        def buff(i, j):
+            return [(i + k - 2*max(max(nbl))) for k in j]
+
+        b = [min(l) for l in (w for w in (buff(i, j) for i, j in zip(local_size, halo)))]
+        if any(np.array(b) < 0):
+            raise ValueError("Function `%s` halo is not sufficiently thick." % function)
+
+    for d, (nl, nr) in zip(function.space_dimensions, as_tuple(nbl)):
+        dim_l = dv.SubDimension.left(name='abc_%s_l' % d.name, parent=d, thickness=nl)
+        dim_r = dv.SubDimension.right(name='abc_%s_r' % d.name, parent=d, thickness=nr)
+        if mode == 'constant':
+            subsl = nl
+            subsr = d.symbolic_max - nr
+        elif mode == 'reflect':
+            subsl = 2*nl - 1 - dim_l
+            subsr = 2*(d.symbolic_max - nr) + 1 - dim_r
+        else:
+            raise ValueError("Mode not available")
+        lhs.append(function.subs({d: dim_l}))
+        lhs.append(function.subs({d: dim_r}))
+        rhs.append(function.subs({d: subsl}))
+        rhs.append(function.subs({d: subsr}))
+        options.extend([None, None])
+
+        if mapper and d in mapper.keys():
+            exprs = mapper[d]
+            lhs_extra = exprs['lhs']
+            rhs_extra = exprs['rhs']
+            lhs.extend(as_list(lhs_extra))
+            rhs.extend(as_list(rhs_extra))
+            options_extra = exprs.get('options', len(as_list(lhs_extra))*[None, ])
+            if isinstance(options_extra, list):
+                options.extend(options_extra)
+            else:
+                options.extend([options_extra])
+
+    if all(options is None for i in options):
+        options = None
+
+    return lhs, rhs, options
+
+
 def initialize_function(function, data, nbl, mapper=None, mode='constant',
                         name=None, pad_halo=True, **kwargs):
     """
@@ -225,9 +285,9 @@ def initialize_function(function, data, nbl, mapper=None, mode='constant',
 
     Parameters
     ----------
-    function : Function
+    function : Function or list of Functions
         The initialised object.
-    data : ndarray or Function
+    data : ndarray or Function or list of ndarray/Function
         The data used for initialisation.
     nbl : int or tuple of int or tuple of tuple of int
         Number of outer layers (such as absorbing layers for boundary damping).
@@ -286,73 +346,45 @@ def initialize_function(function, data, nbl, mapper=None, mode='constant',
           [2, 3, 3, 3, 3, 2],
           [2, 2, 2, 2, 2, 2]], dtype=int32)
     """
-    name = name or 'pad_%s' % function.name
-    if isinstance(function, dv.TimeFunction):
+    if isinstance(function, (list, tuple)):
+        if not isinstance(data, (list, tuple)):
+            raise TypeError("Expected a list of `data`")
+        elif len(function) != len(data):
+            raise ValueError("Expected %d `data` items, got %d" %
+                             (len(function), len(data)))
+
+        if mapper is not None:
+            raise NotImplementedError("Unsupported `mapper` with batching")
+
+        functions = function
+        datas = data
+    else:
+        functions = (function,)
+        datas = (data,)
+
+    if any(isinstance(f, dv.TimeFunction) for f in functions):
         raise NotImplementedError("TimeFunctions are not currently supported.")
 
     if nbl == 0:
-        if isinstance(data, dv.Function):
-            function.data[:] = data.data[:]
-        else:
-            function.data[:] = data[:]
-        if pad_halo:
-            pad_outhalo(function)
-        return
-
-    nbl, slices = nbl_to_padsize(nbl, function.ndim)
-    if isinstance(data, dv.Function):
-        function.data[slices] = data.data[:]
+        for f in functions:
+            if isinstance(data, dv.Function):
+                f.data[:] = data.data[:]
+            else:
+                f.data[:] = data[:]
     else:
-        function.data[slices] = data
-    lhs = []
-    rhs = []
-    options = []
-
-    if mode == 'reflect' and function.grid.distributor.is_parallel:
-        # Check that HALO size is appropriate
-        halo = function.halo
-        local_size = function.shape
-
-        def buff(i, j):
-            return [(i + k - 2*max(max(nbl))) for k in j]
+        lhss, rhss, optionss = [], [], []
+        for f, data in zip(functions, datas):
+            lhs, rhs, options = _initialize_function(f, data, nbl, mapper, mode)
 
-        b = [min(l) for l in (w for w in (buff(i, j) for i, j in zip(local_size, halo)))]
-        if any(np.array(b) < 0):
-            raise ValueError("Function `%s` halo is not sufficiently thick." % function)
+            lhss.extend(lhs)
+            rhss.extend(rhs)
+            optionss.extend(options)
 
-    for d, (nl, nr) in zip(function.space_dimensions, as_tuple(nbl)):
-        dim_l = dv.SubDimension.left(name='abc_%s_l' % d.name, parent=d, thickness=nl)
-        dim_r = dv.SubDimension.right(name='abc_%s_r' % d.name, parent=d, thickness=nr)
-        if mode == 'constant':
-            subsl = nl
-            subsr = d.symbolic_max - nr
-        elif mode == 'reflect':
-            subsl = 2*nl - 1 - dim_l
-            subsr = 2*(d.symbolic_max - nr) + 1 - dim_r
-        else:
-            raise ValueError("Mode not available")
-        lhs.append(function.subs({d: dim_l}))
-        lhs.append(function.subs({d: dim_r}))
-        rhs.append(function.subs({d: subsl}))
-        rhs.append(function.subs({d: subsr}))
-        options.extend([None, None])
-
-        if mapper and d in mapper.keys():
-            exprs = mapper[d]
-            lhs_extra = exprs['lhs']
-            rhs_extra = exprs['rhs']
-            lhs.extend(as_list(lhs_extra))
-            rhs.extend(as_list(rhs_extra))
-            options_extra = exprs.get('options', len(as_list(lhs_extra))*[None, ])
-            if isinstance(options_extra, list):
-                options.extend(options_extra)
-            else:
-                options.extend([options_extra])
-
-    if all(options is None for i in options):
-        options = None
+        assert len(lhss) == len(rhss) == len(optionss)
 
-    assign(lhs, rhs, options=options, name=name, **kwargs)
+        name = name or 'pad_%s' % '_'.join(f.name for f in functions)
+        assign(lhss, rhss, options=optionss, name=name, **kwargs)
 
     if pad_halo:
-        pad_outhalo(function)
+        for f in functions:
+            pad_outhalo(f)
diff --git a/tests/test_builtins.py b/tests/test_builtins.py
index 21c36e319b..e62c8d58db 100644
--- a/tests/test_builtins.py
+++ b/tests/test_builtins.py
@@ -327,6 +327,23 @@ def test_if_halo_mpi(self, nbl):
             expected = np.pad(a[na//2:, na//2:], [(0, 1+nbl), (0, 1+nbl)], 'edge')
             assert np.all(f._data_with_outhalo._local == expected)
 
+    def test_batching(self):
+        grid = Grid(shape=(12, 12))
+
+        a = np.arange(16).reshape((4, 4))
+
+        f = Function(name='f', grid=grid, dtype=np.int32)
+        g = Function(name='g', grid=grid, dtype=np.int32)
+        h = Function(name='h', grid=grid, dtype=np.int32)
+
+        initialize_function([f, g, h], [a, a, a], 4, mode='reflect')
+
+        for i in [f, g, h]:
+            assert np.all(a[:, ::-1] - np.array(i.data[4:8, 0:4]) == 0)
+            assert np.all(a[:, ::-1] - np.array(i.data[4:8, 8:12]) == 0)
+            assert np.all(a[::-1, :] - np.array(i.data[0:4, 4:8]) == 0)
+            assert np.all(a[::-1, :] - np.array(i.data[8:12, 4:8]) == 0)
+
 
 class TestBuiltinsResult(object):
 

From 4f62b4dd783412fbfb2d5a1ca2b70d8e53743e79 Mon Sep 17 00:00:00 2001
From: mloubout <mathias.louboutin@gmail.com>
Date: Thu, 7 Sep 2023 14:08:49 -0400
Subject: [PATCH 49/90] api: fix indexification of staggered functions after
 dimension subs

---
 devito/builtins/initializers.py    |  4 ++--
 devito/operations/interpolators.py |  2 +-
 devito/types/basic.py              | 11 +++++++++--
 tests/test_builtins.py             |  4 ++--
 tests/test_symbolics.py            | 11 +++++++++++
 5 files changed, 25 insertions(+), 7 deletions(-)

diff --git a/devito/builtins/initializers.py b/devito/builtins/initializers.py
index 0f1e9ea5c2..f338e194e1 100644
--- a/devito/builtins/initializers.py
+++ b/devito/builtins/initializers.py
@@ -366,7 +366,7 @@ def initialize_function(function, data, nbl, mapper=None, mode='constant',
         raise NotImplementedError("TimeFunctions are not currently supported.")
 
     if nbl == 0:
-        for f in functions:
+        for f, data in zip(functions, datas):
             if isinstance(data, dv.Function):
                 f.data[:] = data.data[:]
             else:
@@ -382,7 +382,7 @@ def initialize_function(function, data, nbl, mapper=None, mode='constant',
 
         assert len(lhss) == len(rhss) == len(optionss)
 
-        name = name or 'pad_%s' % '_'.join(f.name for f in functions)
+        name = name or 'initialize_%s' % '_'.join(f.name for f in functions)
         assign(lhss, rhss, options=optionss, name=name, **kwargs)
 
     if pad_halo:
diff --git a/devito/operations/interpolators.py b/devito/operations/interpolators.py
index 3f1ad5e3b6..ab758ba982 100644
--- a/devito/operations/interpolators.py
+++ b/devito/operations/interpolators.py
@@ -191,7 +191,7 @@ def _interp_idx(self, variables, implicit_dims=None):
         temps.extend(self._coeff_temps(implicit_dims))
 
         # Substitution mapper for variables
-        idx_subs = {v: v.subs({k: c - v.origin.get(k, 0) + p
+        idx_subs = {v: v.subs({k: c + p
                     for ((k, c), p) in zip(mapper.items(), pos)})
                     for v in variables}
         idx_subs.update(dict(zip(self._rdim, mapper.values())))
diff --git a/devito/types/basic.py b/devito/types/basic.py
index 55fe1d07a1..c36496d195 100644
--- a/devito/types/basic.py
+++ b/devito/types/basic.py
@@ -1245,8 +1245,15 @@ def indexify(self, indices=None, subs=None):
         subs = [{**{d.spacing: 1, -d.spacing: -1}, **subs} for d in self.dimensions]
 
         # Indices after substitutions
-        indices = [sympy.sympify(a.subs(d, d - o).xreplace(s)) for a, d, o, s in
-                   zip(self.args, self.dimensions, self.origin, subs)]
+        indices = []
+        for a, d, o, s in zip(self.args, self.dimensions, self.origin, subs):
+            if d in a.free_symbols:
+                # Shift by origin d -> d - o.
+                indices.append(sympy.sympify(a.subs(d, d - o).xreplace(s)))
+            else:
+                # Dimension has been removed, e.g. u[10], plain shift by origin
+                indices.append(sympy.sympify(a - o).xreplace(s))
+
         indices = [i.xreplace({k: sympy.Integer(k) for k in i.atoms(sympy.Float)})
                    for i in indices]
 
diff --git a/tests/test_builtins.py b/tests/test_builtins.py
index e62c8d58db..4ffe02b552 100644
--- a/tests/test_builtins.py
+++ b/tests/test_builtins.py
@@ -333,8 +333,8 @@ def test_batching(self):
         a = np.arange(16).reshape((4, 4))
 
         f = Function(name='f', grid=grid, dtype=np.int32)
-        g = Function(name='g', grid=grid, dtype=np.int32)
-        h = Function(name='h', grid=grid, dtype=np.int32)
+        g = Function(name='g', grid=grid, dtype=np.float32)
+        h = Function(name='h', grid=grid, dtype=np.float64)
 
         initialize_function([f, g, h], [a, a, a], 4, mode='reflect')
 
diff --git a/tests/test_symbolics.py b/tests/test_symbolics.py
index 27d40c93c2..ce5bb3cdf2 100644
--- a/tests/test_symbolics.py
+++ b/tests/test_symbolics.py
@@ -132,6 +132,17 @@ def test_indexed():
     assert ub.indexed.free_symbols == {ub.indexed}
 
 
+def test_indexed_staggered():
+    grid = Grid(shape=(10, 10))
+    x, y = grid.dimensions
+    hx, hy = x.spacing, y.spacing
+
+    u = Function(name='u', grid=grid, staggered=(x, y))
+    u0 = u.subs({x: 1, y: 2})
+    assert u0.indices == (1 + hx / 2, 2 + hy / 2)
+    assert u0.indexify().indices == (1, 2)
+
+
 def test_bundle():
     grid = Grid(shape=(4, 4))
 

From cb919a6cbaa0cb12ffd68fc3e378955c56316398 Mon Sep 17 00:00:00 2001
From: Fabio Luporini <fabio@devitocodes.com>
Date: Sat, 26 Aug 2023 14:22:16 +0000
Subject: [PATCH 50/90] compiler: Simplify SubFunction

---
 devito/types/dense.py       | 18 ++++--------------
 devito/types/sparse.py      | 23 ++++++++++++++++-------
 tests/test_interpolation.py |  1 -
 3 files changed, 20 insertions(+), 22 deletions(-)

diff --git a/devito/types/dense.py b/devito/types/dense.py
index d9adfcedc3..4c912d2704 100644
--- a/devito/types/dense.py
+++ b/devito/types/dense.py
@@ -20,7 +20,7 @@
 from devito.finite_differences import Differentiable, generate_fd_shortcuts
 from devito.tools import (ReducerMap, as_tuple, c_restrict_void_p, flatten, is_integer,
                           memoized_meth, dtype_to_ctype, humanbytes)
-from devito.types.dimension import Dimension
+from devito.types.dimension import Dimension, DynamicDimension
 from devito.types.args import ArgProvider
 from devito.types.caching import CacheManager
 from devito.types.basic import AbstractFunction, Size
@@ -1449,16 +1449,10 @@ class SubFunction(Function):
     """
     A Function bound to a "parent" DiscreteFunction.
 
-    A SubFunction hands control of argument binding and halo exchange to its
-    parent DiscreteFunction.
+    A SubFunction hands control of argument binding and halo exchange to the
+    DiscreteFunction it's bound to.
     """
 
-    __rkwargs__ = Function.__rkwargs__ + ('parent',)
-
-    def __init_finalize__(self, *args, **kwargs):
-        super(SubFunction, self).__init_finalize__(*args, **kwargs)
-        self._parent = kwargs['parent']
-
     def __padding_setup__(self, **kwargs):
         # SubFunctions aren't expected to be used in time-consuming loops
         return tuple((0, 0) for i in range(self.ndim))
@@ -1470,12 +1464,8 @@ def _arg_values(self, **kwargs):
         if self.name in kwargs:
             raise RuntimeError("`%s` is a SubFunction, so it can't be assigned "
                                "a value dynamically" % self.name)
-        else:
-            return self._parent._arg_defaults(alias=self._parent).reduce_all()
 
-    @property
-    def parent(self):
-        return self._parent
+        return self._arg_defaults(alias=self)
 
     @property
     def origin(self):
diff --git a/devito/types/sparse.py b/devito/types/sparse.py
index a1aef68f5f..4244c001ab 100644
--- a/devito/types/sparse.py
+++ b/devito/types/sparse.py
@@ -1179,6 +1179,15 @@ class PrecomputedSparseTimeFunction(AbstractSparseTimeFunction,
                                        PrecomputedSparseFunction.__rkwargs__))
 
 
+# *** MatrixSparse*Function API
+# This is mostly legacy stuff which often escapes the devito's modus operandi
+
+class DynamicSubFunction(SubFunction):
+
+    def _arg_defaults(self, **kwargs):
+        return {}
+
+
 class MatrixSparseTimeFunction(AbstractSparseTimeFunction):
     """
     A specialised type of SparseTimeFunction where the interpolation is externally
@@ -1378,7 +1387,7 @@ def __init_finalize__(self, *args, **kwargs):
         else:
             nnz_size = 1
 
-        self._mrow = SubFunction(
+        self._mrow = DynamicSubFunction(
             name='mrow_%s' % self.name,
             dtype=np.int32,
             dimensions=(self.nnzdim,),
@@ -1387,7 +1396,7 @@ def __init_finalize__(self, *args, **kwargs):
             parent=self,
             allocator=self._allocator,
         )
-        self._mcol = SubFunction(
+        self._mcol = DynamicSubFunction(
             name='mcol_%s' % self.name,
             dtype=np.int32,
             dimensions=(self.nnzdim,),
@@ -1396,7 +1405,7 @@ def __init_finalize__(self, *args, **kwargs):
             parent=self,
             allocator=self._allocator,
         )
-        self._mval = SubFunction(
+        self._mval = DynamicSubFunction(
             name='mval_%s' % self.name,
             dtype=self.dtype,
             dimensions=(self.nnzdim,),
@@ -1413,8 +1422,8 @@ def __init_finalize__(self, *args, **kwargs):
         self.par_dim_to_nnz_dim = DynamicDimension('par_dim_to_nnz_%s' % self.name)
 
         # This map acts as an indirect sort of the sources according to their
-        # position along the parallelisation Dimension
-        self._par_dim_to_nnz_map = SubFunction(
+        # position along the parallelisation dimension
+        self._par_dim_to_nnz_map = DynamicSubFunction(
             name='par_dim_to_nnz_map_%s' % self.name,
             dtype=np.int32,
             dimensions=(self.par_dim_to_nnz_dim,),
@@ -1423,7 +1432,7 @@ def __init_finalize__(self, *args, **kwargs):
             space_order=0,
             parent=self,
         )
-        self._par_dim_to_nnz_m = SubFunction(
+        self._par_dim_to_nnz_m = DynamicSubFunction(
             name='par_dim_to_nnz_m_%s' % self.name,
             dtype=np.int32,
             dimensions=(self._par_dim,),
@@ -1432,7 +1441,7 @@ def __init_finalize__(self, *args, **kwargs):
             space_order=0,
             parent=self,
         )
-        self._par_dim_to_nnz_M = SubFunction(
+        self._par_dim_to_nnz_M = DynamicSubFunction(
             name='par_dim_to_nnz_M_%s' % self.name,
             dtype=np.int32,
             dimensions=(self._par_dim,),
diff --git a/tests/test_interpolation.py b/tests/test_interpolation.py
index dca94c8f40..9a0608454a 100644
--- a/tests/test_interpolation.py
+++ b/tests/test_interpolation.py
@@ -689,7 +689,6 @@ def test_msf_interpolate():
 
     eqn_inject = sf.inject(field=u, expr=sf)
     op2 = Operator(eqn_inject)
-
     op2(time_m=0, time_M=4)
 
     # There should be 4 points touched for each source point

From c4c839f6e328850a09fbf2ec084423cc439c8925 Mon Sep 17 00:00:00 2001
From: Fabio Luporini <fabio@devitocodes.com>
Date: Tue, 1 Aug 2023 12:39:43 +0000
Subject: [PATCH 51/90] compiler: Tweak pow_to_mul & factorize

---
 devito/passes/clusters/factorization.py | 17 +++++++------
 devito/symbolics/queries.py             |  9 ++++---
 tests/test_dse.py                       | 32 +++++++++++++++++++++----
 3 files changed, 43 insertions(+), 15 deletions(-)

diff --git a/devito/passes/clusters/factorization.py b/devito/passes/clusters/factorization.py
index f023762402..1353bf89b3 100644
--- a/devito/passes/clusters/factorization.py
+++ b/devito/passes/clusters/factorization.py
@@ -142,13 +142,16 @@ def run(expr):
                     terms.append(i)
 
             # Collect common funcs
-            w_funcs = Add(*w_funcs, evaluate=False)
-            w_funcs = collect(w_funcs, funcs, evaluate=False)
-            try:
-                terms.extend([Mul(k, collect_const(v), evaluate=False)
-                              for k, v in w_funcs.items()])
-            except AttributeError:
-                assert w_funcs == 0
+            if len(w_funcs) > 1:
+                w_funcs = Add(*w_funcs, evaluate=False)
+                w_funcs = collect(w_funcs, funcs, evaluate=False)
+                try:
+                    terms.extend([Mul(k, collect_const(v), evaluate=False)
+                                  for k, v in w_funcs.items()])
+                except AttributeError:
+                    assert w_funcs == 0
+            else:
+                terms.extend(w_funcs)
 
             # Collect common pows
             w_pows = Add(*w_pows, evaluate=False)
diff --git a/devito/symbolics/queries.py b/devito/symbolics/queries.py
index ec86ae7809..c4002508cb 100644
--- a/devito/symbolics/queries.py
+++ b/devito/symbolics/queries.py
@@ -20,7 +20,8 @@
 # * Number
 # * Symbol
 # * Indexed
-extra_leaves = (FieldFromPointer, FieldFromComposite, IndexedBase, AbstractObject)
+extra_leaves = (FieldFromPointer, FieldFromComposite, IndexedBase, AbstractObject,
+                IndexedPointer)
 
 
 def q_symbol(expr):
@@ -31,7 +32,9 @@ def q_symbol(expr):
 
 
 def q_leaf(expr):
-    return expr.is_Atom or expr.is_Indexed or isinstance(expr, extra_leaves)
+    return (expr.is_Atom or
+            expr.is_Indexed or
+            isinstance(expr, extra_leaves))
 
 
 def q_indexed(expr):
@@ -51,7 +54,7 @@ def q_derivative(expr):
 def q_terminal(expr):
     return (expr.is_Symbol or
             expr.is_Indexed or
-            isinstance(expr, extra_leaves + (IndexedPointer,)))
+            isinstance(expr, extra_leaves))
 
 
 def q_routine(expr):
diff --git a/tests/test_dse.py b/tests/test_dse.py
index 730021c3d8..2aefe69ed4 100644
--- a/tests/test_dse.py
+++ b/tests/test_dse.py
@@ -2,21 +2,27 @@
 import pytest
 from cached_property import cached_property
 
+from sympy import Mul  # noqa
+
 from conftest import (skipif, EVAL, _R, assert_structure, assert_blocking,  # noqa
                       get_params, get_arrays, check_array)
-from devito import (NODE, Eq, Inc, Constant, Function, TimeFunction, SparseTimeFunction,  # noqa
-                    Dimension, SubDimension, ConditionalDimension, DefaultDimension, Grid,
-                    Operator, norm, grad, div, dimensions, switchconfig, configuration,
-                    centered, first_derivative, solve, transpose, Abs, cos, sin, sqrt)
+from devito import (NODE, Eq, Inc, Constant, Function, TimeFunction,  # noqa
+                    SparseTimeFunction, Dimension, SubDimension,
+                    ConditionalDimension, DefaultDimension, Grid, Operator,
+                    norm, grad, div, dimensions, switchconfig, configuration,
+                    centered, first_derivative, solve, transpose, Abs, cos,
+                    sin, sqrt)
 from devito.exceptions import InvalidArgument, InvalidOperator
 from devito.finite_differences.differentiable import diffify
 from devito.ir import (Conditional, DummyEq, Expression, Iteration, FindNodes,
                        FindSymbols, ParallelIteration, retrieve_iteration_tree)
 from devito.passes.clusters.aliases import collect
+from devito.passes.clusters.factorization import collect_nested
 from devito.passes.clusters.cse import Temp, _cse
 from devito.passes.iet.parpragma import VExpanded
 from devito.symbolics import (INT, FLOAT, DefFunction, FieldFromPointer,  # noqa
-                              Keyword, SizeOf, estimate_cost, pow_to_mul, indexify)
+                              IndexedPointer, Keyword, SizeOf, estimate_cost,
+                              pow_to_mul, indexify)
 from devito.tools import as_tuple, generator
 from devito.types import Array, Scalar, Symbol
 
@@ -161,6 +167,9 @@ def test_cse(exprs, expected, min_cost):
     ('fa[x]**(-s)', 'fa[x]**(-s)'),
     ('-2/(s**2)', '-2/(s*s)'),
     ('-fa[x]', '-fa[x]'),
+    ('Mul(SizeOf("char"), '
+     '-IndexedPointer(FieldFromPointer("size", fa._C_symbol), x), evaluate=False)',
+     'sizeof(char)*(-fa_vec->size[x])'),
 ])
 def test_pow_to_mul(expr, expected):
     grid = Grid((4, 5))
@@ -173,6 +182,19 @@ def test_pow_to_mul(expr, expected):
     assert str(pow_to_mul(eval(expr))) == expected
 
 
+@pytest.mark.parametrize('expr,expected', [
+    ('s - SizeOf("int")*fa[x]', 's - fa[x]*sizeof(int)'),
+])
+def test_factorize(expr, expected):
+    grid = Grid((4, 5))
+    x, y = grid.dimensions
+
+    s = Scalar(name='s')  # noqa
+    fa = Function(name='fa', grid=grid, dimensions=(x,), shape=(4,))  # noqa
+
+    assert str(collect_nested(eval(expr))) == expected
+
+
 @pytest.mark.parametrize('expr,expected,estimate', [
     ('Eq(t0, 3)', 0, False),
     ('Eq(t0, 4.5)', 0, False),

From eafda08dc304e85b47d9b65b994c7136615d3d63 Mon Sep 17 00:00:00 2001
From: mloubout <mathias.louboutin@gmail.com>
Date: Thu, 7 Sep 2023 08:21:13 -0400
Subject: [PATCH 52/90] api: reconstruct sparse with subfunc rather than its
 data

---
 devito/types/sparse.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/devito/types/sparse.py b/devito/types/sparse.py
index 4244c001ab..f036a68c9c 100644
--- a/devito/types/sparse.py
+++ b/devito/types/sparse.py
@@ -791,7 +791,7 @@ class SparseFunction(AbstractSparseFunction):
 
     _sub_functions = ('coordinates',)
 
-    __rkwargs__ = AbstractSparseFunction.__rkwargs__ + ('coordinates_data',)
+    __rkwargs__ = AbstractSparseFunction.__rkwargs__ + ('coordinates',)
 
     def __init_finalize__(self, *args, **kwargs):
         super().__init_finalize__(*args, **kwargs)
@@ -1014,8 +1014,8 @@ class PrecomputedSparseFunction(AbstractSparseFunction):
     _sub_functions = ('gridpoints', 'coordinates', 'interpolation_coeffs')
 
     __rkwargs__ = (AbstractSparseFunction.__rkwargs__ +
-                   ('r', 'gridpoints_data', 'coordinates_data',
-                    'interpolation_coeffs_data'))
+                   ('r', 'gridpoints', 'coordinates',
+                    'interpolation_coeffs'))
 
     def __init_finalize__(self, *args, **kwargs):
         super().__init_finalize__(*args, **kwargs)

From cd59050c325388d7343dfc2fcea95be21481fbcf Mon Sep 17 00:00:00 2001
From: mloubout <mathias.louboutin@gmail.com>
Date: Thu, 7 Sep 2023 08:21:48 -0400
Subject: [PATCH 53/90] compiler: remove mul print tweak

---
 devito/symbolics/printer.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/devito/symbolics/printer.py b/devito/symbolics/printer.py
index 8f7ef6a719..c47ef95bfc 100644
--- a/devito/symbolics/printer.py
+++ b/devito/symbolics/printer.py
@@ -105,10 +105,6 @@ def _print_Mod(self, expr):
         args = ['(%s)' % self._print(a) for a in expr.args]
         return '%'.join(args)
 
-    def _print_Mul(self, expr):
-        term = super()._print_Mul(expr)
-        return term.replace("(-1)*", "-")
-
     def _print_Min(self, expr):
         if has_integer_args(*expr.args) and len(expr.args) == 2:
             return "MIN(%s)" % self._print(expr.args)[1:-1]

From 68c29d47a829cbc751a6b54c08ef2d664e92eb5a Mon Sep 17 00:00:00 2001
From: mloubout <mathias.louboutin@gmail.com>
Date: Thu, 7 Sep 2023 09:53:00 -0400
Subject: [PATCH 54/90] api: fix subfunction handling (subs/rebuild/...)

---
 devito/symbolics/printer.py                   |   4 +
 devito/types/dense.py                         |   3 +-
 devito/types/sparse.py                        | 113 ++++++++----------
 examples/seismic/inversion/inversion_utils.py |  22 ++--
 tests/test_interpolation.py                   |   4 +-
 tests/test_pickle.py                          |   2 +-
 tests/{test_msparse.py => test_sparse.py}     |  64 +++++++++-
 7 files changed, 128 insertions(+), 84 deletions(-)
 rename tests/{test_msparse.py => test_sparse.py} (84%)

diff --git a/devito/symbolics/printer.py b/devito/symbolics/printer.py
index c47ef95bfc..8f7ef6a719 100644
--- a/devito/symbolics/printer.py
+++ b/devito/symbolics/printer.py
@@ -105,6 +105,10 @@ def _print_Mod(self, expr):
         args = ['(%s)' % self._print(a) for a in expr.args]
         return '%'.join(args)
 
+    def _print_Mul(self, expr):
+        term = super()._print_Mul(expr)
+        return term.replace("(-1)*", "-")
+
     def _print_Min(self, expr):
         if has_integer_args(*expr.args) and len(expr.args) == 2:
             return "MIN(%s)" % self._print(expr.args)[1:-1]
diff --git a/devito/types/dense.py b/devito/types/dense.py
index 4c912d2704..ec371b662c 100644
--- a/devito/types/dense.py
+++ b/devito/types/dense.py
@@ -20,7 +20,7 @@
 from devito.finite_differences import Differentiable, generate_fd_shortcuts
 from devito.tools import (ReducerMap, as_tuple, c_restrict_void_p, flatten, is_integer,
                           memoized_meth, dtype_to_ctype, humanbytes)
-from devito.types.dimension import Dimension, DynamicDimension
+from devito.types.dimension import Dimension
 from devito.types.args import ArgProvider
 from devito.types.caching import CacheManager
 from devito.types.basic import AbstractFunction, Size
@@ -1040,6 +1040,7 @@ def __indices_setup__(cls, *args, **kwargs):
             dimensions = grid.dimensions
 
         if args:
+            assert len(args) == len(dimensions)
             return tuple(dimensions), tuple(args)
 
         # Staggered indices
diff --git a/devito/types/sparse.py b/devito/types/sparse.py
index f036a68c9c..42ecdc4a9a 100644
--- a/devito/types/sparse.py
+++ b/devito/types/sparse.py
@@ -61,11 +61,9 @@ def __indices_setup__(cls, *args, **kwargs):
             dimensions = (Dimension(name='p_%s' % kwargs["name"]),)
 
         if args:
-            indices = args
+            return tuple(dimensions), tuple(args)
         else:
-            indices = dimensions
-
-        return dimensions, indices
+            return dimensions, dimensions
 
     @classmethod
     def __shape_setup__(cls, **kwargs):
@@ -80,16 +78,6 @@ def __shape_setup__(cls, **kwargs):
             shape = (glb_npoint[grid.distributor.myrank],)
         return shape
 
-    def func(self, *args, **kwargs):
-        # Rebuild subfunctions first to avoid new data creation as we have to use `_data`
-        # as a reconstruction kwargs to avoid the circular dependency
-        # with the parent in SubFunction
-        # This is also necessary to avoid shape issue in the SubFunction with mpi
-        for s in self._sub_functions:
-            if getattr(self, s) is not None:
-                kwargs.update({s: getattr(self, s).func(*args, **kwargs)})
-        return super().func(*args, **kwargs)
-
     def __fd_setup__(self):
         """
         Dynamically add derivative short-cuts.
@@ -108,24 +96,39 @@ def __distributor_setup__(self, **kwargs):
         )
 
     def __subfunc_setup__(self, key, suffix, dtype=None):
+        # Shape and dimensions from args
+        name = '%s_%s' % (self.name, suffix)
+
+        if key is not None and not isinstance(key, SubFunction):
+            key = np.array(key)
+
+        if key is not None:
+            dimensions = (self._sparse_dim, Dimension(name='d'))
+            if key.ndim > 2:
+                dimensions = (self._sparse_dim, Dimension(name='d'),
+                              *mkdims("i", n=key.ndim-2))
+            else:
+                dimensions = (self._sparse_dim, Dimension(name='d'))
+            shape = (self.npoint, self.grid.dim, *key.shape[2:])
+        else:
+            dimensions = (self._sparse_dim, Dimension(name='d'))
+            shape = (self.npoint, self.grid.dim)
+
+        # Check if already a SubFunction
         if isinstance(key, SubFunction):
-            return key
+            # Need to rebuild so the dimensions match the parent SparseFunction
+            indices = (self.indices[self._sparse_position], *key.indices[1:])
+            return key._rebuild(*indices, name=name, shape=shape,
+                                alias=self.alias, halo=None)
         elif key is not None and not isinstance(key, Iterable):
             raise ValueError("`%s` must be either SubFunction "
                              "or iterable (e.g., list, np.ndarray)" % key)
 
-        name = '%s_%s' % (self.name, suffix)
-        dimensions = (self._sparse_dim, Dimension(name='d'))
-        shape = (self.npoint, self.grid.dim)
-
         if key is None:
             # Fallback to default behaviour
             dtype = dtype or self.dtype
         else:
-            if key is not None:
-                key = np.array(key)
-
-            if (shape != key.shape[:2] and key.shape != (shape[1],)) and \
+            if (shape != key.shape and key.shape != (shape[1],)) and \
                     self._distributor.nprocs == 1:
                 raise ValueError("Incompatible shape for %s, `%s`; expected `%s`" %
                                  (suffix, key.shape[:2], shape))
@@ -136,12 +139,8 @@ def __subfunc_setup__(self, key, suffix, dtype=None):
             else:
                 dtype = dtype or self.dtype
 
-        if key is not None and key.ndim > 2:
-            shape = (*shape, *key.shape[2:])
-            dimensions = (*dimensions, *mkdims("i", n=key.ndim-2))
-
         sf = SubFunction(
-            name=name, parent=self, dtype=dtype, dimensions=dimensions,
+            name=name, dtype=dtype, dimensions=dimensions,
             shape=shape, space_order=0, initializer=key, alias=self.alias,
             distributor=self._distributor
         )
@@ -657,20 +656,6 @@ def time_dim(self):
         """The time Dimension."""
         return self._time_dim
 
-    @classmethod
-    def __indices_setup__(cls, *args, **kwargs):
-        dimensions = as_tuple(kwargs.get('dimensions'))
-        if not dimensions:
-            dimensions = (kwargs['grid'].time_dim,
-                          Dimension(name='p_%s' % kwargs["name"]))
-
-        if args:
-            indices = args
-        else:
-            indices = dimensions
-
-        return dimensions, indices
-
     @classmethod
     def __shape_setup__(cls, **kwargs):
         shape = kwargs.get('shape')
@@ -686,6 +671,18 @@ def __shape_setup__(cls, **kwargs):
 
         return tuple(shape)
 
+    @classmethod
+    def __indices_setup__(cls, *args, **kwargs):
+        dimensions = as_tuple(kwargs.get('dimensions'))
+        if not dimensions:
+            dimensions = (kwargs['grid'].time_dim,
+                          Dimension(name='p_%s' % kwargs["name"]),)
+
+        if args:
+            return tuple(dimensions), tuple(args)
+        else:
+            return dimensions, dimensions
+
     @property
     def nt(self):
         return self.shape[self._time_position]
@@ -1032,13 +1029,14 @@ def __init_finalize__(self, *args, **kwargs):
         if r <= 0:
             raise ValueError('`r` must be > 0')
         # Make sure radius matches the coefficients size
-        nr = interpolation_coeffs.shape[-1]
-        if nr // 2 != r:
-            if nr == r:
-                r = r // 2
-            else:
-                raise ValueError("Interpolation coefficients shape %d do "
-                                 "not match specified radius %d" % (r, nr))
+        if interpolation_coeffs is not None:
+            nr = interpolation_coeffs.shape[-1]
+            if nr // 2 != r:
+                if nr == r:
+                    r = r // 2
+                else:
+                    raise ValueError("Interpolation coefficients shape %d do "
+                                     "not match specified radius %d" % (r, nr))
         self._radius = r
 
         if coordinates is not None and gridpoints is not None:
@@ -1680,23 +1678,6 @@ def inject(self, field, expr, u_t=None, p_t=None):
 
         return out
 
-    @classmethod
-    def __indices_setup__(cls, *args, **kwargs):
-        """
-        Return the default Dimension indices for a given data shape.
-        """
-        dimensions = kwargs.get('dimensions')
-        if dimensions is None:
-            dimensions = (kwargs['grid'].time_dim, Dimension(
-                name='p_%s' % kwargs["name"]))
-
-        if args:
-            indices = args
-        else:
-            indices = dimensions
-
-        return dimensions, indices
-
     @classmethod
     def __shape_setup__(cls, **kwargs):
         # This happens before __init__, so we have to get 'npoint'
diff --git a/examples/seismic/inversion/inversion_utils.py b/examples/seismic/inversion/inversion_utils.py
index 7f4784ae8c..9f5709b315 100644
--- a/examples/seismic/inversion/inversion_utils.py
+++ b/examples/seismic/inversion/inversion_utils.py
@@ -7,19 +7,15 @@ def compute_residual(res, dobs, dsyn):
     """
     Computes the data residual dsyn - dobs into residual
     """
-    if res.grid.distributor.is_parallel:
-        # If we run with MPI, we have to compute the residual via an operator
-        # First make sure we can take the difference and that receivers are at the
-        # same position
-        assert np.allclose(dobs.coordinates.data[:], dsyn.coordinates.data)
-        assert np.allclose(res.coordinates.data[:], dsyn.coordinates.data)
-        # Create a difference operator
-        diff_eq = Eq(res, dsyn.subs({dsyn.dimensions[-1]: res.dimensions[-1]}) -
-                     dobs.subs({dobs.dimensions[-1]: res.dimensions[-1]}))
-        Operator(diff_eq)()
-    else:
-        # A simple data difference is enough in serial
-        res.data[:] = dsyn.data[:] - dobs.data[:]
+    # If we run with MPI, we have to compute the residual via an operator
+    # First make sure we can take the difference and that receivers are at the
+    # same position
+    assert np.allclose(dobs.coordinates.data[:], dsyn.coordinates.data)
+    assert np.allclose(res.coordinates.data[:], dsyn.coordinates.data)
+    # Create a difference operator
+    diff_eq = Eq(res, dsyn.subs({dsyn.dimensions[-1]: res.dimensions[-1]}) -
+                 dobs.subs({dobs.dimensions[-1]: res.dimensions[-1]}))
+    Operator(diff_eq)()
 
     return res
 
diff --git a/tests/test_interpolation.py b/tests/test_interpolation.py
index 9a0608454a..3a22ca1db7 100644
--- a/tests/test_interpolation.py
+++ b/tests/test_interpolation.py
@@ -242,8 +242,8 @@ def test_precomputed_injection_time(r):
     sf = PrecomputedSparseTimeFunction(name='s', grid=m.grid, r=r, npoint=len(coords),
                                        gridpoints=gridpoints, nt=nt,
                                        interpolation_coeffs=interpolation_coeffs)
-
-    expr = sf.inject(m, Float(1.))
+    sf.data.fill(1.)
+    expr = sf.inject(m, sf)
 
     op = Operator(expr)
 
diff --git a/tests/test_pickle.py b/tests/test_pickle.py
index 62423b2c15..16f44bdaed 100644
--- a/tests/test_pickle.py
+++ b/tests/test_pickle.py
@@ -111,7 +111,7 @@ def test_precomputed_sparse_function(self, mode, pickle):
 
         sf = PrecomputedSparseTimeFunction(
             name='sf', grid=grid, r=2, npoint=3, nt=5,
-            interpolation_coeffs=np.ndarray(shape=(3, 2, 2)), **kw
+            interpolation_coeffs=np.random.randn(3, 2, 2), **kw
         )
         sf.data[2, 1] = 5.
 
diff --git a/tests/test_msparse.py b/tests/test_sparse.py
similarity index 84%
rename from tests/test_msparse.py
rename to tests/test_sparse.py
index 5cbfde848a..04545d7cc6 100644
--- a/tests/test_msparse.py
+++ b/tests/test_sparse.py
@@ -4,7 +4,13 @@
 import numpy as np
 import scipy.sparse
 
-from devito import Grid, TimeFunction, Eq, Operator, MatrixSparseTimeFunction
+from devito import Grid, TimeFunction, Eq, Operator, Dimension
+from devito import (SparseFunction, SparseTimeFunction, PrecomputedSparseFunction,
+                    PrecomputedSparseTimeFunction, MatrixSparseTimeFunction)
+
+
+_sptypes = [SparseFunction, SparseTimeFunction,
+            PrecomputedSparseFunction, PrecomputedSparseTimeFunction]
 
 
 class TestMatrixSparseTimeFunction(object):
@@ -394,5 +400,61 @@ def test_mpi(self):
             assert sf.data[0, 0] == -3.0  # 1 * (1 * 1) * 1 + (-1) * (2 * 2) * 1
 
 
+class TestSparseFunction(object):
+
+    @pytest.mark.parametrize('sptype', _sptypes)
+    def test_rebuild(self, sptype):
+        grid = Grid((3, 3, 3))
+        # Base object
+        sp = sptype(name="s", grid=grid, npoint=1, nt=11, r=2,
+                    interpolation_coeffs=np.random.randn(1, 3, 2),
+                    coordinates=np.random.randn(1, 3))
+
+        # Check subfunction setup
+        for subf in sp._sub_functions:
+            if getattr(sp, subf) is not None:
+                assert getattr(sp, subf).name.startswith("s_")
+
+        # Rebuild with different name, this should drop the function
+        # and create new data
+        sp2 = sp._rebuild(name="sr")
+
+        # Check new subfunction
+        for subf in sp2._sub_functions:
+            if getattr(sp2, subf) is not None:
+                assert getattr(sp2, subf).name.startswith("sr_")
+                assert np.all(getattr(sp2, subf).data == 0)
+
+        # Rebuild with different name as an alias
+        sp2 = sp._rebuild(name="sr2", alias=True)
+        for subf in sp2._sub_functions:
+            if getattr(sp2, subf) is not None:
+                assert getattr(sp2, subf).name.startswith("sr2_")
+                assert getattr(sp2, subf).data is None
+
+    @pytest.mark.parametrize('sptype', _sptypes)
+    def test_subs(self, sptype):
+        grid = Grid((3, 3, 3))
+        # Base object
+        sp = sptype(name="s", grid=grid, npoint=1, nt=11, r=2,
+                    interpolation_coeffs=np.random.randn(1, 3, 2),
+                    coordinates=np.random.randn(1, 3))
+
+        # Check subfunction setup
+        for subf in sp._sub_functions:
+            if getattr(sp, subf) is not None:
+                assert getattr(sp, subf).dimensions[0] == sp._sparse_dim
+
+        # Do substitution on sparse dimension
+        new_spdim = Dimension(name="newsp")
+
+        sps = sp._subs(sp._sparse_dim, new_spdim)
+        assert sps.indices[sp._sparse_position] == new_spdim
+        for subf in sps._sub_functions:
+            if getattr(sps, subf) is not None:
+                assert getattr(sps, subf).indices[0] == new_spdim
+                assert np.all(getattr(sps, subf).data == getattr(sp, subf).data)
+
+
 if __name__ == "__main__":
     TestMatrixSparseTimeFunction().test_mpi()

From 5eaad80ae52c0af7d41e58bbbb395278a01f6de9 Mon Sep 17 00:00:00 2001
From: mloubout <mathias.louboutin@gmail.com>
Date: Mon, 11 Sep 2023 20:06:30 -0400
Subject: [PATCH 55/90] CI: remove deprecated asv option

---
 .github/workflows/asv.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/asv.yml b/.github/workflows/asv.yml
index f1941d82df..ec75cb81cc 100644
--- a/.github/workflows/asv.yml
+++ b/.github/workflows/asv.yml
@@ -58,7 +58,7 @@ jobs:
 
     - name: Run benchmarks
       run: |
-        asv run -v --strict --show-stderr --config benchmarks/regression/asv.conf.json --cpu-affinity 0-7 --machine i7-6700K
+        asv run -v --show-stderr --config benchmarks/regression/asv.conf.json --cpu-affinity 0-7 --machine i7-6700K
 
     - name: Checkout asv-results branch
       uses: actions/checkout@v3

From 76a3c4b30512fe67ad265fae32efa0faa95e1a82 Mon Sep 17 00:00:00 2001
From: mloubout <mathias.louboutin@gmail.com>
Date: Mon, 11 Sep 2023 09:35:15 -0400
Subject: [PATCH 56/90] api: always use conditional dimension for interpolation
 radius dim

---
 devito/core/autotuning.py          |  7 +++++--
 devito/operations/interpolators.py | 28 +++++++++++++++----------
 devito/operator/operator.py        |  5 +++--
 devito/tools/data_structures.py    |  9 ++++++++
 devito/tools/utils.py              |  9 +++++++-
 devito/types/basic.py              |  4 +++-
 devito/types/dense.py              |  2 +-
 devito/types/dimension.py          | 33 +++++++++++++++++++-----------
 tests/test_operator.py             |  6 ++++--
 9 files changed, 71 insertions(+), 32 deletions(-)

diff --git a/devito/core/autotuning.py b/devito/core/autotuning.py
index 603a78efd6..16b146721a 100644
--- a/devito/core/autotuning.py
+++ b/devito/core/autotuning.py
@@ -209,8 +209,11 @@ def init_time_bounds(stepper, at_args, args):
     else:
         at_args[dim.max_name] = at_args[dim.min_name] + options['squeezer']
         if dim.size_name in args:
-            # May need to shrink to avoid OOB accesses
-            at_args[dim.max_name] = min(at_args[dim.max_name], args[dim.max_name])
+            if isinstance(args[dim.size_name], range):
+                pass
+            else:
+                # May need to shrink to avoid OOB accesses
+                at_args[dim.max_name] = min(at_args[dim.max_name], args[dim.max_name])
         if at_args[dim.min_name] > at_args[dim.max_name]:
             warning("too few time iterations; skipping")
             return False
diff --git a/devito/operations/interpolators.py b/devito/operations/interpolators.py
index ab758ba982..92bc392392 100644
--- a/devito/operations/interpolators.py
+++ b/devito/operations/interpolators.py
@@ -155,7 +155,19 @@ def _rdim(self):
                                 -self.r+1, self.r, 2*self.r, parent)
                 for d in self._gdims]
 
-        return DimensionTuple(*dims, getters=self._gdims)
+        # Make radius dimension conditional to avoid OOB
+        rdims = []
+        pos = self.sfunction._position_map.values()
+
+        for (d, rd, p) in zip(self._gdims, dims, pos):
+            # Add conditional to avoid OOB
+            lb = sympy.And(rd + p >= d.symbolic_min - self.r, evaluate=False)
+            ub = sympy.And(rd + p <= d.symbolic_max + self.r, evaluate=False)
+            cond = sympy.And(lb, ub, evaluate=False)
+            rdims.append(ConditionalDimension(rd.name, rd, condition=cond,
+                                              indirect=True))
+
+        return DimensionTuple(*rdims, getters=self._gdims)
 
     def _augment_implicit_dims(self, implicit_dims):
         if self.sfunction._sparse_position == -1:
@@ -177,13 +189,6 @@ def _interp_idx(self, variables, implicit_dims=None):
         mapper = {}
         pos = self.sfunction._position_map.values()
 
-        for ((di, d), rd, p) in zip(enumerate(self._gdims), self._rdim, pos):
-            # Add conditional to avoid OOB
-            lb = sympy.And(rd + p >= d.symbolic_min - self.r, evaluate=False)
-            ub = sympy.And(rd + p <= d.symbolic_max + self.r, evaluate=False)
-            cond = sympy.And(lb, ub, evaluate=False)
-            mapper[d] = ConditionalDimension(rd.name, rd, condition=cond, indirect=True)
-
         # Temporaries for the position
         temps = self._positions(implicit_dims)
 
@@ -191,10 +196,10 @@ def _interp_idx(self, variables, implicit_dims=None):
         temps.extend(self._coeff_temps(implicit_dims))
 
         # Substitution mapper for variables
+        mapper = self._rdim._getters
         idx_subs = {v: v.subs({k: c + p
                     for ((k, c), p) in zip(mapper.items(), pos)})
                     for v in variables}
-        idx_subs.update(dict(zip(self._rdim, mapper.values())))
 
         return idx_subs, temps
 
@@ -290,7 +295,7 @@ def _inject(self, field, expr, implicit_dims=None):
             injection expression, but that should be honored when constructing
             the operator.
         """
-        implicit_dims = self._augment_implicit_dims(implicit_dims) + self._rdim
+        implicit_dims = self._augment_implicit_dims(implicit_dims)
 
         # Make iterable to support inject((u, v), expr=expr)
         # or inject((u, v), expr=(expr1, expr2))
@@ -380,5 +385,6 @@ def interpolation_coeffs(self):
     @property
     def _weights(self):
         ddim, cdim = self.interpolation_coeffs.dimensions[1:]
-        return Mul(*[self.interpolation_coeffs.subs({ddim: ri, cdim: rd-rd.symbolic_min})
+        return Mul(*[self.interpolation_coeffs.subs({ddim: ri,
+                                                     cdim: rd-rd.parent.symbolic_min})
                      for (ri, rd) in enumerate(self._rdim)])
diff --git a/devito/operator/operator.py b/devito/operator/operator.py
index d1bee9fa66..b91d51727f 100644
--- a/devito/operator/operator.py
+++ b/devito/operator/operator.py
@@ -24,7 +24,7 @@
 from devito.symbolics import estimate_cost
 from devito.tools import (DAG, OrderedSet, Signer, ReducerMap, as_tuple, flatten,
                           filter_sorted, frozendict, is_integer, split, timed_pass,
-                          timed_region)
+                          timed_region, contains_val)
 from devito.types import Grid, Evaluable
 
 __all__ = ['Operator']
@@ -526,6 +526,7 @@ def _prepare_arguments(self, autotune=None, **kwargs):
         edges = [(i, i.parent) for i in self.dimensions
                  if i.is_Derived and i.parent in set(nodes)]
         toposort = DAG(nodes, edges).topological_sort()
+
         futures = {}
         for d in reversed(toposort):
             if set(d._arg_names).intersection(kwargs):
@@ -560,7 +561,7 @@ def _prepare_arguments(self, autotune=None, **kwargs):
                     # a TimeFunction `usave(t_sub, x, y)`, an override for `fact` is
                     # supplied w/o overriding `usave`; that's legal
                     pass
-                elif is_integer(args[k]) and args[k] not in as_tuple(v):
+                elif is_integer(args[k]) and not contains_val(args[k], v):
                     raise ValueError("Default `%s` is incompatible with other args as "
                                      "`%s=%s`, while `%s=%s` is expected. Perhaps you "
                                      "forgot to override `%s`?" %
diff --git a/devito/tools/data_structures.py b/devito/tools/data_structures.py
index 539f75d593..3afe7197eb 100644
--- a/devito/tools/data_structures.py
+++ b/devito/tools/data_structures.py
@@ -110,6 +110,7 @@ def unique(self, key):
             Key for which to retrieve a unique value.
         """
         candidates = self.getall(key)
+        candidates = [c for c in candidates if c is not None]
 
         def compare_to_first(v):
             first = candidates[0]
@@ -122,12 +123,20 @@ def compare_to_first(v):
                     return first in v
             elif isinstance(first, Set):
                 return v in first
+            elif isinstance(v, range):
+                if isinstance(first, range):
+                    return first.stop > v.start or v.stop > first.start
+                else:
+                    return first >= v.start and first < v.stop
+            elif isinstance(first, range):
+                return v >= first.start and v < first.stop
             else:
                 return first == v
 
         if len(candidates) == 1:
             return candidates[0]
         elif all(map(compare_to_first, candidates)):
+            # return first non-range
             return candidates[0]
         else:
             raise ValueError("Unable to find unique value for key %s, candidates: %s"
diff --git a/devito/tools/utils.py b/devito/tools/utils.py
index 16f0987930..d99bf34b25 100644
--- a/devito/tools/utils.py
+++ b/devito/tools/utils.py
@@ -12,7 +12,7 @@
            'roundm', 'powerset', 'invert', 'flatten', 'single_or', 'filter_ordered',
            'as_mapper', 'filter_sorted', 'pprint', 'sweep', 'all_equal', 'as_list',
            'indices_to_slices', 'indices_to_sections', 'transitive_closure',
-           'humanbytes']
+           'humanbytes', 'contains_val']
 
 
 def prod(iterable, initial=1):
@@ -75,6 +75,13 @@ def is_integer(value):
     return isinstance(value, (int, np.integer, sympy.Integer))
 
 
+def contains_val(val, items):
+    try:
+        return val in items
+    except TypeError:
+        return val == items
+
+
 def generator():
     """
     Return a function ``f`` that generates integer numbers starting at 0
diff --git a/devito/types/basic.py b/devito/types/basic.py
index c36496d195..400232faed 100644
--- a/devito/types/basic.py
+++ b/devito/types/basic.py
@@ -838,13 +838,15 @@ def __new__(cls, *args, **kwargs):
         newobj._dimensions = dimensions
         newobj._shape = cls.__shape_setup__(**kwargs)
         newobj._dtype = cls.__dtype_setup__(**kwargs)
-        newobj.__init_finalize__(*args, **kwargs)
 
         # All objects created off an existing AbstractFunction `f` (e.g.,
         # via .func, or .subs, such as `f(x + 1)`) keep a reference to `f`
         # through the `function` field
         newobj.function = function or newobj
 
+        # Initialization
+        newobj.__init_finalize__(*args, **kwargs)
+
         return newobj
 
     def __init__(self, *args, **kwargs):
diff --git a/devito/types/dense.py b/devito/types/dense.py
index ec371b662c..2a13fa91af 100644
--- a/devito/types/dense.py
+++ b/devito/types/dense.py
@@ -94,7 +94,7 @@ def __init_finalize__(self, *args, function=None, **kwargs):
             # a reference to the user-provided buffer
             self._initializer = None
             if len(initializer) > 0:
-                self.data_with_halo[:] = initializer
+                self.data_with_halo[:] = initializer[:]
             else:
                 # This is a corner case -- we might get here, for example, when
                 # running with MPI and some processes get 0-size arrays after
diff --git a/devito/types/dimension.py b/devito/types/dimension.py
index 76d9d9e60a..6044f01469 100644
--- a/devito/types/dimension.py
+++ b/devito/types/dimension.py
@@ -298,13 +298,19 @@ def _arg_values(self, interval, grid=None, args=None, **kwargs):
         # may represent sets of legal values. If that's the case, here we just
         # pick one. Note that we sort for determinism
         try:
-            loc_minv = sorted(loc_minv).pop(0)
-        except TypeError:
-            pass
+            loc_minv = loc_minv.start
+        except AttributeError:
+            try:
+                loc_minv = sorted(loc_minv).pop(0)
+            except TypeError:
+                pass
         try:
-            loc_maxv = sorted(loc_maxv).pop(0)
-        except TypeError:
-            pass
+            loc_maxv = loc_maxv.start
+        except AttributeError:
+            try:
+                loc_maxv = sorted(loc_maxv).pop(0)
+            except TypeError:
+                pass
 
         return {self.min_name: loc_minv, self.max_name: loc_maxv}
 
@@ -853,8 +859,7 @@ def _arg_defaults(self, _min=None, size=None, alias=None):
             factor = defaults[dim._factor.name] = dim._factor.data
         except AttributeError:
             factor = dim._factor
-        defaults[dim.parent.max_name] = \
-            frozenset(range(factor*(size - 1), factor*(size)))
+        defaults[dim.parent.max_name] = range(1, factor*(size))
 
         return defaults
 
@@ -977,8 +982,9 @@ def symbolic_incr(self):
     def bound_symbols(self):
         return set(self.parent.bound_symbols)
 
-    def _arg_defaults(self, **kwargs):
-        return {}
+    def _arg_defaults(self, alias=None, **kwargs):
+        dim = alias or self
+        return {dim.parent.size_name: range(self.symbolic_size, np.iinfo(np.int64).max)}
 
     def _arg_values(self, *args, **kwargs):
         return {}
@@ -1446,7 +1452,7 @@ def symbolic_max(self):
     def _arg_names(self):
         return (self.min_name, self.max_name, self.name) + self.parent._arg_names
 
-    def _arg_defaults(self, _min=None, **kwargs):
+    def _arg_defaults(self, _min=None, size=None, **kwargs):
         """
         A map of default argument values defined by this dimension.
 
@@ -1460,7 +1466,10 @@ def _arg_defaults(self, _min=None, **kwargs):
         A SteppingDimension does not know its max point and therefore
         does not have a size argument.
         """
-        return {self.parent.min_name: _min}
+        args = {self.parent.min_name: _min}
+        if size:
+            args[self.parent.size_name] = range(size-1, np.iinfo(np.int32).max)
+        return args
 
     def _arg_values(self, *args, **kwargs):
         """
diff --git a/tests/test_operator.py b/tests/test_operator.py
index f38ac01942..4f8228bc24 100644
--- a/tests/test_operator.py
+++ b/tests/test_operator.py
@@ -707,6 +707,8 @@ def verify_arguments(self, arguments, expected):
             if isinstance(v, (Function, SparseFunction)):
                 condition = v._C_as_ndarray(arguments[name])[v._mask_domain] == v.data
                 condition = condition.all()
+            elif isinstance(arguments[name], range):
+                condition = arguments[name].start <= v < arguments[name].stop
             else:
                 condition = arguments[name] == v
 
@@ -1803,7 +1805,7 @@ def test_scheduling_sparse_functions(self):
         # `trees` than 6
         op = Operator([eqn1] + eqn2 + [eqn3] + eqn4, opt=('noop', {'openmp': False}))
         trees = retrieve_iteration_tree(op)
-        assert len(trees) == 5
+        assert len(trees) == 6
         # Time loop not shared due to the WAR
         assert trees[0][0].dim is time and trees[0][0] is trees[1][0]  # this IS shared
         assert trees[1][0] is not trees[3][0]
@@ -1813,7 +1815,7 @@ def test_scheduling_sparse_functions(self):
         eqn2 = sf1.inject(u1.forward, expr=sf1)
         op = Operator([eqn1] + eqn2 + [eqn3] + eqn4, opt=('noop', {'openmp': False}))
         trees = retrieve_iteration_tree(op)
-        assert len(trees) == 5
+        assert len(trees) == 6
         assert all(trees[0][0] is i[0] for i in trees)
 
     def test_scheduling_with_free_dims(self):

From fa1a9b7eb772676f67fe452f6afa4d4451fbadc4 Mon Sep 17 00:00:00 2001
From: mloubout <mathias.louboutin@gmail.com>
Date: Fri, 15 Sep 2023 09:19:22 -0400
Subject: [PATCH 57/90] api: process injected expression dimensions in case
 it's not the sparse function

---
 devito/builtins/initializers.py    |  7 ++++++-
 devito/core/autotuning.py          |  4 +---
 devito/operations/interpolators.py | 30 ++++++++++++++++++++----------
 devito/operator/operator.py        |  8 ++++++++
 devito/passes/iet/langbase.py      |  3 +++
 devito/passes/iet/parpragma.py     |  1 +
 devito/tools/data_structures.py    |  3 +++
 devito/types/dimension.py          | 15 ++++++---------
 tests/test_buffering.py            |  2 +-
 tests/test_dimension.py            |  3 +--
 tests/test_dle.py                  | 11 ++++++++---
 tests/test_dse.py                  | 28 +++++++++++++++-------------
 tests/test_interpolation.py        | 26 ++++++++++++++++++++++++++
 tests/test_mpi.py                  |  4 ++--
 14 files changed, 101 insertions(+), 44 deletions(-)

diff --git a/devito/builtins/initializers.py b/devito/builtins/initializers.py
index f338e194e1..83bad735fa 100644
--- a/devito/builtins/initializers.py
+++ b/devito/builtins/initializers.py
@@ -77,7 +77,12 @@ def assign(f, rhs=0, options=None, name='assign', assign_halo=False, **kwargs):
                                          symbolic_max=d.symbolic_max + h.right)
         eqs = [eq.xreplace(subs) for eq in eqs]
 
-    dv.Operator(eqs, name=name, **kwargs)()
+    op = dv.Operator(eqs, name=name, **kwargs)
+    try:
+        op()
+    except ValueError:
+        # Corner case such as assign(u, v) with v a Buffered TimeFunction
+        op(time_M=f._time_size)
 
 
 def smooth(f, g, axis=None):
diff --git a/devito/core/autotuning.py b/devito/core/autotuning.py
index 16b146721a..f30c020ef7 100644
--- a/devito/core/autotuning.py
+++ b/devito/core/autotuning.py
@@ -209,9 +209,7 @@ def init_time_bounds(stepper, at_args, args):
     else:
         at_args[dim.max_name] = at_args[dim.min_name] + options['squeezer']
         if dim.size_name in args:
-            if isinstance(args[dim.size_name], range):
-                pass
-            else:
+            if not isinstance(args[dim.size_name], range):
                 # May need to shrink to avoid OOB accesses
                 at_args[dim.max_name] = min(at_args[dim.max_name], args[dim.max_name])
         if at_args[dim.min_name] > at_args[dim.max_name]:
diff --git a/devito/operations/interpolators.py b/devito/operations/interpolators.py
index 92bc392392..dae96d8dfe 100644
--- a/devito/operations/interpolators.py
+++ b/devito/operations/interpolators.py
@@ -169,11 +169,17 @@ def _rdim(self):
 
         return DimensionTuple(*rdims, getters=self._gdims)
 
-    def _augment_implicit_dims(self, implicit_dims):
+    def _augment_implicit_dims(self, implicit_dims, extras=None):
+        if extras is not None:
+            extra = set([i for v in extras for i in v.dimensions]) - set(self._gdims)
+            extra = tuple(extra)
+        else:
+            extra = tuple()
+
         if self.sfunction._sparse_position == -1:
-            return self.sfunction.dimensions + as_tuple(implicit_dims)
+            return self.sfunction.dimensions + as_tuple(implicit_dims) + extra
         else:
-            return as_tuple(implicit_dims) + self.sfunction.dimensions
+            return as_tuple(implicit_dims) + self.sfunction.dimensions + extra
 
     def _coeff_temps(self, implicit_dims):
         return []
@@ -252,8 +258,6 @@ def _interpolate(self, expr, increment=False, self_subs={}, implicit_dims=None):
             interpolation expression, but that should be honored when constructing
             the operator.
         """
-        implicit_dims = self._augment_implicit_dims(implicit_dims)
-
         # Derivatives must be evaluated before the introduction of indirect accesses
         try:
             _expr = expr.evaluate
@@ -263,6 +267,9 @@ def _interpolate(self, expr, increment=False, self_subs={}, implicit_dims=None):
 
         variables = list(retrieve_function_carriers(_expr))
 
+        # Implicit dimensions
+        implicit_dims = self._augment_implicit_dims(implicit_dims)
+
         # List of indirection indices for all adjacent grid points
         idx_subs, temps = self._interp_idx(variables, implicit_dims=implicit_dims)
 
@@ -295,8 +302,6 @@ def _inject(self, field, expr, implicit_dims=None):
             injection expression, but that should be honored when constructing
             the operator.
         """
-        implicit_dims = self._augment_implicit_dims(implicit_dims)
-
         # Make iterable to support inject((u, v), expr=expr)
         # or inject((u, v), expr=(expr1, expr2))
         fields, exprs = as_tuple(field), as_tuple(expr)
@@ -315,6 +320,10 @@ def _inject(self, field, expr, implicit_dims=None):
             _exprs = exprs
 
         variables = list(v for e in _exprs for v in retrieve_function_carriers(e))
+
+        # Implicit dimensions
+        implicit_dims = self._augment_implicit_dims(implicit_dims, variables)
+
         variables = variables + list(fields)
 
         # List of indirection indices for all adjacent grid points
@@ -385,6 +394,7 @@ def interpolation_coeffs(self):
     @property
     def _weights(self):
         ddim, cdim = self.interpolation_coeffs.dimensions[1:]
-        return Mul(*[self.interpolation_coeffs.subs({ddim: ri,
-                                                     cdim: rd-rd.parent.symbolic_min})
-                     for (ri, rd) in enumerate(self._rdim)])
+        mappers = [{ddim: ri, cdim: rd-rd.parent.symbolic_min}
+                   for (ri, rd) in enumerate(self._rdim)]
+        return Mul(*[self.interpolation_coeffs.subs(mapper)
+                     for mapper in mappers])
diff --git a/devito/operator/operator.py b/devito/operator/operator.py
index b91d51727f..609c69295f 100644
--- a/devito/operator/operator.py
+++ b/devito/operator/operator.py
@@ -566,6 +566,7 @@ def _prepare_arguments(self, autotune=None, **kwargs):
                                      "`%s=%s`, while `%s=%s` is expected. Perhaps you "
                                      "forgot to override `%s`?" %
                                      (p, k, v, k, args[k], p))
+
         args = kwargs['args'] = args.reduce_all()
 
         # DiscreteFunctions may be created from CartesianDiscretizations, which in
@@ -573,6 +574,10 @@ def _prepare_arguments(self, autotune=None, **kwargs):
         discretizations = {getattr(kwargs[p.name], 'grid', None) for p in overrides}
         discretizations.update({getattr(p, 'grid', None) for p in defaults})
         discretizations.discard(None)
+        # Remove subgrids if multiple grids
+        if len(discretizations) > 1:
+            discretizations = {g for g in discretizations
+                               if not any(d.is_Derived for d in g.dimensions)}
         for i in discretizations:
             args.update(i._arg_values(**kwargs))
 
@@ -585,6 +590,9 @@ def _prepare_arguments(self, autotune=None, **kwargs):
             if configuration['mpi']:
                 raise ValueError("Multiple Grids found")
         try:
+            # Take biggest grid, i.e discard grids with subdimensions
+            grids = {g for g in grids if not any(d.is_Sub for d in g.dimensions)}
+            # First grid as there is no heuristic on how to choose from the leftovers
             grid = grids.pop()
         except KeyError:
             grid = None
diff --git a/devito/passes/iet/langbase.py b/devito/passes/iet/langbase.py
index 36ce348ac4..4a4f6ac465 100644
--- a/devito/passes/iet/langbase.py
+++ b/devito/passes/iet/langbase.py
@@ -214,6 +214,9 @@ def DeviceIteration(self):
     def Prodder(self):
         return self.lang.Prodder
 
+    def _is_offloadable(self, *args, **kwargs):
+        return False
+
 
 class DeviceAwareMixin(object):
 
diff --git a/devito/passes/iet/parpragma.py b/devito/passes/iet/parpragma.py
index 44ee6afd6c..4af585f86a 100644
--- a/devito/passes/iet/parpragma.py
+++ b/devito/passes/iet/parpragma.py
@@ -304,6 +304,7 @@ def _select_candidates(self, candidates):
             # Iterations and their position (i.e. outermost to innermost) in the nest
             score = (
                 int(root.is_ParallelNoAtomic),
+                -int(self._is_offloadable(root))*(n0 + 1),  # Outermost offloadable
                 int(len([i for i in collapsable if i.is_ParallelNoAtomic]) >= 1),
                 int(len([i for i in collapsable if i.is_ParallelRelaxed]) >= 1),
                 -(n0 + 1)  # The outermost, the better
diff --git a/devito/tools/data_structures.py b/devito/tools/data_structures.py
index 3afe7197eb..01a9a3f4bd 100644
--- a/devito/tools/data_structures.py
+++ b/devito/tools/data_structures.py
@@ -137,6 +137,9 @@ def compare_to_first(v):
             return candidates[0]
         elif all(map(compare_to_first, candidates)):
             # return first non-range
+            for c in candidates:
+                if not isinstance(c, range):
+                    return c
             return candidates[0]
         else:
             raise ValueError("Unable to find unique value for key %s, candidates: %s"
diff --git a/devito/types/dimension.py b/devito/types/dimension.py
index 6044f01469..2d11ccb220 100644
--- a/devito/types/dimension.py
+++ b/devito/types/dimension.py
@@ -298,14 +298,14 @@ def _arg_values(self, interval, grid=None, args=None, **kwargs):
         # may represent sets of legal values. If that's the case, here we just
         # pick one. Note that we sort for determinism
         try:
-            loc_minv = loc_minv.start
+            loc_minv = loc_minv.stop
         except AttributeError:
             try:
                 loc_minv = sorted(loc_minv).pop(0)
             except TypeError:
                 pass
         try:
-            loc_maxv = loc_maxv.start
+            loc_maxv = loc_maxv.stop
         except AttributeError:
             try:
                 loc_maxv = sorted(loc_maxv).pop(0)
@@ -859,7 +859,8 @@ def _arg_defaults(self, _min=None, size=None, alias=None):
             factor = defaults[dim._factor.name] = dim._factor.data
         except AttributeError:
             factor = dim._factor
-        defaults[dim.parent.max_name] = range(1, factor*(size))
+
+        defaults[dim.parent.max_name] = range(1, factor*size - 1)
 
         return defaults
 
@@ -983,8 +984,7 @@ def bound_symbols(self):
         return set(self.parent.bound_symbols)
 
     def _arg_defaults(self, alias=None, **kwargs):
-        dim = alias or self
-        return {dim.parent.size_name: range(self.symbolic_size, np.iinfo(np.int64).max)}
+        return {}
 
     def _arg_values(self, *args, **kwargs):
         return {}
@@ -1466,10 +1466,7 @@ def _arg_defaults(self, _min=None, size=None, **kwargs):
         A SteppingDimension does not know its max point and therefore
         does not have a size argument.
         """
-        args = {self.parent.min_name: _min}
-        if size:
-            args[self.parent.size_name] = range(size-1, np.iinfo(np.int32).max)
-        return args
+        return {self.parent.min_name: _min}
 
     def _arg_values(self, *args, **kwargs):
         """
diff --git a/tests/test_buffering.py b/tests/test_buffering.py
index 16f98b4f94..ba200d220c 100644
--- a/tests/test_buffering.py
+++ b/tests/test_buffering.py
@@ -272,7 +272,7 @@ def test_over_injection():
 
     # Check generated code
     assert len(retrieve_iteration_tree(op1)) == \
-        7 + int(configuration['language'] != 'C')
+        8 + int(configuration['language'] != 'C')
     buffers = [i for i in FindSymbols().visit(op1) if i.is_Array]
     assert len(buffers) == 1
 
diff --git a/tests/test_dimension.py b/tests/test_dimension.py
index 32da3b22e3..9d41dddf48 100644
--- a/tests/test_dimension.py
+++ b/tests/test_dimension.py
@@ -1515,7 +1515,7 @@ def test_issue_1927(self, factor):
 
         op = Operator(Eq(f, 1))
 
-        assert op.arguments()['time_M'] == 4*(save-1)  # == min legal endpoint
+        assert op.arguments()['time_M'] == 4*save-1  # == min legal endpoint
 
         # Also no issues when supplying an override
         assert op.arguments(time_M=10)['time_M'] == 10
@@ -1530,7 +1530,6 @@ def test_issue_1927_v2(self):
         i = Dimension(name='i')
 
         ci = ConditionalDimension(name='ci', parent=i, factor=factor)
-
         g = Function(name='g', shape=(size,), dimensions=(i,))
         f = Function(name='f', shape=(int(size/factor),), dimensions=(ci,))
 
diff --git a/tests/test_dle.py b/tests/test_dle.py
index 86a288ac00..3b9883e665 100644
--- a/tests/test_dle.py
+++ b/tests/test_dle.py
@@ -187,9 +187,14 @@ def test_cache_blocking_structure_optrelax():
 
     op = Operator(eqns, opt=('advanced', {'blockrelax': True}))
 
-    bns, _ = assert_blocking(op, {'x0_blk0', 'p_src0_blk0'})
+    bns, _ = assert_blocking(op, {'x0_blk0', 'p_src0_blk0', 'p_src1_blk0'})
 
     iters = FindNodes(Iteration).visit(bns['p_src0_blk0'])
+    assert len(iters) == 2
+    assert iters[0].dim.is_Block
+    assert iters[1].dim.is_Block
+
+    iters = FindNodes(Iteration).visit(bns['p_src1_blk0'])
     assert len(iters) == 5
     assert iters[0].dim.is_Block
     assert iters[1].dim.is_Block
@@ -286,7 +291,7 @@ def test_cache_blocking_structure_optrelax_prec_inject():
                                           'openmp': True,
                                           'par-collapse-ncores': 1}))
 
-    assert_structure(op, ['t', 't,p_s0_blk0,p_s,rsx,rsy'],
+    assert_structure(op, ['t', 't,p_s0_blk0,p_s', 't,p_s0_blk0,p_s,rsx,rsy'],
                      't,p_s0_blk0,p_s,rsx,rsy')
 
 
@@ -958,7 +963,7 @@ def test_parallel_prec_inject(self):
         iterations = FindNodes(Iteration).visit(op0)
 
         assert not iterations[0].pragmas
-        assert 'omp for collapse(2)' in iterations[1].pragmas[0].value
+        assert 'omp for' in iterations[1].pragmas[0].value
 
 
 class TestNestedParallelism(object):
diff --git a/tests/test_dse.py b/tests/test_dse.py
index 2aefe69ed4..1e18157c77 100644
--- a/tests/test_dse.py
+++ b/tests/test_dse.py
@@ -48,9 +48,9 @@ def test_scheduling_after_rewrite():
     trees = retrieve_iteration_tree(op)
 
     # Check loop nest structure
-    assert all(i.dim is j for i, j in zip(trees[0], grid.dimensions))  # time invariant
-    assert trees[1].root.dim is grid.time_dim
-    assert all(trees[1].root.dim is tree.root.dim for tree in trees[1:])
+    assert all(i.dim is j for i, j in zip(trees[1], grid.dimensions))  # time invariant
+    assert trees[2].root.dim is grid.time_dim
+    assert all(trees[2].root.dim is tree.root.dim for tree in trees[2:])
 
 
 @pytest.mark.parametrize('exprs,expected,min_cost', [
@@ -1687,7 +1687,7 @@ def test_drop_redundants_after_fusion(self, rotate):
         op = Operator(eqns, opt=('advanced', {'cire-rotate': rotate}))
 
         arrays = [i for i in FindSymbols().visit(op) if i.is_Array]
-        assert len(arrays) == 2
+        assert len(arrays) == 4
         assert all(i._mem_heap and not i._mem_external for i in arrays)
 
     def test_full_shape_big_temporaries(self):
@@ -2711,9 +2711,11 @@ def test_fullopt(self):
         assert np.isclose(summary0[('section0', None)].oi, 2.851, atol=0.001)
 
         assert summary1[('section0', None)].ops == 9
-        assert summary1[('section1', None)].ops == 31
-        assert summary1[('section2', None)].ops == 88
-        assert np.isclose(summary1[('section1', None)].oi, 1.767, atol=0.001)
+        assert summary1[('section1', None)].ops == 9
+        assert summary1[('section2', None)].ops == 31
+        assert summary1[('section3', None)].ops == 26
+        assert summary1[('section4', None)].ops == 22
+        assert np.isclose(summary1[('section2', None)].oi, 1.767, atol=0.001)
 
         assert np.allclose(u0.data, u1.data, atol=10e-5)
         assert np.allclose(rec0.data, rec1.data, atol=10e-5)
@@ -2773,8 +2775,8 @@ def test_fullopt(self):
         assert np.allclose(self.tti_noopt[1].data, rec.data, atol=10e-1)
 
         # Check expected opcount/oi
-        assert summary[('section2', None)].ops == 92
-        assert np.isclose(summary[('section2', None)].oi, 2.074, atol=0.001)
+        assert summary[('section3', None)].ops == 92
+        assert np.isclose(summary[('section3', None)].oi, 2.074, atol=0.001)
 
         # With optimizations enabled, there should be exactly four BlockDimensions
         op = wavesolver.op_fwd()
@@ -2792,7 +2794,7 @@ def test_fullopt(self):
         #   3 Arrays are defined globally for the sparse positions temporaries
         # and two additional bock-sized Arrays are defined locally
         arrays = [i for i in FindSymbols().visit(op) if i.is_Array]
-        extra_arrays = 2+3
+        extra_arrays = 2+3+3
         assert len(arrays) == 4 + extra_arrays
         assert all(i._mem_heap and not i._mem_external for i in arrays)
         bns, pbs = assert_blocking(op, {'x0_blk0'})
@@ -2828,7 +2830,7 @@ def test_fullopt_w_mpi(self):
     def test_opcounts(self, space_order, expected):
         op = self.tti_operator(opt='advanced', space_order=space_order)
         sections = list(op.op_fwd()._profiler._sections.values())
-        assert sections[2].sops == expected
+        assert sections[3].sops == expected
 
     @switchconfig(profiling='advanced')
     @pytest.mark.parametrize('space_order,expected', [
@@ -2838,8 +2840,8 @@ def test_opcounts_adjoint(self, space_order, expected):
         wavesolver = self.tti_operator(opt=('advanced', {'openmp': False}))
         op = wavesolver.op_adj()
 
-        assert op._profiler._sections['section2'].sops == expected
-        assert len([i for i in FindSymbols().visit(op) if i.is_Array]) == 7+3
+        assert op._profiler._sections['section3'].sops == expected
+        assert len([i for i in FindSymbols().visit(op) if i.is_Array]) == 7+3+3
 
 
 class TestTTIv2(object):
diff --git a/tests/test_interpolation.py b/tests/test_interpolation.py
index 3a22ca1db7..c7a15665a4 100644
--- a/tests/test_interpolation.py
+++ b/tests/test_interpolation.py
@@ -734,3 +734,29 @@ class SparseFirst(SparseFunction):
     op(time_M=10)
     expected = 10*11/2  # n (n+1)/2
     assert np.allclose(s.data, expected)
+
+
+def test_inject_function():
+    nt = 11
+
+    grid = Grid(shape=(5, 5))
+    u = TimeFunction(name="u", grid=grid, time_order=2)
+    src = SparseTimeFunction(name="src", grid=grid, nt=nt, npoint=1,
+                             coordinates=[[0.5, 0.5]])
+
+    nfreq = 5
+    freq_dim = DefaultDimension(name="freq", default_value=nfreq)
+    omega = Function(name="omega", dimensions=(freq_dim,), shape=(nfreq,), grid=grid)
+    omega.data.fill(1.)
+
+    inj = src.inject(field=u.forward, expr=omega)
+
+    op = Operator([inj])
+
+    op(time_M=0)
+    assert u.data[1, 2, 2] == nfreq
+    assert np.all(u.data[0] == 0)
+    assert np.all(u.data[2] == 0)
+    for i in [0, 1, 3, 4]:
+        for j in [0, 1, 3, 4]:
+            assert u.data[1, i, j] == 0
diff --git a/tests/test_mpi.py b/tests/test_mpi.py
index 14ddbec249..2860fc726e 100644
--- a/tests/test_mpi.py
+++ b/tests/test_mpi.py
@@ -2499,8 +2499,8 @@ def test_adjoint_codegen(self, shape, kernel, space_order, save):
         op_adj = solver.op_adj()
         adj_calls = FindNodes(Call).visit(op_adj)
 
-        # one halo, ndim memalign and free (pos temp rec)
-        sf_calls = 2 * len(shape)
+        # one halo, ndim memalign and free (pos temp rec/src)
+        sf_calls = 2 * len(shape) * 2
         assert len(fwd_calls) == 1 + sf_calls
         assert len(adj_calls) == 1 + sf_calls
 

From 87d8d0e1f25c7dc4f6d75d0af2d92c5163f6a05c Mon Sep 17 00:00:00 2001
From: Mathias Louboutin <mathias.louboutin@gmail.com>
Date: Mon, 18 Sep 2023 08:25:17 -0400
Subject: [PATCH 58/90] compiler: remove atomic collapse hack

---
 devito/passes/iet/parpragma.py | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/devito/passes/iet/parpragma.py b/devito/passes/iet/parpragma.py
index 4af585f86a..9d69e12df7 100644
--- a/devito/passes/iet/parpragma.py
+++ b/devito/passes/iet/parpragma.py
@@ -295,9 +295,6 @@ def _select_candidates(self, candidates):
                     except TypeError:
                         pass
 
-                # At least one inner loop (nested) or
-                # we do not collapse most inner loop if it is an atomic reduction
-                if not i.is_ParallelAtomic or nested:
                     collapsable.append(i)
 
             # Give a score to this candidate, based on the number of fully-parallel
@@ -429,11 +426,6 @@ def _make_nested_partree(self, partree):
         if self.nhyperthreads <= self.nested:
             return partree
 
-        # Loop nest with atomic reductions are more likely to have less latency
-        # keep outer loop parallel
-        if partree.root.is_ParallelAtomic:
-            return partree
-
         # Note: there might be multiple sub-trees amenable to nested parallelism,
         # hence we loop over all of them
         #

From f7ab007e0a735952ca9d40509fe7ac9ec222f55c Mon Sep 17 00:00:00 2001
From: Mathias Louboutin <mathias.louboutin@gmail.com>
Date: Mon, 18 Sep 2023 11:56:45 -0400
Subject: [PATCH 59/90] compiler: prevent halo to be moved outside their
 iteration space

---
 devito/ir/stree/algorithms.py  |  6 ++++++
 devito/mpi/halo_scheme.py      |  4 ++++
 devito/passes/iet/langbase.py  | 13 ++++++++++---
 devito/passes/iet/parpragma.py | 10 ++++++++--
 tests/test_dle.py              | 10 ++++++----
 tests/test_gpu_openacc.py      | 16 ++++++++--------
 tests/test_gpu_openmp.py       |  2 +-
 tests/test_mpi.py              |  3 ++-
 8 files changed, 45 insertions(+), 19 deletions(-)

diff --git a/devito/ir/stree/algorithms.py b/devito/ir/stree/algorithms.py
index 58e8e844e6..d8bbb4958a 100644
--- a/devito/ir/stree/algorithms.py
+++ b/devito/ir/stree/algorithms.py
@@ -147,6 +147,12 @@ def preprocess(clusters, options=None, **kwargs):
             found = []
             for c1 in list(queue):
                 distributed_aindices = c1.halo_scheme.distributed_aindices
+                h_indices = set().union(*[(d, d.root)
+                                          for d in c1.halo_scheme.loc_indices])
+
+                # Skip if the Halo echange would end up outside its need iteration space
+                if h_indices and not h_indices & dims:
+                    continue
 
                 diff = dims - distributed_aindices
                 intersection = dims & distributed_aindices
diff --git a/devito/mpi/halo_scheme.py b/devito/mpi/halo_scheme.py
index 0204c171e6..970e84633d 100644
--- a/devito/mpi/halo_scheme.py
+++ b/devito/mpi/halo_scheme.py
@@ -361,6 +361,10 @@ def distributed(self):
     def distributed_aindices(self):
         return set().union(*[i.dims for i in self.fmapper.values()])
 
+    @cached_property
+    def loc_indices(self):
+        return set().union(*[i.loc_indices.keys() for i in self.fmapper.values()])
+
     @cached_property
     def arguments(self):
         return self.dimensions | set(flatten(self.honored.values()))
diff --git a/devito/passes/iet/langbase.py b/devito/passes/iet/langbase.py
index 4a4f6ac465..457d8476c3 100644
--- a/devito/passes/iet/langbase.py
+++ b/devito/passes/iet/langbase.py
@@ -214,8 +214,8 @@ def DeviceIteration(self):
     def Prodder(self):
         return self.lang.Prodder
 
-    def _is_offloadable(self, *args, **kwargs):
-        return False
+    def _n_device_pointers(self, *args, **kwargs):
+        return 0
 
 
 class DeviceAwareMixin(object):
@@ -328,6 +328,12 @@ def _(iet):
 
         return _initialize(iet)
 
+    def _n_device_pointers(self, iet):
+        functions = FindSymbols().visit(iet)
+        devfuncs = [f for f in functions if f.is_Array and f._mem_local]
+
+        return len(devfuncs)
+
     def _is_offloadable(self, iet):
         """
         True if the IET computation is offloadable to device, False otherwise.
@@ -339,7 +345,8 @@ def _is_offloadable(self, iet):
         functions = FindSymbols().visit(iet)
         buffers = [f for f in functions if f.is_Array and f._mem_mapped]
         hostfuncs = [f for f in functions if not is_on_device(f, self.gpu_fit)]
-        return not (buffers and hostfuncs)
+
+        return not (hostfuncs and buffers)
 
 
 class Sections(tuple):
diff --git a/devito/passes/iet/parpragma.py b/devito/passes/iet/parpragma.py
index 9d69e12df7..34ca370a60 100644
--- a/devito/passes/iet/parpragma.py
+++ b/devito/passes/iet/parpragma.py
@@ -295,13 +295,13 @@ def _select_candidates(self, candidates):
                     except TypeError:
                         pass
 
-                    collapsable.append(i)
+                collapsable.append(i)
 
             # Give a score to this candidate, based on the number of fully-parallel
             # Iterations and their position (i.e. outermost to innermost) in the nest
             score = (
                 int(root.is_ParallelNoAtomic),
-                -int(self._is_offloadable(root))*(n0 + 1),  # Outermost offloadable
+                self._n_device_pointers(root),  # Outermost offloadable
                 int(len([i for i in collapsable if i.is_ParallelNoAtomic]) >= 1),
                 int(len([i for i in collapsable if i.is_ParallelRelaxed]) >= 1),
                 -(n0 + 1)  # The outermost, the better
@@ -375,6 +375,12 @@ def _make_partree(self, candidates, nthreads=None):
                                           ncollapsed=ncollapsed, nthreads=nthreads,
                                           **root.args)
             prefix = []
+        elif all(i.is_ParallelRelaxed for i in candidates) and nthreads is not None:
+            body = self.HostIteration(schedule='static',
+                                      parallel=nthreads is not self.nthreads_nested,
+                                      ncollapsed=ncollapsed, nthreads=nthreads,
+                                      **root.args)
+            prefix = []
         else:
             # pragma ... for ... schedule(..., expr)
             assert nthreads is None
diff --git a/tests/test_dle.py b/tests/test_dle.py
index 3b9883e665..df3c4adfa5 100644
--- a/tests/test_dle.py
+++ b/tests/test_dle.py
@@ -291,7 +291,7 @@ def test_cache_blocking_structure_optrelax_prec_inject():
                                           'openmp': True,
                                           'par-collapse-ncores': 1}))
 
-    assert_structure(op, ['t', 't,p_s0_blk0,p_s', 't,p_s0_blk0,p_s,rsx,rsy'],
+    assert_structure(op, ['t,p_s0_blk0,p_s', 't,p_s0_blk0,p_s,rsx,rsy'],
                      't,p_s0_blk0,p_s,rsx,rsy')
 
 
@@ -821,12 +821,13 @@ def test_incs_no_atomic(self):
                                                      'par-collapse-ncores': 1,
                                                      'par-collapse-work': 0}))
 
-        assert 'collapse(2)' in str(op0)
+        assert 'collapse(3)' in str(op0)
         assert 'atomic' in str(op0)
 
         # Now only `x` is parallelized
         op1 = Operator([Eq(v[t, x, 0, 0], v[t, x, 0, 0] + 1), Inc(uf, 1)],
                        opt=('advanced', {'openmp': True, 'par-collapse-ncores': 1}))
+
         assert 'omp for' in str(op1)
         assert 'collapse' not in str(op1)
         assert 'atomic' not in str(op1)
@@ -951,11 +952,12 @@ def test_parallel_prec_inject(self):
         eqns = sf.inject(field=u.forward, expr=sf * dt**2)
 
         op0 = Operator(eqns, opt=('advanced', {'openmp': True,
-                                               'par-collapse-ncores': 1}))
+                                               'par-collapse-ncores': 20}))
         iterations = FindNodes(Iteration).visit(op0)
 
         assert not iterations[0].pragmas
         assert 'omp for' in iterations[1].pragmas[0].value
+        assert 'collapse' not in iterations[1].pragmas[0].value
 
         op0 = Operator(eqns, opt=('advanced', {'openmp': True,
                                                'par-collapse-ncores': 1,
@@ -963,7 +965,7 @@ def test_parallel_prec_inject(self):
         iterations = FindNodes(Iteration).visit(op0)
 
         assert not iterations[0].pragmas
-        assert 'omp for' in iterations[1].pragmas[0].value
+        assert 'omp for collapse' in iterations[2].pragmas[0].value
 
 
 class TestNestedParallelism(object):
diff --git a/tests/test_gpu_openacc.py b/tests/test_gpu_openacc.py
index 823d11854d..db92db3c83 100644
--- a/tests/test_gpu_openacc.py
+++ b/tests/test_gpu_openacc.py
@@ -102,15 +102,15 @@ def test_tile_insteadof_collapse(self, par_tile):
                       opt=('advanced', {'par-tile': par_tile}))
 
         trees = retrieve_iteration_tree(op)
-        assert len(trees) == 4
+        assert len(trees) == 6
 
-        assert trees[0][1].pragmas[0].value ==\
-            'acc parallel loop tile(32,4,4) present(u)'
         assert trees[1][1].pragmas[0].value ==\
+            'acc parallel loop tile(32,4,4) present(u)'
+        assert trees[2][1].pragmas[0].value ==\
             'acc parallel loop tile(32,4) present(u)'
         # Only the AFFINE Iterations are tiled
-        assert trees[3][1].pragmas[0].value ==\
-            'acc parallel loop collapse(3) present(src,src_coords,u)'
+        assert trees[4][1].pragmas[0].value ==\
+            'acc parallel loop present(src,src_coords,u) deviceptr(r1,r2,r3)'
 
     @pytest.mark.parametrize('par_tile', [((32, 4, 4), (8, 8)), ((32, 4), (8, 8)),
                                           ((32, 4, 4), (8, 8, 8))])
@@ -130,11 +130,11 @@ def test_multiple_tile_sizes(self, par_tile):
                       opt=('advanced', {'par-tile': par_tile}))
 
         trees = retrieve_iteration_tree(op)
-        assert len(trees) == 4
+        assert len(trees) == 6
 
-        assert trees[0][1].pragmas[0].value ==\
-            'acc parallel loop tile(32,4,4) present(u)'
         assert trees[1][1].pragmas[0].value ==\
+            'acc parallel loop tile(32,4,4) present(u)'
+        assert trees[2][1].pragmas[0].value ==\
             'acc parallel loop tile(8,8) present(u)'
 
     def test_multi_tile_blocking_structure(self):
diff --git a/tests/test_gpu_openmp.py b/tests/test_gpu_openmp.py
index bc2de71708..29866508d8 100644
--- a/tests/test_gpu_openmp.py
+++ b/tests/test_gpu_openmp.py
@@ -265,7 +265,7 @@ def test_timeparallel_reduction(self):
         assert not tree.root.pragmas
         assert len(tree[1].pragmas) == 1
         assert tree[1].pragmas[0].value ==\
-            ('omp target teams distribute parallel for collapse(2)'
+            ('omp target teams distribute parallel for collapse(3)'
              ' reduction(+:f[0])')
 
 
diff --git a/tests/test_mpi.py b/tests/test_mpi.py
index 2860fc726e..51facd7a7c 100644
--- a/tests/test_mpi.py
+++ b/tests/test_mpi.py
@@ -2558,7 +2558,8 @@ def test_adjoint_F_no_omp(self):
     # TestDecomposition().test_reshape_left_right()
     # TestOperatorSimple().test_trivial_eq_2d()
     # TestFunction().test_halo_exchange_bilateral()
-    TestSparseFunction().test_sparse_coords()
+    # TestSparseFunction().test_sparse_coords()
     # TestSparseFunction().test_precomputed_sparse(2)
     # TestOperatorAdvanced().test_fission_due_to_antidep()
+    TestOperatorAdvanced().test_injection_wodup_wtime()
     # TestIsotropicAcoustic().test_adjoint_F(1)

From 441de0f148a57619d61ec80468965e289f2cc0cd Mon Sep 17 00:00:00 2001
From: mloubout <mathias.louboutin@gmail.com>
Date: Tue, 19 Sep 2023 09:51:08 -0400
Subject: [PATCH 60/90] compiler: improve interpolation parallelism

---
 devito/operations/interpolators.py |  5 +++++
 tests/test_buffering.py            |  2 +-
 tests/test_dle.py                  | 11 ++---------
 tests/test_dse.py                  | 29 ++++++++++++++---------------
 tests/test_gpu_openacc.py          | 18 ++++++++++--------
 tests/test_interpolation.py        |  3 ++-
 tests/test_mpi.py                  |  2 +-
 tests/test_operator.py             |  4 ++--
 8 files changed, 37 insertions(+), 37 deletions(-)

diff --git a/devito/operations/interpolators.py b/devito/operations/interpolators.py
index dae96d8dfe..3d2dcb7466 100644
--- a/devito/operations/interpolators.py
+++ b/devito/operations/interpolators.py
@@ -305,6 +305,7 @@ def _inject(self, field, expr, implicit_dims=None):
         # Make iterable to support inject((u, v), expr=expr)
         # or inject((u, v), expr=(expr1, expr2))
         fields, exprs = as_tuple(field), as_tuple(expr)
+
         # Provide either one expr per field or on expr for all fields
         if len(fields) > 1:
             if len(exprs) == 1:
@@ -323,6 +324,10 @@ def _inject(self, field, expr, implicit_dims=None):
 
         # Implicit dimensions
         implicit_dims = self._augment_implicit_dims(implicit_dims, variables)
+        # Move all temporaries inside inner loop to improve parallelism
+        # Can only be done for inject as interpolation need a temporary
+        # summing temp that wouldn't allow collapsing
+        implicit_dims = implicit_dims + tuple(r.parent for r in self._rdim)
 
         variables = variables + list(fields)
 
diff --git a/tests/test_buffering.py b/tests/test_buffering.py
index ba200d220c..16f98b4f94 100644
--- a/tests/test_buffering.py
+++ b/tests/test_buffering.py
@@ -272,7 +272,7 @@ def test_over_injection():
 
     # Check generated code
     assert len(retrieve_iteration_tree(op1)) == \
-        8 + int(configuration['language'] != 'C')
+        7 + int(configuration['language'] != 'C')
     buffers = [i for i in FindSymbols().visit(op1) if i.is_Array]
     assert len(buffers) == 1
 
diff --git a/tests/test_dle.py b/tests/test_dle.py
index df3c4adfa5..8d58827a61 100644
--- a/tests/test_dle.py
+++ b/tests/test_dle.py
@@ -187,19 +187,12 @@ def test_cache_blocking_structure_optrelax():
 
     op = Operator(eqns, opt=('advanced', {'blockrelax': True}))
 
-    bns, _ = assert_blocking(op, {'x0_blk0', 'p_src0_blk0', 'p_src1_blk0'})
+    bns, _ = assert_blocking(op, {'x0_blk0', 'p_src0_blk0'})
 
     iters = FindNodes(Iteration).visit(bns['p_src0_blk0'])
-    assert len(iters) == 2
-    assert iters[0].dim.is_Block
-    assert iters[1].dim.is_Block
-
-    iters = FindNodes(Iteration).visit(bns['p_src1_blk0'])
     assert len(iters) == 5
     assert iters[0].dim.is_Block
     assert iters[1].dim.is_Block
-    for i in range(2, 5):
-        assert not iters[i].dim.is_Block
 
 
 def test_cache_blocking_structure_optrelax_customdim():
@@ -965,7 +958,7 @@ def test_parallel_prec_inject(self):
         iterations = FindNodes(Iteration).visit(op0)
 
         assert not iterations[0].pragmas
-        assert 'omp for collapse' in iterations[2].pragmas[0].value
+        assert 'omp for collapse' in iterations[1].pragmas[0].value
 
 
 class TestNestedParallelism(object):
diff --git a/tests/test_dse.py b/tests/test_dse.py
index 1e18157c77..728f8f9357 100644
--- a/tests/test_dse.py
+++ b/tests/test_dse.py
@@ -48,9 +48,9 @@ def test_scheduling_after_rewrite():
     trees = retrieve_iteration_tree(op)
 
     # Check loop nest structure
-    assert all(i.dim is j for i, j in zip(trees[1], grid.dimensions))  # time invariant
-    assert trees[2].root.dim is grid.time_dim
-    assert all(trees[2].root.dim is tree.root.dim for tree in trees[2:])
+    assert all(i.dim is j for i, j in zip(trees[0], grid.dimensions))  # time invariant
+    assert trees[1].root.dim is grid.time_dim
+    assert all(trees[1].root.dim is tree.root.dim for tree in trees[1:])
 
 
 @pytest.mark.parametrize('exprs,expected,min_cost', [
@@ -1687,7 +1687,7 @@ def test_drop_redundants_after_fusion(self, rotate):
         op = Operator(eqns, opt=('advanced', {'cire-rotate': rotate}))
 
         arrays = [i for i in FindSymbols().visit(op) if i.is_Array]
-        assert len(arrays) == 4
+        assert len(arrays) == 2
         assert all(i._mem_heap and not i._mem_external for i in arrays)
 
     def test_full_shape_big_temporaries(self):
@@ -2711,11 +2711,10 @@ def test_fullopt(self):
         assert np.isclose(summary0[('section0', None)].oi, 2.851, atol=0.001)
 
         assert summary1[('section0', None)].ops == 9
-        assert summary1[('section1', None)].ops == 9
-        assert summary1[('section2', None)].ops == 31
-        assert summary1[('section3', None)].ops == 26
-        assert summary1[('section4', None)].ops == 22
-        assert np.isclose(summary1[('section2', None)].oi, 1.767, atol=0.001)
+        assert summary1[('section1', None)].ops == 31
+        assert summary1[('section2', None)].ops == 88
+        assert summary1[('section3', None)].ops == 22
+        assert np.isclose(summary1[('section1', None)].oi, 1.767, atol=0.001)
 
         assert np.allclose(u0.data, u1.data, atol=10e-5)
         assert np.allclose(rec0.data, rec1.data, atol=10e-5)
@@ -2775,8 +2774,8 @@ def test_fullopt(self):
         assert np.allclose(self.tti_noopt[1].data, rec.data, atol=10e-1)
 
         # Check expected opcount/oi
-        assert summary[('section3', None)].ops == 92
-        assert np.isclose(summary[('section3', None)].oi, 2.074, atol=0.001)
+        assert summary[('section2', None)].ops == 92
+        assert np.isclose(summary[('section2', None)].oi, 2.074, atol=0.001)
 
         # With optimizations enabled, there should be exactly four BlockDimensions
         op = wavesolver.op_fwd()
@@ -2794,7 +2793,7 @@ def test_fullopt(self):
         #   3 Arrays are defined globally for the sparse positions temporaries
         # and two additional bock-sized Arrays are defined locally
         arrays = [i for i in FindSymbols().visit(op) if i.is_Array]
-        extra_arrays = 2+3+3
+        extra_arrays = 2+3
         assert len(arrays) == 4 + extra_arrays
         assert all(i._mem_heap and not i._mem_external for i in arrays)
         bns, pbs = assert_blocking(op, {'x0_blk0'})
@@ -2830,7 +2829,7 @@ def test_fullopt_w_mpi(self):
     def test_opcounts(self, space_order, expected):
         op = self.tti_operator(opt='advanced', space_order=space_order)
         sections = list(op.op_fwd()._profiler._sections.values())
-        assert sections[3].sops == expected
+        assert sections[2].sops == expected
 
     @switchconfig(profiling='advanced')
     @pytest.mark.parametrize('space_order,expected', [
@@ -2840,8 +2839,8 @@ def test_opcounts_adjoint(self, space_order, expected):
         wavesolver = self.tti_operator(opt=('advanced', {'openmp': False}))
         op = wavesolver.op_adj()
 
-        assert op._profiler._sections['section3'].sops == expected
-        assert len([i for i in FindSymbols().visit(op) if i.is_Array]) == 7+3+3
+        assert op._profiler._sections['section2'].sops == expected
+        assert len([i for i in FindSymbols().visit(op) if i.is_Array]) == 7+3
 
 
 class TestTTIv2(object):
diff --git a/tests/test_gpu_openacc.py b/tests/test_gpu_openacc.py
index db92db3c83..3085ad85c9 100644
--- a/tests/test_gpu_openacc.py
+++ b/tests/test_gpu_openacc.py
@@ -102,15 +102,15 @@ def test_tile_insteadof_collapse(self, par_tile):
                       opt=('advanced', {'par-tile': par_tile}))
 
         trees = retrieve_iteration_tree(op)
-        assert len(trees) == 6
+        assert len(trees) == 4
 
-        assert trees[1][1].pragmas[0].value ==\
+        assert trees[0][1].pragmas[0].value ==\
             'acc parallel loop tile(32,4,4) present(u)'
-        assert trees[2][1].pragmas[0].value ==\
+        assert trees[1][1].pragmas[0].value ==\
             'acc parallel loop tile(32,4) present(u)'
         # Only the AFFINE Iterations are tiled
-        assert trees[4][1].pragmas[0].value ==\
-            'acc parallel loop present(src,src_coords,u) deviceptr(r1,r2,r3)'
+        assert trees[3][1].pragmas[0].value ==\
+            'acc parallel loop collapse(4) present(src,src_coords,u)'
 
     @pytest.mark.parametrize('par_tile', [((32, 4, 4), (8, 8)), ((32, 4), (8, 8)),
                                           ((32, 4, 4), (8, 8, 8))])
@@ -130,12 +130,14 @@ def test_multiple_tile_sizes(self, par_tile):
                       opt=('advanced', {'par-tile': par_tile}))
 
         trees = retrieve_iteration_tree(op)
-        assert len(trees) == 6
+        assert len(trees) == 4
 
-        assert trees[1][1].pragmas[0].value ==\
+        assert trees[0][1].pragmas[0].value ==\
             'acc parallel loop tile(32,4,4) present(u)'
-        assert trees[2][1].pragmas[0].value ==\
+        assert trees[1][1].pragmas[0].value ==\
             'acc parallel loop tile(8,8) present(u)'
+        assert trees[3][1].pragmas[0].value ==\
+            'acc parallel loop collapse(4) present(src,src_coords,u)'
 
     def test_multi_tile_blocking_structure(self):
         grid = Grid(shape=(8, 8, 8))
diff --git a/tests/test_interpolation.py b/tests/test_interpolation.py
index c7a15665a4..97d86c1759 100644
--- a/tests/test_interpolation.py
+++ b/tests/test_interpolation.py
@@ -5,7 +5,7 @@
 from sympy import Float
 
 from devito import (Grid, Operator, Dimension, SparseFunction, SparseTimeFunction,
-                    Function, TimeFunction, DefaultDimension, Eq,
+                    Function, TimeFunction, DefaultDimension, Eq, switchconfig,
                     PrecomputedSparseFunction, PrecomputedSparseTimeFunction,
                     MatrixSparseTimeFunction)
 from examples.seismic import (demo_model, TimeAxis, RickerSource, Receiver,
@@ -736,6 +736,7 @@ class SparseFirst(SparseFunction):
     assert np.allclose(s.data, expected)
 
 
+@switchconfig(safe_math=True)
 def test_inject_function():
     nt = 11
 
diff --git a/tests/test_mpi.py b/tests/test_mpi.py
index 51facd7a7c..ab7092ba1a 100644
--- a/tests/test_mpi.py
+++ b/tests/test_mpi.py
@@ -2500,7 +2500,7 @@ def test_adjoint_codegen(self, shape, kernel, space_order, save):
         adj_calls = FindNodes(Call).visit(op_adj)
 
         # one halo, ndim memalign and free (pos temp rec/src)
-        sf_calls = 2 * len(shape) * 2
+        sf_calls = 2 * len(shape)
         assert len(fwd_calls) == 1 + sf_calls
         assert len(adj_calls) == 1 + sf_calls
 
diff --git a/tests/test_operator.py b/tests/test_operator.py
index 4f8228bc24..3064565e3c 100644
--- a/tests/test_operator.py
+++ b/tests/test_operator.py
@@ -1805,7 +1805,7 @@ def test_scheduling_sparse_functions(self):
         # `trees` than 6
         op = Operator([eqn1] + eqn2 + [eqn3] + eqn4, opt=('noop', {'openmp': False}))
         trees = retrieve_iteration_tree(op)
-        assert len(trees) == 6
+        assert len(trees) == 5
         # Time loop not shared due to the WAR
         assert trees[0][0].dim is time and trees[0][0] is trees[1][0]  # this IS shared
         assert trees[1][0] is not trees[3][0]
@@ -1815,7 +1815,7 @@ def test_scheduling_sparse_functions(self):
         eqn2 = sf1.inject(u1.forward, expr=sf1)
         op = Operator([eqn1] + eqn2 + [eqn3] + eqn4, opt=('noop', {'openmp': False}))
         trees = retrieve_iteration_tree(op)
-        assert len(trees) == 6
+        assert len(trees) == 5
         assert all(trees[0][0] is i[0] for i in trees)
 
     def test_scheduling_with_free_dims(self):

From ab160ddd1b3ec6810d77f8296704744967d6854f Mon Sep 17 00:00:00 2001
From: mloubout <mathias.louboutin@gmail.com>
Date: Wed, 20 Sep 2023 08:08:00 -0400
Subject: [PATCH 61/90] compiler: remove redundant subgrid check and cleanup

---
 devito/ir/stree/algorithms.py  |  6 +++---
 devito/operator/operator.py    |  4 +---
 devito/passes/iet/langbase.py  | 12 +++++-------
 devito/passes/iet/parpragma.py |  5 ++---
 devito/types/dimension.py      |  4 ++--
 5 files changed, 13 insertions(+), 18 deletions(-)

diff --git a/devito/ir/stree/algorithms.py b/devito/ir/stree/algorithms.py
index d8bbb4958a..0f7e46bcfd 100644
--- a/devito/ir/stree/algorithms.py
+++ b/devito/ir/stree/algorithms.py
@@ -147,10 +147,10 @@ def preprocess(clusters, options=None, **kwargs):
             found = []
             for c1 in list(queue):
                 distributed_aindices = c1.halo_scheme.distributed_aindices
-                h_indices = set().union(*[(d, d.root)
-                                          for d in c1.halo_scheme.loc_indices])
+                h_indices = set().union(*[d._defines for d in c1.halo_scheme.loc_indices])
 
-                # Skip if the Halo echange would end up outside its need iteration space
+                # Skip if the halo exchange would end up outside
+                # its iteration space
                 if h_indices and not h_indices & dims:
                     continue
 
diff --git a/devito/operator/operator.py b/devito/operator/operator.py
index 609c69295f..eb8b793f22 100644
--- a/devito/operator/operator.py
+++ b/devito/operator/operator.py
@@ -578,6 +578,7 @@ def _prepare_arguments(self, autotune=None, **kwargs):
         if len(discretizations) > 1:
             discretizations = {g for g in discretizations
                                if not any(d.is_Derived for d in g.dimensions)}
+
         for i in discretizations:
             args.update(i._arg_values(**kwargs))
 
@@ -590,9 +591,6 @@ def _prepare_arguments(self, autotune=None, **kwargs):
             if configuration['mpi']:
                 raise ValueError("Multiple Grids found")
         try:
-            # Take biggest grid, i.e discard grids with subdimensions
-            grids = {g for g in grids if not any(d.is_Sub for d in g.dimensions)}
-            # First grid as there is no heuristic on how to choose from the leftovers
             grid = grids.pop()
         except KeyError:
             grid = None
diff --git a/devito/passes/iet/langbase.py b/devito/passes/iet/langbase.py
index 457d8476c3..2acccba648 100644
--- a/devito/passes/iet/langbase.py
+++ b/devito/passes/iet/langbase.py
@@ -214,8 +214,8 @@ def DeviceIteration(self):
     def Prodder(self):
         return self.lang.Prodder
 
-    def _n_device_pointers(self, *args, **kwargs):
-        return 0
+    def _device_pointers(self, *args, **kwargs):
+        return {}
 
 
 class DeviceAwareMixin(object):
@@ -328,11 +328,10 @@ def _(iet):
 
         return _initialize(iet)
 
-    def _n_device_pointers(self, iet):
+    def _device_pointers(self, iet):
         functions = FindSymbols().visit(iet)
         devfuncs = [f for f in functions if f.is_Array and f._mem_local]
-
-        return len(devfuncs)
+        return set(devfuncs)
 
     def _is_offloadable(self, iet):
         """
@@ -345,8 +344,7 @@ def _is_offloadable(self, iet):
         functions = FindSymbols().visit(iet)
         buffers = [f for f in functions if f.is_Array and f._mem_mapped]
         hostfuncs = [f for f in functions if not is_on_device(f, self.gpu_fit)]
-
-        return not (hostfuncs and buffers)
+        return not (buffers and hostfuncs)
 
 
 class Sections(tuple):
diff --git a/devito/passes/iet/parpragma.py b/devito/passes/iet/parpragma.py
index 34ca370a60..b6476192b2 100644
--- a/devito/passes/iet/parpragma.py
+++ b/devito/passes/iet/parpragma.py
@@ -301,7 +301,7 @@ def _select_candidates(self, candidates):
             # Iterations and their position (i.e. outermost to innermost) in the nest
             score = (
                 int(root.is_ParallelNoAtomic),
-                self._n_device_pointers(root),  # Outermost offloadable
+                len(self._device_pointers(root)),  # Outermost offloadable
                 int(len([i for i in collapsable if i.is_ParallelNoAtomic]) >= 1),
                 int(len([i for i in collapsable if i.is_ParallelRelaxed]) >= 1),
                 -(n0 + 1)  # The outermost, the better
@@ -375,7 +375,7 @@ def _make_partree(self, candidates, nthreads=None):
                                           ncollapsed=ncollapsed, nthreads=nthreads,
                                           **root.args)
             prefix = []
-        elif all(i.is_ParallelRelaxed for i in candidates) and nthreads is not None:
+        elif nthreads is not None:
             body = self.HostIteration(schedule='static',
                                       parallel=nthreads is not self.nthreads_nested,
                                       ncollapsed=ncollapsed, nthreads=nthreads,
@@ -383,7 +383,6 @@ def _make_partree(self, candidates, nthreads=None):
             prefix = []
         else:
             # pragma ... for ... schedule(..., expr)
-            assert nthreads is None
             nthreads = self.nthreads_nonaffine
             chunk_size = Symbol(name='chunk_size')
             body = self.HostIteration(ncollapsed=ncollapsed, chunk_size=chunk_size,
diff --git a/devito/types/dimension.py b/devito/types/dimension.py
index 2d11ccb220..43865c72f5 100644
--- a/devito/types/dimension.py
+++ b/devito/types/dimension.py
@@ -983,7 +983,7 @@ def symbolic_incr(self):
     def bound_symbols(self):
         return set(self.parent.bound_symbols)
 
-    def _arg_defaults(self, alias=None, **kwargs):
+    def _arg_defaults(self, **kwargs):
         return {}
 
     def _arg_values(self, *args, **kwargs):
@@ -1452,7 +1452,7 @@ def symbolic_max(self):
     def _arg_names(self):
         return (self.min_name, self.max_name, self.name) + self.parent._arg_names
 
-    def _arg_defaults(self, _min=None, size=None, **kwargs):
+    def _arg_defaults(self, _min=None, **kwargs):
         """
         A map of default argument values defined by this dimension.
 

From 97f1cc876d063a5a3aba9ce41678b013e1529be4 Mon Sep 17 00:00:00 2001
From: mloubout <mathias.louboutin@gmail.com>
Date: Wed, 20 Sep 2023 09:43:26 -0400
Subject: [PATCH 62/90] api: make SubDimension's side easy to check

---
 devito/types/dimension.py | 43 +++++++++++++++++++++++++++------------
 tests/test_dimension.py   |  9 +++++++-
 2 files changed, 38 insertions(+), 14 deletions(-)

diff --git a/devito/types/dimension.py b/devito/types/dimension.py
index 43865c72f5..d7b25e382d 100644
--- a/devito/types/dimension.py
+++ b/devito/types/dimension.py
@@ -586,7 +586,7 @@ def left(cls, name, parent, thickness, local=True):
         return cls(name, parent,
                    left=parent.symbolic_min,
                    right=parent.symbolic_min+lst-1,
-                   thickness=((lst, thickness), (rst, 0)),
+                   thickness=((lst, thickness), (rst, None)),
                    local=local)
 
     @classmethod
@@ -595,7 +595,7 @@ def right(cls, name, parent, thickness, local=True):
         return cls(name, parent,
                    left=parent.symbolic_max-rst+1,
                    right=parent.symbolic_max,
-                   thickness=((lst, 0), (rst, thickness)),
+                   thickness=((lst, None), (rst, thickness)),
                    local=local)
 
     @classmethod
@@ -628,6 +628,18 @@ def local(self):
     def thickness(self):
         return self._thickness
 
+    @property
+    def is_left(self):
+        return self.thickness.right[1] is None
+
+    @property
+    def is_right(self):
+        return self.thickness.left[1] is None
+
+    @property
+    def is_middle(self):
+        return not self.is_left and not self.is_right
+
     @cached_property
     def bound_symbols(self):
         # Add thickness symbols
@@ -701,7 +713,7 @@ def _arg_values(self, interval, grid=None, **kwargs):
         # However, arguments from the user are considered global
         # So overriding the thickness to a nonzero value should not cause
         # boundaries to exist between ranks where they did not before
-        requested_ltkn, requested_rtkn = (
+        r_ltkn, r_rtkn = (
             kwargs.get(k.name, v) for k, v in self.thickness
         )
 
@@ -710,19 +722,24 @@ def _arg_values(self, interval, grid=None, **kwargs):
             if self.local:
                 # dimension is of type ``left``/right`` - compute the 'offset'
                 # and then add 1 to get the appropriate thickness
-                ltkn = grid.distributor.glb_to_loc(self.root, requested_ltkn-1, LEFT)
-                rtkn = grid.distributor.glb_to_loc(self.root, requested_rtkn-1, RIGHT)
-                ltkn = ltkn+1 if ltkn is not None else 0
-                rtkn = rtkn+1 if rtkn is not None else 0
+                if r_ltkn is not None:
+                    ltkn = grid.distributor.glb_to_loc(self.root, r_ltkn-1, LEFT)
+                    ltkn = ltkn+1 if ltkn is not None else 0
+                else:
+                    ltkn = 0
+
+                if r_rtkn is not None:
+                    rtkn = grid.distributor.glb_to_loc(self.root, r_rtkn-1, RIGHT)
+                    rtkn = rtkn+1 if rtkn is not None else 0
+                else:
+                    rtkn = 0
             else:
                 # dimension is of type ``middle``
-                ltkn = grid.distributor.glb_to_loc(self.root, requested_ltkn,
-                                                   LEFT) or 0
-                rtkn = grid.distributor.glb_to_loc(self.root, requested_rtkn,
-                                                   RIGHT) or 0
+                ltkn = grid.distributor.glb_to_loc(self.root, r_ltkn, LEFT) or 0
+                rtkn = grid.distributor.glb_to_loc(self.root, r_rtkn, RIGHT) or 0
         else:
-            ltkn = requested_ltkn
-            rtkn = requested_rtkn
+            ltkn = r_ltkn or 0
+            rtkn = r_rtkn or 0
 
         return {i.name: v for i, v in zip(self._thickness_map, (ltkn, rtkn))}
 
diff --git a/tests/test_dimension.py b/tests/test_dimension.py
index 9d41dddf48..a54e160b38 100644
--- a/tests/test_dimension.py
+++ b/tests/test_dimension.py
@@ -243,6 +243,10 @@ def test_subdim_middle(self, opt):
         xi = SubDimension.middle(name='xi', parent=x,
                                  thickness_left=1,
                                  thickness_right=1)
+        assert xi.is_middle
+        assert not xi.is_left
+        assert not xi.is_right
+
         eqs = [Eq(u.forward, u + 1)]
         eqs = [e.subs(x, xi) for e in eqs]
 
@@ -261,6 +265,8 @@ def test_symbolic_size(self):
         thickness = 4
 
         xleft = SubDimension.left(name='xleft', parent=x, thickness=thickness)
+        assert xleft.is_left
+        assert not xleft.is_middle
         assert xleft.symbolic_size == xleft.thickness.left[0]
 
         xi = SubDimension.middle(name='xi', parent=x,
@@ -289,7 +295,8 @@ def test_bcs(self, opt):
         xi = SubDimension.middle(name='xi', parent=x,
                                  thickness_left=thickness, thickness_right=thickness)
         xright = SubDimension.right(name='xright', parent=x, thickness=thickness)
-
+        assert xright.is_right
+        assert not xright.is_middle
         yi = SubDimension.middle(name='yi', parent=y,
                                  thickness_left=thickness, thickness_right=thickness)
 

From 20b248135292f28f553e076a0bde8a60904ae958 Mon Sep 17 00:00:00 2001
From: mloubout <mathias.louboutin@gmail.com>
Date: Wed, 20 Sep 2023 19:50:10 -0400
Subject: [PATCH 63/90] compiler: fix arg processing for empty arg update

---
 devito/tools/data_structures.py |  2 ++
 tests/test_dimension.py         | 21 ++++++++++++++++++++-
 2 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/devito/tools/data_structures.py b/devito/tools/data_structures.py
index 01a9a3f4bd..61bb7f5257 100644
--- a/devito/tools/data_structures.py
+++ b/devito/tools/data_structures.py
@@ -111,6 +111,8 @@ def unique(self, key):
         """
         candidates = self.getall(key)
         candidates = [c for c in candidates if c is not None]
+        if not candidates:
+            return None
 
         def compare_to_first(v):
             first = candidates[0]
diff --git a/tests/test_dimension.py b/tests/test_dimension.py
index a54e160b38..7532602190 100644
--- a/tests/test_dimension.py
+++ b/tests/test_dimension.py
@@ -9,7 +9,7 @@
                     SparseFunction, SparseTimeFunction, Eq, Operator, Constant,
                     Dimension, DefaultDimension, SubDimension, switchconfig,
                     SubDomain, Lt, Le, Gt, Ge, Ne, Buffer, sin, SpaceDimension,
-                    CustomDimension, dimensions, configuration)
+                    CustomDimension, dimensions, configuration, norm, Inc, sum)
 from devito.ir.iet import (Conditional, Expression, Iteration, FindNodes,
                            FindSymbols, retrieve_iteration_tree)
 from devito.symbolics import indexify, retrieve_functions, IntDiv, INT
@@ -1636,6 +1636,25 @@ def test_array_shared_w_topofuse(self):
 
         assert_structure(op, ['i,x,y', 'i', 'i,x,y', 'i,x,y'], 'i,x,y,x,y,x,y')
 
+    def test_cond_notime(self):
+        grid = Grid(shape=(10, 10))
+        time = grid.time_dim
+
+        time_under = ConditionalDimension(name='timeu', parent=time, factor=5)
+        nt = 10
+
+        u = TimeFunction(name='u', grid=grid, space_order=2)
+        usaved = TimeFunction(name='usaved', grid=grid, space_order=2,
+                              time_dim=time_under, save=nt//5+1)
+        g = Function(name='g', grid=grid)
+
+        op = Operator([Eq(usaved, u)])
+        op(time_m=1, time_M=nt-1, dt=1)
+
+        op = Operator([Inc(g, usaved)])
+        op(time_m=1, time_M=nt-1, dt=1)
+        assert norm(g, order=1) == norm(sum(usaved, dims=time_under), order=1)
+
 
 class TestCustomDimension(object):
 

From e01780026998faa1c7b4ed986ce6d73649704655 Mon Sep 17 00:00:00 2001
From: Mathias Louboutin <mathias.louboutin@gmail.com>
Date: Tue, 26 Sep 2023 09:21:54 -0400
Subject: [PATCH 64/90] compiler: prevent radius dependent temps for sparse
 operations

---
 devito/ir/clusters/algorithms.py |  2 +-
 tests/test_dse.py                | 14 +++++++++++++-
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/devito/ir/clusters/algorithms.py b/devito/ir/clusters/algorithms.py
index 2c0bdacc0c..dcea8e2f1c 100644
--- a/devito/ir/clusters/algorithms.py
+++ b/devito/ir/clusters/algorithms.py
@@ -458,7 +458,7 @@ def normalize_reductions(cluster, sregistry, options):
 
     processed = []
     for e in cluster.exprs:
-        if e.is_Reduction and e.lhs.is_Indexed and cluster.is_sparse:
+        if e.is_Reduction and (e.lhs.is_Indexed or cluster.is_sparse):
             # Transform `e` such that we reduce into a scalar (ultimately via
             # atomic ops, though this part is carried out by a much later pass)
             # For example, given `i = m[p_src]` (i.e., indirection array), turn:
diff --git a/tests/test_dse.py b/tests/test_dse.py
index 728f8f9357..e1ae16eb69 100644
--- a/tests/test_dse.py
+++ b/tests/test_dse.py
@@ -24,7 +24,7 @@
                               IndexedPointer, Keyword, SizeOf, estimate_cost,
                               pow_to_mul, indexify)
 from devito.tools import as_tuple, generator
-from devito.types import Array, Scalar, Symbol
+from devito.types import Array, Scalar, Symbol, PrecomputedSparseTimeFunction
 
 from examples.seismic.acoustic import AcousticWaveSolver
 from examples.seismic import demo_model, AcquisitionGeometry
@@ -2664,6 +2664,18 @@ def test_dtype_aliases(self):
         assert FindNodes(Expression).visit(op)[0].dtype == np.float32
         assert np.all(fo.data[:-1, :-1] == 8)
 
+    def test_sparse_const(self):
+        grid = Grid((11, 11, 11))
+
+        u = TimeFunction(name="u", grid=grid)
+        src = PrecomputedSparseTimeFunction(name="src", grid=grid, npoint=1, nt=11,
+                                            r=2, interpolation_coeffs=np.ones((1, 3, 2)))
+        op = Operator(src.interpolate(u))
+
+        cond = FindNodes(Conditional).visit(op)
+        assert len(cond) == 1
+        assert all(e.is_scalar for e in cond[0].args['then_body'][0].exprs)
+
 
 class TestIsoAcoustic(object):
 

From ebf4b78ecfbb84fd2a630e77a8fdbc28380f0622 Mon Sep 17 00:00:00 2001
From: Mathias Louboutin <mathias.louboutin@gmail.com>
Date: Wed, 27 Sep 2023 09:38:50 -0400
Subject: [PATCH 65/90] compiler: prevent temporary for local reductions

---
 devito/builtins/utils.py         |  2 +-
 devito/ir/clusters/algorithms.py |  5 +++--
 devito/ir/clusters/cluster.py    | 13 ++++++++++++-
 devito/ir/equations/equation.py  |  5 +++++
 devito/types/sparse.py           |  4 ++--
 tests/test_dle.py                | 26 +++++++++++++++++++++++++-
 tests/test_dse.py                |  9 ++++++++-
 7 files changed, 56 insertions(+), 8 deletions(-)

diff --git a/devito/builtins/utils.py b/devito/builtins/utils.py
index 214aac104e..70f590d5de 100644
--- a/devito/builtins/utils.py
+++ b/devito/builtins/utils.py
@@ -34,7 +34,7 @@ def __init__(self, *functions, op=dv.mpi.MPI.SUM, dtype=None):
         self.op = op
 
     def __enter__(self):
-        i = dv.Dimension(name='i',)
+        i = dv.Dimension(name='mri',)
         self.n = dv.Function(name='n', shape=(1,), dimensions=(i,),
                              grid=self.grid, dtype=self.dtype)
         self.n.data[0] = 0
diff --git a/devito/ir/clusters/algorithms.py b/devito/ir/clusters/algorithms.py
index dcea8e2f1c..6b246f1f16 100644
--- a/devito/ir/clusters/algorithms.py
+++ b/devito/ir/clusters/algorithms.py
@@ -458,7 +458,7 @@ def normalize_reductions(cluster, sregistry, options):
 
     processed = []
     for e in cluster.exprs:
-        if e.is_Reduction and (e.lhs.is_Indexed or cluster.is_sparse):
+        if e.is_Reduction and e.lhs.is_Indexed and cluster.is_sparse:
             # Transform `e` such that we reduce into a scalar (ultimately via
             # atomic ops, though this part is carried out by a much later pass)
             # For example, given `i = m[p_src]` (i.e., indirection array), turn:
@@ -471,7 +471,8 @@ def normalize_reductions(cluster, sregistry, options):
             processed.extend([e.func(v, e.rhs, operation=None),
                               e.func(e.lhs, v)])
 
-        elif e.is_Reduction and e.lhs.is_Symbol and opt_mapify_reduce:
+        elif e.is_Reduction and e.lhs.is_Symbol and opt_mapify_reduce \
+                and not cluster.is_sparse:
             # Transform `e` into what is in essence an explicit map-reduce
             # For example, turn:
             # `s += f(u[x], v[x], ...)`
diff --git a/devito/ir/clusters/cluster.py b/devito/ir/clusters/cluster.py
index 0dc3200b4f..ef839e2222 100644
--- a/devito/ir/clusters/cluster.py
+++ b/devito/ir/clusters/cluster.py
@@ -230,7 +230,18 @@ def is_dense(self):
 
     @property
     def is_sparse(self):
-        return not self.is_dense
+        """
+        A cluster is sparse if it represent a sparse operation i.e if both
+
+            * The cluster contains sparse functions
+            * The cluster uses dense functions
+
+        If only the first case is true, the cluster only contains operation on the sparse
+        function itself without indirection and therefore only contains dense operations.
+        """
+        return (any(f.is_SparseFunction for f in self.functions) and
+                len([f for f in self.functions
+                     if (f.is_Function and not f.is_SparseFunction)]) > 0)
 
     @property
     def is_halo_touch(self):
diff --git a/devito/ir/equations/equation.py b/devito/ir/equations/equation.py
index a360132c1b..d416c22766 100644
--- a/devito/ir/equations/equation.py
+++ b/devito/ir/equations/equation.py
@@ -73,6 +73,11 @@ def apply(self, func):
         kwargs['conditionals'] = {k: func(v) for k, v in self.conditionals.items()}
         return self.func(*args, **kwargs)
 
+    def __repr__(self):
+        if not self.is_Reduction:
+            return super().__repr__()
+        else:
+            return '%s = %s(%s, %s)' % (self.lhs, self.operation, self.lhs, self.rhs)
     # Pickling support
     __reduce_ex__ = Pickable.__reduce_ex__
 
diff --git a/devito/types/sparse.py b/devito/types/sparse.py
index 42ecdc4a9a..90816fd7ad 100644
--- a/devito/types/sparse.py
+++ b/devito/types/sparse.py
@@ -338,11 +338,11 @@ def guard(self, expr=None):
                                   condition=condition, indirect=True)
 
         if expr is None:
-            out = self.indexify().xreplace({self._sparse_dim: cd})
+            out = self.indexify()._subs(self._sparse_dim, cd)
         else:
             functions = {f for f in retrieve_function_carriers(expr)
                          if f.is_SparseFunction}
-            out = indexify(expr).xreplace({f._sparse_dim: cd for f in functions})
+            out = indexify(expr).subs({f._sparse_dim: cd for f in functions})
 
         return out, temps
 
diff --git a/tests/test_dle.py b/tests/test_dle.py
index 8d58827a61..9c297e0dca 100644
--- a/tests/test_dle.py
+++ b/tests/test_dle.py
@@ -11,7 +11,7 @@
                     configuration, dimensions, info, cos)
 from devito.exceptions import InvalidArgument
 from devito.ir.iet import (Iteration, FindNodes, IsPerfectIteration,
-                           retrieve_iteration_tree)
+                           retrieve_iteration_tree, Expression)
 from devito.passes.iet.languages.openmp import Ompizer, OmpRegion
 from devito.tools import as_tuple
 from devito.types import Scalar
@@ -765,6 +765,30 @@ def test_array_sum_reduction(self, so, dim):
 
         assert np.allclose(f.data, 18)
 
+    def test_reduction_local(self):
+        grid = Grid((11, 11))
+        d = Dimension("i")
+        n = Function(name="n", dimensions=(d,), shape=(1,))
+        u = Function(name="u", grid=grid)
+        u.data.fill(1.)
+
+        op = Operator(Inc(n[0], u))
+        op()
+
+        cond = FindNodes(Expression).visit(op)
+        iterations = FindNodes(Iteration).visit(op)
+        # Should not creat any temporary for the reduction
+        assert len(cond) == 1
+        if configuration['language'] == 'C':
+            pass
+        elif Ompizer._support_array_reduction(configuration['compiler']):
+            assert "reduction(+:n[0])" in iterations[0].pragmas[0].value
+        else:
+            # E.g. old GCC's
+            assert "atomic update" in str(iterations[-1])
+
+        assert n.data[0] == 11*11
+
     def test_array_max_reduction(self):
         """
         Test generation of OpenMP sum-reduction clauses involving Function's.
diff --git a/tests/test_dse.py b/tests/test_dse.py
index e1ae16eb69..c111104506 100644
--- a/tests/test_dse.py
+++ b/tests/test_dse.py
@@ -2669,13 +2669,20 @@ def test_sparse_const(self):
 
         u = TimeFunction(name="u", grid=grid)
         src = PrecomputedSparseTimeFunction(name="src", grid=grid, npoint=1, nt=11,
-                                            r=2, interpolation_coeffs=np.ones((1, 3, 2)))
+                                            r=2, interpolation_coeffs=np.ones((1, 3, 2)),
+                                            gridpoints=[[5, 5, 5]])
+        u.data.fill(1.)
+
         op = Operator(src.interpolate(u))
 
         cond = FindNodes(Conditional).visit(op)
         assert len(cond) == 1
+        assert len(cond[0].args['then_body'][0].exprs) == 1
         assert all(e.is_scalar for e in cond[0].args['then_body'][0].exprs)
 
+        op()
+        assert np.all(src.data == 8)
+
 
 class TestIsoAcoustic(object):
 

From 47bf87c3922287304678ef170760000ad35e30f8 Mon Sep 17 00:00:00 2001
From: mloubout <mathias.louboutin@gmail.com>
Date: Thu, 28 Sep 2023 08:33:06 -0400
Subject: [PATCH 66/90] compiler: split normalization into sparse and dense

---
 devito/ir/clusters/algorithms.py | 52 ++++++++++++++++++++------------
 devito/ir/clusters/cluster.py    |  2 +-
 devito/ir/equations/equation.py  |  3 ++
 devito/ir/support/properties.py  |  7 +++++
 4 files changed, 43 insertions(+), 21 deletions(-)

diff --git a/devito/ir/clusters/algorithms.py b/devito/ir/clusters/algorithms.py
index 6b246f1f16..19d31d5173 100644
--- a/devito/ir/clusters/algorithms.py
+++ b/devito/ir/clusters/algorithms.py
@@ -7,7 +7,7 @@
 
 from devito.exceptions import InvalidOperator
 from devito.ir.support import (Any, Backward, Forward, IterationSpace,
-                               PARALLEL_IF_ATOMIC, pull_dims)
+                               pull_dims)
 from devito.ir.clusters.analysis import analyze
 from devito.ir.clusters.cluster import Cluster, ClusterGroup
 from devito.ir.clusters.visitors import Queue, QueueStateful, cluster_pass
@@ -402,7 +402,8 @@ def normalize(clusters, **kwargs):
     sregistry = kwargs['sregistry']
 
     clusters = normalize_nested_indexeds(clusters, sregistry)
-    clusters = normalize_reductions(clusters, sregistry, options)
+    clusters = normalize_reductions_dense(clusters, sregistry, options)
+    clusters = normalize_reductions_sparse(clusters, sregistry, options)
 
     return clusters
 
@@ -444,35 +445,22 @@ def pull_indexeds(expr, subs, mapper, parent=None):
     return cluster.rebuild(processed)
 
 
-@cluster_pass(mode='all')
-def normalize_reductions(cluster, sregistry, options):
+@cluster_pass(mode='dense')
+def normalize_reductions_dense(cluster, sregistry, options):
     """
     Extract the right-hand sides of reduction Eq's in to temporaries.
     """
     opt_mapify_reduce = options['mapify-reduce']
 
-    dims = [d for d, v in cluster.properties.items() if PARALLEL_IF_ATOMIC in v]
+    dims = [d for d in cluster.properties.dimensions
+            if cluster.properties.is_parallel_atomic(d)]
 
     if not dims:
         return cluster
 
     processed = []
     for e in cluster.exprs:
-        if e.is_Reduction and e.lhs.is_Indexed and cluster.is_sparse:
-            # Transform `e` such that we reduce into a scalar (ultimately via
-            # atomic ops, though this part is carried out by a much later pass)
-            # For example, given `i = m[p_src]` (i.e., indirection array), turn:
-            # `u[t, i] += f(u[t, i], src, ...)`
-            # into
-            # `s = f(u[t, i], src, ...)`
-            # `u[t, i] += s`
-            name = sregistry.make_name()
-            v = Symbol(name=name, dtype=e.dtype)
-            processed.extend([e.func(v, e.rhs, operation=None),
-                              e.func(e.lhs, v)])
-
-        elif e.is_Reduction and e.lhs.is_Symbol and opt_mapify_reduce \
-                and not cluster.is_sparse:
+        if e.is_Reduction and e.lhs.is_Symbol and opt_mapify_reduce:
             # Transform `e` into what is in essence an explicit map-reduce
             # For example, turn:
             # `s += f(u[x], v[x], ...)`
@@ -485,7 +473,31 @@ def normalize_reductions(cluster, sregistry, options):
             a = Array(name=name, dtype=e.dtype, dimensions=dims)
             processed.extend([Eq(a.indexify(), e.rhs),
                               e.func(e.lhs, a.indexify())])
+        else:
+            processed.append(e)
+
+    return cluster.rebuild(processed)
 
+
+@cluster_pass(mode='sparse')
+def normalize_reductions_sparse(cluster, sregistry, options):
+    """
+    Extract the right-hand sides of reduction Eq's in to temporaries.
+    """
+    processed = []
+    for e in cluster.exprs:
+        if e.is_Reduction and e.lhs.is_Indexed:
+            # Transform `e` such that we reduce into a scalar (ultimately via
+            # atomic ops, though this part is carried out by a much later pass)
+            # For example, given `i = m[p_src]` (i.e., indirection array), turn:
+            # `u[t, i] += f(u[t, i], src, ...)`
+            # into
+            # `s = f(u[t, i], src, ...)`
+            # `u[t, i] += s`
+            name = sregistry.make_name()
+            v = Symbol(name=name, dtype=e.dtype)
+            processed.extend([e.func(v, e.rhs, operation=None),
+                              e.func(e.lhs, v)])
         else:
             processed.append(e)
 
diff --git a/devito/ir/clusters/cluster.py b/devito/ir/clusters/cluster.py
index ef839e2222..2143331aba 100644
--- a/devito/ir/clusters/cluster.py
+++ b/devito/ir/clusters/cluster.py
@@ -228,7 +228,7 @@ def is_dense(self):
                 not self.is_halo_touch and
                 all(a.is_regular for a in self.scope.accesses))
 
-    @property
+    @cached_property
     def is_sparse(self):
         """
         A cluster is sparse if it represent a sparse operation i.e if both
diff --git a/devito/ir/equations/equation.py b/devito/ir/equations/equation.py
index d416c22766..ecef8b82fb 100644
--- a/devito/ir/equations/equation.py
+++ b/devito/ir/equations/equation.py
@@ -76,8 +76,11 @@ def apply(self, func):
     def __repr__(self):
         if not self.is_Reduction:
             return super().__repr__()
+        elif self.operation is OpInc:
+            return '%s += %s' % (self.lhs, self.rhs)
         else:
             return '%s = %s(%s, %s)' % (self.lhs, self.operation, self.lhs, self.rhs)
+
     # Pickling support
     __reduce_ex__ = Pickable.__reduce_ex__
 
diff --git a/devito/ir/support/properties.py b/devito/ir/support/properties.py
index f4ab873575..c3c808356e 100644
--- a/devito/ir/support/properties.py
+++ b/devito/ir/support/properties.py
@@ -139,6 +139,10 @@ class Properties(frozendict):
     A mapper {Dimension -> {properties}}.
     """
 
+    @property
+    def dimensions(self):
+        return tuple(self)
+
     def add(self, dims, properties=None):
         m = dict(self)
         for d in as_tuple(dims):
@@ -205,6 +209,9 @@ def is_parallel(self, dims):
         return any(len(self[d] & {PARALLEL, PARALLEL_INDEP}) > 0
                    for d in as_tuple(dims))
 
+    def is_parallel_atomic(self, dims):
+        return any(len(self[d] & {PARALLEL_IF_ATOMIC}) > 0 for d in as_tuple(dims))
+
     def is_parallel_relaxed(self, dims):
         return any(len(self[d] & PARALLELS) > 0 for d in as_tuple(dims))
 

From ca762180b5fed78cb8843741460ad2f49b65197a Mon Sep 17 00:00:00 2001
From: mloubout <mathias.louboutin@gmail.com>
Date: Wed, 27 Sep 2023 10:06:54 -0400
Subject: [PATCH 67/90] api: cleanup subdimension and subdomain

---
 devito/passes/clusters/implicit.py | 29 +++++++++++++++++++----------
 devito/types/grid.py               |  3 ++-
 tests/test_subdomains.py           | 23 ++++++++++++-----------
 3 files changed, 33 insertions(+), 22 deletions(-)

diff --git a/devito/passes/clusters/implicit.py b/devito/passes/clusters/implicit.py
index 78454c1f1a..42e528d904 100644
--- a/devito/passes/clusters/implicit.py
+++ b/devito/passes/clusters/implicit.py
@@ -107,20 +107,25 @@ def callback(self, clusters, prefix):
             exprs, dims, sub_iterators = make_implicit_exprs(d.msd, c)
 
             # The IterationSpace induced by the MultiSubDomain
-            intervals = [Interval(i, 0, 0) for i in dims]
-            relations = (ispace0.itdimensions + dims, dims + ispace1.itdimensions)
-            ispaceN = IterationSpace(
-                IntervalGroup(intervals, relations=relations), sub_iterators
-            )
+            if dims:
+                intervals = [Interval(i) for i in dims]
+                relations = (ispace0.itdimensions + dims, dims + ispace1.itdimensions)
+                ispaceN = IterationSpace(
+                    IntervalGroup(intervals, relations=relations), sub_iterators
+                )
+                ispace = IterationSpace.union(ispace0, ispaceN)
+            else:
+                ispaceN = None
+                ispace = ispace0
 
-            ispace = IterationSpace.union(ispace0, ispaceN)
             properties = {i.dim: {SEQUENTIAL} for i in ispace}
-            if len(ispaceN) == 0:
+
+            if not ispaceN:
                 # Special case: we can factorize the thickness assignments
                 # once and for all at the top of the current IterationInterval,
                 # and reuse them for one or more (potentially non-consecutive)
                 # `clusters`
-                if ispaceN not in seen:
+                if d not in seen:
                     # Retain the guards and the syncs along the outer Dimensions
                     retained = {None} | set(c.ispace[:n-1].dimensions)
 
@@ -143,8 +148,12 @@ def callback(self, clusters, prefix):
                     )
                     tip = nxt
 
-            ispace = IterationSpace.union(c.ispace, ispaceN)
-            processed.append(c.rebuild(ispace=ispace))
+            if ispaceN:
+                ispace = IterationSpace.union(c.ispace, ispaceN)
+                processed.append(c.rebuild(ispace=ispace))
+            else:
+                processed.append(c)
+            seen.add(d)
 
         return processed
 
diff --git a/devito/types/grid.py b/devito/types/grid.py
index faecb25236..de91284441 100644
--- a/devito/types/grid.py
+++ b/devito/types/grid.py
@@ -725,7 +725,8 @@ def __subdomain_finalize__(self, grid, counter=0, **kwargs):
 
         # Create the SubDomainSet SubDimensions
         self._dimensions = tuple(
-            MultiSubDimension('%si' % d.name, d, self) for d in grid.dimensions
+            MultiSubDimension('%si%d' % (d.name, counter), d, self)
+            for d in grid.dimensions
         )
 
         # Compute the SubDomainSet shapes
diff --git a/tests/test_subdomains.py b/tests/test_subdomains.py
index f7a8c3df4d..984f2c29ef 100644
--- a/tests/test_subdomains.py
+++ b/tests/test_subdomains.py
@@ -447,7 +447,7 @@ class DummySubdomains(SubDomainSet):
         # Make sure it jit-compiles
         op.cfunction
 
-        assert_structure(op, ['x,y', 't,n0', 't,n0,xi,yi'], 'x,y,t,n0,xi,yi')
+        assert_structure(op, ['x,y', 't,n0', 't,n0,xi2,yi2'], 'x,y,t,n0,xi2,yi2')
 
     def test_issue_1761_b(self):
         """
@@ -486,8 +486,8 @@ class DummySubdomains2(SubDomainSet):
         op.cfunction
 
         assert_structure(op,
-                         ['x,y', 't,n0', 't,n0,xi,yi', 't,n1', 't,n1,xi,yi'],
-                         'x,y,t,n0,xi,yi,n1,xi,yi')
+                         ['x,y', 't,n0', 't,n0,xi2,yi2', 't,n1', 't,n1,xi3,yi3'],
+                         'x,y,t,n0,xi2,yi2,n1,xi3,yi3')
 
     def test_issue_1761_c(self):
         """
@@ -522,9 +522,9 @@ class DummySubdomains2(SubDomainSet):
         # Make sure it jit-compiles
         op.cfunction
 
-        assert_structure(op, ['x,y', 't,n0', 't,n0,xi,yi',
-                              't,n1', 't,n1,xi,yi', 't,n0', 't,n0,xi,yi'],
-                         'x,y,t,n0,xi,yi,n1,xi,yi,n0,xi,yi')
+        assert_structure(op, ['x,y', 't,n0', 't,n0,xi2,yi2',
+                              't,n1', 't,n1,xi3,yi3', 't,n0', 't,n0,xi2,yi2'],
+                         'x,y,t,n0,xi2,yi2,n1,xi3,yi3,n0,xi2,yi2')
 
     def test_issue_1761_d(self):
         """
@@ -549,7 +549,8 @@ class Dummy(SubDomainSet):
         # Make sure it jit-compiles
         op.cfunction
 
-        assert_structure(op, ['t,n0', 't,n0,xi,yi', 't,n0,xi,yi'], 't,n0,xi,yi,xi,yi')
+        assert_structure(op, ['t,n0', 't,n0,xi2,yi2', 't,n0,xi2,yi2'],
+                         't,n0,xi2,yi2,xi2,yi2')
 
     def test_guarding(self):
 
@@ -576,8 +577,8 @@ class Dummy(SubDomainSet):
         # Make sure it jit-compiles
         op.cfunction
 
-        assert_structure(op, ['t', 't,n0', 't,n0,xi,yi', 't,n0', 't,n0,xi,yi'],
-                         't,n0,xi,yi,n0,xi,yi')
+        assert_structure(op, ['t', 't,n0', 't,n0,xi2,yi2', 't,n0', 't,n0,xi2,yi2'],
+                         't,n0,xi2,yi2,n0,xi2,yi2')
 
     def test_3D(self):
 
@@ -597,8 +598,8 @@ class Dummy(SubDomainSet):
         # Make sure it jit-compiles
         op.cfunction
 
-        assert_structure(op, ['t,n0', 't,n0,xi0_blk0,yi0_blk0,xi,yi,zi'],
-                         't,n0,xi0_blk0,yi0_blk0,xi,yi,zi')
+        assert_structure(op, ['t,n0', 't,n0,xi20_blk0,yi20_blk0,xi2,yi2,zi2'],
+                         't,n0,xi20_blk0,yi20_blk0,xi2,yi2,zi2')
 
     def test_sequential_implicit(self):
         """

From 7887f8672bec6b75f318877c763877f6b0b0ae6c Mon Sep 17 00:00:00 2001
From: mloubout <mathias.louboutin@gmail.com>
Date: Thu, 28 Sep 2023 18:08:33 -0400
Subject: [PATCH 68/90] ci: switch icx to self hosted

---
 .github/workflows/pytest-core-mpi.yml   | 11 +++++++++--
 .github/workflows/pytest-core-nompi.yml |  2 +-
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/pytest-core-mpi.yml b/.github/workflows/pytest-core-mpi.yml
index 0811d51289..ddb9fd2846 100644
--- a/.github/workflows/pytest-core-mpi.yml
+++ b/.github/workflows/pytest-core-mpi.yml
@@ -56,10 +56,17 @@ jobs:
 
   test-mpi-docker:
       name: pytest-mpi
-      runs-on: ubuntu-latest
+      runs-on: ${{ matrix.os }}
       strategy:
         matrix:
-          arch: [gcc, icx]
+          name: [gcc, icx]
+          include:
+            - name: gcc
+              arch: gcc
+              os: ubuntu-latest
+            - name: icx
+              arch: icx
+              os: ["self-hosted", "docker"]
       
       steps:
       - name: Checkout devito
diff --git a/.github/workflows/pytest-core-nompi.yml b/.github/workflows/pytest-core-nompi.yml
index 63c65d8813..5370d47118 100644
--- a/.github/workflows/pytest-core-nompi.yml
+++ b/.github/workflows/pytest-core-nompi.yml
@@ -100,7 +100,7 @@ jobs:
 
         - name: pytest-docker-py39-icx-omp
           python-version: '3.9'
-          os: ubuntu-22.04
+          os: ["self-hosted", "docker"]
           arch: "icx"
           language: "openmp"
           sympy: "1.12"

From bd54b78d65809a9a9c1e359b637dd9f0a9faede8 Mon Sep 17 00:00:00 2001
From: Mathias Louboutin <mathias.louboutin@gmail.com>
Date: Fri, 29 Sep 2023 08:52:52 -0400
Subject: [PATCH 69/90] compiler: fix corner case reduction

---
 devito/ir/clusters/visitors.py |  4 ++--
 tests/test_dle.py              | 28 +++++++++++++++++++++++++++-
 2 files changed, 29 insertions(+), 3 deletions(-)

diff --git a/devito/ir/clusters/visitors.py b/devito/ir/clusters/visitors.py
index 95062bb427..f9bb36d6e3 100644
--- a/devito/ir/clusters/visitors.py
+++ b/devito/ir/clusters/visitors.py
@@ -200,9 +200,9 @@ def __init__(self, func, mode='dense'):
         self.func = func
 
         if mode == 'dense':
-            self.cond = lambda c: c.is_dense
+            self.cond = lambda c: c.is_dense or not c.is_sparse
         elif mode == 'sparse':
-            self.cond = lambda c: not c.is_dense
+            self.cond = lambda c: c.is_sparse
         else:
             self.cond = lambda c: True
 
diff --git a/tests/test_dle.py b/tests/test_dle.py
index 9c297e0dca..f78ff17585 100644
--- a/tests/test_dle.py
+++ b/tests/test_dle.py
@@ -14,7 +14,7 @@
                            retrieve_iteration_tree, Expression)
 from devito.passes.iet.languages.openmp import Ompizer, OmpRegion
 from devito.tools import as_tuple
-from devito.types import Scalar
+from devito.types import Scalar, Symbol
 
 
 def get_blocksizes(op, opt, grid, blockshape, level=0):
@@ -789,6 +789,32 @@ def test_reduction_local(self):
 
         assert n.data[0] == 11*11
 
+    def test_mapify_reduction_sparse(self):
+        grid = Grid((11, 11))
+        s = SparseTimeFunction(name="s", grid=grid, npoint=1, nt=11)
+        s.data.fill(1.)
+        r = Symbol(name="r", dtype=np.float32)
+        n0 = Function(name="n0", dimensions=(Dimension("noi"),), shape=(1,))
+
+        eqns = [Eq(r, 0), Inc(r, s*s), Eq(n0[0], r)]
+        op0 = Operator(eqns)
+        op1 = Operator(eqns, opt=('advanced', {'mapify-reduce': True}))
+        
+        expr0 = FindNodes(Expression).visit(op0)
+        assert len(expr0) == 3
+        assert expr0[1].is_reduction
+
+        expr1 = FindNodes(Expression).visit(op1)
+        assert len(expr1) == 4
+        assert expr1[1].expr.lhs.indices == s.indices
+        assert expr1[2].expr.rhs.is_Indexed
+        assert expr1[2].is_reduction
+        
+        op0()
+        assert n0.data[0] == 11
+        op1()
+        assert n0.data[0] == 11  
+
     def test_array_max_reduction(self):
         """
         Test generation of OpenMP sum-reduction clauses involving Function's.

From ce145902d1a28461f94beab9b952b831b962c720 Mon Sep 17 00:00:00 2001
From: Mathias Louboutin <mathias.louboutin@gmail.com>
Date: Fri, 29 Sep 2023 08:59:18 -0400
Subject: [PATCH 70/90] ci: add configuration check

---
 .github/workflows/pytest-core-mpi.yml   | 2 +-
 .github/workflows/pytest-core-nompi.yml | 6 +++++-
 tests/test_buffering.py                 | 2 +-
 tests/test_dle.py                       | 6 +++---
 4 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/pytest-core-mpi.yml b/.github/workflows/pytest-core-mpi.yml
index ddb9fd2846..581fc94e9c 100644
--- a/.github/workflows/pytest-core-mpi.yml
+++ b/.github/workflows/pytest-core-mpi.yml
@@ -66,7 +66,7 @@ jobs:
               os: ubuntu-latest
             - name: icx
               arch: icx
-              os: ["self-hosted", "docker"]
+              os: ubuntu-latest
       
       steps:
       - name: Checkout devito
diff --git a/.github/workflows/pytest-core-nompi.yml b/.github/workflows/pytest-core-nompi.yml
index 5370d47118..1cbf3d510d 100644
--- a/.github/workflows/pytest-core-nompi.yml
+++ b/.github/workflows/pytest-core-nompi.yml
@@ -100,7 +100,7 @@ jobs:
 
         - name: pytest-docker-py39-icx-omp
           python-version: '3.9'
-          os: ["self-hosted", "docker"]
+          os: ubuntu-latest
           arch: "icx"
           language: "openmp"
           sympy: "1.12"
@@ -160,6 +160,10 @@ jobs:
         pip install -e .[tests]
         pip install sympy==${{matrix.sympy}}
 
+    - name: Check configuration
+      run: |
+        ${{ env.RUN_CMD }} python3 -c "from devito import configuration; print(''.join(['%s: %s \n' % (k, v) for (k, v) in configuration.items()]))"
+
     - name: Test with pytest
       run: |
         ${{ env.RUN_CMD }} ${{ matrix.arch }} --version
diff --git a/tests/test_buffering.py b/tests/test_buffering.py
index 16f98b4f94..d3c8667025 100644
--- a/tests/test_buffering.py
+++ b/tests/test_buffering.py
@@ -268,7 +268,7 @@ def test_over_injection():
             rec.interpolate(expr=u.forward))
 
     op0 = Operator(eqns, opt='noop')
-    op1 = Operator(eqns, opt='buffering')
+    op1 = Operator(eqns, opt=('buffering', {'par-collapse-work': 0}))
 
     # Check generated code
     assert len(retrieve_iteration_tree(op1)) == \
diff --git a/tests/test_dle.py b/tests/test_dle.py
index f78ff17585..9dcd6d3d6e 100644
--- a/tests/test_dle.py
+++ b/tests/test_dle.py
@@ -799,7 +799,7 @@ def test_mapify_reduction_sparse(self):
         eqns = [Eq(r, 0), Inc(r, s*s), Eq(n0[0], r)]
         op0 = Operator(eqns)
         op1 = Operator(eqns, opt=('advanced', {'mapify-reduce': True}))
-        
+
         expr0 = FindNodes(Expression).visit(op0)
         assert len(expr0) == 3
         assert expr0[1].is_reduction
@@ -809,11 +809,11 @@ def test_mapify_reduction_sparse(self):
         assert expr1[1].expr.lhs.indices == s.indices
         assert expr1[2].expr.rhs.is_Indexed
         assert expr1[2].is_reduction
-        
+
         op0()
         assert n0.data[0] == 11
         op1()
-        assert n0.data[0] == 11  
+        assert n0.data[0] == 11
 
     def test_array_max_reduction(self):
         """

From c3abf22435dc70b7660c6edfdb1fa8dcbf77c2be Mon Sep 17 00:00:00 2001
From: Andrew <AndrewCheng827827@gmail.com>
Date: Fri, 22 Sep 2023 11:42:55 +0100
Subject: [PATCH 71/90] compiler: Fix issue 2194

---
 devito/ir/support/basic.py | 13 +++++++++-
 tests/test_operator.py     | 52 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 64 insertions(+), 1 deletion(-)

diff --git a/devito/ir/support/basic.py b/devito/ir/support/basic.py
index 98ba9da51a..0ca1da8796 100644
--- a/devito/ir/support/basic.py
+++ b/devito/ir/support/basic.py
@@ -426,7 +426,18 @@ def distance(self, other):
             else:
                 v = i - j
                 if v.is_Number and v.is_finite:
-                    return Vector(S.ImaginaryUnit)
+                    if i.is_Number and j.is_Number:
+                        return Vector(S.ImaginaryUnit)
+                    else:
+                        # For example:
+                        # self=W<u,[0,y]> and other=R<u,[0,y+1]>
+                        ret.append(v)
+
+                # Writing (reading) over an entire dimension, reading (writing)
+                # from one point. For example:
+                # self=R<u,[1,2]> and other=W<u,[1, y+1]>
+                elif (not i.is_Number or not j.is_Number):
+                    ret.append(S.Infinity)
 
         return Vector(*ret)
 
diff --git a/tests/test_operator.py b/tests/test_operator.py
index 3064565e3c..b2188d007c 100644
--- a/tests/test_operator.py
+++ b/tests/test_operator.py
@@ -1936,6 +1936,58 @@ def test_topofuse_w_numeric_dim(self):
 
         assert_structure(op, ['r,i', 'r'], 'r,i')
 
+    @pytest.mark.parametrize('eqns, expected, exp_trees, exp_iters', [
+        (['Eq(u[0, x], 1)',
+            'Eq(u[1, x], u[0, x + h_x] + u[0, x - h_x] - 2*u[0, x])'],
+            np.array([[1., 1., 1.], [-1., 0., -1.]]),
+            ['x', 'x'], 'x,x')
+    ])
+    def test_2194(self, eqns, expected, exp_trees, exp_iters):
+        grid = Grid(shape=(3, ))
+        u = TimeFunction(name='u', grid=grid)
+        x = grid.dimensions[0]
+        h_x = x.spacing  # noqa: F841
+
+        for i, e in enumerate(list(eqns)):
+            eqns[i] = eval(e)
+
+        op = Operator(eqns)
+        assert_structure(op, exp_trees, exp_iters)
+
+        op.apply()
+        assert(np.all(u.data[:] == expected[:]))
+
+    @pytest.mark.parametrize('eqns, expected, exp_trees, exp_iters', [
+        (['Eq(u[0, y], 1)', 'Eq(u[1, y], u[0, y + 1])'],
+            np.array([[1., 1.], [1., 0.]]),
+            ['y', 'y'], 'y,y'),
+        (['Eq(u[0, y], 1)', 'Eq(u[1, y], u[0, 2])'],
+            np.array([[1., 1.], [0., 0.]]),
+            ['y', 'y'], 'y,y'),
+        (['Eq(u[0, y], 1)', 'Eq(u[1, y], u[0, 1])'],
+            np.array([[1., 1.], [1., 1.]]),
+            ['y', 'y'], 'y,y'),
+        (['Eq(u[0, y], 1)', 'Eq(u[1, y], u[0, y + 1])'],
+            np.array([[1., 1.], [1., 0.]]),
+            ['y', 'y'], 'y,y'),
+        (['Eq(u[0, 1], 1)', 'Eq(u[x, y], u[0, y])'],
+            np.array([[0., 1.], [0., 1.]]),
+            ['xy'], 'x,y')
+    ])
+    def test_2194_v2(self, eqns, expected, exp_trees, exp_iters):
+        grid = Grid(shape=(2, 2))
+        u = Function(name='u', grid=grid)
+        x, y = grid.dimensions
+
+        for i, e in enumerate(list(eqns)):
+            eqns[i] = eval(e)
+
+        op = Operator(eqns)
+        assert_structure(op, exp_trees, exp_iters)
+
+        op.apply()
+        assert(np.all(u.data[:] == expected[:]))
+
 
 class TestInternals(object):
 

From 798d2fffee15fa54fa7a9b145de236e98e98600f Mon Sep 17 00:00:00 2001
From: mloubout <mathias.louboutin@gmail.com>
Date: Wed, 4 Oct 2023 08:05:39 -0400
Subject: [PATCH 72/90] documentation: update ci for new website setup

---
 .github/workflows/documentation.yml    |  27 +-
 docs/Makefile                          | 225 -------------
 docs/make.bat                          | 281 ----------------
 docs/source/_static/devito_logo.png    | Bin 6475 -> 0 bytes
 docs/source/_static/devito_style.css   |  75 -----
 docs/source/_templates/layout.html     |   4 -
 docs/source/builtins.rst               |  12 -
 docs/source/conf.py                    | 430 -------------------------
 docs/source/constant.rst               |   7 -
 docs/source/dimension.rst              |   7 -
 docs/source/download.rst               | 132 --------
 docs/source/equation.rst               |   7 -
 docs/source/finite-difference.rst      |   6 -
 docs/source/function.rst               |   9 -
 docs/source/grid.rst                   |   7 -
 docs/source/grids.rst                  |   8 -
 docs/source/index.rst                  |  38 ---
 docs/source/operator.rst               |   7 -
 docs/source/precsparsefunction.rst     |   7 -
 docs/source/precsparsetimefunction.rst |   7 -
 docs/source/sparsefunction.rst         |   7 -
 docs/source/sparsetimefunction.rst     |   7 -
 docs/source/subdomain.rst              |   7 -
 docs/source/symbolic.rst               |  18 --
 docs/source/tensorfunction.rst         |   9 -
 docs/source/tensortimefunction.rst     |   9 -
 docs/source/timefunction.rst           |   9 -
 docs/source/userapi.rst                |  12 -
 docs/source/vectorfunction.rst         |   9 -
 docs/source/vectortimefunction.rst     |   9 -
 30 files changed, 5 insertions(+), 1387 deletions(-)
 delete mode 100644 docs/Makefile
 delete mode 100644 docs/make.bat
 delete mode 100644 docs/source/_static/devito_logo.png
 delete mode 100644 docs/source/_static/devito_style.css
 delete mode 100644 docs/source/_templates/layout.html
 delete mode 100644 docs/source/builtins.rst
 delete mode 100644 docs/source/conf.py
 delete mode 100644 docs/source/constant.rst
 delete mode 100644 docs/source/dimension.rst
 delete mode 100644 docs/source/download.rst
 delete mode 100644 docs/source/equation.rst
 delete mode 100644 docs/source/finite-difference.rst
 delete mode 100644 docs/source/function.rst
 delete mode 100644 docs/source/grid.rst
 delete mode 100644 docs/source/grids.rst
 delete mode 100644 docs/source/index.rst
 delete mode 100644 docs/source/operator.rst
 delete mode 100644 docs/source/precsparsefunction.rst
 delete mode 100644 docs/source/precsparsetimefunction.rst
 delete mode 100644 docs/source/sparsefunction.rst
 delete mode 100644 docs/source/sparsetimefunction.rst
 delete mode 100644 docs/source/subdomain.rst
 delete mode 100644 docs/source/symbolic.rst
 delete mode 100644 docs/source/tensorfunction.rst
 delete mode 100644 docs/source/tensortimefunction.rst
 delete mode 100644 docs/source/timefunction.rst
 delete mode 100644 docs/source/userapi.rst
 delete mode 100644 docs/source/vectorfunction.rst
 delete mode 100644 docs/source/vectortimefunction.rst

diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml
index 9e7bb1d39d..b6503299c8 100644
--- a/.github/workflows/documentation.yml
+++ b/.github/workflows/documentation.yml
@@ -7,31 +7,14 @@ on:
 
 jobs:
   build:
-
     runs-on: ubuntu-latest
 
     steps:
     - uses: actions/checkout@v3
-    - name: Set up Python
-      uses: actions/setup-python@v4
-      with:
-        # TODO: 3.x should be fine, but for now we enforce 3.10 to work around
-        # the "fatal error: longintrepr.h: No such file or directory" error
-        # triggered by revolve/cython in 3.11
-        python-version: '3.10'
-    - name: Install Sphinx
-      run: |
-        python -m pip install --upgrade pip
-        pip install sphinx sphinx_rtd_theme
-        pip install -e .
-
-    - name: Generate documentation
-      working-directory: docs
-      run: make html
 
-    - name: Deploy
-      uses: peaceiris/actions-gh-pages@v3
+    - name: Repository Dispatch
+      uses: peter-evans/repository-dispatch@v2
       with:
-        personal_token: ${{ secrets.PERSONAL_TOKEN }}
-        publish_branch: gh-pages
-        publish_dir: ./docs/_build/html
+        token: ${{ secrets.PERSONAL_TOKEN }}
+        repository: devitocodes/devitoproject.org
+        event-type: deploy-docs
\ No newline at end of file
diff --git a/docs/Makefile b/docs/Makefile
deleted file mode 100644
index f830cd685c..0000000000
--- a/docs/Makefile
+++ /dev/null
@@ -1,225 +0,0 @@
-# Makefile for Sphinx documentation
-#
-
-# You can set these variables from the command line.
-SPHINXOPTS    =
-SPHINXBUILD   = sphinx-build
-PAPER         =
-BUILDDIR      = _build
-
-# Internal variables.
-PAPEROPT_a4     = -D latex_paper_size=a4
-PAPEROPT_letter = -D latex_paper_size=letter
-ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
-# the i18n builder cannot share the environment and doctrees with the others
-I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
-
-.PHONY: help
-help:
-	@echo "Please use \`make <target>' where <target> is one of"
-	@echo "  html       to make standalone HTML files"
-	@echo "  dirhtml    to make HTML files named index.html in directories"
-	@echo "  singlehtml to make a single large HTML file"
-	@echo "  pickle     to make pickle files"
-	@echo "  json       to make JSON files"
-	@echo "  htmlhelp   to make HTML files and a HTML help project"
-	@echo "  qthelp     to make HTML files and a qthelp project"
-	@echo "  applehelp  to make an Apple Help Book"
-	@echo "  devhelp    to make HTML files and a Devhelp project"
-	@echo "  epub       to make an epub"
-	@echo "  epub3      to make an epub3"
-	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
-	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
-	@echo "  latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
-	@echo "  text       to make text files"
-	@echo "  man        to make manual pages"
-	@echo "  texinfo    to make Texinfo files"
-	@echo "  info       to make Texinfo files and run them through makeinfo"
-	@echo "  gettext    to make PO message catalogs"
-	@echo "  changes    to make an overview of all changed/added/deprecated items"
-	@echo "  xml        to make Docutils-native XML files"
-	@echo "  pseudoxml  to make pseudoxml-XML files for display purposes"
-	@echo "  linkcheck  to check all external links for integrity"
-	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
-	@echo "  coverage   to run coverage check of the documentation (if enabled)"
-	@echo "  dummy      to check syntax errors of document sources"
-
-.PHONY: clean
-clean:
-	rm -rf $(BUILDDIR)/*
-
-.PHONY: html
-html:
-	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
-	@echo
-	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
-
-.PHONY: dirhtml
-dirhtml:
-	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
-	@echo
-	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
-
-.PHONY: singlehtml
-singlehtml:
-	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
-	@echo
-	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
-
-.PHONY: pickle
-pickle:
-	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
-	@echo
-	@echo "Build finished; now you can process the pickle files."
-
-.PHONY: json
-json:
-	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
-	@echo
-	@echo "Build finished; now you can process the JSON files."
-
-.PHONY: htmlhelp
-htmlhelp:
-	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
-	@echo
-	@echo "Build finished; now you can run HTML Help Workshop with the" \
-	      ".hhp project file in $(BUILDDIR)/htmlhelp."
-
-.PHONY: qthelp
-qthelp:
-	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
-	@echo
-	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
-	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
-	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/Devito.qhcp"
-	@echo "To view the help file:"
-	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/Devito.qhc"
-
-.PHONY: applehelp
-applehelp:
-	$(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp
-	@echo
-	@echo "Build finished. The help book is in $(BUILDDIR)/applehelp."
-	@echo "N.B. You won't be able to view it unless you put it in" \
-	      "~/Library/Documentation/Help or install it in your application" \
-	      "bundle."
-
-.PHONY: devhelp
-devhelp:
-	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
-	@echo
-	@echo "Build finished."
-	@echo "To view the help file:"
-	@echo "# mkdir -p $$HOME/.local/share/devhelp/Devito"
-	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/Devito"
-	@echo "# devhelp"
-
-.PHONY: epub
-epub:
-	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
-	@echo
-	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
-
-.PHONY: epub3
-epub3:
-	$(SPHINXBUILD) -b epub3 $(ALLSPHINXOPTS) $(BUILDDIR)/epub3
-	@echo
-	@echo "Build finished. The epub3 file is in $(BUILDDIR)/epub3."
-
-.PHONY: latex
-latex:
-	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
-	@echo
-	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
-	@echo "Run \`make' in that directory to run these through (pdf)latex" \
-	      "(use \`make latexpdf' here to do that automatically)."
-
-.PHONY: latexpdf
-latexpdf:
-	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
-	@echo "Running LaTeX files through pdflatex..."
-	$(MAKE) -C $(BUILDDIR)/latex all-pdf
-	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
-
-.PHONY: latexpdfja
-latexpdfja:
-	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
-	@echo "Running LaTeX files through platex and dvipdfmx..."
-	$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
-	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
-
-.PHONY: text
-text:
-	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
-	@echo
-	@echo "Build finished. The text files are in $(BUILDDIR)/text."
-
-.PHONY: man
-man:
-	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
-	@echo
-	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
-
-.PHONY: texinfo
-texinfo:
-	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
-	@echo
-	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
-	@echo "Run \`make' in that directory to run these through makeinfo" \
-	      "(use \`make info' here to do that automatically)."
-
-.PHONY: info
-info:
-	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
-	@echo "Running Texinfo files through makeinfo..."
-	make -C $(BUILDDIR)/texinfo info
-	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
-
-.PHONY: gettext
-gettext:
-	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
-	@echo
-	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
-
-.PHONY: changes
-changes:
-	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
-	@echo
-	@echo "The overview file is in $(BUILDDIR)/changes."
-
-.PHONY: linkcheck
-linkcheck:
-	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
-	@echo
-	@echo "Link check complete; look for any errors in the above output " \
-	      "or in $(BUILDDIR)/linkcheck/output.txt."
-
-.PHONY: doctest
-doctest:
-	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
-	@echo "Testing of doctests in the sources finished, look at the " \
-	      "results in $(BUILDDIR)/doctest/output.txt."
-
-.PHONY: coverage
-coverage:
-	$(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage
-	@echo "Testing of coverage in the sources finished, look at the " \
-	      "results in $(BUILDDIR)/coverage/python.txt."
-
-.PHONY: xml
-xml:
-	$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
-	@echo
-	@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
-
-.PHONY: pseudoxml
-pseudoxml:
-	$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
-	@echo
-	@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
-
-.PHONY: dummy
-dummy:
-	$(SPHINXBUILD) -b dummy $(ALLSPHINXOPTS) $(BUILDDIR)/dummy
-	@echo
-	@echo "Build finished. Dummy builder generates no files."
diff --git a/docs/make.bat b/docs/make.bat
deleted file mode 100644
index 1576fc85db..0000000000
--- a/docs/make.bat
+++ /dev/null
@@ -1,281 +0,0 @@
-@ECHO OFF
-
-REM Command file for Sphinx documentation
-
-if "%SPHINXBUILD%" == "" (
-	set SPHINXBUILD=sphinx-build
-)
-set BUILDDIR=_build
-set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% .
-set I18NSPHINXOPTS=%SPHINXOPTS% .
-if NOT "%PAPER%" == "" (
-	set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
-	set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
-)
-
-if "%1" == "" goto help
-
-if "%1" == "help" (
-	:help
-	echo.Please use `make ^<target^>` where ^<target^> is one of
-	echo.  html       to make standalone HTML files
-	echo.  dirhtml    to make HTML files named index.html in directories
-	echo.  singlehtml to make a single large HTML file
-	echo.  pickle     to make pickle files
-	echo.  json       to make JSON files
-	echo.  htmlhelp   to make HTML files and a HTML help project
-	echo.  qthelp     to make HTML files and a qthelp project
-	echo.  devhelp    to make HTML files and a Devhelp project
-	echo.  epub       to make an epub
-	echo.  epub3      to make an epub3
-	echo.  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter
-	echo.  text       to make text files
-	echo.  man        to make manual pages
-	echo.  texinfo    to make Texinfo files
-	echo.  gettext    to make PO message catalogs
-	echo.  changes    to make an overview over all changed/added/deprecated items
-	echo.  xml        to make Docutils-native XML files
-	echo.  pseudoxml  to make pseudoxml-XML files for display purposes
-	echo.  linkcheck  to check all external links for integrity
-	echo.  doctest    to run all doctests embedded in the documentation if enabled
-	echo.  coverage   to run coverage check of the documentation if enabled
-	echo.  dummy      to check syntax errors of document sources
-	goto end
-)
-
-if "%1" == "clean" (
-	for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
-	del /q /s %BUILDDIR%\*
-	goto end
-)
-
-
-REM Check if sphinx-build is available and fallback to Python version if any
-%SPHINXBUILD% 1>NUL 2>NUL
-if errorlevel 9009 goto sphinx_python
-goto sphinx_ok
-
-:sphinx_python
-
-set SPHINXBUILD=python -m sphinx.__init__
-%SPHINXBUILD% 2> nul
-if errorlevel 9009 (
-	echo.
-	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
-	echo.installed, then set the SPHINXBUILD environment variable to point
-	echo.to the full path of the 'sphinx-build' executable. Alternatively you
-	echo.may add the Sphinx directory to PATH.
-	echo.
-	echo.If you don't have Sphinx installed, grab it from
-	echo.http://sphinx-doc.org/
-	exit /b 1
-)
-
-:sphinx_ok
-
-
-if "%1" == "html" (
-	%SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished. The HTML pages are in %BUILDDIR%/html.
-	goto end
-)
-
-if "%1" == "dirhtml" (
-	%SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
-	goto end
-)
-
-if "%1" == "singlehtml" (
-	%SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
-	goto end
-)
-
-if "%1" == "pickle" (
-	%SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished; now you can process the pickle files.
-	goto end
-)
-
-if "%1" == "json" (
-	%SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished; now you can process the JSON files.
-	goto end
-)
-
-if "%1" == "htmlhelp" (
-	%SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished; now you can run HTML Help Workshop with the ^
-.hhp project file in %BUILDDIR%/htmlhelp.
-	goto end
-)
-
-if "%1" == "qthelp" (
-	%SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished; now you can run "qcollectiongenerator" with the ^
-.qhcp project file in %BUILDDIR%/qthelp, like this:
-	echo.^> qcollectiongenerator %BUILDDIR%\qthelp\Devito.qhcp
-	echo.To view the help file:
-	echo.^> assistant -collectionFile %BUILDDIR%\qthelp\Devito.ghc
-	goto end
-)
-
-if "%1" == "devhelp" (
-	%SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished.
-	goto end
-)
-
-if "%1" == "epub" (
-	%SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished. The epub file is in %BUILDDIR%/epub.
-	goto end
-)
-
-if "%1" == "epub3" (
-	%SPHINXBUILD% -b epub3 %ALLSPHINXOPTS% %BUILDDIR%/epub3
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished. The epub3 file is in %BUILDDIR%/epub3.
-	goto end
-)
-
-if "%1" == "latex" (
-	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
-	goto end
-)
-
-if "%1" == "latexpdf" (
-	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
-	cd %BUILDDIR%/latex
-	make all-pdf
-	cd %~dp0
-	echo.
-	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
-	goto end
-)
-
-if "%1" == "latexpdfja" (
-	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
-	cd %BUILDDIR%/latex
-	make all-pdf-ja
-	cd %~dp0
-	echo.
-	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
-	goto end
-)
-
-if "%1" == "text" (
-	%SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished. The text files are in %BUILDDIR%/text.
-	goto end
-)
-
-if "%1" == "man" (
-	%SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished. The manual pages are in %BUILDDIR%/man.
-	goto end
-)
-
-if "%1" == "texinfo" (
-	%SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
-	goto end
-)
-
-if "%1" == "gettext" (
-	%SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
-	goto end
-)
-
-if "%1" == "changes" (
-	%SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.The overview file is in %BUILDDIR%/changes.
-	goto end
-)
-
-if "%1" == "linkcheck" (
-	%SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Link check complete; look for any errors in the above output ^
-or in %BUILDDIR%/linkcheck/output.txt.
-	goto end
-)
-
-if "%1" == "doctest" (
-	%SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Testing of doctests in the sources finished, look at the ^
-results in %BUILDDIR%/doctest/output.txt.
-	goto end
-)
-
-if "%1" == "coverage" (
-	%SPHINXBUILD% -b coverage %ALLSPHINXOPTS% %BUILDDIR%/coverage
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Testing of coverage in the sources finished, look at the ^
-results in %BUILDDIR%/coverage/python.txt.
-	goto end
-)
-
-if "%1" == "xml" (
-	%SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished. The XML files are in %BUILDDIR%/xml.
-	goto end
-)
-
-if "%1" == "pseudoxml" (
-	%SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml.
-	goto end
-)
-
-if "%1" == "dummy" (
-	%SPHINXBUILD% -b dummy %ALLSPHINXOPTS% %BUILDDIR%/dummy
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished. Dummy builder generates no files.
-	goto end
-)
-
-:end
diff --git a/docs/source/_static/devito_logo.png b/docs/source/_static/devito_logo.png
deleted file mode 100644
index 1e1192841e96401bae6974ee014d4873f6e0109b..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 6475
zcmV-R8MNk!P)<h;3K|Lk000e1NJLTq004jh004jp1^@s6!#-il00006VoOIv00000
z008+zyMF)x010qNS#tmY8`b~-8`c4g$`7gl000McNliru<OCA}831mn(DMKQ7}!Zf
zK~#9!?Ol0%RMqwWp0~`BNix}Y2n50=n}D(iD4=Mi(u$%`#idnLv{q~DQh!?O)~dg{
zZ&-g;3W5uQKv7X#Kp<?9Jt06y$VL{DOeV{$@7>=YlY~Ve_q~}35#HAiF`2n@&zpO`
z_uO;N-3YC?XWP=*adO1mvX%z;nF@gj1cE>WfdGO40w53p00Qp<Ab{HbZhN8q=TC8}
z*N=Ud=-5}<ULz0-ycdu~N9DFY*YVFx5J`Xlg4p`INCb%l2qZuf1jr;nA`vE0#Olfe
ze=cpQuS>K<qIYCG21F-gXiOS<M<u+6vls*b|0>*c`InUiUkXGxSJC#(RRAhuP?-vk
zsz4+{mMsn=;<}-CWWttJRRzDD*=;ELMJMe(Yrgl&!};v}qOCB{F&F@!svy>6#jMO>
zn3SANYpM&Si<5f!f3^g8D(`Ey(@|+?S2GzH1Q5)kq$HSZhhH3e)c_)D{JL^KU6zu~
zel`Snbk~<`Ut^7-(ICm7FoqrxacK0rKY4NJ)V<NN`SG6x0UqDIisd`24K0RnnD8h*
zT$0oa$(G1RZ_FJ2Y?mg$6T81=dz{7)5DxfMh4r+>)0DJ+Z%<0<{W1|1bU`0rHQVVp
zY-lUYFbN`_9(cuso{@3ica>)n06+v8+UW#%RRs_3T>0IUweLChI;(qj9RiSGXr~hq
zA<6c~cCR}4`0iCl0O0eA{4>@ACE#dwPm7b&<o7U(Ak)!TPC3Ga+1=7L>a|0K3ZJS#
zB%w7+*qTWMA>46Xp0g_M_AQIqhPsjm0K9m{e1LxztYI5#OE^D3h>+wJ&kerPmTZZd
z^t1Qa85>pOX@0oe<v?L`9g3UlQP@-qhu0n3!xl5X-s?i0uLT}OVX`0)g@M#QRlyZ0
z{cvaRQFKNK@M6JQmQ#C(>jM^1E}1`aM&`L@w(}Y)8$K-E;oR=15Thh>s$20R)T#4k
zzXy->pEze+V%A%o>I3k85kFILTh;~Vq{A@4o??G;;N{(xkG(Z*a7>!~!jLI@vm)c+
zQA3vjwn=8ZvVZ-X_ibDLOs86aSNE@DKhzX)d9fdiocVpEY@T4aasljhRuB26`atip
zmWIW9oK-LjV$gM}jG=FIBAy-e3+hr9Kp=uel-3$*KZ<_QNjvWDJ?7KD5B>$+o^|2%
z;}UxwP{D$7AO#{6IBRhK_T>x!Zrkz+g_Quj-a2CpUR51tsQu))Ds8~W_hygH{$$jw
zOM2L1zcdSyZXyUoC~IlJO`ASs)4B~x2x}O~8}Kjy3RAKTRd=8~fz|+60|2wPEZ*hx
zdj={@)m4{E@!^$%b=Aeq^>cgJV($!#1<+z&6vk|Ziuv)*1=kMxaP$q7WRBEbN3?9l
z?OPY$QRQk}8J;0PLpZ}2GqkD$FAToYett~aold_;R|Ce#7To#ck}I8l_lA%U0h&Bi
z7zEIR2-)leFt@;2UGr|ymQN2h)oRWoNF?CaO&^bDjP)U+eIXnIG*|#bmqDR#bkdR+
zhD@QW(+B$W_yqzHrsvEr82ZJ_(jz-Rrw|dqumGKn8+!~h4YsHL-YjS~!XOaFCTIF)
zW)3&6400k#b5_sr0YZl)OXM5B&mJ>fPn%Y`oA7nzUe}bg{wxFp&@u!V1PB%8^d7@T
z#%C^m@%*dCDmqAzQ+o(Aau&WbCwHlCD<Eke0)nsrH*Q=s_K2tX?>xQ_k+9?QalgIi
zwDoOTM1LJ{*|>SdTc2_GT!@fNh?2~(i84eI0(lECsA?cL#~2_9A}peaNXd+NQv}ki
zb|ji?NU}!OnFZ-rXMzabBVyMx#*%)sX<>z5RYhKv5C{rWZXcJ}lf4Pt(J4_tFe5;O
zEXPNw=CPs5Yc3c$lX*AHU!yPvqNC5oPP;yVwL7znvDS?sDip@>t99?QbZazvMaHAb
z-LyKvZ2NI`)<{!~$$EddNkBw30Oad7%r9j?x|V*qqGHc%=p;a=JAq8YzmBlEw)A6{
z;%6t@5_a-`N8gi3fGmhG3nHwdj7Z4@lOUqT(~OO^B`9vLyL4mip$D(eS@4H@a+k6<
z4s7^nL2>T1a1-I9(Kq%=u|#R=BR*Bd-MLHIu7)FmE&{MlB0z)J9n$3#yPby{MH0d?
zQNj>dDi<wVsLIoXHPwaJeOk7A@wD~x*e=ImmQ!1j77n>3Ee_u!118?A^>fvpCLHlN
zmWDkFP=hc4E;Y2fry9I&bk=r820#)75=p?5d8_ea!J6{_6m1Q;N1SPkWoee^|6G~c
zZ=z3CG=27^f*+=?J5qF2SS>(c)YRLGYDgQ)qx#S2dgf&nd6>27L-yG2ReM5R_pLoI
z_%16l{vs`(F^dwGmGAjLhXvrLKrl`qA+(E$Jc@!d34sW1#fLo&l>_GFE@f+~4=xO4
z{r?>D%T1Rh_kqIr?e1P@P2_9)*Z*UAc^+vXK<9*kp*Ghost?o!_B4njpvKdL7Ye?g
zaovXbbNx&iJhyOT&<{pL%IsbvDMB*U^M1x6N_4Qf?xCsa16WuH5X@|VO*DnH;jN<F
zr4mzCQ6_+9-?we~j(;E88Xc;7-y3n=GA$kNP;(u7ZU6f1Ve$d=!zP<Ts;=96UZ{9g
z1ziaY3{~zXB%2}~6)tB}DC#tc;%}48w&!@sE|Ngo9Th{u;sXSo17Pfwy_U5$VB3Ek
zAw9xA`|^}N9n{C4R|q9cf{5S$_?hjSs{M7LsPC;2*F4-ODv7(i!|TG_!c86S_?5I<
z0R2rFBUv|*iGEE4wD`U79}^XbL@)_5WTAajs_m%04M&P;bbMyY!KS)VWv;;4MzSEn
zL?VRN^MTrSy)iZL<SJCCj6q>46aW>j2G<h<E?c#?u^I)9HK=ws!=-qcS(JlzSp*`y
zx_^D_{6o3BXJrl_7))KS>{~|}5z)We>8SWDFw}G``|GNGiOb9PL{3dVuj$l|Ie@nc
zH?VK33OjrVud3krAy=05jY{f<vjkPHMoP9u9mndgD)+s<uw<LFn>FT<{KhIUhM+cq
zv?x9{v-{AEH)akS6YTTOE85EbdpMul;$D+6@Tr;I&wu>%J^*K@rXa(>u80$r4*-)>
z`rI=mwg00}4Z55bkGY95BJAs9WGnnkUm7ezlxacnkE6b?DGcghsgmY;y7`YIB$adi
zm;fv|^y6cUF>45YfOBIyczU<N?O*ho^g-kPb-}fCd(R6=?@sC{*-S4NtUE85XX<W?
zWmpPjY+}zk-l5nkn()^>UtQ8AqJW<(#wTSl0Jt)(Ki!cvDtdTq2K3C|%YuMen-&G#
zg!lIum%tlR6M$Hg_45uafN?+sFD4}SYJ9l=1R7vZ8X6(Vnp@7L_%SDUNzf!nFh}m<
zwE|yN?nk++;kwgU0A^SKjWCn6^MUM(M-7Y10MpPc1fa&#j3s5eJ`P6y{|=e*EN>Rn
zCYf;m_T`D+RUM#HSOBfjyn)BgyObqZY?~kMH-T)TX`9wodAo4q^p^^Lu<vzN>oUJY
z^kJW<glb;W4UfrqeL`|?7E}~qsQ-ARS)&;M%o}<A@L1WR>6kUUY(cEa>gX4pq`OH6
z*;D56W<uB17K1S+25$ij2AmZCx?p;$C{VeU&nzz6g}jE!cl64f(f#}fQ>{^4WhD~u
z_Q6e`8c{$Pv1sHCebcS{b3j&6!lIHLHwPn=G5n2}0Hw|KR~kM*81ZfOftEq`)DeP)
zT*3B+3R8`z@qhHnyga2(wMX@HDaGM)$w3pqQ2(hhDLI?{F6+YF5wYnn@-E`A2omn!
z_TM>rWlqZ&^k$@F=5iz}Tuv~?rk;cVMi>}M{JGyH52Rb|#NVbYH5hqUr1as99Pnjj
z!5fAT&?#VJc=Ic6#gFwzigYiSTvEQLKTkQ41S~1v72FVD<P7=n+OVs0?6Q@stU!bp
z3f8EMF|!_7voeQCJgI2{;8v9M(v}A4I0BHN*0A7?tWmRg%bG|M-#oA(NspXq=>re)
zZZ#JG+H}mBkotC#7~n#^uVpa-UQRy%gJM$e)gy<9TF#H*+o$4D6|6r}j1w$?F$jbM
zm#6e~4~t7zc^7p4SYQ4py|OM#$>v(I?Pm&Z&l;Ja+X*z(uMH3Pn`q@-P4rcz{!?Er
zO6W<%A%`)55f1CN0Aw&gGoob+S2<A-P|{qlYm53rJ6B3FM*@lnC~B$=W=J;}pcP)#
z&$SdxCIKIp?bIcsPh<ilu48HmhngKm7}%MJk*yA%oPbCIzOOFSC1Y?*8bl8BOaKnM
zohJ~$&}vw5er!7YoQ~k7$=PG|$mkuJ!2GJ};VE0=Y1SVF7+leg$;sJHzpC;oQdcAp
zYqBa0J~!9pk50ck2p_=k0yQHeB4!HjG4gIj{}~F=GG8LmrT9*?0Gy|V8x}w}h_mAa
za{)zQvuwxk9b=6&%?29E4}t*PCG}6YNu28tj?GJG@&Oo(c>}%R%?6ruoE+LNgOfbR
z4o8|#3pdoS9gML!#;qLv1aK*QN9IIIX8los!2s=8f24SspL4d(D$2U$H2S!rK*XCO
zjwgV&xd4L!TJcroKFB0;^{wajemhmK96wV!WElgHY>5oQ0vOPY;+A@qCkjv)!?3sv
zT{G@`8mi<3v#mQ<SZa;Y7X}7CV9-S?%z`wKxO%XPYtskolCi^4&UhS6#uzefu_uOs
zngUuwqZ-koc;mQo0E!<M#AVdzk&*AL0k5CY!xodHn*fFd(1wy`hqu|!wI~R^qvAnC
zy6yy8QJK$pP8Jo2aNqV7<Mk83umGCyWZvp#nKKQ1Q|55pa^4+&O#{ymU=k#ZjO(u7
z2Oxt0+<5CiPE3>E9m8Ax58Ia~>6P_$)qWd~Ze&5gnD|WnK7cU<XbH>IXL(N&QNi%(
z=vfxMvU2Ooxir)0^E3>LO;>g20R%(;d|r8dMf94Q!n=6NG6217@kooXm+I{OLsSk2
zP-P5*V^SL}qO6`^0hnO{LX6iAte;F8?qq)QyvzTfSJwQZTrbbzRSHw_>-2#q=+kfl
z!vcrb{RwZ`RBIG0lJu%xS(m0{=kb)21OY>0QoH?>02;;sBNf~+p2_>h&D&4>Ol6}J
zGE0Mzx03VV$Zip_KP5%0Rt*A#0FUhaoN7HSH|5rsnR(9>OfpB(4c&)z*DG^T$+kN_
zEZM<Kf=H*=8_?wQ?E2}D4sEA|8C%0njd2M*r!*_x_j#4CGREw(<)dI^?rEqV-;weG
zz@sYc7pZ+KPfh?WLxAA}oD}7*hM^ynY+Iq__<E}-)AK{7%m_wqg{$E=T;o3pG%aJ$
z<e)8pNK|jA`*_rPnupHGUAj%{-35()&n>~oom-gme!b7dRd!T-_fuPRUiA|QCP7+n
zs62pI_pPISjWxf#ciXaUCeCzk8`2}}ST^>SdBMm()a<yKXB}YcB^FPgngE;(5C|{{
z;s!(Y0qk!&a?`W<->%Tw^L?s<sj2<@1|z$y#qrPlMy_)r;qv?Ow}F!voW=q$ZVLcl
zm!W##lhU2;(w2tzd1v~L?Ty3ZGG3aPoV_m?`OoBkH;Z=_@TBD4ut}zZQxZVqO<<;T
zS@BQjt)4L>=L7ap=??QTx0~>K{pc5!ga`Xx{O4evp`^tzy3X62$XkAK>CVAxYYv{&
zdzZ8jz|T~axg5&@q?}7iZ}NF;k+Qk@*3BQY9gf3HW9p<GDV8WaJ?IL0I#?3!S-qB5
zXDx~k-&}Hs%__<4G(!O2N)lNR@Y23@zH=a?YuE4V2R0m<o%?CS^bPY_li#CP_YjEi
z#_+3yj-jtu`)+)_&&9QNoB{Y__BdE2dE;sGIturdF@!_S^@)vs&spmT{=0bVmb-J8
ze*4hQm2CQk`Ru!@1KoY9A_h~ZpQ-SwzQ`Fl9|TQ+R7=zm?y*`BvAM1^;`BNN@Q?iO
zSZ;k8_d~c8A7W)I(yY<#iJ`!CaIM$AvD5d2Ho<i4@1XX-g%graZRuy?NJOcv*U~W-
zuxoAC)G2K@g)s(|sqMeFD8A!1OZu70q!8Xnt}+If;*XRCp=H_FTY@@fabEG(Z&p;~
zPaqB}#K17fo{GPociCyn27CaG`_ZhT47cjX!RETN&H{u=`3!->py!XANms9*M@uid
zS-0|cIu295vnq8_>Gp{{`wM`;4x^5g-`?5$Oo4&Wfq@~zYRCMMGYJ5KQ9b|+j!9*Q
zS{&uP$_D_is;r$~oXbZ5MKzQbO86OjX-=;(QmE?td;YiIZmccQ<a2y7X4b{CdJON7
zc&%ZeVF5ycX1^B?^d0}uq~z@9KMP^KI~<26h#Ds0;!F|TDdyH8z&HY0F9PT9`fyFg
zpgo_D{~b+A&VD`=^{zTxKmc&d=8qWfF|Ys-Em`pHh-;tIL;x)U3><||Rgh$fEO}+v
zub-IFedxeY);&3;kNu^BwX8`)s##@>4YViEAfkNU&q%s68|SQp7x?LCtni8RF1veZ
zZ2GF#!yxB38y8;fXR0RMWZGis{=VZiEijP)AR{8?tPJVGq5pyQ)Z!=3yEF>`J`0nO
zM|Xblm;A<>XGBd0cMt~KQ#4;ZApt<Or*SfXuMGVj504r+apU2B6EXUe2krhl7Rizz
z)P_smdqrDFBzj{-MgCn{l<!kjJUwVK2HI0AbrE2EVz2daCL7k(6v59FMy*?b*$J_2
zn+;?qjeL2{C&#<&)Sn>=<OL-XKt$kM5OJEPX#GA64B3$hxFo3;9^doTqybUMQEv~Q
z=8ZSozE}p}u&a?IK?tk2oM4K0V^Qh$yR?{n2C(ind-FhhYLp(CKasVfJnyACUyD~J
z5hg)|EQpW=0WwLD1OX;NghT=)5+ITQfd~Q-#MZytmM69?j1-6<0AQf@*F@pyeiW%S
z@pkM&N&>a-KoUR@NN7s~lSG130M!H71fZUX3PY`kN_QirSfV;LH2P0b?q60M&U;3y
z^5V@lyfu89Za2>G|GEI4+OvB0min@}W{u0r1R~tkYxF(wX4{*C>?yhkD;mvzW-%vs
z$^8eKYG0EDZF3L|3<SEa!RMYgv-?nHSb(1)UfBD+Z*yI#u9Wdzy+&hVa_^w^XYBJh
zL%4J6r!1$oM6>c$FicMA6B6Yc1n5MxT@(Lo-na9v->`tydRriB++?FL#`;Gk;r6VN
zA+y}Z5YS1uX8pX>$tis+R#g@-QOBACl`+;WBF6pl`BNi8`CNtt2s6rE4Qca>f84Dw
zRIIMr4?W6fVCZR!qfjf~umEAkp{6?a;I7ZLF1!B;M#N>{p?(v>qAP;{A;i+M-E?)v
zK=yIzj^Z^n2Qv@5oDfOBk!1j;cN>iByA27Oo{S+NNW6D&OS^JDF5UU|ZCgHJ3rn_l
zuXZ;DrF?~{n3U9OUO1I+Sb!k$P1S)>uN_$5bFe*iVV<)pB#lW3EV{Ky@;z@4zvhi_
ze+PpA8nD|@Ippd5Z@l;Q8Jjn^a3dT(H}@?ayv|;u&r`N=)D6#mRJ#4`Ilad)g8-e3
zBc3L4O?44{Sy_PG`ZBg;%uNk%7UtN>Tn%t5zM#x{M;IC39e?V3ai{8p0HDg{bh=fg
zfup=N(-xc1QK8Znhf`&&Aq)?@vZ%TKu8WhhI_wz*&YD{nl;l3?Qv9%rGGZhPB4iUR
zBtwOY7R84;Zwo5jjqs`pe5wMk>W4*?6B&cqt@>Iwm$0y&i$n;_lsbK`v@gfsR!Kyx
z(+TkCt}oc0hAJ+vblKQjI&6)bHhsvPK6l6;wog<do*y#hwDNzbK1lBrZN9h3?|E4S
zwY6~jDgbtZ={t6JQfH&d?`4BxQZ_#^;IfAzB-5HRVeupYkSH+k&qDwn$O2z|TESy}
z$0cR47xsSt^5(kI<7nQM#yUS!F(Nht7bWy~`I4kwe|-K7K8u925Jg>deWS`4qGU5B
zC1vN#>M?9g&XGfBoR?=M0bD`cz8Ma%KwR&>ATDF^U;0m)^?vK?qZ2ZjL4YuzEA4Y=
z@q2MaYTqdWgw=OvkMVZ3wmK^ba3;GIA#f_kxP+egqH^zr-%q$}%gQd?R%ayv)K0bl
zf%6Ac47I0Yo1^^U4@S*gYnQD#<t+`Aj=Q>aTb-Q*=mckufx#lmNV7y$wkW=>Zw$No
zZXzlH06T600HjCQyZolhvyuSatoD#Z0<8TfK>#62Hly0zSTZa&L-?NolkS%VadBJO
zH-DyWbygDKP>bVH0MB%+*rSJJ1~!MNU@Q=)+awdlCicYSq&^S`Sx@GzF1@Yi1vEOL
zhi?Udznvv*6*$47Y&WZLJGpwqJ-x?t7`y&o(PrijLnxMHj>PnCgSleohj)HHB+pr`
zw4DzG#*k>SA=wgz1d|O3<_IL3Bavu|M2a=={|HI0C8EPPm$15Ozoulw`^8&NTYgE|
luCOLI4m8#p6Ida{{{vZyytz!Sq>%ss002ovPDHLkV1hD5O~3#E

diff --git a/docs/source/_static/devito_style.css b/docs/source/_static/devito_style.css
deleted file mode 100644
index 07b588ed2c..0000000000
--- a/docs/source/_static/devito_style.css
+++ /dev/null
@@ -1,75 +0,0 @@
-/* Width of the main tab
-.wy-nav-content {
-    max-width: none;
-}
-*/
-
-/* Sidebar header (and topbar for mobile) */
-.wy-side-nav-search, .wy-nav-top {
-  background: #e5e5e5;
-}
-
-/* Sidebar
-.wy-nav-side {
-  background: #ff0000;
-}
-*/
-
-/* Link colors */
-a:link {
-  color: #54c6a4;
-}
-a:hover, a:visited, a:active {
-  color: #1ea47d;
-}
-
-a.icon {
-  color: #343131;
-}
-
-.wy-side-nav-search > div.version {
-  color: #343131;
-}
-
-.rst-content dl:not(.docutils) dt {
-  color: #404040;
-}
-
-.rst-content dl:not(.docutils) dt {
-  border-top: solid 3px #54c6a4;
-  background: #dff1ec;
-}
-
-/* To get rid of colored boxes around methods
-.rst-content dl:not(.docutils) dl dt {
-  border-left: none;
-  background: none;
-}
-*/
-
-/* Make sure the colored box around classes takes up the whole page width */
-.rst-content dl:not(.docutils) dt {
-  display: block;
-}
-
-/* A thin line right below Examples, Notes, ... */
-p.rubric {
-  border-bottom: 1px solid rgb(204, 204, 204);
-}
-
-/* A vertical line on the left of Parameters */
-.wy-table, .rst-content table.docutils, .rst-content table.field-list {
-  border-left: 5px solid rgb(238, 238, 238) !important;
-}
-
-/* A grey box in background for Parameters */
-.rst-content table.field-list .field-name {
-  display: inline-block;
-  background-color: rgb(238, 238, 238);
-  padding: 1px 8px 1px 5px;
-}
-
-/* Put class/method parameters closer to the "Parameters" box */
-.wy-table td, .rst-content table.docutils td, .rst-content table.field-list td, .wy-table th, .rst-content table.docutils th, .rst-content table.field-list th {
-  padding: 2px 0px 2px 0px;
-}
diff --git a/docs/source/_templates/layout.html b/docs/source/_templates/layout.html
deleted file mode 100644
index f76b65b8ad..0000000000
--- a/docs/source/_templates/layout.html
+++ /dev/null
@@ -1,4 +0,0 @@
-{% extends "!layout.html" %}
-{% block extrahead %}
-    <link href="{{ pathto("_static/devito_style.css", True) }}" rel="stylesheet" type="text/css">
-{% endblock %}
diff --git a/docs/source/builtins.rst b/docs/source/builtins.rst
deleted file mode 100644
index dac4db2f96..0000000000
--- a/docs/source/builtins.rst
+++ /dev/null
@@ -1,12 +0,0 @@
-==================
-Built-in Operators
-==================
-
-Devito provides a number of convenience Operators that can be imported and run
-in user code to facilitate development. These are exposed as free functions.
-
-All built-in Operators support MPI.
-
-.. automodule:: devito.builtins
-    :members:
-    :show-inheritance:
diff --git a/docs/source/conf.py b/docs/source/conf.py
deleted file mode 100644
index 3aa28eb90f..0000000000
--- a/docs/source/conf.py
+++ /dev/null
@@ -1,430 +0,0 @@
-# -*- coding: utf-8 -*-
-#
-# Devito documentation build configuration file, created by
-# sphinx-quickstart on Wed Jul 20 13:02:08 2016.
-#
-# This file is execfile()d with the current directory set to its
-# containing dir.
-#
-# Note that not all possible configuration values are present in this
-# autogenerated file.
-#
-# All configuration values have a default; values that are commented out
-# serve to show the default.
-
-# If extensions (or modules to document with autodoc) are in another directory,
-# add these directories to sys.path here. If the directory is relative to the
-# documentation root, use os.path.abspath to make it absolute, like shown here.
-#
-# import os
-# import sys
-# sys.path.insert(0, os.path.abspath('.'))
-
-# -- General configuration ------------------------------------------------
-
-# If your documentation needs a minimal Sphinx version, state it here.
-#
-# needs_sphinx = '1.0'
-
-# Add any Sphinx extension module names here, as strings. They can be
-# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
-# ones.
-extensions = [
-    'sphinx.ext.autodoc',
-    'sphinx.ext.todo',
-    'sphinx.ext.viewcode',
-    'sphinx.ext.githubpages',
-    'sphinx.ext.mathjax',
-    'sphinx.ext.napoleon'  # support for numpydoc
-]
-
-# Add any paths that contain templates here, relative to this directory.
-templates_path = ['_templates']
-
-# The suffix(es) of source filenames.
-# You can specify multiple suffix as a list of string:
-#
-# source_suffix = ['.rst', '.md']
-source_suffix = '.rst'
-
-# The encoding of source files.
-#
-# source_encoding = 'utf-8-sig'
-
-# The master toctree document.
-master_doc = 'index'
-
-# General information about the project.
-project = u'Devito v4.6.2'
-copyright = u'2016-2019, Devito'
-author = u'The Devito Community'
-
-# The version info for the project you're documenting, acts as replacement for
-# |version| and |release|, also used in various other places throughout the
-# built documents.
-#
-# The short X.Y version.
-# version = u'4.6.2'
-# The full version, including alpha/beta/rc tags.
-# release = u'4.6.2'
-
-# The language for content autogenerated by Sphinx. Refer to documentation
-# for a list of supported languages.
-#
-# This is also used if you do content translation via gettext catalogs.
-# Usually you set "language" from the command line for these cases.
-language = 'en'
-
-# There are two options for replacing |today|: either, you set today to some
-# non-false value, then it is used:
-#
-# today = ''
-#
-# Else, today_fmt is used as the format for a strftime call.
-#
-# today_fmt = '%B %d, %Y'
-
-# List of patterns, relative to source directory, that match files and
-# directories to ignore when looking for source files.
-# This patterns also effect to html_static_path and html_extra_path
-exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
-
-# The reST default role (used for this markup: `text`) to use for all
-# documents.
-#
-# default_role = None
-
-# If true, '()' will be appended to :func: etc. cross-reference text.
-#
-# add_function_parentheses = True
-
-# If true, the current module name will be prepended to all description
-# unit titles (such as .. function::).
-#
-# add_module_names = True
-
-# If true, sectionauthor and moduleauthor directives will be shown in the
-# output. They are ignored by default.
-#
-# show_authors = False
-
-# The name of the Pygments (syntax highlighting) style to use.
-pygments_style = 'sphinx'
-
-# A list of ignored prefixes for module index sorting.
-# modindex_common_prefix = []
-
-# If true, keep warnings as "system message" paragraphs in the built documents.
-# keep_warnings = False
-
-# If true, `todo` and `todoList` produce output, else they produce nothing.
-todo_include_todos = True
-
-
-# -- Options for HTML output ----------------------------------------------
-
-# The theme to use for HTML and HTML Help pages.  See the documentation for
-# a list of builtin themes.
-#
-html_theme = 'sphinx_rtd_theme'
-
-# Theme options are theme-specific and customize the look and feel of a theme
-# further.  For a list of options available for each theme, see the
-# documentation.
-#
-# html_theme_options = {}
-
-# Add any paths that contain custom themes here, relative to this directory.
-# html_theme_path = []
-
-# The name for this set of Sphinx documents.
-# "<project> v<release> documentation" by default.
-#
-html_title = u'Devito v4.6.2'
-
-# A shorter title for the navigation bar.  Default is the same as html_title.
-#
-# html_short_title = None
-
-# The name of an image file (relative to this directory) to place at the top
-# of the sidebar.
-#
-html_logo = '_static/devito_logo.png'
-
-# The name of an image file (relative to this directory) to use as a favicon of
-# the docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
-# pixels large.
-#
-html_favicon = '_static/devito_logo.png'
-
-# Add any paths that contain custom static files (such as style sheets) here,
-# relative to this directory. They are copied after the builtin static files,
-# so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['_static']
-
-# Add any extra paths that contain custom files (such as robots.txt or
-# .htaccess) here, relative to this directory. These files are copied
-# directly to the root of the documentation.
-#
-# html_extra_path = []
-
-# If not None, a 'Last updated on:' timestamp is inserted at every page
-# bottom, using the given strftime format.
-# The empty string is equivalent to '%b %d, %Y'.
-#
-# html_last_updated_fmt = None
-
-# If true, SmartyPants will be used to convert quotes and dashes to
-# typographically correct entities.
-#
-# html_use_smartypants = True
-
-# Custom sidebar templates, maps document names to template names.
-#
-# html_sidebars = {}
-
-# Additional templates that should be rendered to pages, maps page names to
-# template names.
-#
-# html_additional_pages = {}
-
-# If false, no module index is generated.
-#
-# html_domain_indices = True
-
-# If false, no index is generated.
-#
-# html_use_index = True
-
-# If true, the index is split into individual pages for each letter.
-#
-# html_split_index = False
-
-# If true, links to the reST sources are added to the pages.
-#
-# html_show_sourcelink = True
-
-# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
-#
-html_show_sphinx = False
-
-# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
-#
-# html_show_copyright = True
-
-# If true, an OpenSearch description file will be output, and all pages will
-# contain a <link> tag referring to it.  The value of this option must be the
-# base URL from which the finished HTML is served.
-#
-# html_use_opensearch = ''
-
-# This is the file name suffix for HTML files (e.g. ".xhtml").
-# html_file_suffix = None
-
-# Language to be used for generating the HTML full-text search index.
-# Sphinx supports the following languages:
-#   'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'ja'
-#   'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr', 'zh'
-#
-# html_search_language = 'en'
-
-# A dictionary with options for the search language support, empty by default.
-# 'ja' uses this config value.
-# 'zh' user can custom change `jieba` dictionary path.
-#
-# html_search_options = {'type': 'default'}
-
-# The name of a javascript file (relative to the configuration directory) that
-# implements a search results scorer. If empty, the default will be used.
-#
-# html_search_scorer = 'scorer.js'
-
-# Output file base name for HTML help builder.
-htmlhelp_basename = 'Devitodoc'
-
-# -- Options for LaTeX output ---------------------------------------------
-
-latex_elements = {
-    # The paper size ('letterpaper' or 'a4paper').
-    #
-    # 'papersize': 'letterpaper',
-
-    # The font size ('10pt', '11pt' or '12pt').
-    #
-    # 'pointsize': '10pt',
-
-    # Additional stuff for the LaTeX preamble.
-    #
-    # 'preamble': '',
-
-    # Latex figure (float) alignment
-    #
-    # 'figure_align': 'htbp',
-}
-
-# Grouping the document tree into LaTeX files. List of tuples
-# (source start file, target name, title,
-#  author, documentclass [howto, manual, or own class]).
-latex_documents = [
-    (master_doc, 'Devito.tex', u'Devito Documentation',
-     u'Devito', 'manual'),
-]
-
-# The name of an image file (relative to this directory) to place at the top of
-# the title page.
-#
-# latex_logo = None
-
-# For "manual" documents, if this is true, then toplevel headings are parts,
-# not chapters.
-#
-# latex_use_parts = False
-
-# If true, show page references after internal links.
-#
-# latex_show_pagerefs = False
-
-# If true, show URL addresses after external links.
-#
-# latex_show_urls = False
-
-# Documents to append as an appendix to all manuals.
-#
-# latex_appendices = []
-
-# It false, will not define \strong, \code, 	itleref, \crossref ... but only
-# \sphinxstrong, ..., \sphinxtitleref, ... To help avoid clash with user added
-# packages.
-#
-# latex_keep_old_macro_names = True
-
-# If false, no module index is generated.
-#
-# latex_domain_indices = True
-
-
-# -- Options for manual page output ---------------------------------------
-
-# One entry per manual page. List of tuples
-# (source start file, name, description, authors, manual section).
-man_pages = [
-    (master_doc, 'devito', u'Devito Documentation',
-     [author], 1)
-]
-
-# If true, show URL addresses after external links.
-#
-# man_show_urls = False
-
-
-# -- Options for Texinfo output -------------------------------------------
-
-# Grouping the document tree into Texinfo files. List of tuples
-# (source start file, target name, title, author,
-#  dir menu entry, description, category)
-texinfo_documents = [
-    (master_doc, 'Devito', u'Devito Documentation',
-     author, 'Devito', u'Devito Documentation.',
-     'Miscellaneous'),
-]
-
-# Documents to append as an appendix to all manuals.
-#
-# texinfo_appendices = []
-
-# If false, no module index is generated.
-#
-# texinfo_domain_indices = True
-
-# How to display URL addresses: 'footnote', 'no', or 'inline'.
-#
-# texinfo_show_urls = 'footnote'
-
-# If true, do not generate a @detailmenu in the "Top" node's menu.
-#
-# texinfo_no_detailmenu = False
-
-
-# -- Options for Epub output ----------------------------------------------
-
-# Bibliographic Dublin Core info.
-epub_title = project
-epub_author = author
-epub_publisher = author
-epub_copyright = copyright
-
-# The basename for the epub file. It defaults to the project name.
-# epub_basename = project
-
-# The HTML theme for the epub output. Since the default themes are not
-# optimized for small screen space, using the same theme for HTML and epub
-# output is usually not wise. This defaults to 'epub', a theme designed to save
-# visual space.
-#
-# epub_theme = 'epub'
-
-# The language of the text. It defaults to the language option
-# or 'en' if the language is not set.
-#
-# epub_language = ''
-
-# The scheme of the identifier. Typical schemes are ISBN or URL.
-# epub_scheme = ''
-
-# The unique identifier of the text. This can be a ISBN number
-# or the project homepage.
-#
-# epub_identifier = ''
-
-# A unique identification for the text.
-#
-# epub_uid = ''
-
-# A tuple containing the cover image and cover page html template filenames.
-#
-# epub_cover = ()
-
-# A sequence of (type, uri, title) tuples for the guide element of content.opf.
-#
-# epub_guide = ()
-
-# HTML files that should be inserted before the pages created by sphinx.
-# The format is a list of tuples containing the path and title.
-#
-# epub_pre_files = []
-
-# HTML files that should be inserted after the pages created by sphinx.
-# The format is a list of tuples containing the path and title.
-#
-# epub_post_files = []
-
-# A list of files that should not be packed into the epub file.
-epub_exclude_files = ['search.html']
-
-# The depth of the table of contents in toc.ncx.
-#
-# epub_tocdepth = 3
-
-# Allow duplicate toc entries.
-#
-# epub_tocdup = True
-
-# Choose between 'default' and 'includehidden'.
-#
-# epub_tocscope = 'default'
-
-# Fix unsupported image types using the Pillow.
-#
-# epub_fix_images = False
-
-# Scale large images.
-#
-# epub_max_image_width = 0
-
-# How to display URL addresses: 'footnote', 'no', or 'inline'.
-#
-# epub_show_urls = 'inline'
-
-# If false, no index is generated.
-#
-# epub_use_index = True
diff --git a/docs/source/constant.rst b/docs/source/constant.rst
deleted file mode 100644
index cd6e9510ce..0000000000
--- a/docs/source/constant.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-========
-Constant
-========
-
-.. autoclass:: devito.types.Constant
-    :members:
-    :show-inheritance:
diff --git a/docs/source/dimension.rst b/docs/source/dimension.rst
deleted file mode 100644
index 459c738853..0000000000
--- a/docs/source/dimension.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-=========
-Dimension
-=========
-
-.. automodule:: devito.types.dimension
-    :members:
-    :show-inheritance:
diff --git a/docs/source/download.rst b/docs/source/download.rst
deleted file mode 100644
index 2ee073f69b..0000000000
--- a/docs/source/download.rst
+++ /dev/null
@@ -1,132 +0,0 @@
-===========================
-Download and Install Devito
-===========================
-
-There are two main approaches to installing Devito.
-
-- `Docker`_, for those looking for the least-friction way to try Devito
-- `Virtual environment`_, for those looking to use Devito as part of a project alongside other packages
-
-Docker
-------
-
-For detailed installation instructions and information on the Devito Docker image library please follow 
-the docker/README.md_
-
-.. _README.md: https://github.com/devitocodes/devito/tree/master/docker#readme
-
-
-Virtual environment
--------------------
-
-venv route
-``````````
-
-Devito is available as a `pip package`_ in PyPI.
-
-Create a `Python virtual environment`_
-
-.. _Python virtual environment: https://docs.python.org/3/library/venv.html
-
-.. code-block:: shell
-
-  python3 -m venv <your_venv_name>
-
-Source the newly created `venv`. This needs to be repeated each time a new terminal is open.
-
-.. code-block:: shell
-
-  source <your_venv_name>/bin/activate
-
-
-To install the `latest Devito release`_ along with any additional dependencies, follow:
-
-.. code-block:: shell
-
-   pip install devito
-   # ...or to install additional dependencies:
-   # pip install devito[extras,mpi,nvidia,tests]
-
-.. _latest Devito release: https://pypi.org/project/devito/#history
-
-To install the latest Devito development version, without the tutorials, follow:
-
-.. code-block:: shell
-
-   pip install git+https://github.com/devitocodes/devito.git
-   # ...or to install additional dependencies:
-   # pip install git+https://github.com/devitocodes/devito.git#egg=project[extras,mpi,nvidia,tests]
-
-Additional dependencies:
-
-- extras : optional dependencies for Jupyter notebooks, plotting, benchmarking
-- tests : optional dependencies required for testing infrastructure
-- mpi : optional dependencies for MPI (mpi4py)
-- nvidia : optional dependencies for targetting GPU deployment
-
-.. _pip package: https://pypi.org/project/devito/
-
-Note that here, you do not need to get the code via `git clone`.
-Depending on your needs, this might also be the recommended setup for using Devito
-in a production-like environment. However, since some components need to be
-compiled before use, this approach may be sensitive to the C/C++ compilers present
-on your system and the related environment, including what other packages you might
-have installed.
-
-
-conda route
-```````````
-Please install either Anaconda_ or Miniconda_.
-
-.. _Anaconda: https://www.continuum.io/downloads
-
-.. _Miniconda: https://conda.io/miniconda.html
-
-.. _Python virtual environment: https://docs.python.org/3/library/venv.html
-
-.. _Conda environment: https://docs.conda.io/projects/conda/en/latest/user-guide/concepts/environments.html
-
-.. code-block:: shell
-
-   # Create new env with the name devito
-   conda create --name devito
-   # Activate the environment
-   conda activate devito
-
-and finally, install Devito along with any extra dependencies:
-
-.. code-block:: shell
-
-   pip install devito
-   # ... or to install additional dependencies
-   # pip install devito[extras,mpi,nvidia,tests]
-
-
-For developers
-``````````````
-First clone Devito:
-
-.. code-block:: shell
-
-   git clone https://github.com/devitocodes/devito.git
-   cd devito
-
-and then install the requirements in your virtual environment (venv or conda):
-
-.. code-block:: shell
-
-   # Install requirements
-   pip install -e .
-   # ...or to install additional dependencies
-   # pip install -e .[extras,mpi,nvidia,tests]
-
-
-Facing issues?
---------------
-
-If you are facing any issues, we are happy to help on Slack_. Also, have a look at our
-list of known installation issues_.
-
-.. _issues: https://github.com/devitocodes/devito/wiki/Installation-Issues
-
-.. _Slack: https://join.slack.com/t/devitocodes/shared_invite/zt-gtd2yxj9-Y31YKk_7lr9AwfXeL2iMFg
diff --git a/docs/source/equation.rst b/docs/source/equation.rst
deleted file mode 100644
index 4668048a4f..0000000000
--- a/docs/source/equation.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-========
-Equation
-========
-
-.. automodule:: devito.types.equation
-    :members:
-    :show-inheritance:
diff --git a/docs/source/finite-difference.rst b/docs/source/finite-difference.rst
deleted file mode 100644
index 2be32e1756..0000000000
--- a/docs/source/finite-difference.rst
+++ /dev/null
@@ -1,6 +0,0 @@
-================================
-Finite-difference approximations
-================================
-
-.. automodule:: devito.finite_differences.finite_difference
-    :members:
diff --git a/docs/source/function.rst b/docs/source/function.rst
deleted file mode 100644
index 98ba2de5a9..0000000000
--- a/docs/source/function.rst
+++ /dev/null
@@ -1,9 +0,0 @@
-========
-Function
-========
-
-.. Need to explicitly state each member to avoid showing sympy members
-
-.. autoclass:: devito.types.Function
-    :members: name, dtype, grid, dimensions, space_dimensions, shape, shape_with_halo, shape_allocated, shape_global, data, data_domain, data_with_halo, data_ro_domain, data_ro_with_halo, space_order, sum, avg
-    :show-inheritance:
diff --git a/docs/source/grid.rst b/docs/source/grid.rst
deleted file mode 100644
index e8c6b67fd4..0000000000
--- a/docs/source/grid.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-====
-Grid
-====
-
-.. autoclass:: devito.types.Grid
-    :members:
-    :show-inheritance:
diff --git a/docs/source/grids.rst b/docs/source/grids.rst
deleted file mode 100644
index 5d653e1dd4..0000000000
--- a/docs/source/grids.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-=====
-Grids
-=====
-
-.. toctree::
-
-    grid
-    subdomain
diff --git a/docs/source/index.rst b/docs/source/index.rst
deleted file mode 100644
index b52eefde85..0000000000
--- a/docs/source/index.rst
+++ /dev/null
@@ -1,38 +0,0 @@
-.. Devito documentation master file, created by
-   sphinx-quickstart on Wed Jul 20 13:02:08 2016.
-   You can adapt this file completely to your liking, but it should at least
-   contain the root `toctree` directive.
-
-Devito
-======
-
-Welcome to the Devito documentation!
-
-`Devito <http://www.devitoproject.org>`_ is a software to
-implement optimised finite difference (FD) computation from
-high-level symbolic problem definitions. Starting from symbolic
-equations defined in `SymPy <http://www.sympy.org/en/index.html>`_,
-Devito employs automated code generation and just-in-time (JIT)
-compilation to execute FD kernels on multiple computer platforms.
-
-Getting started
----------------
-
-You can get instructions on how to download and install Devito
-:doc:`here </download>`.
-
-To learn how to use Devito, check our :doc:`tutorials and examples </tutorials>`.
-Here you will also find documentation about the inner workings of the compiler.
-
-You can find the API Reference, which includes detailed explanations and
-inline examples, :doc:`here </userapi>`.
-
-.. title:: The Devito project
-
-.. toctree::
-   :hidden:
-
-   Download <download>
-   Tutorials <https://github.com/devitocodes/devito/tree/master/examples>
-   FAQ <https://github.com/devitocodes/devito/wiki/FAQ>
-   API Reference <userapi>
diff --git a/docs/source/operator.rst b/docs/source/operator.rst
deleted file mode 100644
index 909be6b8c6..0000000000
--- a/docs/source/operator.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-========
-Operator
-========
-
-.. automodule:: devito.operator.operator
-    :members:
-    :undoc-members:
diff --git a/docs/source/precsparsefunction.rst b/docs/source/precsparsefunction.rst
deleted file mode 100644
index 0058ca2f64..0000000000
--- a/docs/source/precsparsefunction.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-=========================
-PrecomputedSparseFunction 
-=========================
-
-.. autoclass:: devito.types.PrecomputedSparseFunction
-    :members: name, dtype, grid, dimensions, shape, data, data_domain, data_with_halo, data_ro_domain, data_ro_with_halo, space_order, gridpoints, inject, interpolate
-    :show-inheritance:
diff --git a/docs/source/precsparsetimefunction.rst b/docs/source/precsparsetimefunction.rst
deleted file mode 100644
index fb26782dcd..0000000000
--- a/docs/source/precsparsetimefunction.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-=============================
-PrecomputedSparseTimeFunction 
-=============================
-
-.. autoclass:: devito.types.PrecomputedSparseTimeFunction
-    :members: name, dtype, grid, dimensions, shape, data, data_domain, data_with_halo, data_ro_domain, data_ro_with_halo, space_order, gridpoints, inject, interpolate, time_order
-    :show-inheritance:
diff --git a/docs/source/sparsefunction.rst b/docs/source/sparsefunction.rst
deleted file mode 100644
index cce3876ffd..0000000000
--- a/docs/source/sparsefunction.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-==============
-SparseFunction 
-==============
-
-.. autoclass:: devito.types.SparseFunction
-    :members: name, dtype, grid, dimensions, shape, data, data_domain, data_with_halo, data_ro_domain, data_ro_with_halo, space_order, gridpoints, guard, inject, interpolate
-    :show-inheritance:
diff --git a/docs/source/sparsetimefunction.rst b/docs/source/sparsetimefunction.rst
deleted file mode 100644
index 66aa2c49eb..0000000000
--- a/docs/source/sparsetimefunction.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-==================
-SparseTimeFunction 
-==================
-
-.. autoclass:: devito.types.SparseTimeFunction
-    :members: name, dtype, grid, dimensions, shape, data, data_domain, data_with_halo, data_ro_domain, data_ro_with_halo, space_order, gridpoints, guard, inject, interpolate, time_order
-    :show-inheritance:
diff --git a/docs/source/subdomain.rst b/docs/source/subdomain.rst
deleted file mode 100644
index dee2c03b0a..0000000000
--- a/docs/source/subdomain.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-=========
-SubDomain
-=========
-
-.. autoclass:: devito.types.SubDomain
-    :members:
-    :show-inheritance:
diff --git a/docs/source/symbolic.rst b/docs/source/symbolic.rst
deleted file mode 100644
index ffee006acd..0000000000
--- a/docs/source/symbolic.rst
+++ /dev/null
@@ -1,18 +0,0 @@
-================
-Symbolic Objects
-================
-
-.. toctree::
-
-    dimension
-    constant
-    function
-    timefunction
-    sparsefunction
-    sparsetimefunction
-    precsparsefunction
-    precsparsetimefunction
-    vectorfunction
-    vectortimefunction
-    tensorfunction
-    tensortimefunction
\ No newline at end of file
diff --git a/docs/source/tensorfunction.rst b/docs/source/tensorfunction.rst
deleted file mode 100644
index d473d278bf..0000000000
--- a/docs/source/tensorfunction.rst
+++ /dev/null
@@ -1,9 +0,0 @@
-========
-TensorFunction
-========
-
-.. Need to explicitly state each member to avoid showing sympy members
-
-.. autoclass:: devito.types.TensorFunction
-    :members: grid, space_dimensions, space_order, is_diagonal, is_symmetric, mat
-    :show-inheritance:
\ No newline at end of file
diff --git a/docs/source/tensortimefunction.rst b/docs/source/tensortimefunction.rst
deleted file mode 100644
index 60f847a42b..0000000000
--- a/docs/source/tensortimefunction.rst
+++ /dev/null
@@ -1,9 +0,0 @@
-========
-TensorTimeFunction
-========
-
-.. Need to explicitly state each member to avoid showing sympy members
-
-.. autoclass:: devito.types.TensorTimeFunction
-    :members: grid, space_dimensions, space_order, is_diagonal, is_symmetric, mat
-    :show-inheritance:
\ No newline at end of file
diff --git a/docs/source/timefunction.rst b/docs/source/timefunction.rst
deleted file mode 100644
index 24655ea5d5..0000000000
--- a/docs/source/timefunction.rst
+++ /dev/null
@@ -1,9 +0,0 @@
-============
-TimeFunction
-============
-
-.. Need to explicitly state each member to avoid showing sympy members
-
-.. autoclass:: devito.types.TimeFunction
-    :members: name, dtype, grid, dimensions, space_dimensions, shape, shape_with_halo, shape_allocated, shape_global, data, data_domain, data_with_halo, data_ro_domain, data_ro_with_halo, space_order, sum, avg, time_order, backward, forward
-    :show-inheritance:
diff --git a/docs/source/userapi.rst b/docs/source/userapi.rst
deleted file mode 100644
index 5a2f20d5f6..0000000000
--- a/docs/source/userapi.rst
+++ /dev/null
@@ -1,12 +0,0 @@
-=============
-API Reference
-=============
-
-.. toctree::
-
-    grids
-    symbolic
-    finite-difference
-    equation
-    operator
-    builtins
diff --git a/docs/source/vectorfunction.rst b/docs/source/vectorfunction.rst
deleted file mode 100644
index a9f732ccbc..0000000000
--- a/docs/source/vectorfunction.rst
+++ /dev/null
@@ -1,9 +0,0 @@
-========
-VectorFunction
-========
-
-.. Need to explicitly state each member to avoid showing sympy members
-
-.. autoclass:: devito.types.VectorFunction
-    :members: grid, space_dimensions, space_order, mat
-    :show-inheritance:
\ No newline at end of file
diff --git a/docs/source/vectortimefunction.rst b/docs/source/vectortimefunction.rst
deleted file mode 100644
index 8c84929947..0000000000
--- a/docs/source/vectortimefunction.rst
+++ /dev/null
@@ -1,9 +0,0 @@
-========
-VectorTimeFunction
-========
-
-.. Need to explicitly state each member to avoid showing sympy members
-
-.. autoclass:: devito.types.VectorTimeFunction
-    :members: grid, space_dimensions, space_order, mat
-    :show-inheritance:
\ No newline at end of file

From 478e343e8049e4af919e20000b53f795456dc2a4 Mon Sep 17 00:00:00 2001
From: mloubout <mathias.louboutin@gmail.com>
Date: Wed, 4 Oct 2023 09:19:27 -0400
Subject: [PATCH 73/90] FAQ: fix block rendering

---
 FAQ.md | 96 ++++++++++++++++++++++++++++++++++++++--------------------
 1 file changed, 63 insertions(+), 33 deletions(-)

diff --git a/FAQ.md b/FAQ.md
index ace1b72b40..8cb99ace32 100644
--- a/FAQ.md
+++ b/FAQ.md
@@ -39,7 +39,7 @@
 ## How can I see the code generated by Devito?
 After you build an ```op=Operator(...)``` implementing one or more equations, you can use ```print(op)``` to see the generated low level code. The example below builds an operator that takes a 1/2 cell forward shifted derivative of the ```Function``` **f** and puts the result in the ```Function``` **g**. 
 
-```
+```python
 import numpy as np
 import devito
 from devito import Grid, Function, Eq, Operator
@@ -54,7 +54,7 @@ print(op)
 
 And the output:
 
-```
+```C
 #define _POSIX_C_SOURCE 200809L
 #include "stdlib.h"
 #include "math.h"
@@ -107,7 +107,8 @@ int Kernel(struct dataobj *restrict f_vec, struct dataobj *restrict g_vec, const
 Set the environment variable `DEVITO_LOGGING=DEBUG`. When an Operator gets compiled, the used compilation command will be emitted to stdout. 
 
 If nothing seems to change, it is possible that no compilation is happening under-the-hood as all kernels have already been compiled in a previous run. You will then have to clear up the Devito kernel cache. From the Devito root directory, run:
-```
+
+```bash
 python scripts/clear_devito_cache.py
 ```
 
@@ -151,7 +152,8 @@ Take a look [here](https://github.com/devitocodes/devito/tree/master/examples/pe
 Devito applies several performance optimizations to improve the number of operations ("operation count") in complex expressions. These optimizations are designed to do a really good job but also be reasonably fast. One such pass attempts to factorize as many common terms as possible in expressions in order to reduce the operation count. We will construct a demonstrative example below that has a common term that is _not_ factored out by the Devito optimization. The difference in floating-point operations per output point for the factoring of that term is about 10 percent, and the generated C is different, but numerical outputs of running the two different operators are indistinguishable to machine precision. In terms of actual performance, the (few) missed factorization opportunities may not necessarily be a relevant issue: as long as the code is not heavily compute-bound, the runtimes may only be slightly higher than in the optimally-factorized version.
 
 #### Operator 1:
-```
+
+```python
 ux_update = t.spacing**2 * b * \
     ((c33 * u_x.dx(x0=x+x.spacing/2)).dx(x0=x-x.spacing/2) +
      (c55 * u_x.dz(x0=z+z.spacing/2)).dz(x0=z-z.spacing/2) +
@@ -162,8 +164,10 @@ stencil_x = Eq(u_x.forward, ux_update)
 print("\n", stencil_x)
 op = Operator([stencil_x])
 ```
+
 #### Operator 2:
-```
+
+```python
 ux_update = \
     t.spacing**2 * b * (c33 * u_x.dx(x0=x+x.spacing/2)).dx(x0=x-x.spacing/2) + \
     t.spacing**2 * b * (c55 * u_x.dz(x0=z+z.spacing/2)).dz(x0=z-z.spacing/2) + \
@@ -176,7 +180,8 @@ op = Operator([stencil_x])
 ```
 
 #### Output 1:
-```
+
+```bash
 Eq(u_x(t + dt, x, z), dt**2*(Derivative(c13(x, z)*Derivative(u_z(t, x, z), z), x) + Derivative(c33(x, z)*Derivative(u_x(t, x, z), x), x) + Derivative(c55(x, z)*Derivative(u_x(t, x, z), z), z) + Derivative(c55(x, z)*Derivative(u_z(t, x, z), x), z))*b(x, z) + (-dt*wOverQ(x, z) + 2)*u_x(t, x, z) + (dt*wOverQ(x, z) - 1)*u_x(t - dt, x, z))
 Operator `Kernel` generated in 1.26 s
   * lowering.Expressions: 0.61 s (48.7 %)
@@ -186,7 +191,8 @@ Flops reduction after symbolic optimization: [1160 --> 136]
 ```
 
 #### Output 2:
-```
+
+```bash
 Eq(u_x(t + dt, x, z), dt**2*b(x, z)*Derivative(c13(x, z)*Derivative(u_z(t, x, z), z), x) + dt**2*b(x, z)*Derivative(c33(x, z)*Derivative(u_x(t, x, z), x), x) + dt**2*b(x, z)*Derivative(c55(x, z)*Derivative(u_x(t, x, z), z), z) + dt**2*b(x, z)*Derivative(c55(x, z)*Derivative(u_z(t, x, z), x), z) + (-dt*wOverQ(x, z) + 2)*u_x(t, x, z) + (dt*wOverQ(x, z) - 1)*u_x(t - dt, x, z))
 Operator `Kernel` generated in 1.12 s
   * lowering.Expressions: 0.59 s (53.0 %)
@@ -221,7 +227,8 @@ You will note that this method uses placeholders for the material parameter arra
 
 ### How to get the list of Devito environment variables
 You can get the list of environment variables with the following python code:
-```
+
+```python
 from devito import print_defaults
 print_defaults()
 ```
@@ -304,7 +311,8 @@ Set `DEVITO_IGNORE_UNKNOWN_PARAMS=1` to avoid Devito raising an exception if one
 
 ## How do you run the unit tests from the command line
 In addition to the [tutorials]( https://www.devitoproject.org/devito/tutorials.html), the unit tests provide an excellent way to see how the Devito API works with small self-contained examples. You can exercise individual unit tests with the following python code:
-```
+
+```bash
 pytest <test.py>
 pytest -vs <test.py>  [more detailed log]
 ```
@@ -315,7 +323,7 @@ pytest -vs <test.py>  [more detailed log]
 ## What is the difference between f() and f[] notation
 Devito offers a functional language to express finite difference operators. This is introduced [here](https://github.com/devitocodes/devito/blob/master/examples/userapi/01_dsl.ipynb) and systematically used throughout our examples and tutorials. The language relies on what in jargon we call the "f() notation".
 
-```
+```python
 >>> from devito import Grid, Function
 >>> grid = Grid(shape=(5, 6))
 >>> f = Function(name='f', grid=grid, space_order=2)
@@ -327,7 +335,7 @@ Derivative(f(x, y), x)
 
 Sometimes, one wishes to escape the constraints of the language. Instead of taking derivatives, other special operations are required. Or perhaps, a specific grid point needs to be accessed. In such a case, one could use the "f[] notation" or "indexed notation". Following on from the example above:
 
-```
+```python
 >>> x, y = grid.dimensions
 >>> f[x + 1000, y]
 f[x + 1000, y]
@@ -335,7 +343,7 @@ f[x + 1000, y]
 
 The indexed object can be used at will to construct `Eq`s, and they can be mixed up with objects stemming from the "f() notation".
 
-```
+```python
 >>> f.dx + f[x + 1000, y]
 Derivative(f(x, y), x) + f[x + 1000, y]
 ```
@@ -378,7 +386,8 @@ The indexed notation, or "f[] notation", is discussed [here](#What-is-the-differ
 
 ## What's up with object\.data
 The `.data` property which is associated with objects such as `Constant`, `Function` and `SparseFunction` (along with their derivatives) represents the 'numerical' value of the 'data' associated with that particular object. For example, a `Constant` will have a single numerical value associated with it as shown in the following snippet
-```
+
+```python
 from devito import Constant
 
 c = Constant(name='c')
@@ -386,11 +395,14 @@ c.data = 2.7
 
 print(c.data)
 ```
-```
+
+```default
 2.7
 ```
+
 Then, a `Function` defined on a `Grid` will have a data value associated with each of the grid points (as shown in the snippet below) and so forth.
-```
+
+```python
 import numpy as np
 from devito import Grid, Function
 
@@ -400,7 +412,8 @@ f.data[:] = np.arange(16).reshape(grid.shape)
 
 print(f.data)
 ```
-```
+
+```default
 [[ 0.  1.  2.  3.]
  [ 4.  5.  6.  7.]
  [ 8.  9. 10. 11.]
@@ -412,27 +425,36 @@ print(f.data)
 
 ## How do I create and N-dimensional grid
 Grids are often created via, e.g.,
-```
+
+```python
 grid = Grid(shape=(5, 5))
 ```
+
 where printing the `grid` object then returns:
-```
+
+```default
 Grid[extent=(1.0, 1.0), shape=(5, 5), dimensions=(x, y)]
-``` 
-Here we see the `grid` has been created with the 'default' dimensions `x` and `y`. If a grid is created and passed a shape of `(5, 5, 5)` we'll see that in addition it has a `z` dimension. However, what if we want to create a grid with, say, a shape of `(5, 5, 5, 5)`? For this case, we've now run out of the dimensions defined by default and hence need to create our own dimensions to achieve this. This can be done via, e.g.,
 ```
+
+Here we see the `grid` has been created with the 'default' dimensions `x` and `y`. If a grid is created and passed a shape of `(5, 5, 5)` we'll see that in addition it has a `z` dimension. However, what if we want to create a grid with, say, a shape of `(5, 5, 5, 5)`? For this case, we've now run out of the dimensions defined by default and hence need to create our own dimensions to achieve this. This can be done via, e.g.,
+
+```python
 a = SpaceDimension('a')
 b = SpaceDimension('b')
 c = SpaceDimension('c')
 d = SpaceDimension('d')
 grid = Grid(shape=(5, 5, 5, 5), dimensions=(a, b, c, d))
 ```
+
 where now, printng `grid` we get
-```
+
+```default
 Grid[extent=(1.0, 1.0, 1.0, 1.0), shape=(5, 5, 5, 5), dimensions=(a, b, c, d)]
 ```
+
 and `grid.shape` returns
-```
+
+```default
 (5, 5, 5, 5)
 ```
 
@@ -463,7 +485,8 @@ Loop fission (to maximize parallelism)
 ## As time increases in the finite difference evolution, are wavefield arrays "swapped" as you might see in c/c++ code
 
 In c/c++ code using two wavefield arrays for second order acoustics, you might see code like the following to “swap” the wavefield arrays at each time step:
-```
+
+```C
     float *p_tmp = p_old;
     p_old = p_cur;
     p_cur = p_tmp;
@@ -491,7 +514,7 @@ First, classes such as `Function` or `SparseTimeFunction` are inherently complex
 
 Second, you must know that these objects are subjected to so-called reconstruction during compilation. Objects are immutable inside Devito; therefore, even a straightforward symbolic transformation such as `f[x] -> f[y]` boils down to performing a reconstruction, that is, creating a whole new object. Since `f` carries around several attributes (e.g., shape, grid, dimensions), each time Devito performs a reconstruction, we only want to specify which attributes are changing -- not all of them, as it would make the code ugly and incredibly complicated. The solution to this problem is that all the base symbolic types inherit from a common base class called `Reconstructable`; a `Reconstructable` object has two special class attributes, called `__rargs__` and `__rkwargs__`. If a subclass adds a new positional or keyword argument to its `__init_finalize__`, it must also be added to `__rargs__` or `__rkwargs__`, respectively. This will provide Devito with enough information to perform a reconstruction when it's needed during compilation. The following example should clarify:
 
-```
+```python
 class Foo(Reconstructable):
     __rargs__ = ('a', 'b')
     __rkwargs__ = ('c',)
@@ -515,7 +538,7 @@ class Bar(Foo):
 
 You are unlikely to care about how reconstruction works in practice, but here are a few examples for `a = Foo(3, 5)` to give you more context.
 
-```
+```python
 a._rebuild() -> "x(3, 5, 4)" (i.e., copy of `a`).
 a._rebuild(4) -> "x(4, 5, 4)"
 a._rebuild(4, 7) -> "x(4, 7, 4)"
@@ -534,7 +557,7 @@ There is currently no API to achieve this straightforwardly. However, there are
 * via env vars: use a [CustomCompiler](https://github.com/opesci/devito/blob/v4.0/devito/compiler.py#L446) -- just leave the `DEVITO_ARCH` environment variable unset or set it to `'custom'`. Then, `export CFLAGS="..."` to tell Devito to use the exported flags in place of the default ones.
 * programmatically: subclass one of the compiler classes and set `self.cflags` to whatever you need. Do not forget to add the subclass to the [compiler registry](https://github.com/opesci/devito/blob/v4.0/devito/compiler.py#L472). For example, you could do
 
-```
+```python
 from devito import configuration, compiler_registry
 from devito.compiler import GNUCompiler
 
@@ -576,7 +599,8 @@ Until Devito v3.5 included, domain decomposition occurs along the fastest axis.
 ## How should I use MPI on multi-socket machines
 
 In general you should use one MPI rank per NUMA node on a multi-socket machine. You can find the number of numa nodes with the `lscpu` command. For example, here is the relevant part of the output from the `lscpu` command on an AMD 7502 2 socket machine with 2 NUMA nodes:
-```
+
+```default
 Architecture:          x86_64
 CPU(s):                64
 On-line CPU(s) list:   0-63
@@ -597,7 +621,7 @@ NUMA node1 CPU(s):     32-63
 There are a few things you may want to check
 
 * To refer to the actual ("global") shape of the domain, you should always use `grid.shape` (or analogously through a `Function`, `f.grid.shape`). And unless you know well what you're doing, you should never use the function shape, namely `f.shape` or `f.data.shape`, as that will return the "local" domain shape, that is the data shape after domain decomposition, which might differ across the various MPI ranks. 
-* <... to be completed ...>
+
 
 [top](#Frequently-Asked-Questions)
 
@@ -613,17 +637,23 @@ This is likely due to an out-of-bounds (OOB) array access while running the gene
 ## Can I manually modify the C code generated by Devito and test these modifications
 
 Yes, as of Devito v3.5 it is possible to modify the generated C code and run it inside Devito. First you need to get the C file generated for a given `Operator`. Run your code in `DEBUG` mode:
-```
+
+```bash
 DEVITO_LOGGING=DEBUG python your_code.py
 ```
+
 The generated code path will be shown as in the excerpt below:
-```
+
+```default
 CustomCompiler: compiled `/tmp/devito-jitcache-uid1000/ed41e9373af1bc129471b7ae45e1c3740b60a856.c` [0.29 s]
 ```
+
 You can now open the C file, do the modifications you like, and save them. Finally, rerun the same program but this time with the _Devito JIT backdoor_ enabled:
-```
+
+```bash
 DEVITO_JIT_BACKDOOR=1 python your_code.py
 ```
+
 This will force Devito to recompile and link the modified C code.
 
 If you have a large codebase with many `Operator`s, here's a [trick](https://github.com/devitocodes/devito/wiki/Efficient-use-of-DEVITO_JIT_BACKDOOR-in-large-codes-with-many-Operators) to speed up your hacking with the JIT backdoor.
@@ -675,7 +705,7 @@ About the GPts/s metric, that is number of gigapoints per seconds. The "points"
 
 An excerpt of the performance profile emitted by Devito upon running an Operator is provided below. In this case, the Operator has two sections, ``section0`` and ``section1``, and ``section1`` consists of two consecutive 6D iteration spaces whose size is given between angle brackets. 
 
-```
+```default
 Global performance: [OI=0.16, 8.00 GFlops/s, 0.04 GPts/s]
 Local performance:
   * section0<136,136,136> run in 0.10 s [OI=0.16, 0.14 GFlops/s]
@@ -695,7 +725,7 @@ The floating-point operations are counted once all of the symbolic flop-reducing
 
 To calculate the GFlops/s performance, Devito multiplies the floating-point operations calculated at compile time by the size of the iteration space, and it does that at the granularity of individual expressions. For example, consider the following snippet:
 
-```
+```default
 <section0 start>
 for x = x_m to x_M
   for y = y_m to y_M

From 2d355c253ecbec406a211e8849e1799ace0829c0 Mon Sep 17 00:00:00 2001
From: mloubout <mathias.louboutin@gmail.com>
Date: Wed, 4 Oct 2023 11:10:44 -0400
Subject: [PATCH 74/90] ci: fix doc deploy pat

---
 .github/workflows/documentation.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml
index b6503299c8..f14d96eaf6 100644
--- a/.github/workflows/documentation.yml
+++ b/.github/workflows/documentation.yml
@@ -15,6 +15,6 @@ jobs:
     - name: Repository Dispatch
       uses: peter-evans/repository-dispatch@v2
       with:
-        token: ${{ secrets.PERSONAL_TOKEN }}
+        token: ${{ secrets.DEPLOY_DOC_PAT }}
         repository: devitocodes/devitoproject.org
         event-type: deploy-docs
\ No newline at end of file

From 0feae100f977f79672910216f1e6f4802af7d0d9 Mon Sep 17 00:00:00 2001
From: Fabio Luporini <fabio@devitocodes.com>
Date: Fri, 29 Sep 2023 07:40:49 +0000
Subject: [PATCH 75/90] compiler: Fix CustomDimension.is_NonlinearDerived

---
 devito/passes/clusters/buffering.py | 4 ++--
 devito/types/dimension.py           | 4 ++++
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/devito/passes/clusters/buffering.py b/devito/passes/clusters/buffering.py
index 09eda1be89..f479c3500e 100644
--- a/devito/passes/clusters/buffering.py
+++ b/devito/passes/clusters/buffering.py
@@ -572,8 +572,8 @@ def readfrom(self):
     @cached_property
     def lastidx(self):
         """
-        A 2-tuple of indices representing, respectively, the "last" write to the
-        buffer and the "last" read from the buffered Function. For example,
+        A 2-tuple of indices representing, respectively, the *last* write to the
+        buffer and the *last* read from the buffered Function. For example,
         `(sb1, time+1)` in the case of a forward-propagating `time` Dimension.
         """
         try:
diff --git a/devito/types/dimension.py b/devito/types/dimension.py
index d7b25e382d..3166c7503b 100644
--- a/devito/types/dimension.py
+++ b/devito/types/dimension.py
@@ -1251,6 +1251,10 @@ def __init_finalize__(self, name, symbolic_min=None, symbolic_max=None,
     def is_Derived(self):
         return self._parent is not None
 
+    @property
+    def is_NonlinearDerived(self):
+        return self.is_Derived and self.parent.is_NonlinearDerived
+
     @property
     def parent(self):
         return self._parent

From c2c48d038c67f8d5e26fff7cf45deb6ee800875e Mon Sep 17 00:00:00 2001
From: Fabio Luporini <fabio@devitocodes.com>
Date: Fri, 29 Sep 2023 09:53:55 +0000
Subject: [PATCH 76/90] compiler: Prevent lifting in presence of syncops

---
 devito/passes/clusters/misc.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/devito/passes/clusters/misc.py b/devito/passes/clusters/misc.py
index 9d0e7463bd..453e8c79bc 100644
--- a/devito/passes/clusters/misc.py
+++ b/devito/passes/clusters/misc.py
@@ -32,7 +32,8 @@ def callback(self, clusters, prefix):
             # No iteration space to be lifted from
             return clusters
 
-        hope_invariant = prefix[-1].dim._defines
+        dim = prefix[-1].dim
+        hope_invariant = dim._defines
         outer = set().union(*[i.dim._defines for i in prefix[:-1]])
 
         lifted = []
@@ -43,6 +44,11 @@ def callback(self, clusters, prefix):
                 processed.append(c)
                 continue
 
+            # Synchronization operations prevent lifting
+            if c.syncs.get(dim):
+                processed.append(c)
+                continue
+
             # Is `c` a real candidate -- is there at least one invariant Dimension?
             if any(d._defines & hope_invariant for d in c.used_dimensions):
                 processed.append(c)

From ae68e5ef891df1c90d106e56b9026e59a307e1f1 Mon Sep 17 00:00:00 2001
From: Fabio Luporini <fabio@devitocodes.com>
Date: Wed, 4 Oct 2023 10:13:44 +0000
Subject: [PATCH 77/90] compiler: Patch imask construction for irregular
 AbstractFunctions

---
 devito/ir/support/syncs.py | 29 +++++++++++++++++++++++------
 1 file changed, 23 insertions(+), 6 deletions(-)

diff --git a/devito/ir/support/syncs.py b/devito/ir/support/syncs.py
index 96ab27c637..eaad2a79f8 100644
--- a/devito/ir/support/syncs.py
+++ b/devito/ir/support/syncs.py
@@ -66,9 +66,16 @@ def __repr__(self):
 
     @property
     def imask(self):
-        ret = [self.handle.indices[d] if d.root in self.lock.locked_dimensions else FULL
-               for d in self.target.dimensions]
-        return IMask(*ret, getters=self.target.dimensions, function=self.function,
+        ret = []
+        for d in self.target.dimensions:
+            if d.root in self.lock.locked_dimensions:
+                ret.append(self.handle.indices[d])
+            else:
+                ret.append(FULL)
+
+        return IMask(*ret,
+                     getters=self.target.dimensions,
+                     function=self.function,
                      findex=self.findex)
 
 
@@ -81,9 +88,19 @@ def __repr__(self):
 
     @property
     def imask(self):
-        ret = [(self.tindex, self.size) if d.root is self.dim.root else FULL
-               for d in self.target.dimensions]
-        return IMask(*ret, getters=self.target.dimensions, function=self.function,
+        ret = []
+        for d in self.target.dimensions:
+            if d.root is self.dim.root:
+                if self.target.is_regular:
+                    ret.append((self.tindex, self.size))
+                else:
+                    ret.append((self.tindex, 1))
+            else:
+                ret.append(FULL)
+
+        return IMask(*ret,
+                     getters=self.target.dimensions,
+                     function=self.function,
                      findex=self.findex)
 
 

From 77a283e34fc3f20d5aa3accc32f1244dba2a7ebe Mon Sep 17 00:00:00 2001
From: Fabio Luporini <fabio@devitocodes.com>
Date: Mon, 2 Oct 2023 12:23:06 +0000
Subject: [PATCH 78/90] compiler: Fix multi-level buffering

---
 devito/ir/support/syncs.py           |   2 +-
 devito/passes/clusters/asynchrony.py |  23 ++--
 devito/passes/clusters/buffering.py  | 163 +++++++++++++++++----------
 devito/passes/clusters/utils.py      |  14 ++-
 4 files changed, 127 insertions(+), 75 deletions(-)

diff --git a/devito/ir/support/syncs.py b/devito/ir/support/syncs.py
index eaad2a79f8..b39353ad12 100644
--- a/devito/ir/support/syncs.py
+++ b/devito/ir/support/syncs.py
@@ -94,7 +94,7 @@ def imask(self):
                 if self.target.is_regular:
                     ret.append((self.tindex, self.size))
                 else:
-                    ret.append((self.tindex, 1))
+                    ret.append((0, 1))
             else:
                 ret.append(FULL)
 
diff --git a/devito/passes/clusters/asynchrony.py b/devito/passes/clusters/asynchrony.py
index cb874d5295..f839363f4d 100644
--- a/devito/passes/clusters/asynchrony.py
+++ b/devito/passes/clusters/asynchrony.py
@@ -4,7 +4,8 @@
 
 from devito.ir import (Forward, GuardBoundNext, Queue, Vector, WaitLock, WithLock,
                        FetchUpdate, PrefetchUpdate, ReleaseLock, normalize_syncs)
-from devito.symbolics import uxreplace
+from devito.passes.clusters.utils import is_memcpy
+from devito.symbolics import IntDiv, uxreplace
 from devito.tools import OrderedSet, is_integer, timed_pass
 from devito.types import CustomDimension, Lock
 
@@ -125,7 +126,7 @@ def callback(self, clusters, prefix):
                     assert lock.size == 1
                     indices = [0]
 
-                if is_memcpy(c0):
+                if wraps_memcpy(c0):
                     e = c0.exprs[0]
                     function = e.lhs.function
                     findex = e.lhs.indices[d]
@@ -177,7 +178,7 @@ def callback(self, clusters, prefix):
             for c in clusters:
                 dims = self.key(c)
                 if d._defines & dims:
-                    if is_memcpy(c):
+                    if wraps_memcpy(c):
                         # Case 1A (special case, leading to more efficient streaming)
                         self._actions_from_init(c, prefix, actions)
                     else:
@@ -186,7 +187,7 @@ def callback(self, clusters, prefix):
 
         # Case 2
         else:
-            mapper = OrderedDict([(c, is_memcpy(c)) for c in clusters
+            mapper = OrderedDict([(c, wraps_memcpy(c)) for c in clusters
                                   if d in self.key(c)])
 
             # Case 2A (special case, leading to more efficient streaming)
@@ -257,7 +258,7 @@ def _actions_from_update_memcpy(self, cluster, clusters, prefix, actions):
 
         # If fetching into e.g. `ub[sb1]` we'll need to prefetch into e.g. `ub[sb0]`
         tindex0 = e.lhs.indices[d]
-        if is_integer(tindex0):
+        if is_integer(tindex0) or isinstance(tindex0, IntDiv):
             tindex = tindex0
         else:
             assert tindex0.is_Modulo
@@ -321,16 +322,8 @@ def __init__(self, drop=False, syncs=None, insert=None):
         self.insert = insert or []
 
 
-def is_memcpy(cluster):
-    """
-    True if `cluster` emulates a memcpy involving an Array, False otherwise.
-    """
+def wraps_memcpy(cluster):
     if len(cluster.exprs) != 1:
         return False
 
-    a, b = cluster.exprs[0].args
-
-    if not (a.is_Indexed and b.is_Indexed):
-        return False
-
-    return a.function.is_Array or b.function.is_Array
+    return is_memcpy(cluster.exprs[0])
diff --git a/devito/passes/clusters/buffering.py b/devito/passes/clusters/buffering.py
index f479c3500e..35b019b4e1 100644
--- a/devito/passes/clusters/buffering.py
+++ b/devito/passes/clusters/buffering.py
@@ -9,6 +9,7 @@
                        lower_exprs, vmax, vmin)
 from devito.exceptions import InvalidOperator
 from devito.logger import warning
+from devito.passes.clusters.utils import is_memcpy
 from devito.symbolics import IntDiv, retrieve_function_carriers, uxreplace
 from devito.tools import (Bunch, DefaultOrderedDict, Stamp, as_tuple,
                           filter_ordered, flatten, is_integer, timed_pass)
@@ -175,6 +176,12 @@ def callback(self, clusters, prefix, cache=None):
                 if b.size == 1 and not init_onread(b.function):
                     continue
 
+                # Special case: avoid initialization in the case of double
+                # (or multiple levels of) buffering because it will have been
+                # already performed
+                if b.size > 1 and b.multi_buffering:
+                    continue
+
                 dims = b.function.dimensions
                 lhs = b.indexed[dims]._subs(dim, b.firstidx.b)
                 rhs = b.function[dims]._subs(dim, b.firstidx.f)
@@ -347,64 +354,14 @@ def __init__(self, function, dim, d, accessv, cache, options, sregistry):
         self.sub_iterators = defaultdict(list)
         self.subdims_mapper = DefaultOrderedDict(set)
 
-        # Create the necessary ModuloDimensions for indexing into the buffer
-        # E.g., `u[time,x] + u[time+1,x] -> `ub[sb0,x] + ub[sb1,x]`, where `sb0`
-        # and `sb1` are ModuloDimensions starting at `time` and `time+1` respectively
-        dims = list(function.dimensions)
-        assert dim in function.dimensions
-
-        # Determine the buffer size, and therefore the span of the ModuloDimension,
-        # along the contracting Dimension `d`
-        indices = filter_ordered(i.indices[dim] for i in accessv.accesses)
-        slots = [i.subs({dim: 0, dim.spacing: 1}) for i in indices]
-        try:
-            size = max(slots) - min(slots) + 1
-        except TypeError:
-            # E.g., special case `slots=[-1 + time/factor, 2 + time/factor]`
-            # Resort to the fast vector-based comparison machinery (rather than
-            # the slower sympy.simplify)
-            slots = [Vector(i) for i in slots]
-            size = int((vmax(*slots) - vmin(*slots) + 1)[0])
-
-        if async_degree is not None:
-            if async_degree < size:
-                warning("Ignoring provided asynchronous degree as it'd be "
-                        "too small for the required buffer (provided %d, "
-                        "but need at least %d for `%s`)"
-                        % (async_degree, size, function.name))
-            else:
-                size = async_degree
-
-        # Create `xd` -- a contraction Dimension for `dim`
-        try:
-            xd = sregistry.get('xds', (dim, size))
-        except KeyError:
-            name = sregistry.make_name(prefix='db')
-            v = CustomDimension(name, 0, size-1, size, dim)
-            xd = sregistry.setdefault('xds', (dim, size), v)
-        self.xd = dims[dims.index(dim)] = xd
-
-        # Finally create the ModuloDimensions as children of `xd`
-        if size > 1:
-            # Note: indices are sorted so that the semantic order (sb0, sb1, sb2)
-            # follows SymPy's index ordering (time, time-1, time+1) after modulo
-            # replacement, so that associativity errors are consistent. This very
-            # same strategy is also applied in clusters/algorithms/Stepper
-            p, _ = offset_from_centre(d, indices)
-            indices = sorted(indices,
-                             key=lambda i: -np.inf if i - p == 0 else (i - p))
-            for i in indices:
-                try:
-                    md = sregistry.get('mds', (xd, i))
-                except KeyError:
-                    name = sregistry.make_name(prefix='sb')
-                    v = ModuloDimension(name, xd, i, size)
-                    md = sregistry.setdefault('mds', (xd, i), v)
-                self.index_mapper[i] = md
-                self.sub_iterators[d.root].append(md)
+        # Initialize the buffer metadata. Depending on whether it's multi-level
+        # buffering (e.g., double buffering) or first-level, we need to perform
+        # different actions. Multi-level is trivial, because it essentially
+        # inherits metadata from the previous buffering level
+        if self.multi_buffering:
+            self.__init_multi_buffering__()
         else:
-            assert len(indices) == 1
-            self.index_mapper[indices[0]] = 0
+            self.__init_firstlevel_buffering__(async_degree, sregistry)
 
         # Track the SubDimensions used to index into `function`
         for e in accessv.mapper:
@@ -444,6 +401,11 @@ def __init__(self, function, dim, d, accessv, cache, options, sregistry):
             for i in d0._defines:
                 self.itintervals_mapper.setdefault(i, (interval.relaxed, (), Forward))
 
+        # The buffer dimensions
+        dims = list(function.dimensions)
+        assert dim in function.dimensions
+        dims[dims.index(dim)] = self.xd
+
         # Finally create the actual buffer
         if function in cache:
             self.buffer = cache[function]
@@ -462,6 +424,82 @@ def __init__(self, function, dim, d, accessv, cache, options, sregistry):
             except TypeError:
                 self.buffer = cache[function] = Array(**kwargs)
 
+    def __init_multi_buffering__(self):
+        #TODO
+        if self.is_read:
+            self.xd = xd = self.accessv.firstread.lhs.function.indices[self.dim]
+
+            index0 = self.accessv.firstread.rhs.indices[self.dim]
+            index1 = self.accessv.firstread.lhs.indices[self.dim]
+            if is_integer(index1) or isinstance(index1, ModuloDimension):
+                #TODO;Optimization
+                self.index_mapper[index0] = 0
+            else:
+                self.index_mapper[index0] = index0
+        else:
+            self.xd = xd = self.accessv.lastwrite.lhs.function.indices[self.dim]
+
+            index0 = self.accessv.lastwrite.lhs.indices[self.dim]
+            index1 = self.accessv.lastwrite.rhs.indices[self.dim]
+            self.index_mapper[index0] = index1
+
+    def __init_firstlevel_buffering__(self, async_degree, sregistry):
+        d = self.d
+        dim = self.dim
+        function = self.function
+
+        indices = filter_ordered(i.indices[dim] for i in self.accessv.accesses)
+        slots = [i.subs({dim: 0, dim.spacing: 1}) for i in indices]
+
+        try:
+            size = max(slots) - min(slots) + 1
+        except TypeError:
+            # E.g., special case `slots=[-1 + time/factor, 2 + time/factor]`
+            # Resort to the fast vector-based comparison machinery (rather than
+            # the slower sympy.simplify)
+            slots = [Vector(i) for i in slots]
+            size = int((vmax(*slots) - vmin(*slots) + 1)[0])
+
+        if async_degree is not None:
+            if async_degree < size:
+                warning("Ignoring provided asynchronous degree as it'd be "
+                        "too small for the required buffer (provided %d, "
+                        "but need at least %d for `%s`)"
+                        % (async_degree, size, function.name))
+            else:
+                size = async_degree
+
+        # Create `xd` -- a contraction Dimension for `dim`
+        try:
+            xd = sregistry.get('xds', (dim, size))
+        except KeyError:
+            name = sregistry.make_name(prefix='db')
+            v = CustomDimension(name, 0, size-1, size, dim)
+            xd = sregistry.setdefault('xds', (dim, size), v)
+        self.xd = xd
+
+        # Create the ModuloDimensions to step through the buffer
+        if size > 1:
+            # Note: indices are sorted so that the semantic order (sb0, sb1, sb2)
+            # follows SymPy's index ordering (time, time-1, time+1) after modulo
+            # replacement, so that associativity errors are consistent. This very
+            # same strategy is also applied in clusters/algorithms/Stepper
+            p, _ = offset_from_centre(d, indices)
+            indices = sorted(indices,
+                             key=lambda i: -np.inf if i - p == 0 else (i - p))
+            for i in indices:
+                try:
+                    md = sregistry.get('mds', (xd, i))
+                except KeyError:
+                    name = sregistry.make_name(prefix='sb')
+                    v = ModuloDimension(name, xd, i, size)
+                    md = sregistry.setdefault('mds', (xd, i), v)
+                self.index_mapper[i] = md
+                self.sub_iterators[d.root].append(md)
+        else:
+            assert len(indices) == 1
+            self.index_mapper[indices[0]] = 0
+
     def __repr__(self):
         return "Buffer[%s,<%s>]" % (self.buffer.name, self.xd)
 
@@ -497,6 +535,13 @@ def is_writeonly(self):
     def has_uniform_subdims(self):
         return self.subdims_mapper is not None
 
+    @property
+    def multi_buffering(self):
+        """
+        True if double-buffering or more, False otherwise.
+        """
+        return all(is_memcpy(e) for e in self.accessv.exprs)
+
     @cached_property
     def indexed(self):
         return self.buffer.indexed
@@ -517,7 +562,7 @@ def writeto(self):
                 # in principle this could be accessed through a stencil
                 interval = Interval(i.dim, -h.left, h.right, i.stamp)
             except KeyError:
-                assert d is self.xd
+                assert d in self.xd._defines
                 interval, si, direction = Interval(d), (), Forward
             intervals.append(interval)
             sub_iterators[d] = si
@@ -550,6 +595,8 @@ def written(self):
             sub_iterators[d] = si + as_tuple(self.sub_iterators[d])
             directions[d] = direction
 
+            directions[d.root] = direction
+
         relations = (tuple(i.dim for i in intervals),)
         intervals = IntervalGroup(intervals, relations=relations)
 
diff --git a/devito/passes/clusters/utils.py b/devito/passes/clusters/utils.py
index 853dba808a..bb27859471 100644
--- a/devito/passes/clusters/utils.py
+++ b/devito/passes/clusters/utils.py
@@ -1,7 +1,7 @@
 from devito.symbolics import uxreplace
 from devito.types import Symbol, Wildcard
 
-__all__ = ['makeit_ssa']
+__all__ = ['makeit_ssa', 'is_memcpy']
 
 
 def makeit_ssa(exprs):
@@ -36,3 +36,15 @@ def makeit_ssa(exprs):
         else:
             processed.append(e.func(e.lhs, rhs))
     return processed
+
+
+def is_memcpy(expr):
+    """
+    True if `expr` implements a memcpy involving an Array, False otherwise.
+    """
+    a, b = expr.args
+
+    if not (a.is_Indexed and b.is_Indexed):
+        return False
+
+    return a.function.is_Array or b.function.is_Array

From 59b57c852c8654d3738dae3532c243891dc84704 Mon Sep 17 00:00:00 2001
From: Fabio Luporini <fabio@devitocodes.com>
Date: Fri, 6 Oct 2023 07:32:11 +0000
Subject: [PATCH 79/90] tests: Fix to work on any CPU

---
 tests/test_dle.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_dle.py b/tests/test_dle.py
index 9dcd6d3d6e..ec24128983 100644
--- a/tests/test_dle.py
+++ b/tests/test_dle.py
@@ -995,7 +995,7 @@ def test_parallel_prec_inject(self):
         eqns = sf.inject(field=u.forward, expr=sf * dt**2)
 
         op0 = Operator(eqns, opt=('advanced', {'openmp': True,
-                                               'par-collapse-ncores': 20}))
+                                               'par-collapse-ncores': 2000}))
         iterations = FindNodes(Iteration).visit(op0)
 
         assert not iterations[0].pragmas

From 82e47f99ca1ebcc675e976cf950098920d987d29 Mon Sep 17 00:00:00 2001
From: Fabio Luporini <fabio@devitocodes.com>
Date: Fri, 6 Oct 2023 08:59:00 +0000
Subject: [PATCH 80/90] compiler: Refactor __init_multi_buffering__

---
 devito/passes/clusters/buffering.py | 31 ++++++++++++++++-------------
 1 file changed, 17 insertions(+), 14 deletions(-)

diff --git a/devito/passes/clusters/buffering.py b/devito/passes/clusters/buffering.py
index 35b019b4e1..72b45f8e3b 100644
--- a/devito/passes/clusters/buffering.py
+++ b/devito/passes/clusters/buffering.py
@@ -425,23 +425,26 @@ def __init__(self, function, dim, d, accessv, cache, options, sregistry):
                 self.buffer = cache[function] = Array(**kwargs)
 
     def __init_multi_buffering__(self):
-        #TODO
-        if self.is_read:
-            self.xd = xd = self.accessv.firstread.lhs.function.indices[self.dim]
+        try:
+            expr, = self.accessv.exprs
+        except ValueError:
+            assert False
+
+        lhs, rhs = expr.args
+
+        self.xd = lhs.function.indices[self.dim]
 
-            index0 = self.accessv.firstread.rhs.indices[self.dim]
-            index1 = self.accessv.firstread.lhs.indices[self.dim]
-            if is_integer(index1) or isinstance(index1, ModuloDimension):
-                #TODO;Optimization
-                self.index_mapper[index0] = 0
+        idx0 = lhs.indices[self.dim]
+        idx1 = rhs.indices[self.dim]
+
+        if self.is_read:
+            if is_integer(idx0) or isinstance(idx0, ModuloDimension):
+                # This is just for aesthetics of the generated code
+                self.index_mapper[idx1] = 0
             else:
-                self.index_mapper[index0] = index0
+                self.index_mapper[idx1] = idx1
         else:
-            self.xd = xd = self.accessv.lastwrite.lhs.function.indices[self.dim]
-
-            index0 = self.accessv.lastwrite.lhs.indices[self.dim]
-            index1 = self.accessv.lastwrite.rhs.indices[self.dim]
-            self.index_mapper[index0] = index1
+            self.index_mapper[idx0] = idx1
 
     def __init_firstlevel_buffering__(self, async_degree, sregistry):
         d = self.d

From 9e314257527d2e307544324084e81cdcbf8824e8 Mon Sep 17 00:00:00 2001
From: mloubout <mathias.louboutin@gmail.com>
Date: Fri, 6 Oct 2023 14:12:49 -0400
Subject: [PATCH 81/90] ci: add intel missing gpu drivers

---
 docker/Dockerfile.cpu | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/docker/Dockerfile.cpu b/docker/Dockerfile.cpu
index 44619f7a17..2ddf532bd0 100644
--- a/docker/Dockerfile.cpu
+++ b/docker/Dockerfile.cpu
@@ -65,8 +65,17 @@ FROM base as oneapi
 RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor > /usr/share/keyrings/oneapi-archive-keyring.gpg
 RUN echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" > /etc/apt/sources.list.d/oneAPI.list
 
-# Intel advisor
-RUN apt-get update -y && apt-get install -y intel-oneapi-advisor
+# Drivers mandatory for intel gpus
+RUN curl -fsSL https://repositories.intel.com/graphics/intel-graphics.key | apt-key add -
+RUN echo "deb [trusted=yes arch=amd64] https://repositories.intel.com/graphics/ubuntu focal main" > /etc/apt/sources.list.d/intel-graphics.list
+
+# Intel advisor and drivers
+RUN apt-get update -y && \
+    # advisor
+    apt-get install -y intel-oneapi-advisor \
+    # drivers
+    apt-get install -y intel-opencl-icd intel-level-zero-gpu level-zero level-zero-dev
+
 ##############################################################
 # ICC image
 # This is a legacy setup that is not built anymore but kept for reference

From 24a1f2301d592dbc3f99c557dab58f083fd0b61a Mon Sep 17 00:00:00 2001
From: mloubout <mathias.louboutin@gmail.com>
Date: Sat, 7 Oct 2023 07:37:12 -0400
Subject: [PATCH 82/90] deps: update mpi4py req version

---
 requirements-mpi.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements-mpi.txt b/requirements-mpi.txt
index 7fba39ca9d..32f60f7177 100644
--- a/requirements-mpi.txt
+++ b/requirements-mpi.txt
@@ -1,2 +1,2 @@
-mpi4py<4.0
+mpi4py<3.1.6
 ipyparallel<8.7
\ No newline at end of file

From 9b6e35fd4ca3c47906265a261a7aed19386e8dae Mon Sep 17 00:00:00 2001
From: mloubout <mathias.louboutin@gmail.com>
Date: Sat, 7 Oct 2023 07:37:12 -0400
Subject: [PATCH 83/90] deps: update mpi4py req version

---
 docker/Dockerfile.cpu | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/Dockerfile.cpu b/docker/Dockerfile.cpu
index 2ddf532bd0..0230da05ce 100644
--- a/docker/Dockerfile.cpu
+++ b/docker/Dockerfile.cpu
@@ -72,7 +72,7 @@ RUN echo "deb [trusted=yes arch=amd64] https://repositories.intel.com/graphics/u
 # Intel advisor and drivers
 RUN apt-get update -y && \
     # advisor
-    apt-get install -y intel-oneapi-advisor \
+    apt-get install -y intel-oneapi-advisor && \
     # drivers
     apt-get install -y intel-opencl-icd intel-level-zero-gpu level-zero level-zero-dev
 

From 4e220c33eb954ca2e0fe185e2c7e3cf1009cc7fd Mon Sep 17 00:00:00 2001
From: mloubout <mathias.louboutin@gmail.com>
Date: Tue, 10 Oct 2023 09:57:53 -0400
Subject: [PATCH 84/90] CI: add new trigger

---
 .../workflows/{documentation.yml => triggers.yml}   | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)
 rename .github/workflows/{documentation.yml => triggers.yml} (51%)

diff --git a/.github/workflows/documentation.yml b/.github/workflows/triggers.yml
similarity index 51%
rename from .github/workflows/documentation.yml
rename to .github/workflows/triggers.yml
index f14d96eaf6..965338d99a 100644
--- a/.github/workflows/documentation.yml
+++ b/.github/workflows/triggers.yml
@@ -1,4 +1,4 @@
-name: Documentation
+name: Master triggers
 
 on:
   push:
@@ -12,9 +12,16 @@ jobs:
     steps:
     - uses: actions/checkout@v3
 
-    - name: Repository Dispatch
+    - name: Trigger doc build
       uses: peter-evans/repository-dispatch@v2
       with:
         token: ${{ secrets.DEPLOY_DOC_PAT }}
         repository: devitocodes/devitoproject.org
-        event-type: deploy-docs
\ No newline at end of file
+        event-type: deploy-docs
+
+    - name: Trigger pro submodule update
+      uses: peter-evans/repository-dispatch@v2
+      with:
+        token: ${{ secrets.PRO_SUBMODULE }}
+        repository: devitocodespro/devitopro
+        event-type: update-submodule
\ No newline at end of file

From a9132a659ed5dab2312867dabe6812cc9655b8da Mon Sep 17 00:00:00 2001
From: Mathias Louboutin <mathias.louboutin@gmail.com>
Date: Fri, 6 Oct 2023 10:25:48 -0400
Subject: [PATCH 85/90] compiler: prevent reduction clause for perfect-enough
 outer loops

---
 devito/passes/iet/parpragma.py | 10 ++++++++++
 tests/test_dle.py              | 23 ++++++++++++++++++++++-
 tests/test_gpu_openacc.py      | 24 +++++++++++++++++++++++-
 tests/test_gpu_openmp.py       | 22 ++++++++++++++++++++++
 4 files changed, 77 insertions(+), 2 deletions(-)

diff --git a/devito/passes/iet/parpragma.py b/devito/passes/iet/parpragma.py
index b6476192b2..5d75e002db 100644
--- a/devito/passes/iet/parpragma.py
+++ b/devito/passes/iet/parpragma.py
@@ -318,6 +318,16 @@ def _make_reductions(self, partree):
         if not any(i.is_ParallelAtomic for i in partree.collapsed):
             return partree
 
+        # We bypass the corner case where a reduction might not be optimal, mainly:
+        # - Only the most inner loop is atomic
+        # In which case we can parallelize the perfect nest
+        # The opposite corner case (most outer loop atomic)
+        # should be detected before this pass
+        nc = len(partree.collapsed)
+        if all(i.is_ParallelNoAtomic for i in partree.collapsed[:nc-1]):
+            mapper = {partree.root: partree.root._rebuild(ncollapsed=nc-1)}
+            return Transformer(mapper).visit(partree)
+
         exprs = [i for i in FindNodes(Expression).visit(partree) if i.is_reduction]
 
         reductions = []
diff --git a/tests/test_dle.py b/tests/test_dle.py
index ec24128983..ecd5644e41 100644
--- a/tests/test_dle.py
+++ b/tests/test_dle.py
@@ -863,7 +863,6 @@ def test_incs_no_atomic(self):
         op0 = Operator(Inc(uf, 1), opt=('advanced', {'openmp': True,
                                                      'par-collapse-ncores': 1,
                                                      'par-collapse-work': 0}))
-
         assert 'collapse(3)' in str(op0)
         assert 'atomic' in str(op0)
 
@@ -875,6 +874,28 @@ def test_incs_no_atomic(self):
         assert 'collapse' not in str(op1)
         assert 'atomic' not in str(op1)
 
+    def test_incr_perfect_outer(self):
+        grid = Grid((5, 5))
+        d = Dimension(name="d")
+        u = Function(name="u", dimensions=(*grid.dimensions, d),
+                     grid=grid, shape=(*grid.shape, 5), )
+        v = Function(name="v", dimensions=(*grid.dimensions, d),
+                     grid=grid, shape=(*grid.shape, 5))
+        u.data.fill(1)
+        v.data.fill(2)
+
+        w = Function(name="w", grid=grid)
+
+        summation = Inc(w, u*v)
+
+        op0 = Operator([summation])
+        assert 'reduction' not in str(op0)
+        assert 'collapse(2)' in str(op0)
+        assert 'omp for' in str(op0)
+
+        op0()
+        assert np.all(w.data == 10)
+
     @pytest.mark.parametrize('exprs,simd_level,expected', [
         (['Eq(y.symbolic_max, g[0, x], implicit_dims=(t, x))',
          'Inc(h1[0, 0], 1, implicit_dims=(t, x, y))'],
diff --git a/tests/test_gpu_openacc.py b/tests/test_gpu_openacc.py
index 3085ad85c9..526ddca59d 100644
--- a/tests/test_gpu_openacc.py
+++ b/tests/test_gpu_openacc.py
@@ -2,7 +2,7 @@
 import numpy as np
 
 from devito import (Grid, Function, TimeFunction, SparseTimeFunction, Eq, Operator,
-                    norm, solve)
+                    norm, solve, Dimension, Inc)
 from conftest import skipif, assert_blocking, opts_device_tiling
 from devito.data import LEFT
 from devito.exceptions import InvalidOperator
@@ -168,6 +168,28 @@ def test_multi_tile_blocking_structure(self):
             assert len(iters) == len(v)
             assert all(i.step == j for i, j in zip(iters, v))
 
+    def test_incr_perfect_outer(self):
+        grid = Grid((5, 5))
+        d = Dimension(name="d")
+        u = Function(name="u", dimensions=(*grid.dimensions, d),
+                     grid=grid, shape=(*grid.shape, 5), )
+        v = Function(name="v", dimensions=(*grid.dimensions, d),
+                     grid=grid, shape=(*grid.shape, 5))
+        u.data.fill(1)
+        v.data.fill(2)
+
+        w = Function(name="w", grid=grid)
+
+        summation = Inc(w, u*v)
+
+        op0 = Operator([summation])
+        assert 'reduction' not in str(op0)
+        assert 'collapse(2)' in str(op0)
+        assert 'acc parallel loop' in str(op0)
+
+        op0()
+        assert np.all(w.data == 10)
+
 
 class TestOperator(object):
 
diff --git a/tests/test_gpu_openmp.py b/tests/test_gpu_openmp.py
index 29866508d8..38157f0962 100644
--- a/tests/test_gpu_openmp.py
+++ b/tests/test_gpu_openmp.py
@@ -268,6 +268,28 @@ def test_timeparallel_reduction(self):
             ('omp target teams distribute parallel for collapse(3)'
              ' reduction(+:f[0])')
 
+    def test_incr_perfect_outer(self):
+        grid = Grid((5, 5))
+        d = Dimension(name="d")
+        u = Function(name="u", dimensions=(*grid.dimensions, d),
+                     grid=grid, shape=(*grid.shape, 5), )
+        v = Function(name="v", dimensions=(*grid.dimensions, d),
+                     grid=grid, shape=(*grid.shape, 5))
+        u.data.fill(1)
+        v.data.fill(2)
+
+        w = Function(name="w", grid=grid)
+
+        summation = Inc(w, u*v)
+
+        op0 = Operator([summation])
+        assert 'reduction' not in str(op0)
+        assert 'collapse(2)' in str(op0)
+        assert 'omp target teams distribute parallel' in str(op0)
+
+        op0()
+        assert np.all(w.data == 10)
+
 
 class TestOperator(object):
 

From 5f18b8d7275dc97b90fbd19583baf05dc9ba9e2d Mon Sep 17 00:00:00 2001
From: Mathias Louboutin <mathias.louboutin@gmail.com>
Date: Fri, 6 Oct 2023 11:16:02 -0400
Subject: [PATCH 86/90] ci: switch innerproduct tests to inner

---
 devito/passes/iet/parpragma.py | 2 +-
 tests/test_adjoint.py          | 8 ++++----
 tests/test_dle.py              | 3 +--
 3 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/devito/passes/iet/parpragma.py b/devito/passes/iet/parpragma.py
index 5d75e002db..a4513ef31f 100644
--- a/devito/passes/iet/parpragma.py
+++ b/devito/passes/iet/parpragma.py
@@ -324,7 +324,7 @@ def _make_reductions(self, partree):
         # The opposite corner case (most outer loop atomic)
         # should be detected before this pass
         nc = len(partree.collapsed)
-        if all(i.is_ParallelNoAtomic for i in partree.collapsed[:nc-1]):
+        if nc > 1 and all(i.is_ParallelNoAtomic for i in partree.collapsed[:nc-1]):
             mapper = {partree.root: partree.root._rebuild(ncollapsed=nc-1)}
             return Transformer(mapper).visit(partree)
 
diff --git a/tests/test_adjoint.py b/tests/test_adjoint.py
index 473e484c0e..759b86a3ae 100644
--- a/tests/test_adjoint.py
+++ b/tests/test_adjoint.py
@@ -1,7 +1,7 @@
 import numpy as np
 import pytest
 
-from devito import Operator, norm, Function, Grid, SparseFunction
+from devito import Operator, norm, Function, Grid, SparseFunction, inner
 from devito.logger import info
 from examples.seismic import demo_model, Receiver
 from examples.seismic.acoustic import acoustic_setup
@@ -114,7 +114,7 @@ def test_adjoint_F(self, mkey, shape, kernel, space_order, time_order, setup_fun
         solver.adjoint(rec=rec, srca=srca)
 
         # Adjoint test: Verify <Ax,y> matches  <x, A^Ty> closely
-        term1 = np.dot(srca.data.reshape(-1), solver.geometry.src.data)
+        term1 = inner(srca, solver.geometry.src)
         term2 = norm(rec) ** 2
         info('<x, A^Ty>: %f, <Ax,y>: %f, difference: %4.4e, ratio: %f'
              % (term1, term2, (term1 - term2)/term1, term1 / term2))
@@ -231,6 +231,6 @@ def test_adjoint_inject_interpolate(self, shape, coords, npoints=19):
         # y => p
         # x => c
         # P^T y => a
-        term1 = np.dot(p2.data.reshape(-1), p.data.reshape(-1))
-        term2 = np.dot(c.data.reshape(-1), a.data.reshape(-1))
+        term1 = inner(p2, p)
+        term2 = inner(c, a)
         assert np.isclose((term1-term2) / term1, 0., atol=1.e-6)
diff --git a/tests/test_dle.py b/tests/test_dle.py
index ecd5644e41..e7935dd45d 100644
--- a/tests/test_dle.py
+++ b/tests/test_dle.py
@@ -888,9 +888,8 @@ def test_incr_perfect_outer(self):
 
         summation = Inc(w, u*v)
 
-        op0 = Operator([summation])
+        op0 = Operator([summation], opt=('advanced', {'openmp': True}))
         assert 'reduction' not in str(op0)
-        assert 'collapse(2)' in str(op0)
         assert 'omp for' in str(op0)
 
         op0()

From c807e2b4dfcf1960183d4b8016f885098ab164f4 Mon Sep 17 00:00:00 2001
From: Fabio Luporini <fabio@devitocodes.com>
Date: Tue, 10 Oct 2023 07:33:58 +0000
Subject: [PATCH 87/90] tests: Factorize test_incr_perfect_outer

---
 tests/test_gpu_common.py  | 30 +++++++++++++++++++++++++++---
 tests/test_gpu_openacc.py | 24 +-----------------------
 tests/test_gpu_openmp.py  | 22 ----------------------
 3 files changed, 28 insertions(+), 48 deletions(-)

diff --git a/tests/test_gpu_common.py b/tests/test_gpu_common.py
index 031bd9181b..071005464d 100644
--- a/tests/test_gpu_common.py
+++ b/tests/test_gpu_common.py
@@ -6,9 +6,9 @@
 
 from conftest import assert_structure
 from devito import (Constant, Eq, Inc, Grid, Function, ConditionalDimension,
-                    MatrixSparseTimeFunction, SparseTimeFunction, SubDimension,
-                    SubDomain, SubDomainSet, TimeFunction, Operator, configuration,
-                    switchconfig)
+                    Dimension, MatrixSparseTimeFunction, SparseTimeFunction,
+                    SubDimension, SubDomain, SubDomainSet, TimeFunction,
+                    Operator, configuration, switchconfig)
 from devito.arch import get_gpu_info
 from devito.exceptions import InvalidArgument
 from devito.ir import (Conditional, Expression, Section, FindNodes, FindSymbols,
@@ -110,6 +110,30 @@ def test_fission(self):
         assert np.all(usave.data[5:] == expected[5:])
         assert np.all(vsave.data[:5] == expected[:5])
 
+    def test_incr_perfect_outer(self):
+        grid = Grid((5, 5))
+        d = Dimension(name="d")
+
+        u = Function(name="u", dimensions=(*grid.dimensions, d),
+                     grid=grid, shape=(*grid.shape, 5), )
+        v = Function(name="v", dimensions=(*grid.dimensions, d),
+                     grid=grid, shape=(*grid.shape, 5))
+        w = Function(name="w", grid=grid)
+
+        u.data.fill(1)
+        v.data.fill(2)
+
+        summation = Inc(w, u*v)
+
+        op = Operator([summation])
+
+        assert 'reduction' not in str(op)
+        assert 'collapse(2)' in str(op)
+        assert 'parallel' in str(op)
+
+        op()
+        assert np.all(w.data == 10)
+
 
 class Bundle(SubDomain):
     """
diff --git a/tests/test_gpu_openacc.py b/tests/test_gpu_openacc.py
index 526ddca59d..3085ad85c9 100644
--- a/tests/test_gpu_openacc.py
+++ b/tests/test_gpu_openacc.py
@@ -2,7 +2,7 @@
 import numpy as np
 
 from devito import (Grid, Function, TimeFunction, SparseTimeFunction, Eq, Operator,
-                    norm, solve, Dimension, Inc)
+                    norm, solve)
 from conftest import skipif, assert_blocking, opts_device_tiling
 from devito.data import LEFT
 from devito.exceptions import InvalidOperator
@@ -168,28 +168,6 @@ def test_multi_tile_blocking_structure(self):
             assert len(iters) == len(v)
             assert all(i.step == j for i, j in zip(iters, v))
 
-    def test_incr_perfect_outer(self):
-        grid = Grid((5, 5))
-        d = Dimension(name="d")
-        u = Function(name="u", dimensions=(*grid.dimensions, d),
-                     grid=grid, shape=(*grid.shape, 5), )
-        v = Function(name="v", dimensions=(*grid.dimensions, d),
-                     grid=grid, shape=(*grid.shape, 5))
-        u.data.fill(1)
-        v.data.fill(2)
-
-        w = Function(name="w", grid=grid)
-
-        summation = Inc(w, u*v)
-
-        op0 = Operator([summation])
-        assert 'reduction' not in str(op0)
-        assert 'collapse(2)' in str(op0)
-        assert 'acc parallel loop' in str(op0)
-
-        op0()
-        assert np.all(w.data == 10)
-
 
 class TestOperator(object):
 
diff --git a/tests/test_gpu_openmp.py b/tests/test_gpu_openmp.py
index 38157f0962..29866508d8 100644
--- a/tests/test_gpu_openmp.py
+++ b/tests/test_gpu_openmp.py
@@ -268,28 +268,6 @@ def test_timeparallel_reduction(self):
             ('omp target teams distribute parallel for collapse(3)'
              ' reduction(+:f[0])')
 
-    def test_incr_perfect_outer(self):
-        grid = Grid((5, 5))
-        d = Dimension(name="d")
-        u = Function(name="u", dimensions=(*grid.dimensions, d),
-                     grid=grid, shape=(*grid.shape, 5), )
-        v = Function(name="v", dimensions=(*grid.dimensions, d),
-                     grid=grid, shape=(*grid.shape, 5))
-        u.data.fill(1)
-        v.data.fill(2)
-
-        w = Function(name="w", grid=grid)
-
-        summation = Inc(w, u*v)
-
-        op0 = Operator([summation])
-        assert 'reduction' not in str(op0)
-        assert 'collapse(2)' in str(op0)
-        assert 'omp target teams distribute parallel' in str(op0)
-
-        op0()
-        assert np.all(w.data == 10)
-
 
 class TestOperator(object):
 

From a2faf422ffa1956ec56dd60b934b665edc2e67d4 Mon Sep 17 00:00:00 2001
From: Fabio Luporini <fabio@devitocodes.com>
Date: Tue, 10 Oct 2023 08:07:33 +0000
Subject: [PATCH 88/90] compiler: Revamp Parizer scoring function

---
 devito/passes/iet/langbase.py  |  3 --
 devito/passes/iet/parpragma.py | 74 ++++++++++++++++++++++------------
 tests/test_dle.py              | 18 +++++----
 3 files changed, 59 insertions(+), 36 deletions(-)

diff --git a/devito/passes/iet/langbase.py b/devito/passes/iet/langbase.py
index 2acccba648..91e68fc02b 100644
--- a/devito/passes/iet/langbase.py
+++ b/devito/passes/iet/langbase.py
@@ -214,9 +214,6 @@ def DeviceIteration(self):
     def Prodder(self):
         return self.lang.Prodder
 
-    def _device_pointers(self, *args, **kwargs):
-        return {}
-
 
 class DeviceAwareMixin(object):
 
diff --git a/devito/passes/iet/parpragma.py b/devito/passes/iet/parpragma.py
index a4513ef31f..1deb4f3f8b 100644
--- a/devito/passes/iet/parpragma.py
+++ b/devito/passes/iet/parpragma.py
@@ -1,3 +1,5 @@
+from itertools import takewhile
+
 import numpy as np
 import cgen as c
 from cached_property import cached_property
@@ -254,6 +256,36 @@ def nthreads_nonaffine(self):
     def threadid(self):
         return self.sregistry.threadid
 
+    def _score_candidate(self, n0, root, collapsable=()):
+        """
+        The score of a collapsable nest depends on the number of fully-parallel
+        Iterations and their position in the nest (the outer, the better).
+        """
+        nest = [root] + list(collapsable)
+        n = len(nest)
+
+        # Number of fully-parallel collapsable Iterations
+        key = lambda i: i.is_ParallelNoAtomic
+        fpiters = list(takewhile(key, nest))
+        nfpiters = len(fpiters)
+
+        # Prioritize the Dimensions that are more likely to define larger
+        # iteration spaces
+        fpdims = [i.dim for i in fpiters]
+        key = lambda d: (not d.is_Derived or
+                         d.is_Custom or  # NOTE: might use a refinement
+                         (d.is_Block and d._depth == 1))
+        nfpiters_large = len([d for d in fpdims if key(d)])
+
+        return (
+            int(nfpiters == n),  # Fully-parallel nest
+            int(nfpiters == 0 and n),  # Fully-atomic nest
+            nfpiters_large,
+            -(n0 + 1),  # The outer, the better
+            nfpiters,
+            n,
+        )
+
     def _select_candidates(self, candidates):
         assert candidates
 
@@ -263,15 +295,18 @@ def _select_candidates(self, candidates):
         mapper = {}
         for n0, root in enumerate(candidates):
 
+            # Score `root` in isolation
+            mapper[(root, ())] = self._score_candidate(n0, root)
+
             collapsable = []
             for n, i in enumerate(candidates[n0+1:], n0+1):
                 # The Iteration nest [root, ..., i] must be perfect
                 if not IsPerfectIteration(depth=i).visit(root):
                     break
 
-                # Loops are collapsable only if none of the iteration variables appear
-                # in initializer expressions. For example, the following two loops
-                # cannot be collapsed
+                # Loops are collapsable only if none of the iteration variables
+                # appear in initializer expressions. For example, the following
+                # two loops cannot be collapsed
                 #
                 # for (i = ... )
                 #   for (j = i ...)
@@ -281,7 +316,7 @@ def _select_candidates(self, candidates):
                 if any(j.dim in i.symbolic_min.free_symbols for j in candidates[n0:n]):
                     break
 
-                # Also, we do not want to collapse SIMD-vectorized Iterations
+                # Can't collapse SIMD-vectorized Iterations
                 if i.is_Vectorized:
                     break
 
@@ -297,17 +332,9 @@ def _select_candidates(self, candidates):
 
                 collapsable.append(i)
 
-            # Give a score to this candidate, based on the number of fully-parallel
-            # Iterations and their position (i.e. outermost to innermost) in the nest
-            score = (
-                int(root.is_ParallelNoAtomic),
-                len(self._device_pointers(root)),  # Outermost offloadable
-                int(len([i for i in collapsable if i.is_ParallelNoAtomic]) >= 1),
-                int(len([i for i in collapsable if i.is_ParallelRelaxed]) >= 1),
-                -(n0 + 1)  # The outermost, the better
-            )
-
-            mapper[(root, tuple(collapsable))] = score
+                # Score `root + collapsable`
+                v = tuple(collapsable)
+                mapper[(root, v)] = self._score_candidate(n0, root, v)
 
         # Retrieve the candidates with highest score
         root, collapsable = max(mapper, key=mapper.get)
@@ -318,16 +345,6 @@ def _make_reductions(self, partree):
         if not any(i.is_ParallelAtomic for i in partree.collapsed):
             return partree
 
-        # We bypass the corner case where a reduction might not be optimal, mainly:
-        # - Only the most inner loop is atomic
-        # In which case we can parallelize the perfect nest
-        # The opposite corner case (most outer loop atomic)
-        # should be detected before this pass
-        nc = len(partree.collapsed)
-        if nc > 1 and all(i.is_ParallelNoAtomic for i in partree.collapsed[:nc-1]):
-            mapper = {partree.root: partree.root._rebuild(ncollapsed=nc-1)}
-            return Transformer(mapper).visit(partree)
-
         exprs = [i for i in FindNodes(Expression).visit(partree) if i.is_reduction]
 
         reductions = []
@@ -586,6 +603,13 @@ def __init__(self, sregistry, options, platform, compiler):
         self.par_tile = UnboundTuple(options['par-tile'])
         self.par_disabled = options['par-disabled']
 
+    def _score_candidate(self, n0, root, collapsable=()):
+        # `ndptrs`, the number of device pointers, part of the score too to
+        # ensure the outermost loop is offloaded
+        ndptrs = len(self._device_pointers(root))
+
+        return (ndptrs,) + super()._score_candidate(n0, root, collapsable)
+
     def _make_threaded_prodders(self, partree):
         if isinstance(partree.root, self.DeviceIteration):
             # no-op for now
diff --git a/tests/test_dle.py b/tests/test_dle.py
index e7935dd45d..3a94f46a9d 100644
--- a/tests/test_dle.py
+++ b/tests/test_dle.py
@@ -863,8 +863,9 @@ def test_incs_no_atomic(self):
         op0 = Operator(Inc(uf, 1), opt=('advanced', {'openmp': True,
                                                      'par-collapse-ncores': 1,
                                                      'par-collapse-work': 0}))
-        assert 'collapse(3)' in str(op0)
-        assert 'atomic' in str(op0)
+        assert 'omp for schedule' in str(op0)
+        assert 'collapse' not in str(op0)
+        assert 'atomic' not in str(op0)
 
         # Now only `x` is parallelized
         op1 = Operator([Eq(v[t, x, 0, 0], v[t, x, 0, 0] + 1), Inc(uf, 1)],
@@ -877,22 +878,23 @@ def test_incs_no_atomic(self):
     def test_incr_perfect_outer(self):
         grid = Grid((5, 5))
         d = Dimension(name="d")
+
         u = Function(name="u", dimensions=(*grid.dimensions, d),
                      grid=grid, shape=(*grid.shape, 5), )
         v = Function(name="v", dimensions=(*grid.dimensions, d),
                      grid=grid, shape=(*grid.shape, 5))
+        w = Function(name="w", grid=grid)
+
         u.data.fill(1)
         v.data.fill(2)
 
-        w = Function(name="w", grid=grid)
-
         summation = Inc(w, u*v)
 
-        op0 = Operator([summation], opt=('advanced', {'openmp': True}))
-        assert 'reduction' not in str(op0)
-        assert 'omp for' in str(op0)
+        op = Operator([summation], opt=('advanced', {'openmp': True}))
+        assert 'reduction' not in str(op)
+        assert 'omp for' in str(op)
 
-        op0()
+        op()
         assert np.all(w.data == 10)
 
     @pytest.mark.parametrize('exprs,simd_level,expected', [

From 1b6cd885eb6d13a13876a8b5236316605377f2d0 Mon Sep 17 00:00:00 2001
From: Fabio Luporini <fabio@devitocodes.com>
Date: Tue, 10 Oct 2023 14:29:57 +0000
Subject: [PATCH 89/90] compiler: Prioritize large sparse loops over tiny ones

---
 devito/passes/iet/parpragma.py | 28 +++++++++++++++++++---------
 tests/test_dle.py              | 19 ++++++++++++++++++-
 2 files changed, 37 insertions(+), 10 deletions(-)

diff --git a/devito/passes/iet/parpragma.py b/devito/passes/iet/parpragma.py
index 1deb4f3f8b..6bfe2a4c42 100644
--- a/devito/passes/iet/parpragma.py
+++ b/devito/passes/iet/parpragma.py
@@ -266,23 +266,33 @@ def _score_candidate(self, n0, root, collapsable=()):
 
         # Number of fully-parallel collapsable Iterations
         key = lambda i: i.is_ParallelNoAtomic
-        fpiters = list(takewhile(key, nest))
-        nfpiters = len(fpiters)
+        fp_iters = list(takewhile(key, nest))
+        n_fp_iters = len(fp_iters)
+
+        # Number of parallel-if-atomic collapsable Iterations
+        key = lambda i: i.is_ParallelAtomic
+        pia_iters = list(takewhile(key, nest))
+        n_pia_iters = len(pia_iters)
 
         # Prioritize the Dimensions that are more likely to define larger
         # iteration spaces
-        fpdims = [i.dim for i in fpiters]
         key = lambda d: (not d.is_Derived or
-                         d.is_Custom or  # NOTE: might use a refinement
+                         (d.is_Custom and not is_integer(d.symbolic_size)) or
                          (d.is_Block and d._depth == 1))
-        nfpiters_large = len([d for d in fpdims if key(d)])
+
+        fpdims = [i.dim for i in fp_iters]
+        n_fp_iters_large = len([d for d in fpdims if key(d)])
+
+        piadims = [i.dim for i in pia_iters]
+        n_pia_iters_large = len([d for d in piadims if key(d)])
 
         return (
-            int(nfpiters == n),  # Fully-parallel nest
-            int(nfpiters == 0 and n),  # Fully-atomic nest
-            nfpiters_large,
+            int(n_fp_iters == n),  # Fully-parallel nest
+            n_fp_iters_large,
+            n_pia_iters_large,
+            n_pia_iters,
             -(n0 + 1),  # The outer, the better
-            nfpiters,
+            n_fp_iters,
             n,
         )
 
diff --git a/tests/test_dle.py b/tests/test_dle.py
index 3a94f46a9d..520405f839 100644
--- a/tests/test_dle.py
+++ b/tests/test_dle.py
@@ -284,7 +284,7 @@ def test_cache_blocking_structure_optrelax_prec_inject():
                                           'openmp': True,
                                           'par-collapse-ncores': 1}))
 
-    assert_structure(op, ['t,p_s0_blk0,p_s', 't,p_s0_blk0,p_s,rsx,rsy'],
+    assert_structure(op, ['t', 't,p_s0_blk0,p_s,rsx,rsy'],
                      't,p_s0_blk0,p_s,rsx,rsy')
 
 
@@ -897,6 +897,23 @@ def test_incr_perfect_outer(self):
         op()
         assert np.all(w.data == 10)
 
+    def test_incr_perfect_sparse_outer(self):
+        grid = Grid(shape=(3, 3, 3))
+
+        u = TimeFunction(name='u', grid=grid)
+        s = SparseTimeFunction(name='u', grid=grid, npoint=1, nt=11)
+
+        eqns = s.inject(u, expr=s)
+
+        op = Operator(eqns, opt=('advanced', {'par-collapse-ncores': 0}))
+
+        iters = FindNodes(Iteration).visit(op)
+        assert len(iters) == 5
+        assert iters[0].is_Sequential
+        assert all(i.is_ParallelAtomic for i in iters[1:])
+        assert iters[1].pragmas[0].value == 'omp for schedule(dynamic,chunk_size)'
+        assert all(not i.pragmas for i in iters[2:])
+
     @pytest.mark.parametrize('exprs,simd_level,expected', [
         (['Eq(y.symbolic_max, g[0, x], implicit_dims=(t, x))',
          'Inc(h1[0, 0], 1, implicit_dims=(t, x, y))'],

From daf671c3d08a63d0a4b8e65fb1e12d2fa8c349c2 Mon Sep 17 00:00:00 2001
From: mloubout <mathias.louboutin@gmail.com>
Date: Tue, 10 Oct 2023 10:53:58 -0400
Subject: [PATCH 90/90] compiler: prioritize smaller perfect loop over atomics

---
 devito/passes/iet/parpragma.py | 2 +-
 tests/test_dle.py              | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/devito/passes/iet/parpragma.py b/devito/passes/iet/parpragma.py
index 6bfe2a4c42..29ba9a986b 100644
--- a/devito/passes/iet/parpragma.py
+++ b/devito/passes/iet/parpragma.py
@@ -289,10 +289,10 @@ def _score_candidate(self, n0, root, collapsable=()):
         return (
             int(n_fp_iters == n),  # Fully-parallel nest
             n_fp_iters_large,
+            n_fp_iters,
             n_pia_iters_large,
             n_pia_iters,
             -(n0 + 1),  # The outer, the better
-            n_fp_iters,
             n,
         )
 
diff --git a/tests/test_dle.py b/tests/test_dle.py
index 520405f839..03c0b533c9 100644
--- a/tests/test_dle.py
+++ b/tests/test_dle.py
@@ -905,7 +905,8 @@ def test_incr_perfect_sparse_outer(self):
 
         eqns = s.inject(u, expr=s)
 
-        op = Operator(eqns, opt=('advanced', {'par-collapse-ncores': 0}))
+        op = Operator(eqns, opt=('advanced', {'par-collapse-ncores': 0,
+                                              'openmp': True}))
 
         iters = FindNodes(Iteration).visit(op)
         assert len(iters) == 5