diff --git a/devito/operator/operator.py b/devito/operator/operator.py index 609c69295f6..eb8b793f222 100644 --- a/devito/operator/operator.py +++ b/devito/operator/operator.py @@ -578,6 +578,7 @@ def _prepare_arguments(self, autotune=None, **kwargs): if len(discretizations) > 1: discretizations = {g for g in discretizations if not any(d.is_Derived for d in g.dimensions)} + for i in discretizations: args.update(i._arg_values(**kwargs)) @@ -590,9 +591,6 @@ def _prepare_arguments(self, autotune=None, **kwargs): if configuration['mpi']: raise ValueError("Multiple Grids found") try: - # Take biggest grid, i.e discard grids with subdimensions - grids = {g for g in grids if not any(d.is_Sub for d in g.dimensions)} - # First grid as there is no heuristic on how to choose from the leftovers grid = grids.pop() except KeyError: grid = None diff --git a/devito/passes/iet/langbase.py b/devito/passes/iet/langbase.py index 457d8476c3f..8c058170144 100644 --- a/devito/passes/iet/langbase.py +++ b/devito/passes/iet/langbase.py @@ -214,8 +214,8 @@ def DeviceIteration(self): def Prodder(self): return self.lang.Prodder - def _n_device_pointers(self, *args, **kwargs): - return 0 + def n_device_pointers(self, *args, **kwargs): + return {} class DeviceAwareMixin(object): @@ -328,11 +328,10 @@ def _(iet): return _initialize(iet) - def _n_device_pointers(self, iet): + def n_device_pointers(self, iet): functions = FindSymbols().visit(iet) devfuncs = [f for f in functions if f.is_Array and f._mem_local] - - return len(devfuncs) + return set(devfuncs) def _is_offloadable(self, iet): """ @@ -345,8 +344,7 @@ def _is_offloadable(self, iet): functions = FindSymbols().visit(iet) buffers = [f for f in functions if f.is_Array and f._mem_mapped] hostfuncs = [f for f in functions if not is_on_device(f, self.gpu_fit)] - - return not (hostfuncs and buffers) + return not (buffers and hostfuncs) class Sections(tuple): diff --git a/devito/passes/iet/parpragma.py b/devito/passes/iet/parpragma.py index 34ca370a60c..4ca4897a4dc 100644 --- a/devito/passes/iet/parpragma.py +++ b/devito/passes/iet/parpragma.py @@ -301,7 +301,7 @@ def _select_candidates(self, candidates): # Iterations and their position (i.e. outermost to innermost) in the nest score = ( int(root.is_ParallelNoAtomic), - self._n_device_pointers(root), # Outermost offloadable + len(self.n_device_pointers(root)), # Outermost offloadable int(len([i for i in collapsable if i.is_ParallelNoAtomic]) >= 1), int(len([i for i in collapsable if i.is_ParallelRelaxed]) >= 1), -(n0 + 1) # The outermost, the better @@ -375,7 +375,7 @@ def _make_partree(self, candidates, nthreads=None): ncollapsed=ncollapsed, nthreads=nthreads, **root.args) prefix = [] - elif all(i.is_ParallelRelaxed for i in candidates) and nthreads is not None: + elif nthreads is not None: body = self.HostIteration(schedule='static', parallel=nthreads is not self.nthreads_nested, ncollapsed=ncollapsed, nthreads=nthreads, @@ -383,7 +383,6 @@ def _make_partree(self, candidates, nthreads=None): prefix = [] else: # pragma ... for ... schedule(..., expr) - assert nthreads is None nthreads = self.nthreads_nonaffine chunk_size = Symbol(name='chunk_size') body = self.HostIteration(ncollapsed=ncollapsed, chunk_size=chunk_size,