diff --git a/FAQ.md b/FAQ.md
index 4b919483d5..ecd011faea 100644
--- a/FAQ.md
+++ b/FAQ.md
@@ -596,9 +596,9 @@ By default, Devito compiles the generated code using flags that maximize the run
 
 [top](#Frequently-Asked-Questions)
 
-## Can I control the MPI domain decomposition
+## Can I control the MPI domain decomposition?
 
-Until Devito v3.5 included, domain decomposition occurs along the fastest axis. As of later versions, domain decomposition occurs along the slowest axis, for performance reasons.  And yes, it is possible to control the domain decomposition in user code, but this is not neatly documented. Take a look at `test_custom_topology` in [this file](https://github.com/devitocodes/devito/blob/master/tests/test_mpi.py). In essence, `Grid` accepts the optional argument `topology`, which allows the user to pass a custom topology as an n-tuple, where `n` is the number of distributed dimensions. For example, for a two-dimensional grid, the topology `(4, 1)` will decompose the slowest axis into four partitions, one partition per MPI rank, while the fastest axis will be replicated over all MPI ranks.
+Until Devito v3.5 included, domain decomposition occurs along the fastest axis. As of later versions, domain decomposition occurs along the slowest axis, for performance reasons.  And yes, it is possible to control the domain decomposition in user code, but this is not neatly documented. Take a look at `class CustomTopology` in [distributed.py](https://github.com/devitocodes/devito/blob/master/devito/mpi/distributed.py) and `test_custom_topology` in [this file](https://github.com/devitocodes/devito/blob/master/tests/test_mpi.py). In essence, `Grid` accepts the optional argument `topology`, which allows the user to pass a custom topology as an n-tuple, where `n` is the number of distributed dimensions. For example, for a two-dimensional grid, the topology `(4, 1)` will decompose the slowest axis into four partitions, one partition per MPI rank, while the fastest axis will be replicated over all MPI ranks.
 
 
 [top](#Frequently-Asked-Questions)
diff --git a/devito/logger.py b/devito/logger.py
index 92ede2e8dc..b8fce1b68e 100644
--- a/devito/logger.py
+++ b/devito/logger.py
@@ -4,7 +4,7 @@
 import sys
 from contextlib import contextmanager
 
-__all__ = ('set_log_level', 'set_log_noperf', 'is_log_enabled_for',
+__all__ = ('set_log_level', 'set_log_noperf', 'is_log_enabled_for', 'switch_log_level',
            'log', 'warning', 'error', 'perf', 'hint',
            'RED', 'GREEN', 'BLUE')
 
@@ -71,21 +71,42 @@ def set_log_level(level, comm=None):
     comm : MPI communicator, optional
         An MPI communicator the logger should be collective over. If provided, only
         rank-0 on that communicator will write to the registered handlers, other
-        ranks will use a `logging.NullHandler`.  By default, ``comm`` is set
-        to ``None``, so all ranks will use the default handlers.  This could be
+        ranks will use a `logging.NullHandler`. By default, ``comm`` is set
+        to ``None``, so all ranks will use the default handlers. This could be
         used, for example, if one wants to log to one file per rank.
     """
     from devito import configuration
 
-    if comm is not None:
+    if comm is not None and configuration['mpi']:
         if comm.rank != 0:
             logger.removeHandler(stream_handler)
             logger.addHandler(logging.NullHandler())
+    else:
+        logger.addHandler(stream_handler)
 
     # Triggers a callback to `_set_log_level`
     configuration['log-level'] = level
 
 
+class switch_log_level(object):
+    """
+    A context manager to temporarily change MPI logging.
+    """
+
+    def __init__(self, comm):
+
+        from devito import configuration
+        self.level = configuration['log-level']
+        self.comm = comm
+
+    def __enter__(self):
+        # Limit logging to rank 0
+        set_log_level(self.level, self.comm)
+
+    def __exit__(self, *args):
+        set_log_level(self.level)
+
+
 def set_log_noperf():
     """Do not print performance-related messages."""
     logger.setLevel(WARNING)
diff --git a/devito/operator/operator.py b/devito/operator/operator.py
index 45860303e8..462e5b335b 100644
--- a/devito/operator/operator.py
+++ b/devito/operator/operator.py
@@ -9,7 +9,7 @@
 from devito.arch import compiler_registry, platform_registry
 from devito.data import default_allocator
 from devito.exceptions import InvalidOperator, ExecutionError
-from devito.logger import debug, info, perf, warning, is_log_enabled_for
+from devito.logger import debug, info, perf, warning, is_log_enabled_for, switch_log_level
 from devito.ir.equations import LoweredEq, lower_exprs
 from devito.ir.clusters import ClusterGroup, clusterize
 from devito.ir.iet import (Callable, CInterface, EntryFunction, FindSymbols, MetaCall,
@@ -871,8 +871,9 @@ def apply(self, **kwargs):
         # Post-process runtime arguments
         self._postprocess_arguments(args, **kwargs)
 
-        # Output summary of performance achieved
-        return self._emit_apply_profiling(args)
+        # In case MPI is used restrict result logging to one rank only
+        with switch_log_level(comm=args.comm):
+            return self._emit_apply_profiling(args)
 
     # Performance profiling
 
diff --git a/devito/operator/profiling.py b/devito/operator/profiling.py
index fd0defd089..62d842398a 100644
--- a/devito/operator/profiling.py
+++ b/devito/operator/profiling.py
@@ -473,7 +473,7 @@ def add_glb_vanilla(self, key, time):
         ops = sum(v.ops for v in self.input.values())
         traffic = sum(v.traffic for v in self.input.values())
 
-        if np.isnan(traffic):
+        if np.isnan(traffic) or traffic == 0:
             return
 
         gflops = float(ops)/10**9