From e859b2104206525bef4a3dec3d5288932d2f22cf Mon Sep 17 00:00:00 2001 From: George Bisbas Date: Tue, 20 Jun 2023 11:52:14 +0100 Subject: [PATCH 1/3] misc: Restrict MPI perf logging to rank 0 --- devito/operator/operator.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/devito/operator/operator.py b/devito/operator/operator.py index 45860303e8..c312b910fa 100644 --- a/devito/operator/operator.py +++ b/devito/operator/operator.py @@ -8,8 +8,8 @@ from devito.arch import compiler_registry, platform_registry from devito.data import default_allocator -from devito.exceptions import InvalidOperator, ExecutionError -from devito.logger import debug, info, perf, warning, is_log_enabled_for +from devito.exceptions import InvalidOperator +from devito.logger import debug, info, perf, warning, is_log_enabled_for, set_log_level from devito.ir.equations import LoweredEq, lower_exprs from devito.ir.clusters import ClusterGroup, clusterize from devito.ir.iet import (Callable, CInterface, EntryFunction, FindSymbols, MetaCall, @@ -909,6 +909,11 @@ def _emit_timings(timings, indent=''): def _emit_apply_profiling(self, args): """Produce a performance summary of the profiled sections.""" + + # In case 'MPI0' is selected for logging, restrict result printing to one rank + if configuration['mpi']: + set_log_level(configuration['log-level'], comm=args.comm) + # Rounder to 2 decimal places fround = lambda i: ceil(i * 100) / 100 @@ -1002,7 +1007,12 @@ def lower_perfentry(v): if a in args: perf_args[a] = args[a] break - perf("Performance[mode=%s] arguments: %s" % (self._mode, perf_args)) + + if configuration['mpi']: + perf("Performance[mode=%s, mpi=%s] arguments: %s, " % + (self._mode, configuration['mpi'], perf_args)) + else: + perf("Performance[mode=%s] arguments: %s" % (self._mode, perf_args)) return summary From 6ddce4cdcfc994182977c9fa2b6819583f257d90 Mon Sep 17 00:00:00 2001 From: George BIsbas Date: Wed, 17 Jan 2024 15:57:13 +0200 Subject: [PATCH 2/3] docs: Update for Custom topology and restore logging --- FAQ.md | 4 ++-- devito/logger.py | 16 ++++++++-------- devito/operator/operator.py | 20 +++++++++----------- devito/operator/profiling.py | 2 +- 4 files changed, 20 insertions(+), 22 deletions(-) diff --git a/FAQ.md b/FAQ.md index 4b919483d5..ecd011faea 100644 --- a/FAQ.md +++ b/FAQ.md @@ -596,9 +596,9 @@ By default, Devito compiles the generated code using flags that maximize the run [top](#Frequently-Asked-Questions) -## Can I control the MPI domain decomposition +## Can I control the MPI domain decomposition? -Until Devito v3.5 included, domain decomposition occurs along the fastest axis. As of later versions, domain decomposition occurs along the slowest axis, for performance reasons. And yes, it is possible to control the domain decomposition in user code, but this is not neatly documented. Take a look at `test_custom_topology` in [this file](https://github.com/devitocodes/devito/blob/master/tests/test_mpi.py). In essence, `Grid` accepts the optional argument `topology`, which allows the user to pass a custom topology as an n-tuple, where `n` is the number of distributed dimensions. For example, for a two-dimensional grid, the topology `(4, 1)` will decompose the slowest axis into four partitions, one partition per MPI rank, while the fastest axis will be replicated over all MPI ranks. +Until Devito v3.5 included, domain decomposition occurs along the fastest axis. As of later versions, domain decomposition occurs along the slowest axis, for performance reasons. And yes, it is possible to control the domain decomposition in user code, but this is not neatly documented. Take a look at `class CustomTopology` in [distributed.py](https://github.com/devitocodes/devito/blob/master/devito/mpi/distributed.py) and `test_custom_topology` in [this file](https://github.com/devitocodes/devito/blob/master/tests/test_mpi.py). In essence, `Grid` accepts the optional argument `topology`, which allows the user to pass a custom topology as an n-tuple, where `n` is the number of distributed dimensions. For example, for a two-dimensional grid, the topology `(4, 1)` will decompose the slowest axis into four partitions, one partition per MPI rank, while the fastest axis will be replicated over all MPI ranks. [top](#Frequently-Asked-Questions) diff --git a/devito/logger.py b/devito/logger.py index 92ede2e8dc..fe75132162 100644 --- a/devito/logger.py +++ b/devito/logger.py @@ -13,13 +13,13 @@ stream_handler = logging.StreamHandler() logger.addHandler(stream_handler) -# Add extra logging levels (note: INFO has value=20, WARNING has value=30) -DEBUG = logging.DEBUG +# Add extra logging levels +DEBUG = logging.DEBUG # value=10 PERF = 19 -INFO = logging.INFO -WARNING = logging.WARNING -ERROR = logging.ERROR -CRITICAL = logging.CRITICAL +INFO = logging.INFO # value=20 +WARNING = logging.WARNING # value=30 +ERROR = logging.ERROR # value=40 +CRITICAL = logging.CRITICAL # value=50 logging.addLevelName(PERF, "PERF") @@ -71,8 +71,8 @@ def set_log_level(level, comm=None): comm : MPI communicator, optional An MPI communicator the logger should be collective over. If provided, only rank-0 on that communicator will write to the registered handlers, other - ranks will use a `logging.NullHandler`. By default, ``comm`` is set - to ``None``, so all ranks will use the default handlers. This could be + ranks will use a `logging.NullHandler`. By default, ``comm`` is set + to ``None``, so all ranks will use the default handlers. This could be used, for example, if one wants to log to one file per rank. """ from devito import configuration diff --git a/devito/operator/operator.py b/devito/operator/operator.py index c312b910fa..6c9a0c70bc 100644 --- a/devito/operator/operator.py +++ b/devito/operator/operator.py @@ -6,6 +6,7 @@ from cached_property import cached_property from sympy import sympify +from devito import switchconfig from devito.arch import compiler_registry, platform_registry from devito.data import default_allocator from devito.exceptions import InvalidOperator @@ -871,7 +872,13 @@ def apply(self, **kwargs): # Post-process runtime arguments self._postprocess_arguments(args, **kwargs) - # Output summary of performance achieved + # In case MPI is used restrict result logging to one rank only + if configuration['mpi']: + # Only temporarily change configuration + with switchconfig(mpi=True): + set_log_level('DEBUG', comm=args.comm) + return self._emit_apply_profiling(args) + return self._emit_apply_profiling(args) # Performance profiling @@ -910,10 +917,6 @@ def _emit_timings(timings, indent=''): def _emit_apply_profiling(self, args): """Produce a performance summary of the profiled sections.""" - # In case 'MPI0' is selected for logging, restrict result printing to one rank - if configuration['mpi']: - set_log_level(configuration['log-level'], comm=args.comm) - # Rounder to 2 decimal places fround = lambda i: ceil(i * 100) / 100 @@ -1007,12 +1010,7 @@ def lower_perfentry(v): if a in args: perf_args[a] = args[a] break - - if configuration['mpi']: - perf("Performance[mode=%s, mpi=%s] arguments: %s, " % - (self._mode, configuration['mpi'], perf_args)) - else: - perf("Performance[mode=%s] arguments: %s" % (self._mode, perf_args)) + perf("Performance[mode=%s] arguments: %s" % (self._mode, perf_args)) return summary diff --git a/devito/operator/profiling.py b/devito/operator/profiling.py index fd0defd089..62d842398a 100644 --- a/devito/operator/profiling.py +++ b/devito/operator/profiling.py @@ -473,7 +473,7 @@ def add_glb_vanilla(self, key, time): ops = sum(v.ops for v in self.input.values()) traffic = sum(v.traffic for v in self.input.values()) - if np.isnan(traffic): + if np.isnan(traffic) or traffic == 0: return gflops = float(ops)/10**9 From e459d5241682094e3eb08ee359203dd25ce81acf Mon Sep 17 00:00:00 2001 From: George Bisbas Date: Thu, 18 Apr 2024 11:01:42 +0300 Subject: [PATCH 3/3] misc: Use mpi_switch_log for mpi rank 0 logging --- devito/logger.py | 37 +++++++++++++++++++++++++++++-------- devito/operator/operator.py | 15 ++++----------- 2 files changed, 33 insertions(+), 19 deletions(-) diff --git a/devito/logger.py b/devito/logger.py index fe75132162..b8fce1b68e 100644 --- a/devito/logger.py +++ b/devito/logger.py @@ -4,7 +4,7 @@ import sys from contextlib import contextmanager -__all__ = ('set_log_level', 'set_log_noperf', 'is_log_enabled_for', +__all__ = ('set_log_level', 'set_log_noperf', 'is_log_enabled_for', 'switch_log_level', 'log', 'warning', 'error', 'perf', 'hint', 'RED', 'GREEN', 'BLUE') @@ -13,13 +13,13 @@ stream_handler = logging.StreamHandler() logger.addHandler(stream_handler) -# Add extra logging levels -DEBUG = logging.DEBUG # value=10 +# Add extra logging levels (note: INFO has value=20, WARNING has value=30) +DEBUG = logging.DEBUG PERF = 19 -INFO = logging.INFO # value=20 -WARNING = logging.WARNING # value=30 -ERROR = logging.ERROR # value=40 -CRITICAL = logging.CRITICAL # value=50 +INFO = logging.INFO +WARNING = logging.WARNING +ERROR = logging.ERROR +CRITICAL = logging.CRITICAL logging.addLevelName(PERF, "PERF") @@ -77,15 +77,36 @@ def set_log_level(level, comm=None): """ from devito import configuration - if comm is not None: + if comm is not None and configuration['mpi']: if comm.rank != 0: logger.removeHandler(stream_handler) logger.addHandler(logging.NullHandler()) + else: + logger.addHandler(stream_handler) # Triggers a callback to `_set_log_level` configuration['log-level'] = level +class switch_log_level(object): + """ + A context manager to temporarily change MPI logging. + """ + + def __init__(self, comm): + + from devito import configuration + self.level = configuration['log-level'] + self.comm = comm + + def __enter__(self): + # Limit logging to rank 0 + set_log_level(self.level, self.comm) + + def __exit__(self, *args): + set_log_level(self.level) + + def set_log_noperf(): """Do not print performance-related messages.""" logger.setLevel(WARNING) diff --git a/devito/operator/operator.py b/devito/operator/operator.py index 6c9a0c70bc..462e5b335b 100644 --- a/devito/operator/operator.py +++ b/devito/operator/operator.py @@ -6,11 +6,10 @@ from cached_property import cached_property from sympy import sympify -from devito import switchconfig from devito.arch import compiler_registry, platform_registry from devito.data import default_allocator -from devito.exceptions import InvalidOperator -from devito.logger import debug, info, perf, warning, is_log_enabled_for, set_log_level +from devito.exceptions import InvalidOperator, ExecutionError +from devito.logger import debug, info, perf, warning, is_log_enabled_for, switch_log_level from devito.ir.equations import LoweredEq, lower_exprs from devito.ir.clusters import ClusterGroup, clusterize from devito.ir.iet import (Callable, CInterface, EntryFunction, FindSymbols, MetaCall, @@ -873,13 +872,8 @@ def apply(self, **kwargs): self._postprocess_arguments(args, **kwargs) # In case MPI is used restrict result logging to one rank only - if configuration['mpi']: - # Only temporarily change configuration - with switchconfig(mpi=True): - set_log_level('DEBUG', comm=args.comm) - return self._emit_apply_profiling(args) - - return self._emit_apply_profiling(args) + with switch_log_level(comm=args.comm): + return self._emit_apply_profiling(args) # Performance profiling @@ -916,7 +910,6 @@ def _emit_timings(timings, indent=''): def _emit_apply_profiling(self, args): """Produce a performance summary of the profiled sections.""" - # Rounder to 2 decimal places fround = lambda i: ceil(i * 100) / 100