Merge pull request #2184 from devitocodes/intel_compiler

compiler: Enable AVX512 compiler support when available.
devitocodes · Aug 9, 2023 · ca2960d · ca2960d
2 parents 77d4e82 + c0d920a
commit ca2960d
Show file tree

Hide file tree

Showing 2 changed files with 16 additions and 10 deletions.
diff --git a/devito/arch/archinfo.py b/devito/arch/archinfo.py
@@ -20,7 +20,7 @@
            'Device', 'NvidiaDevice', 'AmdDevice', 'IntelDevice',
            # Intel
            'INTEL64', 'SNB', 'IVB', 'HSW', 'BDW', 'KNL', 'KNL7210',
-           'SKX', 'KLX', 'CLX', 'CLK',
+           'SKX', 'KLX', 'CLX', 'CLK', 'SPR',
            # ARM
            'AMD', 'ARM', 'M1', 'GRAVITON',
            # Other loosely supported CPU architectures
@@ -616,7 +616,7 @@ class IntelSkylake(Intel64):
     pass
 
 
-class IntelGoldenCode(Intel64):
+class IntelGoldenCove(Intel64):
     pass
 
 
@@ -744,6 +744,7 @@ def march(cls):
 KLX = IntelSkylake('klx')
 CLX = IntelSkylake('clx')
 CLK = IntelSkylake('clk')
+SPR = IntelGoldenCove('spr')
 
 ARM = Arm('arm')
 GRAVITON = Arm('graviton')
@@ -771,6 +772,7 @@ def march(cls):
     'klx': KLX,  # Kaby Lake
     'clx': CLX,  # Coffee Lake
     'clk': CLK,  # Cascade Lake
+    'spr': SPR,  # Sapphire Rapids
     'knl': KNL,
     'knl7210': KNL7210,
     'arm': ARM,  # Generic ARM CPU

diff --git a/devito/arch/compiler.py b/devito/arch/compiler.py
@@ -13,7 +13,7 @@
 from codepy.toolchain import GCCToolchain, call_capture_output
 
 from devito.arch import (AMDGPUX, Cpu64, M1, NVIDIAX, POWER8, POWER9, GRAVITON,
-                         INTELGPUX, IntelSkylake, get_nvidia_cc, check_cuda_runtime,
+                         INTELGPUX, get_nvidia_cc, check_cuda_runtime,
                          get_m1_llvm_path)
 from devito.exceptions import CompilationError
 from devito.logger import debug, warning, error
@@ -394,11 +394,15 @@ def __init__(self, *args, **kwargs):
         else:
             self.cflags.append('-ffast-math')
 
-        if isinstance(platform, IntelSkylake):
-            # The default is `=256` because avx512 slows down the CPU frequency;
-            # however, we empirically found that stencils generally benefit
-            # from `=512`
-            self.cflags.append('-mprefer-vector-width=512')
+        if platform.isa == 'avx512':
+            if self.version >= Version("8.0.0"):
+                # The default is `=256` because avx512 slows down the CPU frequency;
+                # however, we empirically found that stencils generally benefit
+                # from `=512`
+                self.cflags.append('-mprefer-vector-width=512')
+            else:
+                # Unsupported on earlier versions
+                pass
 
         if platform in [POWER8, POWER9]:
             # -march isn't supported on power architectures, is -mtune needed?
@@ -708,8 +712,8 @@ def __init__(self, *args, **kwargs):
         else:
             self.cflags.append('-fp-model=fast')
 
-        if isinstance(platform, IntelSkylake):
-            # Systematically use 512-bit vectors on skylake
+        if platform.isa == 'avx512':
+            # Systematically use 512-bit vectors if avx512 is available.
             self.cflags.append("-qopt-zmm-usage=high")
 
         if language == 'openmp':