From 90341a2848571c14f928b731365314e5926a84ea Mon Sep 17 00:00:00 2001 From: Matevz Tadel Date: Sun, 9 May 2021 00:31:24 -0700 Subject: [PATCH] Remove USE_MATRIPLEX and remaining traces of MIC as C define and make variable. Add SSE3 option to Makefile.config. Set Matriplex width to 4 for SSE. --- Config.cc | 2 +- Config.h | 25 ++++++++------- Geoms/Makefile | 3 -- Makefile | 4 +-- Makefile.config | 37 ++++++++-------------- Matriplex/GenMul.pm | 2 +- Matriplex/Matriplex.h | 2 +- Matriplex/MatriplexCommon.h | 6 ++-- Matriplex/MatriplexSym.h | 2 +- Matriplex/test/GMtest.cxx | 8 ++--- Matrix.h | 61 ++++++++++++++++++------------------- TrackerInfo.cc | 4 --- mkFit/config-parse/Makefile | 2 +- 13 files changed, 67 insertions(+), 91 deletions(-) diff --git a/Config.cc b/Config.cc index 2efc38e4..cfaad295 100644 --- a/Config.cc +++ b/Config.cc @@ -23,7 +23,7 @@ namespace Config int numThreadsFinder = 1; int numThreadsEvents = 1; -#if defined(__MIC__) || defined(__AVX512F__) +#if defined(__AVX512F__) int numThreadsSimulation = 60; #else int numThreadsSimulation = 12; diff --git a/Config.h b/Config.h index bf9da703..53aa0123 100644 --- a/Config.h +++ b/Config.h @@ -413,22 +413,21 @@ namespace Config return (Config::mag_b0*z*z + Config::mag_b1*z + Config::mag_c1)*(Config::mag_a*r*r + 1.f); } -#ifdef USE_MATRIPLEX - - #ifndef MPT_SIZE - #if defined(__MIC__) || defined(__AVX512F__) - #define MPT_SIZE 16 - #elif defined(__AVX__) || defined(__AVX2__) - #define MPT_SIZE 8 - #else - #define MPT_SIZE 8 - #endif - #endif - #ifndef THREAD_BINDING - #define THREAD_BINDING spread +#ifndef MPT_SIZE + #if defined(__AVX512F__) + #define MPT_SIZE 16 + #elif defined(__AVX__) || defined(__AVX2__) + #define MPT_SIZE 8 + #elif defined(__SSE3__) + #define MPT_SIZE 4 + #else + #define MPT_SIZE 8 #endif +#endif +#ifndef THREAD_BINDING +#define THREAD_BINDING spread #endif }; diff --git a/Geoms/Makefile b/Geoms/Makefile index c66155d8..0b7517c5 100644 --- a/Geoms/Makefile +++ b/Geoms/Makefile @@ -26,9 +26,6 @@ all: ${TGTS} %.so: %.o ${CXX} -shared -L../lib -lMicCore -o $@ $< -%.om: %.cc %.d - ${CXX} ${CPPFLAGS_NO_ROOT} ${CXXFLAGS} ${VEC_MIC} -c -o $@ $< - ifeq ($(filter clean distclean, ${MAKECMDGOALS}),) include ${DEPS} endif diff --git a/Makefile b/Makefile index 47ce91c2..50dbd8ac 100644 --- a/Makefile +++ b/Makefile @@ -18,7 +18,7 @@ CORE_OBJS := $(filter-out main.o, ${OBJS}) AUTO_TGTS := -ifdef USE_MATRIPLEX +# Begin Matriplex auto-matriplex: ${MAKE} -C Matriplex auto && touch $@ @@ -27,7 +27,7 @@ AUTO_TGTS += auto-matriplex ${DEPS}: auto-matriplex -endif +# End Matriplex ifeq ($(filter clean-local clean distclean, ${MAKECMDGOALS}),) include ${DEPS} diff --git a/Makefile.config b/Makefile.config index 4055ab4e..ee2ffcf8 100644 --- a/Makefile.config +++ b/Makefile.config @@ -7,7 +7,6 @@ # This is also needed for icc as it uses gcc headers. # To build mkFit -- Matriplex Kalman Fit: -# - enable USE_MATRIPLEX in section 5, # - cd mkFit; make (or make -C mkFit). @@ -15,23 +14,25 @@ # Configuration section ################################################################ -# -1. What to build -# Define to build for AVX_512, the new mic (KNL) and latest generation Xeons. +# -1. What to build - default is AVX +# Define to build for AVX_512 #AVX_512 := 1 # Define to build for AVX2 #AVX2 := 1 +# Define to build for SSE3 +#SSE3 := 1 # 0. Use gcc-5 from MacPorts on OSX # OSXGCC5 := 1 # Use clang from MacPorts on OSX # OSXMPCLANG := 1 -# 1. Use ROOT or not (never used on MIC) +# 1. Use ROOT or not # Comment out to disable root ("1" is not relevant) #WITH_ROOT := 1 # 2. Use gcc (clang by default on mac) or icc -# Comment out to force using standard c++. For mic only icc can be used. +# Comment out to force using standard c++. ifdef INTEL_LICENSE_FILE # Define ICC to make it easier to switch to icpc ICC := icc @@ -55,25 +56,22 @@ VEC_ICC := -xHost -qopt-zmm-usage=high # -xcore-avx512 else ifdef AVX2 VEC_GCC := -mavx2 -mfma VEC_ICC := -mavx2 -mfma +else ifdef SSE3 +VEC_GCC := -msse3 +VEC_ICC := -msse3 else VEC_GCC := -mavx # -fopt-info-vec-all VEC_ICC := -mavx endif -VEC_MIC := -mmic -# 5. Matriplex, intrinsics, vtune -# Matriplex is required for build in mkFit/ -# Comment-out for matriplex, intrinsics (supported for MIC and AVX) -USE_MATRIPLEX := -DUSE_MATRIPLEX +# 5. Matriplex intrinsics, vtune +# Comment-out to enable intrinsics (supported for AVX512, AVX2 and AVX) USE_INTRINSICS := -DMPLEX_USE_INTRINSICS # To enforce given vector size (does not work with intrinsics!) #USE_INTRINSICS := -DMPT_SIZE=1 - USE_VTUNE_NOTIFY := 1 -# 6. MIC stuff is built when icc is the chosen compiler. -# MIC build is always done without root. -# Need to build root on MIC and figure out how to deploy it there. +# 6. MIC stuff - obsolete # 7. OSX hack (is there a good way to detect clang?) # MT needs this on OSX-10.8, c++ -v @@ -81,13 +79,6 @@ USE_VTUNE_NOTIFY := 1 # OSX_CXXFLAGS := -stdlib=libc++ # And with gcc-4.8.1 from cms we need this # OSX_LDFLAGS := -lstdc++ -# -# Note: there is a problem building with USE_MATRIPLEX on as there -# was no __builtin_assume_aligned support in clang until recently: -# http://reviews.llvm.org/rL217349 -# It will probably take a while before this gets to osx / xcode. -# If somebody is willing to test this we can try without this -# but it was giving 50% speedup on icc. # 9. Check track state propagation for success, turns on simple # checks of filter convergence: used in SMatrix code mostly, still retain as toyMC propagation still uses this @@ -115,11 +106,10 @@ WITH_TBB := 1 # Derived settings ################################################################ -CPPFLAGS := -I. ${USE_MATRIPLEX} ${USE_INTRINSICS} +CPPFLAGS := -I. ${USE_INTRINSICS} CXXFLAGS := -fPIC ${OPT} ${OSX_CXXFLAGS} LDFLAGS_HOST := -LDFLAGS_MIC := -static-intel CPPFLAGS += ${USE_STATE_VALIDITY_CHECKS} ${USE_SCATTERING} ${USE_LINEAR_INTERPOLATION} ${ENDTOEND} ${INWARD_FIT} @@ -127,7 +117,6 @@ ifdef USE_VTUNE_NOTIFY ifdef VTUNE_AMPLIFIER_XE_2017_DIR CPPFLAGS += -I$(VTUNE_AMPLIFIER_XE_2017_DIR)/include -DUSE_VTUNE_PAUSE LDFLAGS_HOST += -L$(VTUNE_AMPLIFIER_XE_2017_DIR)/lib64 -littnotify - LDFLAGS_MIC += -L$(VTUNE_AMPLIFIER_XE_2017_DIR)/bin64/k1om -littnotify endif endif diff --git a/Matriplex/GenMul.pm b/Matriplex/GenMul.pm index 3bd06e85..1968137b 100644 --- a/Matriplex/GenMul.pm +++ b/Matriplex/GenMul.pm @@ -385,7 +385,7 @@ sub handle_all_zeros_ones { my @zo; - push @zo, "#ifdef MIC_INTRINSICS"; + push @zo, "#ifdef AVX512_INTRINSICS"; push @zo, "$S->{vectype} all_zeros = { " . join(", ", (0) x 16) . " };" if $zeros; diff --git a/Matriplex/Matriplex.h b/Matriplex/Matriplex.h index e9ee7caa..c8a2a7bb 100644 --- a/Matriplex/Matriplex.h +++ b/Matriplex/Matriplex.h @@ -104,7 +104,7 @@ class Matriplex } } -#if defined(MIC_INTRINSICS) +#if defined(AVX512_INTRINSICS) template void SlurpIn(const T *arr, __m512i& vi, const U&, const int N_proc = N) diff --git a/Matriplex/MatriplexCommon.h b/Matriplex/MatriplexCommon.h index 6b55da39..158ee446 100644 --- a/Matriplex/MatriplexCommon.h +++ b/Matriplex/MatriplexCommon.h @@ -16,18 +16,18 @@ #if defined(MPLEX_USE_INTRINSICS) // This seems unnecessary: __AVX__ is usually defined for all higher ISA extensions - #if defined(__MIC__) || defined(__AVX__) || defined(__AVX512F__) + #if defined(__AVX__) || defined(__AVX512F__) #define MPLEX_INTRINSICS #endif - #if defined(__MIC__) || defined(__AVX512F__) + #if defined(__AVX512F__) typedef __m512 IntrVec_t; #define MPLEX_INTRINSICS_WIDTH_BYTES 64 #define MPLEX_INTRINSICS_WIDTH_BITS 512 - #define MIC_INTRINSICS + #define AVX512_INTRINSICS #define GATHER_INTRINSICS #define GATHER_IDX_LOAD(name, arr) __m512i name = _mm512_load_epi32(arr); diff --git a/Matriplex/MatriplexSym.h b/Matriplex/MatriplexSym.h index f999f8f5..3046c742 100644 --- a/Matriplex/MatriplexSym.h +++ b/Matriplex/MatriplexSym.h @@ -123,7 +123,7 @@ class MatriplexSym } } -#if defined(MIC_INTRINSICS) +#if defined(AVX512_INTRINSICS) template void SlurpIn(const T *arr, __m512i& vi, const U&, const int N_proc = N) diff --git a/Matriplex/test/GMtest.cxx b/Matriplex/test/GMtest.cxx index 85958527..0f389a2a 100644 --- a/Matriplex/test/GMtest.cxx +++ b/Matriplex/test/GMtest.cxx @@ -7,10 +7,8 @@ /* # Generate .ah files (make sure DIM, DOM and pattern match): ./GMtest.pl -# Compile host: +# Compile: icc -std=gnu++11 -openmp -mavx -O3 -I.. -I../.. GMtest.cxx -o GMtest -# Compile MIC: - icc -std=gnu++11 -openmp -mmic -O3 -I.. -I../.. GMtest.cxx -o GMtest-mic && scp GMtest-mic mic0: */ typedef long long long64; @@ -21,7 +19,7 @@ const int DIM = 3; const int DOM = 6; #ifdef MPLEX_INTRINSICS -# if defined(__MIC__) || defined(__AVX512F__) +# if defined(__AVX512F__) # warning "MPLEX_INTRINSICS CMP_EPS = 2e-7 --> 3e-7" const float CMP_EPS = 3e-7; # elif defined(__AVX__) @@ -32,7 +30,7 @@ const float CMP_EPS = 5e-7; const float CMP_EPS = 2e-7; # endif #else -# if defined(__MIC__) || defined(__AVX512F__) +# if defined(__AVX512F__) # warning "NO MPLEX_INTRINSICS CMP_EPS = 4e-7" const float CMP_EPS = 4e-7; # else diff --git a/Matrix.h b/Matrix.h index e38b62be..9c2b0a41 100644 --- a/Matrix.h +++ b/Matrix.h @@ -101,53 +101,50 @@ inline void sincos4(const float x, float& sin, float& cos) sin = x - 0.16666667f*x*x2; } } // end namespace mkfit -//============================================================================== -// This ifdef needs to be changed to something like "use matriplex" and/or -// "is icc" as we can only do vectorization with icc now. +//============================================================================== -#ifdef USE_MATRIPLEX +// Matriplex dimensions and typedefs - #ifdef __INTEL_COMPILER - #define ASSUME_ALIGNED(a, b) __assume_aligned(a, b) - #else - #define ASSUME_ALIGNED(a, b) a = static_cast(__builtin_assume_aligned(a, b)) - #endif +#ifdef __INTEL_COMPILER + #define ASSUME_ALIGNED(a, b) __assume_aligned(a, b) +#else + #define ASSUME_ALIGNED(a, b) a = static_cast(__builtin_assume_aligned(a, b)) +#endif - #include "Matriplex/MatriplexSym.h" +#include "Matriplex/MatriplexSym.h" - namespace mkfit { +namespace mkfit { - constexpr Matriplex::idx_t NN = MPT_SIZE; // "Length" of MPlex. +constexpr Matriplex::idx_t NN = MPT_SIZE; // "Length" of MPlex. - constexpr Matriplex::idx_t LL = 6; // Dimension of large/long MPlex entities - constexpr Matriplex::idx_t HH = 3; // Dimension of small/short MPlex entities +constexpr Matriplex::idx_t LL = 6; // Dimension of large/long MPlex entities +constexpr Matriplex::idx_t HH = 3; // Dimension of small/short MPlex entities - typedef Matriplex::Matriplex MPlexLL; - typedef Matriplex::Matriplex MPlexLV; - typedef Matriplex::MatriplexSym MPlexLS; +typedef Matriplex::Matriplex MPlexLL; +typedef Matriplex::Matriplex MPlexLV; +typedef Matriplex::MatriplexSym MPlexLS; - typedef Matriplex::Matriplex MPlexHH; - typedef Matriplex::Matriplex MPlexHV; - typedef Matriplex::MatriplexSym MPlexHS; +typedef Matriplex::Matriplex MPlexHH; +typedef Matriplex::Matriplex MPlexHV; +typedef Matriplex::MatriplexSym MPlexHS; - typedef Matriplex::Matriplex MPlex22; - typedef Matriplex::Matriplex MPlex2V; - typedef Matriplex::MatriplexSym MPlex2S; +typedef Matriplex::Matriplex MPlex22; +typedef Matriplex::Matriplex MPlex2V; +typedef Matriplex::MatriplexSym MPlex2S; - typedef Matriplex::Matriplex MPlexLH; - typedef Matriplex::Matriplex MPlexHL; +typedef Matriplex::Matriplex MPlexLH; +typedef Matriplex::Matriplex MPlexHL; - typedef Matriplex::Matriplex MPlexL2; +typedef Matriplex::Matriplex MPlexL2; - typedef Matriplex::Matriplex MPlexQF; - typedef Matriplex::Matriplex MPlexQI; - typedef Matriplex::Matriplex MPlexQUI; +typedef Matriplex::Matriplex MPlexQF; +typedef Matriplex::Matriplex MPlexQI; +typedef Matriplex::Matriplex MPlexQUI; - typedef Matriplex::Matriplex MPlexQB; +typedef Matriplex::Matriplex MPlexQB; - } // end namespace mkfit -#endif +} // end namespace mkfit //============================================================================== diff --git a/TrackerInfo.cc b/TrackerInfo.cc index 88f6ea45..dba9e48c 100644 --- a/TrackerInfo.cc +++ b/TrackerInfo.cc @@ -117,11 +117,7 @@ namespace void TrackerInfo::ExecTrackerInfoCreatorPlugin(const std::string& base, TrackerInfo &ti, IterationsInfo &ii, bool verbose) { -#ifdef __MIC__ - std::string soname = base + "-mic.so"; -#else std::string soname = base + ".so"; -#endif struct stat st; diff --git a/mkFit/config-parse/Makefile b/mkFit/config-parse/Makefile index cec663c7..7d2e2d8d 100644 --- a/mkFit/config-parse/Makefile +++ b/mkFit/config-parse/Makefile @@ -6,7 +6,7 @@ libConfigDict.so: ConfigDict.cc ${CXX} -I.. -I../.. -I$(shell root-config --incdir) ${CPPFLAGS} -fPIC -shared -o $@ $^ ConfigDict.cc: ../IterationConfig.h ../SteeringParams.h ConfigLinkDef.h - rootcling -f -D=USE_MATRIPLEX -I=.. -I=../.. $@ $^ + rootcling -f -I=.. -I=../.. $@ $^ clean: rm -f libConfigDict.so ConfigDict.cc ConfigDict_rdict.pcm