config.cfg

###############################################################################
# FIRESTARTER - A Processor Stress Test Utility
# Copyright (C) 2019 TU Dresden, Center for Information Services and High
# Performance Computing
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
#
# Contact: daniel.hackenberg@tu-dresden.de
###############################################################################

###############################################################################
# this file specifies which functions will be generated
###############################################################################
# the source code (<isa>_functions.c) and templates (<isa>_functions.c.py) for
# each isa have to be placed in the source_files and templates directory, 
# respectively
###############################################################################

[VERSION]
major=1
minor=7
# additional information, e.g., "BETA"
info="3 (github)"

# optional features
enable_cuda=1
enable_win64=1
enable_mac=0

[ISA_AVX512]
template= avx512
feature_req= avx512
fallback= func_skl_xeonep_avx512_1t, func_skl_xeonep_avx512_2t
flags = -mavx512f
win64_incl = 1

[ISA_FMA4]
template= fma4
feature_req= fma4
fallback= func_bld_opteron_fma4_1t
flags= -mavx, -mfma4
win64_incl = 1

[ISA_FMA]
template= fma
feature_req= fma
fallback= func_hsw_corei_fma_1t, func_hsw_corei_fma_2t 
flags= -mavx, -mfma
win64_incl = 1

[ISA_AVX]
template= avx
feature_req= avx
fallback= func_snb_corei_avx_1t, func_snb_corei_avx_2t
flags = -mavx
win64_incl = 1

[ISA_SSE2]
template= sse2
feature_req= sse2
fallback= func_nhm_corei_sse2_1t, func_nhm_corei_sse2_2t
flags = -msse2
win64_incl = 1

###############################################################################
# supported processors
###############################################################################
# arch/model:   used as prefixes for function names in generated code
# threads:      generate code sequences for listed numbers of threads
# isa:          specifies the template that contains the required code snippets
# cpu_family:   cpu family of the processors that may use this code path
# cpu_model:    cpu models (within the cpu_family) that use this code path
# buffer_sizes: L1 cache size / L2 cache size / L3 cache size / memory per core
#               - sizes are defined per core
#               - SMT code paths use size/num_threads_per_core per thread
# lines:        minimal number of instruction groups for assembler loops
#               - SMT code will use lines/num_threads_per_core per thread
# instr_groups: defines which code snippets are used for each level
# amounts:      specifies proportion of accesses to each level
#               - sequence is repeated to reach the minimal number of lines
###############################################################################

# Knights Landing
# TODO: MCDRAM support: 
# - increase ram_size to 945116501 (64\,GiB/72)
# - cache mode: implicitely use l3_size as in other architectures
# - flat/hybrid: explicitely use l3_size/2 allocated in extra MCDRAM buffer
[Knights_Landing]
arch=           knl
model=          xeonphi
threads=        4
isa=            avx512
cpu_family=     6
cpu_model=      87
buffer_sizes=   32768,524288,236279125,26214400
lines=          1536
instr_groups=   RAM_P,L2_S,L1_L,REG
proportion=     3,8,40,10

# Skylake / Kaby Lake desktop
[Skylake]
arch=           skl
model=          corei
threads=        1,2
isa=            fma  
cpu_family=     6
cpu_model=      78,94
buffer_sizes=   32768,262144,1572864,104857600
lines=          1536
instr_groups=   RAM_L,L3_LS_256,L2_LS_256,L1_2LS_256,REG
proportion=     3,5,18,78,40

# Skylake server
[Skylake-SP]
arch=           skl
model=          xeonep
threads=        1,2
isa=            avx512
cpu_family=     6
cpu_model=      85
buffer_sizes=   32768,1048576,1441792,1048576000
lines=          1536
instr_groups=   RAM_S,RAM_P,L3_S,L3_P,L2_S,L2_L,L1_S,L1_L,L1_BROADCAST,REG
proportion=     3,1,1,1,4,70,0,40,120,160

# Haswell/Broadwell desktop
[Haswell]
arch=           hsw
model=          corei
threads=        1,2
isa=            fma
cpu_family=     6
cpu_model=      60,61,69,70,71
buffer_sizes=   32768,262144,1572864,104857600
lines=          1536
instr_groups=   RAM_L,L3_LS,L2_LS,L1_LS,REG
proportion=     2,3,9,90,40

# Haswell/Broadwell server
[Haswell-EP]
arch=           hsw
model=          xeonep
threads=        1,2
isa=            fma
cpu_family=     6
cpu_model=      63,79
buffer_sizes=   32768,262144,2621440,104857600
lines=          1536
instr_groups=   RAM_L,L3_LS,L2_LS,L1_LS,REG
proportion=     2,1,9,79,35

# Sandy/Ivy Bridge desktop
[Sandy Bridge]
arch=           snb
model=          corei
threads=        1,2
isa=            avx
cpu_family=     6
cpu_model=      42,58
buffer_sizes=   32768,262144,1572864,104857600
lines=          1536
instr_groups=   RAM_L,L3_LS,L2_LS,L1_LS,REG
proportion=     2,4,10,90,45

# Sandy/Ivy Bridge server
[Sandy Bridge-EP]
arch=           snb
model=          xeonep
threads=        1,2
isa=            avx
cpu_family=     6
cpu_model=      45,62
buffer_sizes=   32768,262144,2621440,104857600
lines=          1536
instr_groups=   RAM_L,L3_LS,L2_LS,L1_LS,REG
proportion=     3,2,10,90,30

# Nehalem/Westmere desktop
[Nehalem]
arch=           nhm
model=          corei
threads=        1,2
isa=            sse2
cpu_family=     6
cpu_model=      30,37,23
buffer_sizes=   32768,262144,1572864,104857600
coverage=       0.5,0.8,0.8,1.0
lines=          1536
instr_groups=   RAM_P,L1_LS,REG
proportion=     1,70,2

# Nehalem/Westmere server
[Nehalem-EP]
arch=           nhm
model=          xeonep
threads=        1,2
isa=            sse2
cpu_family=     6
cpu_model=      26,44
buffer_sizes=   32768,262144,2097152,104857600
lines=          1536
instr_groups=   RAM_P,L1_LS,REG
proportion=     1,60,2

# Bulldozer/Piledriver
[Bulldozer]
arch=           bld
model=          opteron
threads=        1
isa=            fma4
cpu_family=     21
cpu_model=      1,2,3
buffer_sizes=   16384,1048576,786432,104857600
lines=          1536
instr_groups=   RAM_L,L3_L,L2_LS,L1_L,REG
proportion=     1,1,5,90,45