Skip to content

Commit

Permalink
Merge pull request #100 from intel/develop
Browse files Browse the repository at this point in the history
Develop
  • Loading branch information
chuckyount authored Apr 11, 2018
2 parents 6066373 + 20a9ed8 commit a2ffe23
Show file tree
Hide file tree
Showing 26 changed files with 741 additions and 353 deletions.
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ YASK--Yet Another Stencil Kernel: A framework to facilitate exploration of the H

YASK contains a domain-specific compiler to convert scalar C++ stencil code to SIMD-optimized code for Intel(R) Xeon Phi(TM) and Intel(R) Xeon(R) processors.

Supported Platforms
Supported Platforms and Processors:
* 64-bit Linux
* Intel(R) Xeon Phi(TM) processor supporting the MIC_AVX512 instruction set.
* Intel(R) Xeon(R) processor supporting the AVX, AVX2, or CORE_AVX512 instruction sets.
Expand All @@ -24,9 +24,11 @@ Pre-requisites:
(optional: for functional testing if you don't have native ISA support).
* Intel(R) MPI Library, https://software.intel.com/en-us/intel-mpi-library,
or equivalent (optional: for multi-core and multi-node operation).
* Linux libraries 'librt' and 'libnuma'.
* Perl (5.010 or later).
* Awk.
* Gnu make.
* Bash shell.
* The 'indent' or 'gindent' utility (optional: to make the generated code easier for humans to read).
* SWIG (3.0.12 or later),
http://www.swig.org (optional: for creating the Python interface).
Expand Down
2 changes: 1 addition & 1 deletion bin/gen_layouts.pl
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,7 @@ END

# Creation.
print " else if (ndims == $n && do_wrap == $wrap)\n",
" gp = make_shared<YkElemGrid<$layout, $wrap>>(_dims, name, dims, &_ostr);\n";
" gp = make_shared<YkElemGrid<$layout, $wrap>>(_dims, name, dims, _opts, &_ostr);\n";
}
}

Expand Down
2 changes: 1 addition & 1 deletion bin/gen_loops.pl
Original file line number Diff line number Diff line change
Expand Up @@ -477,7 +477,7 @@ ($$$$)
push @$code,
" // This value of $divar covers ".dimStr($dim)." from $stvar to (but not including) $spvar.",
" idx_t $stvar = std::max($abvar + ($divar * $svar), $bvar);",
" idx_t $spvar = std::min($stvar + $svar, $evar);";
" idx_t $spvar = std::min($abvar + (($divar+1) * $svar), $evar);";
}
}

Expand Down
12 changes: 4 additions & 8 deletions bin/yask.sh
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,6 @@ while true; do
echo " -host <hostname>|-mic <N>"
echo " Specify host to run executable on."
echo " 'ssh <hostname>' will be pre-pended to the sh_prefix command."
echo " If -arch 'knl' is given, it implies the following (which can be overridden):"
echo " -exe_prefix 'numactl --preferred=1'"
echo " If -mic <N> is given, it implies the following (which can be overridden):"
echo " -arch 'knc'"
echo " -host "`hostname`"-mic<N>"
Expand Down Expand Up @@ -188,12 +186,6 @@ if [[ -z ${arch:+ok} ]]; then
exit 1
fi

# Set defaults for KNL.
# TODO: run numactl [on host] to determine if in flat mode.
if [[ "$arch" == "knl" ]]; then
true ${exe_prefix='numactl --preferred=1'}
fi

# Simplified MPI in x-dim only.
if [[ -n "$nranks" ]]; then
true ${mpi_cmd="mpirun -np $nranks"}
Expand Down Expand Up @@ -276,3 +268,7 @@ else
fi

echo "Log saved in '$logfile'."

if [[ `grep -c FAILED $logfile` > 0 ]]; then
exit 1;
fi
Empty file modified docs/YASK-intro.pdf
100755 → 100644
Empty file.
94 changes: 76 additions & 18 deletions include/yask_kernel_api.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,15 @@ namespace yask {
typedef std::int64_t idx_t;
#endif

/// Allocate grids on local NUMA node.
const int yask_numa_local = -1;

/// Allocate grids across all available NUMA nodes.
const int yask_numa_interleave = -2;

/// Do not specify any NUMA binding.
const int yask_numa_none = -9;

// Forward declarations of classes and pointers.

class yk_env;
Expand Down Expand Up @@ -96,7 +105,7 @@ namespace yask {
virtual yk_solution_ptr
new_solution(yk_env_ptr env /**< [in] Pointer to env info. */) const;

/// Create a stencil solution by copying the settings from another.
/// **[Advanced]** Create a stencil solution by copying the settings from another.
/**
All the settings that were specified via the `yk_solution::set_*()`
functions in the source solution will be copied to the new solution.
Expand Down Expand Up @@ -284,12 +293,17 @@ namespace yask {
also be smaller than the specified size when the block is at the
edge of the domain. The block size cannot be set in the
solution-step dimension (because temporal blocking is not yet enabled).
Unless auto-tuning is disabled, the block size will be used as
a starting point for an automated search for a higher-performing
block size.
*/
virtual void
set_block_size(const std::string& dim
/**< [in] Name of dimension to set. Must be one of
the names from get_domain_dim_names(). */,
idx_t size /**< [in] Elements in a block in this `dim`. */ ) =0;
idx_t size
/**< [in] Elements in a block in this `dim`. */ ) =0;

/// Get the block size.
/**
Expand All @@ -302,21 +316,6 @@ namespace yask {
/**< [in] Name of dimension to get. Must be one of
the names from get_domain_dim_names(). */) const =0;

/// Set performance parameters from an option string.
/**
Parses the string for options as if from a command-line.
Example: "-bx 64 -block_threads 4" sets the block-size in the *x*
dimension to 64 and the number of threads used to process each
block to 4.
See the help message from the YASK kernel binary for documentation
on the command-line options.
@returns Any strings that were not recognized by the parser as options.
*/
virtual std::string
apply_command_line_options(const std::string& args
/**< [in] String of arguments to parse. */ ) =0;

/// Set the number of MPI ranks in the given dimension.
/**
The *product* of the number of ranks across all dimensions must
Expand Down Expand Up @@ -703,6 +702,49 @@ namespace yask {
Must be exatly one size for each dimension. */ ) =0;
#endif

/// **[Advanced]** Set the default preferred NUMA node on which to allocate data.
/**
This value is used when allocating grids and MPI buffers.
The NUMA "preferred node allocation" policy is used, meaning that
memory will be allocated in an alternative node if the preferred one
doesn't have enough space available or is otherwise restricted.
Instead of specifying a NUMA node, a special value may be used
to specify another policy as listed.
This setting may be overridden for any specific grid.
*/
virtual void
set_default_numa_preferred(int numa_node
/**< [in] Preferred NUMA node for data
allocation. Alternatively, use
`yask_numa_local` for explicit
local-node allocation,
`yask_numa_interleave` for
interleaving pages across all nodes,
or `yask_numa_none` for no NUMA
policy. */) =0;

/// **[Advanced]** Get the default preferred NUMA node on which to allocate data.
/**
@returns Current setting of preferred NUMA node.
*/
virtual int
get_default_numa_preferred() const =0;

/// **[Advanced]** Set performance parameters from an option string.
/**
Parses the string for options as if from a command-line.
Example: "-bx 64 -block_threads 4" sets the block-size in the *x*
dimension to 64 and the number of threads used to process each
block to 4.
See the help message from the YASK kernel binary for documentation
on the command-line options.
@returns Any strings that were not recognized by the parser as options.
*/
virtual std::string
apply_command_line_options(const std::string& args
/**< [in] String of arguments to parse. */ ) =0;

/// **[Advanced]** Use data-storage from existing grids in specified solution.
/**
Calls yk_grid::share_storage() for each pair of grids that have the same name
Expand Down Expand Up @@ -1310,7 +1352,23 @@ namespace yask {
get_num_storage_elements() const =0;

/* Advanced APIs for yk_grid found below are not needed for most applications. */


/// **[Advanced]** Set the default preferred NUMA node on which to allocate data.
/**
This value is used when allocating data for this grid.
*/
virtual void
set_numa_preferred(int numa_node
/**< [in] Preferred NUMA node.
See set_default_numa_preferred() for other options. */) =0;

/// **[Advanced]** Get the default preferred NUMA node on which to allocate data.
/**
@returns Current setting of preferred NUMA node for this grid.
*/
virtual int
get_numa_preferred() const =0;

/// **[Advanced]** Set the left halo size in the specified dimension.
/**
This value is typically set by the stencil compiler, but
Expand Down
2 changes: 1 addition & 1 deletion src/common/common_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ namespace yask {
// for numbers above 9 (at least up to 99).

// Format: "major.minor.patch".
const string version = "2.04.00";
const string version = "2.05.04";

string yask_get_version_string() {
return version;
Expand Down
6 changes: 3 additions & 3 deletions src/compiler/lib/YaskKernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,7 @@ namespace yask {
ctorCode += " " + grid + "_dim_names = {" +
gdims.makeDimStr(", ", "\"", "\"") + "};\n";
string initCode = " " + grid + "_ptr = std::make_shared<" + typeDef +
">(_dims, \"" + grid + "\", " + grid + "_dim_names, &_ostr);\n"
">(_dims, \"" + grid + "\", " + grid + "_dim_names, _opts, &_ostr);\n"
" assert(" + grid + "_ptr);\n";

// Grid vars.
Expand Down Expand Up @@ -391,7 +391,7 @@ namespace yask {
if (!firstGrid)
newGridCode += " else";
newGridCode += " if (dims == " + grid + "_dim_names) gp = std::make_shared<" +
typeDef + ">(_dims, name, dims, &_ostr);\n";
typeDef + ">(_dims, name, dims, _opts, &_ostr);\n";
}

} // grids.
Expand All @@ -416,7 +416,7 @@ namespace yask {
os << "\n // Make a new grid iff its dims match any in the stencil.\n"
" // Returns pointer to the new grid or nullptr if no match.\n"
" virtual YkGridPtr newStencilGrid(const std::string& name,"
" const GridDimNames& dims) {\n"
" const GridDimNames& dims, KernelSettingsPtr settings) {\n"
" YkGridPtr gp;\n" <<
newGridCode <<
" return gp;\n"
Expand Down
45 changes: 27 additions & 18 deletions src/kernel/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ else ifeq ($(arch),knl)

ISA ?= -xMIC-AVX512
GCXX_ISA ?= -march=knl
MACROS += USE_INTRIN512 USE_RCP28
MACROS += USE_INTRIN512 USE_RCP28 NUMA_PREF=1
YC_TARGET ?= avx512
def_block_args ?= -b 96
def_block_threads ?= 8
Expand Down Expand Up @@ -427,7 +427,25 @@ YK_CXXOPT ?= -O3

# Linker.
YK_LD := $(YK_CXX)
YK_LFLAGS := -lrt -Wl,-rpath=$(LIB_DIR) -L$(LIB_DIR) -l$(YK_BASE2)
YK_LIBS := -lrt
YK_LFLAGS := -Wl,-rpath=$(LIB_DIR) -L$(LIB_DIR) -l$(YK_BASE2)

# Work-around missing numaif.h:
# IF numaif.h is found in /usr/include,
# THEN enable the macro to use it.
ifneq ($(shell find /usr/include -name 'numaif.h' | wc -l),0)
MACROS += USE_NUMAIF_H
endif

# Work-around missing libnuma.so:
# IF libnuma.so.1 exists AND "normal" libnuma can't be found,
# THEN use hard-coded libnuma.so.1,
# ELSE use "normal" libnuma.
ifeq ($(and $(wildcard /usr/lib64/libnuma.so.1),$(shell whereis libnuma |wc -w)),1)
YK_LIBS += /usr/lib64/libnuma.so.1
else
YK_LIBS += -lnuma
endif

# Tools.
SWIG := swig
Expand Down Expand Up @@ -462,15 +480,6 @@ ifeq ($(mpi),1)
MACROS += USE_MPI
endif

# HBW settings.
# TODO: change this to use OS-default location.
ifeq ($(hbw),1)
MACROS += USE_HBW
HBW_DIR = $(HOME)/memkind_build
YK_CXXFLAGS += -I$(HBW_DIR)/include
YK_LFLAGS += -lnuma $(HBW_DIR)/lib/libmemkind.a
endif

# VTUNE settings.
ifeq ($(vtune),1)
MACROS += USE_VTUNE
Expand All @@ -482,7 +491,7 @@ else
VTUNE_DIR = $(VTUNE_AMPLIFIER_XE_2016_DIR)
endif
YK_CXXFLAGS += -I$(VTUNE_DIR)/include
YK_LFLAGS += $(VTUNE_DIR)/lib64/libittnotify.a
YK_LIBS += $(VTUNE_DIR)/lib64/libittnotify.a
endif

# compiler-specific settings
Expand Down Expand Up @@ -541,11 +550,11 @@ kernel: $(YK_EXEC) $(MAKE_REPORT_FILE)
@ls -l $@

$(YK_LIB): $(YK_OBJS)
$(CXX_PREFIX) $(YK_CXX) $(YK_CXXFLAGS) -shared -o $@ $^
$(CXX_PREFIX) $(YK_CXX) $(YK_CXXFLAGS) $(YK_LIBS) -shared -o $@ $^
@ls -l $@

$(YK_EXEC): yask_main.cpp $(YK_LIB)
$(CXX_PREFIX) $(YK_LD) $(YK_CXXFLAGS) $< $(YK_LFLAGS) -o $@
$(CXX_PREFIX) $(YK_LD) $(YK_CXXFLAGS) $< $(YK_LIBS) $(YK_LFLAGS) -o $@
@ls -l $@

$(MAKE_REPORT_FILE): $(YK_LIB)
Expand Down Expand Up @@ -633,7 +642,7 @@ $(YK_SWIG_DIR)/yask_kernel_api_wrap.o: $(YK_SWIG_DIR)/yask_kernel_api_wrap.cpp
$(YK_CXX) $(YK_CXXFLAGS) $(PYINC) -fPIC -c -o $@ $<

$(YK_PY_LIB): $(YK_OBJS) $(YK_SWIG_DIR)/yask_kernel_api_wrap.o
$(YK_CXX) $(YK_CXXFLAGS) -shared -o $@ $^
$(YK_CXX) $(YK_CXXFLAGS) $(YK_LIBS) -shared -o $@ $^

# Simple tests

Expand Down Expand Up @@ -736,8 +745,8 @@ all-tests:
$(MAKE) clean; $(MAKE) stencil=3plane fold=y=2,z=4 yc-and-yk-test
$(MAKE) clean; $(MAKE) stencil=cube fold=x=2,y=2,z=2 yc-and-yk-test
$(MAKE) clean; $(MAKE) stencil=test_4d fold=w=2,x=2,y=2,z=2 yc-and-yk-test
$(MAKE) clean; $(MAKE) stencil=test_scratch1 fold=x=4 yc-and-yk-test
$(MAKE) clean; $(MAKE) stencil=test_scratch2 fold=x=2,y=2,z=2 yc-and-yk-test
$(MAKE) clean; $(MAKE) stencil=test_scratch1 real_bytes=8 fold=x=4 yc-and-yk-test
$(MAKE) clean; $(MAKE) stencil=test_scratch2 real_bytes=8 fold=x=2,y=1,z=2 yc-and-yk-test
$(MAKE) clean; $(MAKE) stencil=iso3dfd fold=x=4,y=2 yc-and-yk-test
$(MAKE) clean; $(MAKE) stencil=awp_elastic real_bytes=8 yc-and-yk-test
$(MAKE) clean; $(MAKE) stencil=ssg real_bytes=8 yc-and-yk-test
Expand Down Expand Up @@ -862,4 +871,4 @@ help:
@echo " $(MAKE) -j all"
@echo " $(MAKE) -j all ranks=2"
@echo " $(MAKE) -j all YK_CXX=g++ YK_CXXOPT=-O2 mpi=0"
@echo " $(MAKE) -j all YK_CXX=mpigxx YK_CXXOPT=-O2 ranks=3"
@echo " $(MAKE) -j all YK_CXX=mpigxx YK_CXXOPT=-O2 ranks=3 EXTRA_MACROS='DEBUG'"
Loading

0 comments on commit a2ffe23

Please sign in to comment.