diff --git a/bin/gen_layouts.pl b/bin/gen_layouts.pl index 88689a64..e32c0a94 100755 --- a/bin/gen_layouts.pl +++ b/bin/gen_layouts.pl @@ -277,7 +277,7 @@ END # Creation. print " else if (ndims == $n && do_wrap == $wrap)\n", - " gp = make_shared>(_dims, name, dims, _opts, &_ostr);\n"; + " gp = make_shared>(_dims, name, dims, &_opts, &_ostr);\n"; } } diff --git a/bin/yask_kernel_api_test.py b/bin/yask_kernel_api_test.py index 40f8cda9..0f6d5fdb 100755 --- a/bin/yask_kernel_api_test.py +++ b/bin/yask_kernel_api_test.py @@ -28,7 +28,7 @@ import numpy as np import ctypes as ct import argparse -import yask_kernel +import yask_kernel as yk # Read data from grid using NumPy ndarray. def read_grid(grid, timestep) : @@ -156,8 +156,8 @@ def init_grid(grid, timestep) : if __name__ == "__main__": # The factories from which all other kernel objects are made. - kfac = yask_kernel.yk_factory() - ofac = yask_kernel.yask_output_factory() + kfac = yk.yk_factory() + ofac = yk.yask_output_factory() # Initalize MPI, etc. env = kfac.new_env() @@ -196,11 +196,13 @@ def init_grid(grid, timestep) : else : soln.set_block_size(dim_name, 32) - # Make a test fixed-size grid. + # Make a test fixed-size grid and set its NUMA preference. fgrid_sizes = () for dim_name in soln_dims : fgrid_sizes += (5,) fgrid = soln.new_fixed_size_grid("fgrid", soln_dims, fgrid_sizes) + fgrid.set_numa_preferred(yk.cvar.yask_numa_local) + fgrid.alloc_storage() # Simple rank configuration in 1st dim only. # In production runs, the ranks would be distributed along diff --git a/include/yask_kernel_api.hpp b/include/yask_kernel_api.hpp index e2fd7958..1ff7828c 100644 --- a/include/yask_kernel_api.hpp +++ b/include/yask_kernel_api.hpp @@ -47,12 +47,27 @@ namespace yask { #endif /// Allocate grids on local NUMA node. + /** + This is used in yk_solution::set_default_numa_preferred + and yk_grid::set_numa_preferred. + In Python, specify as `yask_kernel.cvar.yask_numa_local`. + */ const int yask_numa_local = -1; /// Allocate grids across all available NUMA nodes. + /** + This is used in yk_solution::set_default_numa_preferred + and yk_grid::set_numa_preferred. + In Python, specify as `yask_kernel.cvar.yask_numa_interleave`. + */ const int yask_numa_interleave = -2; /// Do not specify any NUMA binding. + /** + This is used in yk_solution::set_default_numa_preferred + and yk_grid::set_numa_preferred. + In Python, specify as `yask_kernel.cvar.yask_numa_none`. + */ const int yask_numa_none = -9; // Forward declarations of classes and pointers. @@ -711,8 +726,10 @@ namespace yask { Instead of specifying a NUMA node, a special value may be used to specify another policy as listed. This setting may be overridden for any specific grid. + @returns `true` if NUMA preference was set; + `false` if NUMA preferences are not enabled. */ - virtual void + virtual bool set_default_numa_preferred(int numa_node /**< [in] Preferred NUMA node for data allocation. Alternatively, use @@ -720,8 +737,10 @@ namespace yask { local-node allocation, `yask_numa_interleave` for interleaving pages across all nodes, - or `yask_numa_none` for no NUMA - policy. */) =0; + or `yask_numa_none` for no explicit NUMA + policy. These constants are defined in + the _Variable Documentation_ section of + \ref yask_kernel_api.hpp. */) =0; /// **[Advanced]** Get the default preferred NUMA node on which to allocate data. /** @@ -1441,11 +1460,15 @@ namespace yask { /// **[Advanced]** Set the default preferred NUMA node on which to allocate data. /** This value is used when allocating data for this grid. + Thus, the desired NUMA policy must be set before calling alloc_data() + or yk_solution::prepare_solution(). + @returns `true` if NUMA preference was set; + `false` if NUMA preferences are not enabled. */ - virtual void + virtual bool set_numa_preferred(int numa_node /**< [in] Preferred NUMA node. - See set_default_numa_preferred() for other options. */) =0; + See yk_solution::set_default_numa_preferred() for other options. */) =0; /// **[Advanced]** Get the default preferred NUMA node on which to allocate data. /** diff --git a/src/common/common_utils.cpp b/src/common/common_utils.cpp index e89c2530..ef7d44bd 100644 --- a/src/common/common_utils.cpp +++ b/src/common/common_utils.cpp @@ -41,7 +41,7 @@ namespace yask { // for numbers above 9 (at least up to 99). // Format: "major.minor.patch". - const string version = "2.05.06"; + const string version = "2.05.08"; string yask_get_version_string() { return version; diff --git a/src/compiler/lib/YaskKernel.cpp b/src/compiler/lib/YaskKernel.cpp index ba1a204b..1c8e42f1 100644 --- a/src/compiler/lib/YaskKernel.cpp +++ b/src/compiler/lib/YaskKernel.cpp @@ -286,7 +286,7 @@ namespace yask { ctorCode += " " + grid + "_dim_names = {" + gdims.makeDimStr(", ", "\"", "\"") + "};\n"; string initCode = " " + grid + "_ptr = std::make_shared<" + typeDef + - ">(_dims, \"" + grid + "\", " + grid + "_dim_names, _opts, &_ostr);\n" + ">(_dims, \"" + grid + "\", " + grid + "_dim_names, &_opts, &_ostr);\n" " assert(" + grid + "_ptr);\n"; // Grid vars. @@ -391,7 +391,7 @@ namespace yask { if (!firstGrid) newGridCode += " else"; newGridCode += " if (dims == " + grid + "_dim_names) gp = std::make_shared<" + - typeDef + ">(_dims, name, dims, _opts, &_ostr);\n"; + typeDef + ">(_dims, name, dims, &_opts, &_ostr);\n"; } } // grids. @@ -416,7 +416,7 @@ namespace yask { os << "\n // Make a new grid iff its dims match any in the stencil.\n" " // Returns pointer to the new grid or nullptr if no match.\n" " virtual YkGridPtr newStencilGrid(const std::string& name," - " const GridDimNames& dims, KernelSettingsPtr settings) {\n" + " const GridDimNames& dims) {\n" " YkGridPtr gp;\n" << newGridCode << " return gp;\n" diff --git a/src/kernel/Makefile b/src/kernel/Makefile index 4e37b3ef..d615d222 100644 --- a/src/kernel/Makefile +++ b/src/kernel/Makefile @@ -31,6 +31,7 @@ stencil = iso3dfd arch = snb mpi = 1 +numa = 1 real_bytes = 4 radius = 2 ranks = 1 @@ -429,6 +430,19 @@ YK_LD := $(YK_CXX) YK_LIBS := -lrt YK_LFLAGS := -Wl,-rpath=$(LIB_DIR) -L$(LIB_DIR) -l$(YK_BASE2) +# Add options for NUMA. +ifeq ($(numa),1) + +# Look for libnuma. +# TODO: make this more portable. +ifneq ($(wildcard /usr/lib64/libnuma.so),) + YK_LIBS += -lnuma + MACROS += USE_NUMA +else ifneq ($(wildcard /usr/lib64/libnuma.so.1),) + YK_LIBS += /usr/lib64/libnuma.so.1 + MACROS += USE_NUMA +endif + # Work-around missing numaif.h: # IF numaif.h is found in /usr/include, # THEN enable the macro to use it. @@ -436,14 +450,6 @@ ifneq ($(shell find /usr/include -name 'numaif.h' | wc -l),0) MACROS += USE_NUMAIF_H endif -# Work-around missing libnuma.so: -# IF libnuma.so.1 exists AND "normal" libnuma can't be found, -# THEN use hard-coded libnuma.so.1, -# ELSE use "normal" libnuma. -ifeq ($(and $(wildcard /usr/lib64/libnuma.so.1),$(shell whereis libnuma |wc -w)),1) - YK_LIBS += /usr/lib64/libnuma.so.1 -else - YK_LIBS += -lnuma endif # Tools. @@ -738,9 +744,9 @@ yk-test-no-yc: kernel-only # run the tests from the top-level Makefile. all-tests: $(MAKE) clean; $(MAKE) stencil=test_3d fold=x=4,y=2 cxx-yk-grid-test - $(MAKE) clean; $(MAKE) stencil=test_1d yc-and-yk-test $(MAKE) clean; $(MAKE) stencil=iso3dfd real_bytes=8 cxx-yk-api-test $(MAKE) clean; $(MAKE) stencil=iso3dfd py-yk-api-test + $(MAKE) clean; $(MAKE) stencil=test_1d yc-and-yk-test $(MAKE) clean; $(MAKE) stencil=3axis fold=x=4,y=2 yc-and-yk-test $(MAKE) clean; $(MAKE) stencil=9axis fold=z=2 yc-and-yk-test $(MAKE) clean; $(MAKE) stencil=3plane fold=y=2,z=4 yc-and-yk-test diff --git a/src/kernel/lib/context.cpp b/src/kernel/lib/context.cpp index 66c2c957..3d724692 100644 --- a/src/kernel/lib/context.cpp +++ b/src/kernel/lib/context.cpp @@ -1161,34 +1161,30 @@ namespace yask { const std::string& type) { ostream& os = get_ostr(); - // Get default NUMA node from settings. - int numa_def = _opts->_numa_pref; - for (const auto& i : nbytes) { - int np = i.first; + int numa_pref = i.first; size_t nb = i.second; - size_t ng = ngrids.at(np); + size_t ng = ngrids.at(numa_pref); // Don't need pad after last one. if (nb >= _data_buf_pad) nb -= _data_buf_pad; - // What node? - int numa_pref = (np >= 0) ? np : numa_def; - // Allocate data. os << "Allocating " << makeByteStr(nb) << " for " << ng << " " << type << "(s)"; +#ifdef USE_NUMA if (numa_pref >= 0) os << " preferring NUMA node " << numa_pref; else os << " using NUMA policy " << numa_pref; +#endif os << "...\n" << flush; auto p = shared_numa_alloc(nb, numa_pref); TRACE_MSG("Got memory at " << static_cast(p.get())); // Save using original key. - data_buf[np] = p; + data_buf[numa_pref] = p; } } diff --git a/src/kernel/lib/context.hpp b/src/kernel/lib/context.hpp index a329dfe7..16591148 100644 --- a/src/kernel/lib/context.hpp +++ b/src/kernel/lib/context.hpp @@ -540,8 +540,7 @@ namespace yask { // Make a new grid iff its dims match any in the stencil. // Returns pointer to the new grid or nullptr if no match. virtual YkGridPtr newStencilGrid (const std::string & name, - const GridDimNames & dims, - KernelSettingsPtr settings) =0; + const GridDimNames & dims) =0; // Make a new grid with 'name' and 'dims'. // Set sizes if 'sizes' is non-null. @@ -654,8 +653,14 @@ namespace yask { virtual idx_t get_num_ranks(const std::string& dim) const; virtual idx_t get_rank_index(const std::string& dim) const; virtual std::string apply_command_line_options(const std::string& args); - virtual void set_default_numa_preferred(int numa_node) { + virtual bool set_default_numa_preferred(int numa_node) { +#ifdef USE_NUMA _opts->_numa_pref = numa_node; + return true; +#else + _opts->_numa_pref = yask_numa_none; + return numa_node == yask_numa_none; +#endif } virtual int get_default_numa_preferred() const { return _opts->_numa_pref; diff --git a/src/kernel/lib/generic_grids.cpp b/src/kernel/lib/generic_grids.cpp index a226b5d7..919a5b23 100644 --- a/src/kernel/lib/generic_grids.cpp +++ b/src/kernel/lib/generic_grids.cpp @@ -32,7 +32,7 @@ namespace yask { GenericGridBase::GenericGridBase(string name, Layout& layout_base, const GridDimNames& dimNames, - KernelSettingsPtr settings, + KernelSettingsPtr* settings, ostream** ostr) : _name(name), _layout_base(&layout_base), _opts(settings), _ostr(ostr) { for (auto& dn : dimNames) @@ -51,22 +51,21 @@ namespace yask { // Release any old data if last owner. release_storage(); - // Get default NUMA node from settings. - int numa_def = _opts->_numa_pref; - // What node? - int numa_pref = (_numa_pref >= 0) ? _numa_pref : numa_def; + int numa_pref = get_numa_pref(); // Alloc required number of bytes. size_t sz = get_num_bytes(); os << "Allocating " << makeByteStr(sz) << " for grid '" << _name << "'"; +#ifdef USE_NUMA if (numa_pref >= 0) os << " preferring NUMA node " << numa_pref; else os << " on local NUMA node"; +#endif os << "...\n" << flush; - _base = shared_numa_alloc(sz, _numa_pref); + _base = shared_numa_alloc(sz, numa_pref); // No offset. _elems = _base.get(); diff --git a/src/kernel/lib/generic_grids.hpp b/src/kernel/lib/generic_grids.hpp index 3de33aee..7833a1d2 100644 --- a/src/kernel/lib/generic_grids.hpp +++ b/src/kernel/lib/generic_grids.hpp @@ -47,7 +47,8 @@ namespace yask { void* _elems = 0; // actual data, which may be offset from _base. // Preferred NUMA node. - int _numa_pref = -1; // -1 => use default. + const static int _numa_unset = -999; + int _numa_pref = _numa_unset; // use default from _opts. // Note that both _dims and *_layout_base hold dimensions unless this // is a scalar. For a scalar, _dims is empty and _layout_base = 0. @@ -55,7 +56,7 @@ namespace yask { Layout* _layout_base = 0; // memory layout. // Command-line and env parameters. - KernelSettingsPtr _opts; + KernelSettingsPtr* _opts; // Output stream for messages. // Pointer-to-pointer to let it follow a parent's pointer. @@ -76,7 +77,7 @@ namespace yask { GenericGridBase(std::string name, Layout& layout_base, const GridDimNames& dimNames, - KernelSettingsPtr settings, + KernelSettingsPtr* settings, std::ostream** ostr); virtual ~GenericGridBase() { } @@ -92,9 +93,20 @@ namespace yask { void set_name(const std::string& name) { _name = name; } // NUMA accessors. - virtual int get_numa_pref() const { return _numa_pref; } - virtual void set_numa_pref(int pref_numa_node) { _numa_pref = pref_numa_node; } - + virtual int get_numa_pref() const { + return (_numa_pref != _numa_unset) ? + _numa_pref : (*_opts)->_numa_pref; + } + virtual bool set_numa_pref(int numa_node) { +#ifdef USE_NUMA + _numa_pref = numa_node; + return true; +#else + _numa_pref = yask_numa_none; + return numa_node == yask_numa_none; +#endif + } + // Access dims. const IdxTuple& get_dims() const { return _dims; } @@ -204,7 +216,7 @@ namespace yask { GenericGridTemplate(std::string name, Layout& layout_base, const GridDimNames& dimNames, - KernelSettingsPtr settings, + KernelSettingsPtr* settings, std::ostream** ostr) : GenericGridBase(name, layout_base, dimNames, settings, ostr) { } @@ -309,7 +321,7 @@ namespace yask { // Construct an unallocated grid. GenericGrid(std::string name, const GridDimNames& dimNames, - KernelSettingsPtr settings, + KernelSettingsPtr* settings, std::ostream** ostr) : GenericGridTemplate(name, _layout, dimNames, settings, ostr) { assert(int(dimNames.size()) == _layout.get_num_sizes()); diff --git a/src/kernel/lib/new_grid.cpp b/src/kernel/lib/new_grid.cpp index 468a4b1b..984b1e4c 100644 --- a/src/kernel/lib/new_grid.cpp +++ b/src/kernel/lib/new_grid.cpp @@ -45,7 +45,7 @@ namespace yask { // First, try to make a grid that matches the layout in // the stencil. - YkGridPtr gp = newStencilGrid(name, dims, _opts); + YkGridPtr gp = newStencilGrid(name, dims); // If there was no match, use default layout. if (!gp) { @@ -76,7 +76,7 @@ namespace yask { // Scalar? if (ndims == 0) - gp = make_shared>(_dims, name, dims, _opts, &_ostr); + gp = make_shared>(_dims, name, dims, &_opts, &_ostr); // Include auto-gen code for all other cases. #include "yask_grid_code.hpp" diff --git a/src/kernel/lib/realv_grids.hpp b/src/kernel/lib/realv_grids.hpp index 096f0fd4..8c185a8b 100644 --- a/src/kernel/lib/realv_grids.hpp +++ b/src/kernel/lib/realv_grids.hpp @@ -191,7 +191,9 @@ namespace yask { // NUMA accessors. virtual int get_numa_preferred() const { return _ggb->get_numa_pref(); } - virtual void set_numa_preferred(int pref_numa_node) { _ggb->set_numa_pref(pref_numa_node); } + virtual bool set_numa_preferred(int numa_node) { + return _ggb->set_numa_pref(numa_node); + } // Lookup position by dim name. // Return -1 or die if not found, depending on flag. @@ -566,7 +568,7 @@ namespace yask { YkElemGrid(DimsPtr dims, std::string name, const GridDimNames& dimNames, - KernelSettingsPtr settings, + KernelSettingsPtr* settings, std::ostream** ostr) : YkGridBase(&_data, dimNames.size(), dims), _data(name, dimNames, settings, ostr) { @@ -683,7 +685,7 @@ namespace yask { YkVecGrid(DimsPtr dims, const std::string& name, const GridDimNames& dimNames, - KernelSettingsPtr settings, + KernelSettingsPtr* settings, std::ostream** ostr) : YkGridBase(&_data, dimNames.size(), dims), _data(name, dimNames, settings, ostr), diff --git a/src/kernel/lib/settings.cpp b/src/kernel/lib/settings.cpp index 5bbcce8b..b10e440f 100644 --- a/src/kernel/lib/settings.cpp +++ b/src/kernel/lib/settings.cpp @@ -242,6 +242,7 @@ namespace yask { ("block_threads", "Number of threads to use within each block.", num_block_threads)); +#ifdef USE_NUMA stringstream msg; msg << "Preferred NUMA node on which to allocate data for " "grids and MPI buffers. " @@ -251,6 +252,7 @@ namespace yask { parser.add_option(new CommandLineParser::IntOption ("numa_pref", msg.str(), _numa_pref)); +#endif } // Print usage message. diff --git a/src/kernel/lib/utils.cpp b/src/kernel/lib/utils.cpp index 5cb74d9c..031a2cab 100644 --- a/src/kernel/lib/utils.cpp +++ b/src/kernel/lib/utils.cpp @@ -65,9 +65,14 @@ namespace yask { if (numa_pref == yask_numa_none) return alignedAlloc(nbytes); + +#ifndef USE_NUMA + THROW_YASK_EXCEPTION("Error: explicit NUMA policy allocation is not enabled"); +#endif void *p = 0; +#ifdef USE_NUMA #ifdef USE_NUMA_POLICY_LIB #pragma omp single if (numa_available() != -1) { @@ -80,6 +85,9 @@ namespace yask { THROW_YASK_EXCEPTION("Error: numa_alloc_*(" << makeByteStr(nbytes) << ") returned unaligned addr " << p); } + else + THROW_YASK_EXCEPTION("Error: explicit NUMA policy allocation is not available"); + #else if (get_mempolicy(NULL, NULL, 0, 0, 0) == 0) { @@ -118,14 +126,14 @@ namespace yask { } } else - p = 0; + THROW_YASK_EXCEPTION("Error: anonymous mmap of " << makeByteStr(nbytes) << + " failed"); } + else + THROW_YASK_EXCEPTION("Error: explicit NUMA policy allocation is not available"); #endif - // If NUMA not avail or mmap failed, use regular aligned malloc. - if (!p) - p = alignedAlloc(nbytes); - - // If still bad, throw exception. +#endif + // Should not get here w/null p; throw exception. if (!p) THROW_YASK_EXCEPTION("Error: cannot allocate " << makeByteStr(nbytes)); diff --git a/src/kernel/lib/utils.hpp b/src/kernel/lib/utils.hpp index 73c37684..5c0713ca 100644 --- a/src/kernel/lib/utils.hpp +++ b/src/kernel/lib/utils.hpp @@ -25,6 +25,8 @@ IN THE SOFTWARE. #pragma once +#ifdef USE_NUMA + // Use numa policy library? #ifdef USE_NUMA_POLICY_LIB #include @@ -52,6 +54,7 @@ extern "C" { #define MPOL_BIND 2 #define MPOL_INTERLEAVE 3 +#endif #endif #endif @@ -97,7 +100,10 @@ namespace yask { extern char* alignedAlloc(std::size_t nbytes); struct AlignedDeleter { void operator()(char* p) { - std::free(p); + if (p) { + std::free(p); + p = NULL; + } } }; @@ -109,16 +115,23 @@ namespace yask { std::size_t _nbytes; NumaDeleter(std::size_t nbytes): _nbytes(nbytes) {} void operator()(char* p) { - if (p) { + +#ifdef USE_NUMA #ifdef USE_NUMA_POLICY_LIB - if (numa_available() != -1) - numa_free(p, _nbytes); + if (p && numa_available() != -1) { + numa_free(p, _nbytes); + p = NULL; + } #else - if (get_mempolicy(NULL, NULL, 0, 0, 0) == 0) - munmap(p, _nbytes); + if (p && get_mempolicy(NULL, NULL, 0, 0, 0) == 0) { + munmap(p, _nbytes); + p = NULL; + } #endif - else - free(p); +#endif + if (p) { + free(p); + p = NULL; } } }; diff --git a/src/kernel/lib/yask.hpp b/src/kernel/lib/yask.hpp index 95037600..ec2ef99b 100644 --- a/src/kernel/lib/yask.hpp +++ b/src/kernel/lib/yask.hpp @@ -155,8 +155,11 @@ inline void omp_set_nested(int n) { } #define YASK_PAD (7) // cache-lines between data buffers. #define YASK_HUGE_ALIGNMENT (2 * 1024 * 1024) // 2MiB-page for large allocs. #define CACHE_ALIGNED __attribute__ ((aligned (CACHELINE_BYTES))) -#ifndef NUMA_PREF -#define NUMA_PREF -1 +#ifndef USE_NUMA +#undef NUMA_PREF +#define NUMA_PREF yask_numa_none +#elif !defined NUMA_PREF +#define NUMA_PREF yask_numa_local #endif // Define a folded vector of reals.