Skip to content

Commit

Permalink
Merge pull request #279 from intel/develop
Browse files Browse the repository at this point in the history
Develop
  • Loading branch information
chuckyount authored Aug 4, 2023
2 parents 0384156 + fd754e0 commit c7d1140
Show file tree
Hide file tree
Showing 6 changed files with 70 additions and 65 deletions.
14 changes: 7 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,14 @@ and Intel(R) graphics processors.

### Pre-requisites:
* Intel(R) [oneAPI](https://software.intel.com/content/www/us/en/develop/tools/oneapi.html)
HPC Toolkit for Linux (toolkit 2022.3 or later recommended; this will install
the Intel(R) oneAPI DPC++/C++ Compiler 2022.2.0).
See notes below under version 4.00.00 changes.
* Gnu C++ compiler, g++ (8.2.0 or later recommended).
HPC Toolkit for Linux (toolkit 2023.2 or later recommended); this will install
the Intel(R) oneAPI DPC++/C++ Compiler and the Intel(R) MPI Library.
See compiler notes below under version 4.00.00 changes.
* Gnu C++ compiler, g++ (8.5.0 or later recommended).
Even when using Intel compilers, a g++ installation is required.
* Linux libraries `librt` and `libnuma`.
* Grep.
* Perl (5.010 or later).
* Perl (v5 or later).
* Awk.
* Gnu make.
* Bash shell.
Expand All @@ -65,7 +65,7 @@ and Intel(R) graphics processors.

## Backward-compatibility notices
### Version 4
* Version 4.04.00 deprecates the existing `void*` set/get_elements_in_slice()
* Version 4.04.00 deprecates the existing `void* {set,get}_elements_in_slice()`
APIs and provides safer `float*` and `double*` versions.
* Version 4.03.00 is a significant release with the following notices:
- Each non-scratch stencil equation is now checked to ensure
Expand All @@ -74,7 +74,7 @@ and Intel(R) graphics processors.
(-1 is used for less-common reverse-time stencils.)
- The `yk_solution::get_var()` API now throws an exception if the
named var does not exist. (Used to return `std::nullptr`.)
- Vector clustering (unrolling by the YASK compiler) is no
- Vector "clustering" (unrolling by the YASK compiler) is no
longer supported.
- Read-ahead in the inner-loop is no longer supported.
- APIs for getting OpenMP thread counts were added.
Expand Down
4 changes: 1 addition & 3 deletions src/common/common.mk
Original file line number Diff line number Diff line change
Expand Up @@ -190,9 +190,7 @@ ifeq ($(offload),1)
endif

# Base compiler flags for building kernel lib and apps.
ifeq ($(offload),1)
YK_CXXDBG := -gline-tables-only
else
ifeq ($(offload),0)
YK_CXXDBG := -g
endif
YK_CXXOPT := -O3
Expand Down
2 changes: 1 addition & 1 deletion src/common/common_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ namespace yask {
// for numbers above 9 (at least up to 99).

// Format: "major.minor.patch[-alpha|-beta]".
const string version = "4.04.02";
const string version = "4.04.03";

string yask_get_version_string() {
return version;
Expand Down
20 changes: 11 additions & 9 deletions src/compiler/compiler_main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,8 @@ struct MySettings {
// Parse options from the command-line and set corresponding vars.
// Exit with message on error or request for help.
void parse(int argc, char** argv,
yc_solution_ptr csoln) {
yc_solution_ptr csoln,
bool print_info = true) {
string pgm_name(argv[0]);
string values;

Expand Down Expand Up @@ -134,19 +135,20 @@ struct MySettings {
cout <<
"\nExamples:\n"
" " << pgm_name << " -stencil 3axis -radius 1 -target pseudo -p - # '-' for stdout\n"
" " << pgm_name << " -stencil awp -elem-bytes 8 -fold x=4,y=2 -target avx2 -p stencil_code.hpp\n"
" " << pgm_name << " -stencil awp -elem-bytes 8 -fold x=2,y=2 -target avx2 -p stencil_code.hpp\n"
" " << pgm_name << " -stencil iso3dfd -radius 4 -target avx512 -p stencil_code.hpp\n" <<
flush;
exit(1);
}

// Show settings.
ostringstream oss;
oss << "Options from the '" << pgm_name << "' binary:\n";
parser.print_values(oss);
auto cvals = csoln->get_command_line_values();
oss << "Options from the YASK compiler library:\n" <<
cvals;
if (print_info) {
cout << "Settings of options from the '" << pgm_name << "' binary:\n";
parser.print_values(cout);
auto cvals = csoln->get_command_line_values();
cout << "Settings of options from the YASK compiler library:\n" <<
cvals;
}

if (rem_args2.length())
THROW_YASK_EXCEPTION("extraneous parameter(s): '" +
Expand All @@ -171,7 +173,7 @@ int main(int argc, char* argv[]) {
// before the requested solution is chosen.
{
auto null_soln = factory.new_solution("temp");
my_settings.parse(argc, argv, null_soln);
my_settings.parse(argc, argv, null_soln, false);
}

// Find the requested stencil in the registry.
Expand Down
37 changes: 21 additions & 16 deletions src/kernel/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ use_ptrs ?= 1
use_safe_ptrs ?= 0
outer_domain_layout ?= 0
inner_misc_layout ?= 1
first_inner ?= 1
early_loads ?= 0
min_buffer_len ?= 1
trace ?= 0
Expand All @@ -58,7 +59,6 @@ ifeq ($(offload),1)
pfd_l1 := 0
pfd_l2 := 0
inner_loop_dim := 1
early_loads := 1
outer_domain_layout := 1
endif

Expand Down Expand Up @@ -190,6 +190,11 @@ ifeq ($(inner_misc_layout),1)
else
YC_FLAGS += -no-inner-misc-layout
endif
ifeq ($(first_inner),1)
YC_FLAGS += -first-inner
else
YC_FLAGS += -no-first-inner
endif
ifeq ($(early_loads),1)
YC_FLAGS += -early-loads
else
Expand Down Expand Up @@ -346,8 +351,7 @@ endif
# Compiler-specific settings.

# Create a compiler invocation to test for macro settings.
# Use the flags defined so far.
YK_CXX_TEST := $(YK_CXX) $(YK_CXXFLAGS)
YK_CXX_TEST := $(YK_CXX)
cxx_is_llvm_intel := $(call MACRO_DEF,$(YK_CXX_TEST),__INTEL_LLVM_COMPILER)
cxx_is_clang := $(call MACRO_DEF,$(YK_CXX_TEST),__clang__)
cxx_is_intel := $(call MACRO_DEF,$(YK_CXX_TEST),__INTEL_COMPILER)
Expand All @@ -356,8 +360,8 @@ cxx_is_gnu := $(call MACRO_DEF,$(YK_CXX_TEST),__GNUC__)
# LLVM-based Intel compiler (icpx).
ifeq ($(cxx_is_llvm_intel),1)
YK_CXXFLAGS2 += $(GXX_ISA) $(ICC_ISA)
ifeq ($(check),0)
YK_CXXDBG2 += -gline-tables-only
ifeq ($(offload),1)
YK_CXXDBG2 := -gline-tables-only
endif
YK_CXXDBG2 += -fdebug-info-for-profiling
YK_CXXFLAGS2 += -ansi-alias \
Expand Down Expand Up @@ -763,8 +767,13 @@ echo-settings:
echo YK_TAG=$(YK_TAG); \
echo YK_CXXVER=`$(YK_CXX) --version`; \
echo arch=$(arch); \
echo cxx_is_llvm_intel=$(cxx_is_llvm_intel); \
echo cxx_is_clang=$(cxx_is_clang); \
echo cxx_is_intel=$(cxx_is_intel); \
echo cxx_is_gnu=$(cxx_is_gnu); \
echo fold=$(fold); \
echo offload=$(offload); \
echo offload_arch=$(offload_arch); \
echo offload_usm=$(offload_usm); \
echo pfd_l1=$(pfd_l1); \
echo pfd_l2=$(pfd_l2); \
Expand Down Expand Up @@ -857,25 +866,21 @@ help:
first_test := 0
last_test := 999

# Default threads.
ifeq ($(offload),1)
outer_threads := 2
inner_threads := 2
else
outer_threads := 8
inner_threads := 2
endif

TEST_MAKE_ARGS := real_bytes=8 use_rcp=0 allow_new_var_types=0 trace=1
TEST_MAKE := $(MAKE) $(TEST_MAKE_ARGS)

# Makefile functions for folding.
# Define makefile functions for folding.
# Set default threads.
# Disable folding and checking for offload testing.
ifeq ($(offload),1)
FOLD =
outer_threads := 2
inner_threads := 2
else
FOLD = fold=$(subst $(space),$(comma),$(1))
TEST_MAKE_ARGS += check=1
FOLD = fold=$(subst $(space),$(comma),$(1))
outer_threads := 8
inner_threads := 2
endif

### Unit tests.
Expand Down
58 changes: 29 additions & 29 deletions src/kernel/yask_main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -157,19 +157,20 @@ struct MySettings {
// Any remaining strings will be returned.
auto rem_args = parser.parse_args(argc, argv);

// Handle additional knobs and help if there is a soln.
// Handle additional knobs and -help if there is a soln.
if (ksoln) {
assert(kenv);
auto& os = kenv->get_debug_output()->get_ostream();

// Parse standard args not handled by this parser.
rem_args = ksoln->apply_command_line_options(rem_args);

if (help) {
cout << "Usage: " << pgm_name << " [options]\n"
os << "Usage: " << pgm_name << " [options]\n"
"Options from the '" << pgm_name << "' binary:\n";
parser.print_help(cout);
cout << "Options from the YASK library:\n" <<
ksoln->get_command_line_help();
cout <<
parser.print_help(os);
os << "Options from the YASK library:\n" <<
ksoln->get_command_line_help() <<
"\nValidation is very slow and uses 2x memory,\n"
" so run with very small sizes and number of time-steps.\n"
" If validation fails, it may be due to rounding error;\n"
Expand All @@ -184,27 +185,32 @@ struct MySettings {
exnr += " -nr" + dname + " " + to_string(i + 1);
i++;
}
cout <<
os <<
"\nExamples:\n"
" " << pgm_name << " -g 768 # global-domain size in all dims same.\n"
" " << pgm_name << exg << " # global-domain size in each dim separately.\n"
" " << pgm_name << " -l 128 # local-domain (per-rank) size.\n"
" " << pgm_name << " -g 512" << exnr << " # number of ranks in each dim.\n" <<
" " << pgm_name << " -g 512" << exb << " -no-pre_auto_tune # manual block size.\n" <<
flush;
if (kenv)
kenv->exit(1);
else
exit(1);
kenv->exit(1);
}

// Add settings.
ostringstream oss;
oss << "Options from the '" << pgm_name << "' binary:\n";
parser.print_values(oss);
oss << "Options from the YASK library:\n" <<
// Print splash banner and related info.
kenv->print_splash(argc, argv, "YASK Performance and Validation Utility invocation: ");
os << "\nStencil name: " << ksoln->get_name() << endl;

// Print current settings.
os << "Settings of options from the '" << pgm_name << "' binary:\n";
parser.print_values(os);
os << "Settings of options from the YASK library:\n" <<
ksoln->get_command_line_values();

// Check option consistency.
kenv->assert_equality_over_ranks(num_trials, "number of trials");
kenv->assert_equality_over_ranks(trial_steps, "number of steps per trial");
kenv->assert_equality_over_ranks(validate ? 0 : 1, "validation");

if (rem_args.length())
THROW_YASK_EXCEPTION("extraneous parameter(s): '" +
rem_args +
Expand Down Expand Up @@ -267,8 +273,13 @@ int main(int argc, char** argv)

// Set up the environment.
kenv = kfac.new_env();
if (!kenv)
THROW_YASK_EXCEPTION("could not create YASK env");
auto num_ranks = kenv->get_num_ranks();

// Make sure any MPI/OMP debug data is dumped from all ranks before continuing.
kenv->global_barrier();

// Enable debug only on requested rank.
if (opts.msg_rank != kenv->get_rank_index())
yk_env::disable_debug_output();
Expand All @@ -277,21 +288,10 @@ int main(int argc, char** argv)
// Make solution object containing data and parameters for stencil eval.
auto ksoln = kfac.new_solution(kenv);

// Parse custom and library-provided cmd-line options and
// exit on -help or error.
// Parse custom and library-provided cmd-line options, exit on -help
// or error, else show splash and current options.
opts.parse(argc, argv, kenv, ksoln);

// Make sure any MPI/OMP debug data is dumped from all ranks before continuing
// and check option consistency.
kenv->global_barrier();
kenv->assert_equality_over_ranks(opts.num_trials, "number of trials");
kenv->assert_equality_over_ranks(opts.trial_steps, "number of steps per trial");
kenv->assert_equality_over_ranks(opts.validate ? 0 : 1, "validation");

// Print splash banner and related info.
kenv->print_splash(argc, argv, "YASK Performance and Validation Utility invocation: ");
os << "\nStencil name: " << ksoln->get_name() << endl;

// Print PID and sleep for debug if needed.
os << "\nPID: " << getpid() << endl;
if (opts.debug_sleep) {
Expand Down

0 comments on commit c7d1140

Please sign in to comment.