diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml index dafe942b20b..03eb2ec578c 100644 --- a/.github/workflows/cmake.yml +++ b/.github/workflows/cmake.yml @@ -29,6 +29,9 @@ jobs: - name: Test run: | quantlib-test-suite --log_level=message + - name: Run benchmark + run: | + quantlib-benchmark --size=1 cmake-linux-with-options: runs-on: ubuntu-latest steps: diff --git a/CMakeLists.txt b/CMakeLists.txt index 6dde66d4710..f77d4c01d77 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -39,7 +39,6 @@ set(QL_INSTALL_CMAKEDIR "lib/cmake/${PACKAGE_NAME}" CACHE STRING "Installation directory for CMake scripts") # Options -option(QL_BUILD_BENCHMARK "Build benchmark" ON) option(QL_BUILD_EXAMPLES "Build examples" ON) option(QL_BUILD_TEST_SUITE "Build test suite" ON) option(QL_BUILD_FUZZ_TEST_SUITE "Build fuzz test suite" OFF) @@ -70,6 +69,7 @@ option(QL_USE_STD_OPTIONAL "Use std::optional instead of boost::optional" OFF) option(QL_USE_STD_SHARED_PTR "Use standard smart pointers instead of Boost ones" OFF) option(QL_USE_STD_TUPLE "Use std::tuple instead of boost::tuple" ON) set(QL_EXTERNAL_SUBDIRECTORIES "" CACHE STRING "Optional list of external source directories to be added to the build (semicolon-separated)") +# set -lpapi here set(QL_EXTRA_LINK_LIBRARIES "" CACHE STRING "Optional extra link libraries to add to QuantLib") # Require C++14 or higher @@ -269,7 +269,7 @@ add_subdirectory(ql) if (QL_BUILD_EXAMPLES) add_subdirectory(Examples) endif() -if (QL_BUILD_TEST_SUITE OR QL_BUILD_BENCHMARK) +if (QL_BUILD_TEST_SUITE) add_subdirectory(test-suite) endif() diff --git a/test-suite/CMakeLists.txt b/test-suite/CMakeLists.txt index 0319b497b9f..a4b558cf7e7 100644 --- a/test-suite/CMakeLists.txt +++ b/test-suite/CMakeLists.txt @@ -129,7 +129,6 @@ set(QL_TEST_SOURCES preconditions.cpp prices.cpp quantlibglobalfixture.cpp - quantlibtestsuite.cpp quantooption.cpp quotes.cpp rangeaccrual.cpp @@ -183,63 +182,35 @@ set(QL_TEST_HEADERS utilities.hpp ) -set(QL_BENCHMARK_SOURCES - quantlibbenchmark.cpp - - americanoption.cpp - asianoptions.cpp - barrieroption.cpp - basketoption.cpp - batesmodel.cpp - convertiblebonds.cpp - digitaloption.cpp - dividendoption.cpp - europeanoption.cpp - fdheston.cpp - hestonmodel.cpp - interpolations.cpp - jumpdiffusion.cpp - lowdiscrepancysequences.cpp - marketmodel_cms.cpp - marketmodel_smm.cpp - preconditions.cpp preconditions.hpp - quantooption.cpp - quantlibglobalfixture.cpp quantlibglobalfixture.hpp - riskstats.cpp - shortratemodels.cpp - utilities.cpp utilities.hpp - swaptionvolstructuresutilities.hpp -) if (QL_BUILD_TEST_SUITE) - add_executable(ql_test_suite ${QL_TEST_SOURCES} ${QL_TEST_HEADERS}) - set_target_properties(ql_test_suite PROPERTIES OUTPUT_NAME "quantlib-test-suite") - set_source_files_properties(quantlibtestsuite.cpp PROPERTIES SKIP_UNITY_BUILD_INCLUSION true) - target_link_libraries(ql_test_suite PRIVATE - ql_library - ${QL_THREAD_LIBRARIES}) + add_library(ql_test OBJECT ${QL_TEST_SOURCES} ${QL_TEST_HEADERS}) + if (NOT Boost_USE_STATIC_LIBS) + target_compile_definitions(ql_test PUBLIC BOOST_ALL_DYN_LINK) + endif() if(MSVC AND CMAKE_UNITY_BUILD) # for Unity builds, we need to add /bigobj - target_compile_options(ql_test_suite PRIVATE "/bigobj") + target_compile_options(ql_test PUBLIC "/bigobj") endif() + target_link_libraries(ql_test PUBLIC ql_library ${QL_THREAD_LIBRARIES}) + + + add_executable(ql_test_suite quantlibtestsuite.cpp) + set_source_files_properties(quantlibtestsuite.cpp PROPERTIES SKIP_UNITY_BUILD_INCLUSION true) + set_target_properties(ql_test_suite PROPERTIES OUTPUT_NAME "quantlib-test-suite") + target_link_libraries(ql_test_suite PRIVATE ql_test) + if (QL_INSTALL_TEST_SUITE) install(TARGETS ql_test_suite RUNTIME DESTINATION ${QL_INSTALL_BINDIR}) endif() add_test(NAME quantlib_test_suite COMMAND ql_test_suite --log_level=message) -endif() -IF (QL_BUILD_BENCHMARK) - add_executable(ql_benchmark ${QL_BENCHMARK_SOURCES}) + + add_executable(ql_benchmark quantlibbenchmark.cpp) set_target_properties(ql_benchmark PROPERTIES OUTPUT_NAME "quantlib-benchmark") - set_source_files_properties(quantlibbenchmark.cpp PROPERTIES SKIP_UNITY_BUILD_INCLUSION true) - target_link_libraries(ql_benchmark PRIVATE - ql_library - ${QL_THREAD_LIBRARIES}) - if(MSVC AND CMAKE_UNITY_BUILD) - # for Unity builds, we need to add /bigobj - target_compile_options(ql_benchmark PRIVATE "/bigobj") - endif() + target_link_libraries(ql_benchmark PRIVATE ql_test) if (QL_INSTALL_BENCHMARK) install(TARGETS ql_benchmark RUNTIME DESTINATION ${QL_INSTALL_BINDIR}) endif() endif() + diff --git a/test-suite/Makefile.am b/test-suite/Makefile.am index eebc8c21b13..00bad8d2e31 100644 --- a/test-suite/Makefile.am +++ b/test-suite/Makefile.am @@ -1,6 +1,5 @@ QL_TEST_SRCS = \ - quantlibtestsuite.cpp \ americanoption.cpp \ amortizingbond.cpp \ andreasenhugevolatilityinterpl.cpp \ @@ -174,6 +173,7 @@ QL_TEST_SRCS = \ zabr.cpp \ zerocouponswap.cpp + QL_TEST_HDRS = \ preconditions.hpp \ quantlibglobalfixture.hpp \ @@ -183,38 +183,6 @@ QL_TEST_HDRS = \ QL_TESTS = ${QL_TEST_SRCS} ${QL_TEST_HDRS} -QL_BENCHMARK_SRCS = \ - quantlibbenchmark.cpp \ - americanoption.cpp \ - asianoptions.cpp \ - barrieroption.cpp \ - doublebarrieroption.cpp \ - basketoption.cpp \ - batesmodel.cpp \ - convertiblebonds.cpp \ - digitaloption.cpp \ - dividendoption.cpp \ - europeanoption.cpp \ - fdheston.cpp \ - hestonmodel.cpp \ - interpolations.cpp \ - jumpdiffusion.cpp \ - lowdiscrepancysequences.cpp \ - marketmodel_cms.cpp \ - marketmodel_smm.cpp \ - preconditions.cpp \ - quantlibglobalfixture.cpp \ - quantooption.cpp \ - riskstats.cpp \ - shortratemodels.cpp \ - utilities.cpp - -QL_BENCHMARK_HDRS = \ - quantlibglobalfixture.hpp \ - preconditions.hpp \ - utilities.hpp - -QL_BENCHMARKS = ${QL_BENCHMARK_SRCS} ${QL_BENCHMARK_HDRS} dist-hook: mkdir -p $(distdir)/build @@ -234,9 +202,7 @@ endif if UNITY_BUILD -nodist_quantlib_test_suite_SOURCES = unity_test.cpp - -unity_test.cpp: Makefile.am +unity.cpp: Makefile.am echo "/* This file is automatically generated; do not edit. */" > $@ echo "/* Add the files to be included into Makefile.am instead. */" >> $@ echo >> $@ @@ -244,34 +210,27 @@ unity_test.cpp: Makefile.am echo "#include \"test-suite/$$i\"" >> $@; \ done -nodist_quantlib_benchmark_SOURCES = unity_benchmark.cpp quantlibbenchmark.cpp -UNITY_SRC = $(filter-out quantlibbenchmark.cpp,$(QL_BENCHMARK_SRCS)) +nodist_quantlib_test_suite_SOURCES = unity.cpp +quantlib_test_suite_SOURCES = quantlibtestsuite.cpp -unity_benchmark.cpp: Makefile.am - echo "/* This file is automatically generated; do not edit. */" > $@ - echo "/* Add the files to be included into Makefile.am instead. */" >> $@ - echo >> $@ - for i in $(UNITY_SRC); do \ - echo "#include \"test-suite/$$i\"" >> $@; \ - done +nodist_quantlib_benchmark_SOURCES = unity.cpp +quantlib_benchmark_SOURCES = quantlibbenchmark.cpp -EXTRA_DIST = $(QL_TESTS) $(QL_BENCHMARKS) +EXTRA_DIST = $(QL_TESTS) else -quantlib_test_suite_SOURCES = $(QL_TESTS) - -quantlib_benchmark_SOURCES = $(QL_BENCHMARKS) - +quantlib_test_suite_SOURCES = $(QL_TESTS) quantlibtestsuite.cpp +quantlib_benchmark_SOURCES = $(QL_TESTS) quantlibbenchmark.cpp EXTRA_DIST = -endif +endif quantlib_test_suite_LDADD = ${top_builddir}/ql/libQuantLib.la \ ${PTHREAD_LIB} ${BOOST_INTERPROCESS_LIB} quantlib_benchmark_LDADD = ${top_builddir}/ql/libQuantLib.la \ - ${PTHREAD_LIB} + ${PTHREAD_LIB} ${BOOST_INTERPROCESS_LIB} TESTS = quantlib-test-suite$(EXEEXT) TESTS_ENVIRONMENT = BOOST_TEST_LOG_LEVEL=message BOOST_TEST_COLOR_OUTPUT=false diff --git a/test-suite/americanoption.cpp b/test-suite/americanoption.cpp index 13bd8185832..570563e0191 100644 --- a/test-suite/americanoption.cpp +++ b/test-suite/americanoption.cpp @@ -1542,7 +1542,7 @@ BOOST_AUTO_TEST_CASE(testQdEngineStandardExample) { ); const Real calculated = americanOption.NPV() - europeanOption.NPV(); - const Real tol = 1e-15; + const Real tol = 7e-15; const Real diff = std::abs(calculated - expected[i]); if (diff > tol) { diff --git a/test-suite/quantlibbenchmark.cpp b/test-suite/quantlibbenchmark.cpp index b848f7f2f33..5369cd0a8c2 100644 --- a/test-suite/quantlibbenchmark.cpp +++ b/test-suite/quantlibbenchmark.cpp @@ -22,25 +22,19 @@ QuantLib Benchmark Suite Measures the performance of a preselected set of numerically intensive - test cases. The overall QuantLib Benchmark Index is given by the average - performance in mflops. This benchmarks supports multiprocessing, e.g. + test cases. This benchmarks supports multiprocessing, e.g. - Single process benchmark: - ./quantlib-benchmark + Single process benchmark for testing: + ./quantlib-benchmark --size=1 --nProc=1 - Benchmark with 16 processes: - ./quantlib-benchmark --mp=16 + Benchmark with 16 processes and the default size: + ./quantlib-benchmark --nProc=16 - Benchmark with one process per core - ./quantlib-benchmark --mp - - The number of floating point operations of a given test case was measured - using PAPI, http://icl.cs.utk.edu/papi - - Example results can be found at https://openbenchmarking.org/test/pts/quantlib + Benchmark with one worker process per hardware thread and the default size: + ./quantlib-benchmark This benchmark is derived from quantlibtestsuite.cpp. Please see the - copyrights therein. + copyrights therein. */ #include @@ -51,11 +45,14 @@ #include #endif -#define BOOST_TEST_NO_MAIN 1 +#define BOOST_TEST_NO_MAIN +#define BOOST_TEST_ALTERNATIVE_INIT_API #include #include #include +#include +#include #include #include @@ -66,14 +63,6 @@ #include -/* initialize PAPI on Linux - sudo sysctl -w kernel.perf_event_paranoid=0 - export PAPI_EVENTS="PAPI_TOT_INS,PAPI_FP_OPS,PAPI_FP_INS" - export PAPI_REPORT=1 -*/ -//#include - - /* Use BOOST_MSVC instead of _MSC_VER since some other vendors (Metrowerks, for example) also #define _MSC_VER @@ -84,290 +73,603 @@ #include "utilities.hpp" -namespace QuantLibTests { - namespace AmericanOptionTests { - struct testFdAmericanGreeks: - public BOOST_AUTO_TEST_CASE_FIXTURE { void test_method(); }; - } - namespace AsianOptionTests { - struct testMCDiscreteArithmeticAveragePrice: - public BOOST_AUTO_TEST_CASE_FIXTURE { void test_method(); }; - } - namespace BarrierOptionTests { - struct testBabsiriValues: - public BOOST_AUTO_TEST_CASE_FIXTURE { void test_method(); }; - } +namespace { - namespace BasketOptionTests { - struct testEuroTwoValues: - public BOOST_AUTO_TEST_CASE_FIXTURE { void test_method(); }; + /** + * A class representing an individual benchmark. Each benchmark is one of the QuantLib + * test-suite tests, run one or more times. Boost unit test framework causes a dilemma: + * + * * if we don't use boost::unit_test::framework::run to run the test, then all the + * correcness checks are disabled. We can't validate that the test passed. + * * if we do use boost::unit_test::framework::run, then we incur a very large overhead + * especially for short tests that are run many thousands of times. + * + * We deal with this by running each test exactly once using boost::unit_test::framework::run. + * Failures are marked using a boost::unit_test::test_observer and cause immediate tear down + * of the benchmark master process. All subsequent runs of the test are done through a hack. + * We copy the declarations of the BOOST_AUTO_TEST_CASE and friends macros in boost/test/unit_test_suite.hpp + * to declare the symbols that Boost creates. This allows us to call these symbols directly, + * by-passing the boost unit test framework completely. + * + * The overall benchmark is parallelised using Boost::IPC. QuantLib is not thread safe, so any + * kind of shared memory paralellism is ruled out. The benchmark creates a large (fixed) amount of + * work, distributes this between all the workers, and sees how quickly the workers can finish it all. + * The overall metric is #tasks/s that the system can process. The tasks are pre-set (these are the + * tests from the test-suite), and the --size argument to the benchmark controls how many times the + * entire set of tasks is executed. Once the machine is saturated with work the benchmark typically + * exhibits perfect weak scaling: doubling --size will double runtime and leave #tasks/s unchanged. + * The #tasks/s will typically increase as the machine is given more work to do. + * + * The pre-set benchmark sizes are chosen to saturate even very large machines. + */ + class Benchmark + { + public: + template + Benchmark( + std::string name, // the test name, as known by boost::unit_test::test_unit + CALLABLE &&body, // the "body" of the test we want to run + double cost // how expensive (runtime) this test is relative to others + ) + : name_(std::move(name)), test_(nullptr), cost_(cost), totalRuntime_(0), testBody_(std::forward(body)) {} + + Benchmark(const Benchmark& copy) = default; + Benchmark(Benchmark&& move) = default; + Benchmark& operator=(const Benchmark &other) = default; + Benchmark& operator=(Benchmark &&other) = default; + + double getCost() const { return cost_; } + std::string getName() const { return name_; } + bool foundTestUnit() const { return test_ != nullptr; } + // Total runtime across multiple runs is manually accumulated into the class + double& getTotalRuntime() { return totalRuntime_; } + const double& getTotalRuntime() const { return totalRuntime_; } + void setTestUnit(const boost::unit_test::test_unit * unit) { test_ = unit; } + + + // Run the underlying QuantLib test exactly once using the Boost test framework + // This will check all results and will flag any errors that are found. It is much + // slower than running just the test body outside of the Boost framework + double runValidation() const + { + double time = -1.0; + try { + auto startTime = std::chrono::steady_clock::now(); + boost::unit_test::framework::run(test_, false); + auto stopTime = std::chrono::steady_clock::now(); + time = std::chrono::duration_cast(stopTime - startTime).count() * 1e-6; + } + catch(const std::exception &e) { + std::cerr << "error: caught exception in benchmark " << getName() << "\n" + << "message: " << e.what() << "\n" << std::endl; + } + catch(...) { + std::cerr << "error: caught unknown exception in benchmark " << getName() << std::endl; + } + return time; + } - struct testTavellaValues: - public BOOST_AUTO_TEST_CASE_FIXTURE { void test_method(); }; + // Directly run the body of the underlying QuantLib test (multiple times) without using the Boost + // test framework. This eliminates all the boost overhead, but also disables all results checking. + double runBenchmark() const + { + double time = -1.0; + try { + auto startTime = std::chrono::steady_clock::now(); + testBody_(); + auto stopTime = std::chrono::steady_clock::now(); + time = std::chrono::duration_cast(stopTime - startTime).count() * 1e-6; + } + catch(const std::exception &e) { + std::cerr << "Error: caught exception in benchmark " << getName() << "\n" + << "Message: " << e.what() << "\n" << std::endl; + } + catch(...) { + std::cerr << "Error: caught unknown exception in benchmark " << getName() << std::endl; + } + return time; + } - struct testOddSamples: - public BOOST_AUTO_TEST_CASE_FIXTURE { void test_method(); }; - } + private: + std::string name_; + const boost::unit_test::test_unit * test_; + double cost_; + double totalRuntime_; + std::function testBody_; + }; - namespace BatesModelTests { - struct testDAXCalibration: - public BOOST_AUTO_TEST_CASE_FIXTURE { void test_method(); }; - } - namespace ConvertibleBondTests { - struct testBond: - public BOOST_AUTO_TEST_CASE_FIXTURE { void test_method(); }; - } + /** + * To determine programmatically whether a test has passed or not, Boost unit test framework requires + * us to register a test observer class. This only gives the pass/fail status for the most recently + * run test, not even the name of the test that was run. Hence we need some additional + * plumbing to ensure that intra-test failures are not overridden by intra-test passes + * (for a test that has multiple calls to BOOST_CHECK or BOOST_FAIL). + */ + struct BenchmarkResult : public boost::unit_test::test_observer + { + public: + BenchmarkResult() : passed_(true) { + boost::unit_test::framework::register_observer(*this); + } + ~BenchmarkResult() { + boost::unit_test::framework::deregister_observer(*this); + } + BenchmarkResult(const BenchmarkResult&) = delete; + BenchmarkResult(BenchmarkResult&&) = delete; + BenchmarkResult& operator=(const BenchmarkResult &) = delete; + BenchmarkResult& operator=(BenchmarkResult &&) = delete; - namespace DigitalOptionTests { - struct testMCCashAtHit: - public BOOST_AUTO_TEST_CASE_FIXTURE { void test_method(); }; - } - namespace DividendOptionTests { - struct testFdEuropeanGreeks: - public BOOST_AUTO_TEST_CASE_FIXTURE { void test_method(); }; + void assertion_result( boost::unit_test::assertion_result ar ) override + { + passed_ = passed_ && (ar == boost::unit_test::AR_PASSED); + } + bool pass() const { return passed_; } + void reset() { passed_ = true; } - struct testFdAmericanGreeks: - public BOOST_AUTO_TEST_CASE_FIXTURE { void test_method(); }; - } + private: + bool passed_; + }; - namespace EuropeanOptionTests { - struct testMcEngines: - public BOOST_AUTO_TEST_CASE_FIXTURE { void test_method(); }; - struct testImpliedVol: - public BOOST_AUTO_TEST_CASE_FIXTURE { void test_method(); }; + /** + * This class takes a list of Benchmarks and attempts to find the corresponding + * test_units in the Boost test unit tree. + * */ + class TestUnitFinder : public boost::unit_test::test_tree_visitor + { + private: + TestUnitFinder(std::vector & bm) : bm_(bm) {} + + // Utility method needed for initialising the Boost test framework + static bool init_unit_test_suite() { return true; } + + public: + bool visit(const boost::unit_test::test_unit & tu) override + { + const std::string& thisTest = tu.full_name(); + // Try find this in the bm array. We know every test name sill start with + // "QuantLibTests/" which contains 14 characters + for(auto &b : bm_ ) { + if( thisTest.find( b.getName(), 14) != std::string::npos ) { + // We have a match + b.setTestUnit( &tu ); + } + } + // Continue visiting + return true; + } - struct testFdEngines: - public BOOST_AUTO_TEST_CASE_FIXTURE { void test_method(); }; - } - namespace FdHestonTests { - struct testFdmHestonAmerican: - public BOOST_AUTO_TEST_CASE_FIXTURE { void test_method(); }; - } + // Find the corresponding Boost test_unit for each Benchmark + // If we can't find a test_unit, throw an exception + static void findAllTests(char** argv, std::vector &bm) + { + boost::unit_test::framework::init(TestUnitFinder::init_unit_test_suite, 1, argv); + boost::unit_test_framework::framework::finalize_setup_phase(); + + TestUnitFinder tuf(bm); + boost::unit_test::traverse_test_tree(boost::unit_test_framework::framework::master_test_suite(), tuf, true); + + // Now check that we've found all test units + for(const auto &b : bm) { + if( !b.foundTestUnit() ) { + std::string msg = "Unable to find the Boost test unit for Benchmark '"; + msg += b.getName(); + msg += "'"; + std::runtime_error err(msg); + throw err; + } + } + } - namespace HestonModelTests { - struct testDAXCalibration: - public BOOST_AUTO_TEST_CASE_FIXTURE { void test_method(); }; - } + private: + std::vector & bm_; + }; - namespace InterpolationTests { - struct testSabrInterpolation: - public BOOST_AUTO_TEST_CASE_FIXTURE { void test_method(); }; - } - namespace JumpDiffusionTests { - struct testGreeks: - public BOOST_AUTO_TEST_CASE_FIXTURE { void test_method(); }; - } + // The container holding all the benchmarks we will run + std::vector bm; - namespace LowDiscrepancyTests { - struct testMersenneTwisterDiscrepancy: - public BOOST_AUTO_TEST_CASE_FIXTURE { void test_method(); }; - } - namespace MarketModelCmsTests { - struct testMultiStepCmSwapsAndSwaptions: - public BOOST_AUTO_TEST_CASE_FIXTURE { void test_method(); }; - } - namespace MarketModelSmmTests { - struct testMultiStepCoterminalSwapsAndSwaptions: - public BOOST_AUTO_TEST_CASE_FIXTURE { void test_method(); }; - } + /** + * A clas to group and tidy up all the benchmark IO and boilerplate routines + */ + struct BenchmarkSupport + { + // Verbosity level and a logging macro to help debugging + static int verbose; +#define LOG_MESSAGE(...) if(BenchmarkSupport::verbose >= 3) { std::cout << __VA_ARGS__ << std::endl; } - namespace QuantoOptionTests { - struct testForwardGreeks: - public BOOST_AUTO_TEST_CASE_FIXTURE { void test_method(); }; - } - namespace RiskStatisticsTests { - struct testResults: - public BOOST_AUTO_TEST_CASE_FIXTURE { void test_method(); }; - } + // The set of pre-defined benchmark sizes that we support + static const std::vector< std::pair > bmSizes; - namespace ShortRateModelTests { - struct testSwaps: - public BOOST_AUTO_TEST_CASE_FIXTURE { void test_method(); }; - } -} + // Turn a command line '--size=' string into a benchmark size + static unsigned int parseBmSize(const std::string &size) + { + for(const auto & p : bmSizes) { + if(p.first == size) + return p.second; + } + // OK - it's not a preset size, let's see if it's parsable as an integer + try { + unsigned int sz = std::stoul(size); + return sz; + } + catch(const std::exception &e) { + // Unable to convert to integer. Abort + std::cerr << "Error: INVALID BENCHMARK RUN\n"; + std::cerr << "Invalid custom benchmark size specified, unable to convert to an integer\n"; + std::cerr << "Exception generated: " << e.what() << "\n"; + exit(1); + } + } -namespace { + // Turn a benchmark size into a string for printing + static std::string bmSizeAsString(unsigned int size) + { + for(const auto& p : bmSizes) { + if(p.second == size) + return p.first; + } + // Not a preset size + return "Custom (" + std::to_string(size) + ")"; + } - class Benchmark { - public: - Benchmark(std::string name, std::function f, double mflop) - : f_(std::move(f)), name_(std::move(name)), mflop_(mflop) {} - std::function getTestCase() const { - return f_; - } - double getMflop() const { - return mflop_; - } - std::string getName() const { - return name_; + static void printGreeting(const std::string &size, unsigned nProc) + { + std::cout << std::endl; + std::cout << std::string(84,'-') << "\n"; + std::cout << "Benchmark Suite QuantLib " QL_VERSION << "\n"; + std::cout << "\n"; + std::cout << "Benchmark size='" << size << "' on " << nProc << " processes\n"; + std::cout << std::string(84,'-') << "\n"; + std::cout << std::endl; } - void swap(Benchmark& other) { - std::swap(f_, other.f_); - std::swap(name_, other.name_); - std::swap(mflop_, other.mflop_); + + // If a test fails, notify the user and terminate the benchmark + static void terminateBenchmark() + { + std::cerr << "\033[0m\nError: INVALID BENCHMARK RUN.\n" + << "One or more tests failed, please see the log for details" << std::endl ; + // Tear down the master process, which kills all child threads/processes + exit(1); } - private: - std::function f_; - std::string name_; - double mflop_; // total number of mega floating - // point operations (not per sec!) - }; - std::vector bm = { - Benchmark("AmericanOption::FdAmericanGreeks", [] { QuantLibTests::AmericanOptionTests::testFdAmericanGreeks().test_method(); }, 518.31), - Benchmark("AsianOption::MCArithmeticAveragePrice", [] { QuantLibTests::AsianOptionTests::testMCDiscreteArithmeticAveragePrice().test_method(); }, 5186.13), - Benchmark("BarrierOption::BabsiriValues", [] { QuantLibTests::BarrierOptionTests::testBabsiriValues().test_method(); }, 880.8), - Benchmark("BasketOption::EuroTwoValues", [] { QuantLibTests::BasketOptionTests::testEuroTwoValues().test_method(); }, 340.04), - Benchmark("BasketOption::EuroTwoValues", [] { QuantLibTests::BasketOptionTests::testTavellaValues().test_method(); }, 933.80), - Benchmark("BasketOption::EuroTwoValues", [] { QuantLibTests::BasketOptionTests::testOddSamples().test_method(); }, 642.46), - Benchmark("BatesModel::DAXCalibration", [] { QuantLibTests::BatesModelTests::testDAXCalibration().test_method(); }, 1993.35), - Benchmark("ConvertibleBondTest::testBond", [] { QuantLibTests::ConvertibleBondTests::testBond().test_method(); }, 159.85), - Benchmark("DigitalOption::MCCashAtHit", [] { QuantLibTests::DigitalOptionTests::testMCCashAtHit().test_method(); }, 995.87), - Benchmark("DividendOption::FdEuropeanGreeks", [] { QuantLibTests::DividendOptionTests::testFdEuropeanGreeks().test_method(); }, 949.52), - Benchmark("DividendOption::FdAmericanGreeks", [] { QuantLibTests::DividendOptionTests::testFdAmericanGreeks().test_method(); }, 1113.74), - Benchmark("EuropeanOption::FdMcEngines", [] { QuantLibTests::EuropeanOptionTests::testMcEngines().test_method(); }, 1988.63), - Benchmark("EuropeanOption::ImpliedVol", [] { QuantLibTests::EuropeanOptionTests::testImpliedVol().test_method(); }, 131.51), - Benchmark("EuropeanOption::FdEngines", [] { QuantLibTests::EuropeanOptionTests::testFdEngines().test_method(); }, 148.43), - Benchmark("FdHestonTest::testFdmHestonAmerican", [] { QuantLibTests::FdHestonTests::testFdmHestonAmerican().test_method(); }, 234.21), - Benchmark("HestonModel::DAXCalibration", [] { QuantLibTests::HestonModelTests::testDAXCalibration().test_method(); }, 555.19), - Benchmark("InterpolationTest::testSabrInterpolation", [] { QuantLibTests::InterpolationTests::testSabrInterpolation().test_method(); }, 295.63), - Benchmark("JumpDiffusion::Greeks", [] { QuantLibTests::JumpDiffusionTests::testGreeks().test_method(); }, 433.77), - Benchmark("MarketModelCmsTest::testCmSwapsSwaptions", [] { QuantLibTests::MarketModelCmsTests::testMultiStepCmSwapsAndSwaptions().test_method(); }, 11497.73), - Benchmark("MarketModelSmmTest::testMultiSmmSwaptions", [] { QuantLibTests::MarketModelSmmTests::testMultiStepCoterminalSwapsAndSwaptions().test_method(); }, 11244.95), - Benchmark("QuantoOption::ForwardGreeks", [] { QuantLibTests::QuantoOptionTests::testForwardGreeks().test_method(); }, 90.98), - Benchmark("RandomNumber::MersenneTwisterDescrepancy", [] { QuantLibTests::LowDiscrepancyTests::testMersenneTwisterDiscrepancy().test_method(); }, 951.98), - Benchmark("RiskStatistics::Results", [] { QuantLibTests::RiskStatisticsTests::testResults().test_method(); }, 300.28), - Benchmark("ShortRateModel::Swaps", [] { QuantLibTests::ShortRateModelTests::testSwaps().test_method(); }, 454.73) - }; - class TimedBenchmark { - public: - TimedBenchmark(std::function f, std::string name) - : f_(std::move(f)), name_(std::move(name)) {} + static void printResults( + unsigned nSize, // the size of the benchmark + double masterLifetime, // lifetime of the master process + std::vector workerLifetimes // lifetimes of all the worker processes + ) + { + std::cout << "\033[0m\n"; + std::cout << "Benchmark Size = " << BenchmarkSupport::bmSizeAsString(nSize) << std::endl; + std::cout << "System Throughput = " << (double(nSize) * bm.size() ) / masterLifetime << " tasks/s" << std::endl; + std::cout << "Benchmark Runtime = " << masterLifetime<< "s" << std::endl; + + if(verbose >=1 ) + { + const size_t nProc = workerLifetimes.size(); + std::cout << "Num. Worker Processes = " << nProc << std::endl; + + // Work out tail effect. We define "tail effect" as the ratio of the average (geomean) + // tail lifetime, to the lifetime of the master process. The cutoff for defining + // the "tail" is arbitrary. A ratio of 1 means no tail effect (tail lifetime is same + // as lifetime of master process), a ratio near 0 means tail finished significantly + // before master process + std::sort(workerLifetimes.begin(), workerLifetimes.end()); + const double thresh = 0.1; + int tail = (int)std::ceil(thresh * nProc); + double tailGeomean = 1.0; + for(int i=0; i= 2) { + std::cout << " Total Runtime spent in each test " << std::endl; + std::cout << std::string(84,'-') << std::endl; - void stopMeasurement() const { - //QL_REQUIRE(PAPI_hl_region_end(name_.c_str()) == PAPI_OK, - // "could not stop PAPI"); + // Compute max test name length + size_t len = 0; + for (const auto & b : bm) { len = std::max(len, b.getName().length() ); } + + for (const auto& b: bm) { + std::cout << b.getName() + << std::string(len+2 - b.getName().length(),' ') + << ": " << b.getTotalRuntime() << "s" << std::endl; + } + std::cout << std::string(84,'-') << std::endl; + } + std::cout << std::endl; } - double operator()() const { - startMeasurement(); - auto startTime = std::chrono::steady_clock::now(); - BOOST_CHECK(true); // to prevent no-assertion warning - f_(); - auto stopTime = std::chrono::steady_clock::now(); - stopMeasurement(); - return std::chrono::duration_cast( - stopTime - startTime).count() * 1e-6; + +#ifdef QL_ENABLE_PARALLEL_UNIT_TEST_RUNNER + // The entry point for the std::thread's that will be the workers + static int worker(const char * exe, const std::vector& args) { + return boost::process::system(exe, boost::process::args=args); } - private: - std::function f_; - const std::string name_; - }; +#endif - void printResults( - unsigned nProc, - std::vector >& runTimes) { + // A helper class to push benchmark objects into the benchmark container + // before main() starts. Every time the constructor is called, a test is added. + struct AddBenchmark { + template + AddBenchmark(std::vector &bm, CALLABLE && test_body, const char* name, double cost) { + bm.push_back( Benchmark(name, std::move(test_body), cost) ); + } + }; + }; + int BenchmarkSupport::verbose = 0; + const std::vector< std::pair > BenchmarkSupport::bmSizes = { + {"XXS", 60}, + {"XS", 120}, + {"S", 240}, + {"M", 480}, + {"L", 960} + }; + + + // The messages sent from workers to master across boost IPC queues + struct IPCResultMsg + { + unsigned bmId; // the benchcmark that was run + unsigned threadId; // the ID of the worker who ran it + double time; // the runtime + }; - const std::string header = "Benchmark Suite QuantLib " QL_VERSION; + // The messages sent from master to workers across boost IPC queues + struct IPCInstructionMsg + { + unsigned j = 0; // the benchmark to run + bool validate = false; // whether to run in validation mode or not + }; - std::cout << std::endl << std::string(58,'-') << std::endl; - std::cout << header << std::endl; - std::cout << std::string(58,'-') << std::endl << std::endl; - std::sort(runTimes.begin(), runTimes.end(), - [](const auto& a, const auto& b) { - return a.first.getName() < b.first.getName(); - } - ); - - std::vector > aggTimes; - for (const auto& iter: runTimes) { - if (aggTimes.empty() - || std::get<0>(aggTimes.back()).getName() - != iter.first.getName()) { - aggTimes.emplace_back(iter.first, 1, iter.second); - } - else { - ++std::get<1>(aggTimes.back()); - std::get<2>(aggTimes.back()) += iter.second; - } - } - double sum=0; - for (const auto& iterT: aggTimes) { - const double mflopsPerSec - = std::get<0>(iterT).getMflop() / std::get<2>(iterT) - * nProc * std::get<1>(iterT); +} // END anonymous namespace - std::cout << std::get<0>(iterT).getName() - << std::string(42-std::get<0>(iterT).getName().length(),' ') - << ":" << std::fixed << std::setw(8) << std::setprecision(1) - << mflopsPerSec - << " mflops" << std::endl; - sum+=mflopsPerSec; +// These are pulled from boost/unit_test/unit_test_suite.hpp. We declare the +// bodies of the tests so that we can run them more efficiently. +#define QL_BENCHMARK_DECLARE(test_fixture, test_name, num_iters, cost) \ + namespace QuantLibTests { \ + namespace test_fixture { \ + struct test_name : public BOOST_AUTO_TEST_CASE_FIXTURE { \ + void test_method(); \ + }; \ + }} \ + \ + namespace { \ + /* Declare unique global variable and push benchmark into bm */ \ + BenchmarkSupport::AddBenchmark test_fixture##_##test_name( \ + bm, \ + [] { QuantLibTests::test_fixture::test_name thetest; for(int i=0; i& args) { - return boost::process::system(exe, boost::process::args=args); - } -#endif -} -int main(int argc, char* argv[] ) { + +// Set of all tests we will run. The integer is the number of times the test is run, and +// the value at the end is a relative runtime cost of each benchmark compared with the others. +// Exact values are not needed, we just need to know what is "expensive" and what is "cheap" +// in terms of runtime. + +// Equity & FX +QL_BENCHMARK_DECLARE(AmericanOptionTests, testFdAmericanGreeks, 1, 0.5); +QL_BENCHMARK_DECLARE(AmericanOptionTests, testFdValues, 20, 3.0); +QL_BENCHMARK_DECLARE(AmericanOptionTests, testCallPutParity, 100, 1.0); +QL_BENCHMARK_DECLARE(AmericanOptionTests, testQdEngineStandardExample, 400, 0.5); +QL_BENCHMARK_DECLARE(EuropeanOptionTests, testImpliedVol, 1, 0.5); +QL_BENCHMARK_DECLARE(EuropeanOptionTests, testMcEngines, 1, 1.0); +QL_BENCHMARK_DECLARE(EuropeanOptionTests, testLocalVolatility, 3, 2.0); +QL_BENCHMARK_DECLARE(BatesModelTests, testDAXCalibration, 1, 0.5); +QL_BENCHMARK_DECLARE(BatesModelTests, testAnalyticVsMCPricing, 1, 1.0); +QL_BENCHMARK_DECLARE(BatesModelTests, testAnalyticAndMcVsJumpDiffusion, 5, 1.0); +QL_BENCHMARK_DECLARE(HestonModelTests, testDAXCalibration, 1, 0.5); +QL_BENCHMARK_DECLARE(HestonModelTests, testFdBarrierVsCached, 1, 3.0); +QL_BENCHMARK_DECLARE(HestonModelTests, testFdAmerican, 1, 1.0); +QL_BENCHMARK_DECLARE(HestonModelTests, testLocalVolFromHestonModel, 10, 1.0); +QL_BENCHMARK_DECLARE(FdHestonTests, testFdmHestonAmerican, 10, 1.0); +QL_BENCHMARK_DECLARE(FdHestonTests, testAmericanCallPutParity, 15, 1.5); +QL_BENCHMARK_DECLARE(FdHestonTests, testFdmHestonBarrierVsBlackScholes, 1, 2.0); +QL_BENCHMARK_DECLARE(HestonSLVModelTests, testMonteCarloCalibration, 1, 3.0); +QL_BENCHMARK_DECLARE(HestonSLVModelTests, testHestonFokkerPlanckFwdEquation, 1, 5.0); +QL_BENCHMARK_DECLARE(HestonSLVModelTests, testBarrierPricingViaHestonLocalVol, 1, 1.0); +QL_BENCHMARK_DECLARE(MCLongstaffSchwartzEngineTests, testAmericanOption, 1, 2.0); +QL_BENCHMARK_DECLARE(VarianceGammaTests, testVarianceGamma, 1, 0.1); +QL_BENCHMARK_DECLARE(ConvertibleBondTests, testBond, 100, 2.0); +QL_BENCHMARK_DECLARE(AndreasenHugeVolatilityInterplTests, testArbitrageFree, 1, 1.0); +QL_BENCHMARK_DECLARE(AndreasenHugeVolatilityInterplTests, testAndreasenHugeCallPut, 1, 1.0); +QL_BENCHMARK_DECLARE(AndreasenHugeVolatilityInterplTests, testAndreasenHugeCall, 1, 1.0); +QL_BENCHMARK_DECLARE(AndreasenHugeVolatilityInterplTests, testAndreasenHugePut, 1, 1.0); +QL_BENCHMARK_DECLARE(AndreasenHugeVolatilityInterplTests, testFlatVolCalibration, 1, 1.0); +QL_BENCHMARK_DECLARE(AndreasenHugeVolatilityInterplTests, testTimeDependentInterestRates, 1, 1.0); +QL_BENCHMARK_DECLARE(AndreasenHugeVolatilityInterplTests, testPiecewiseConstantInterpolation, 1, 1.0); +QL_BENCHMARK_DECLARE(AndreasenHugeVolatilityInterplTests, testLinearInterpolation, 1, 1.0); + +// Interest Rates +QL_BENCHMARK_DECLARE(ShortRateModelTests, testSwaps, 30, 3.0); +QL_BENCHMARK_DECLARE(ShortRateModelTests, testCachedHullWhite2, 500, 1.0); +QL_BENCHMARK_DECLARE(ShortRateModelTests, testCachedHullWhiteFixedReversion, 1000, 1.0); +QL_BENCHMARK_DECLARE(MarketModelCmsTests, testMultiStepCmSwapsAndSwaptions, 1, 11.0); +QL_BENCHMARK_DECLARE(MarketModelSmmTests, testMultiStepCoterminalSwapsAndSwaptions, 1, 9.0); +QL_BENCHMARK_DECLARE(BermudanSwaptionTests, testCachedG2Values, 1, 2.0); +QL_BENCHMARK_DECLARE(BermudanSwaptionTests, testCachedValues, 100, 3.0); +QL_BENCHMARK_DECLARE(LiborMarketModelTests, testSwaptionPricing, 1, 1.0); +QL_BENCHMARK_DECLARE(LiborMarketModelTests, testCalibration, 1, 5.0); +QL_BENCHMARK_DECLARE(PiecewiseYieldCurveTests, testConvexMonotoneForwardConsistency, 10, 2.0); +QL_BENCHMARK_DECLARE(PiecewiseYieldCurveTests, testFlatForwardConsistency, 50, 3.0); +QL_BENCHMARK_DECLARE(PiecewiseYieldCurveTests, testGlobalBootstrap, 20, 2.0); +QL_BENCHMARK_DECLARE(OvernightIndexedSwapTests, testBootstrapWithArithmeticAverage, 10, 5.0); +QL_BENCHMARK_DECLARE(OvernightIndexedSwapTests, testBaseBootstrap, 10, 3.0); +QL_BENCHMARK_DECLARE(OvernightIndexedSwapTests, testBootstrapRegression, 10, 1.0); +QL_BENCHMARK_DECLARE(MarkovFunctionalTests, testCalibrationTwoInstrumentSets, 1, 3.0); +QL_BENCHMARK_DECLARE(MarkovFunctionalTests, testCalibrationOneInstrumentSet, 1, 4.0); +QL_BENCHMARK_DECLARE(MarkovFunctionalTests, testVanillaEngines, 1, 7.0); +QL_BENCHMARK_DECLARE(MarkovFunctionalTests, testBermudanSwaption, 3, 1.0); +QL_BENCHMARK_DECLARE(SwaptionVolatilityCubeTests, testSpreadedCube, 20, 1.0); +QL_BENCHMARK_DECLARE(SwaptionVolatilityCubeTests, testSabrNormalVolatility, 1, 1.0); +QL_BENCHMARK_DECLARE(SwaptionVolatilityCubeTests, testSabrVols, 30, 1.0); +QL_BENCHMARK_DECLARE(ZabrTests, testConsistency, 1, 10.0); +QL_BENCHMARK_DECLARE(CmsSpreadTests, testCouponPricing, 1, 1.0); +QL_BENCHMARK_DECLARE(CmsTests, testCmsSwap, 20, 2.0); +QL_BENCHMARK_DECLARE(CmsTests, testParity, 30, 2.0); +QL_BENCHMARK_DECLARE(InterestRateTests, testConversions, 10000, 0.1); + +// Credit Derivatives +QL_BENCHMARK_DECLARE(NthToDefaultTests, testGauss, 2, 14.0); +QL_BENCHMARK_DECLARE(CreditDefaultSwapTests, testImpliedHazardRate, 1000, 1.0); +QL_BENCHMARK_DECLARE(CreditDefaultSwapTests, testCachedMarketValue, 1000, 0.1); +QL_BENCHMARK_DECLARE(CreditDefaultSwapTests, testIsdaEngine, 200, 2.0); +QL_BENCHMARK_DECLARE(SquareRootCLVModelTests, testSquareRootCLVMappingFunction, 20, 0.5); +QL_BENCHMARK_DECLARE(SquareRootCLVModelTests, testSquareRootCLVVanillaPricing, 200, 0.5); + +// Energy +QL_BENCHMARK_DECLARE(SwingOptionTests, testExtOUJumpSwingOption, 1, 3.0); +QL_BENCHMARK_DECLARE(SwingOptionTests, testExtOUJumpVanillaEngine, 1, 3.0); +QL_BENCHMARK_DECLARE(SwingOptionTests, testFdBSSwingOption, 20, 1.0); +QL_BENCHMARK_DECLARE(VppTests, testVPPPricing, 1, 5.0); +QL_BENCHMARK_DECLARE(VppTests, testKlugeExtOUSpreadOption, 1, 1.0); + +// Math +QL_BENCHMARK_DECLARE(RiskStatisticsTests, testResults, 4, 0.5); +QL_BENCHMARK_DECLARE(LowDiscrepancyTests, testMersenneTwisterDiscrepancy, 2, 0.5); +QL_BENCHMARK_DECLARE(LinearLeastSquaresRegressionTests, testMultiDimRegression, 20, 2.0); +QL_BENCHMARK_DECLARE(StatisticsTests, testIncrementalStatistics, 20, 0.5); +QL_BENCHMARK_DECLARE(FunctionsTests, testFactorial, 1000, 0.1); +QL_BENCHMARK_DECLARE(FunctionsTests, testGammaFunction, 1000, 0.5); +QL_BENCHMARK_DECLARE(FunctionsTests, testGammaValues, 100000, 0.5); +QL_BENCHMARK_DECLARE(FunctionsTests, testModifiedBesselFunctions, 10000, 0.5); +QL_BENCHMARK_DECLARE(FunctionsTests, testWeightedModifiedBesselFunctions, 20, 0.5); +QL_BENCHMARK_DECLARE(LowDiscrepancyTests, testHalton, 80, 1.0); +QL_BENCHMARK_DECLARE(GaussianQuadraturesTests, testNonCentralChiSquared, 4000, 0.5); +QL_BENCHMARK_DECLARE(GaussianQuadraturesTests, testNonCentralChiSquaredSumOfNodes, 8000, 0.5); +QL_BENCHMARK_DECLARE(GaussianQuadraturesTests, testMomentBasedGaussianPolynomial, 100000, 0.5); +QL_BENCHMARK_DECLARE(RoundingTests, testCeiling, 100000, 0.1); +QL_BENCHMARK_DECLARE(RoundingTests, testUp, 100000, 0.1); +QL_BENCHMARK_DECLARE(RoundingTests, testFloor, 100000, 0.1); +QL_BENCHMARK_DECLARE(RoundingTests, testDown, 100000, 0.1); +QL_BENCHMARK_DECLARE(RoundingTests, testClosest, 100000, 0.1); + + + + +int main(int argc, char* argv[] ) +{ const std::string clientModeStr = "--client_mode=true"; bool clientMode = false; + // Default number of worker processes to use +#if defined(QL_ENABLE_PARALLEL_UNIT_TEST_RUNNER) + unsigned nProc = std::thread::hardware_concurrency(); +#else unsigned nProc = 1; - std::vector > runTimes; +#endif + + // By default, run the smallest size we have. + std::string defaultSize = "3"; + std::string size = defaultSize; + // A threadId is useful for debugging, but has no other purpose + unsigned threadId = 0; + + + + + //// Argument handling ////////////////////////// for (int i=1; i tok; boost::split(tok, arg, boost::is_any_of("=")); - if (tok[0] == "--mp") { - nProc = (tok.size() == 2) - ? boost::numeric_cast(std::stoul(tok[1])) - : std::thread::hardware_concurrency(); + if (tok[0] == "--nProc") { + QL_REQUIRE(tok.size() == 2, "Must provide a number of worker processes"); + try { + nProc = boost::numeric_cast(std::stoul(tok[1])); + } catch(const std::exception &e) { + std::cerr << "Invalid argument to 'nProc', not a positive integer" << std::endl; + std::cerr << "Exception generated: " << e.what() << "\n"; + exit(1); + } } - else if (arg == "--help" || arg == "-?") { + else if (tok[0] == "--threadId") { + QL_REQUIRE(tok.size() == 2, "Must provide a threadId"); + try { + threadId = boost::numeric_cast(std::stoul(tok[1])); + } catch(const std::exception &e) { + std::cerr << "Invalid argument to 'threadId', not a positive integer. This is an internal error, please contact the developers" << std::endl; + std::cerr << "Exception generated: " << e.what() << "\n"; + exit(1); + } + } + else if (tok[0] == "--verbose") { + QL_REQUIRE(tok.size() == 2, "Must provide a value for verbose"); + try { + BenchmarkSupport::verbose = boost::numeric_cast(std::stoul(tok[1])); + } catch(const std::exception &e) { + std::cerr << "Invalid argument to 'verbose', not a positive integer" << std::endl; + std::cerr << "Exception generated: " << e.what() << "\n"; + exit(1); + } + QL_REQUIRE(BenchmarkSupport::verbose>=0 && BenchmarkSupport::verbose <= 3, "Value for verbose must be 0, 1, 2 or 3"); + } + else if (tok[0] == "--size") { + QL_REQUIRE(tok.size() == 2, + "benchmark size is not given"); + size = tok[1]; + } + else if (arg == "-h" || arg == "--help" || arg == "-?") { std::cout - << "'quantlib-benchmark' is QuantLib " QL_VERSION " CPU performance benchmark" - << std::endl << std::endl - << "Usage: ./quantlib-benchmark [OPTION]..." - << std::endl << std::endl + << "\n'quantlib-benchmark' is QuantLib " QL_VERSION " CPU performance benchmark\n" + << "\n" + << "You are strongly encouraged to run 'ulimit -n unlimited' before running this benchmark\n" + << "on Linux systems. It uses Boost::IPC for parallelism, and a large number of file descriptors\n" + << "are needed to run this benchmark with a large number of worker processes.\n" + << "\n" + << "By default the benchmark uses a tiny size as a quick check that\n" + << "everything works. To benchmark large systems a size of 'S' or larger\n" + << "should be used.\n" + << "\n" + << "Usage: ./quantlib-benchmark [OPTION] ...\n" + << "\n" << "with the following options:" - << std::endl + << "\n" #ifdef QL_ENABLE_PARALLEL_UNIT_TEST_RUNNER - << "--mp[=PROCESSES] \t parallel execution with PROCESSES processes" - << std::endl + << "--nProc[=NN] \t parallel execution with NN worker processes.\n" + << " \t Default value is nProc=" << nProc << "\n" + << "\n" #endif - << "-?, --help \t\t display this help and exit" + << "--size=<"; + for(const auto &p : BenchmarkSupport::bmSizes) { + std::cout << p.first << "|"; + } + std::cout << "NN> \n" + << " \t the size of the benchmark (how many times each \n" + << " \t task is run), where 'NN' can be any positive integer.\n" + << " \t Default vaue is size=" << defaultSize << "\n" + << "\n" + << "--verbose=<0|1|2|3>\t controls verbosity of output, default value is verbose=" << BenchmarkSupport::verbose << "\n" + << "\n" + << "-?, --help \t display this help and exit" << std::endl; return 0; } @@ -383,85 +685,214 @@ int main(int argc, char* argv[] ) { } } - if (nProc == 1 && !clientMode) { - std::for_each(bm.begin(), bm.end(), - [&runTimes](const Benchmark& iter) { - runTimes.emplace_back( - iter, TimedBenchmark(iter.getTestCase(), iter.getName())()); - }); - printResults(nProc, runTimes); - } - else { -#ifdef QL_ENABLE_PARALLEL_UNIT_TEST_RUNNER - using namespace boost::interprocess; - - typedef std::pair result_type; - - message_queue::size_type recvd_size; - unsigned priority, terminateId=-1; - - const char* const testUnitIdQueueName = "test_unit_queue"; - const char* const testResultQueueName = "test_result_queue"; - - if (!clientMode) { - message_queue::remove(testUnitIdQueueName); - message_queue::remove(testResultQueueName); - struct queue_remove { - explicit queue_remove(const char* name) : name_(name) { } - ~queue_remove() { message_queue::remove(name_); } - - private: - const char* const name_; - } remover1(testUnitIdQueueName),remover2(testResultQueueName); - - message_queue mq( - open_or_create, testUnitIdQueueName, - nProc*bm.size(), sizeof(unsigned) - ); - message_queue rq( - open_or_create, testResultQueueName, 16, sizeof(result_type)); - - const std::vector workerArgs(1, clientModeStr); - std::vector threadGroup; - for (unsigned i = 0; i < nProc; ++i) { - threadGroup.emplace_back([&]() { worker(argv[0], workerArgs); }); - } + const unsigned int nSize = BenchmarkSupport::parseBmSize(size); + std::vector workerLifetimes; - for (unsigned i=0; i < nProc; ++i) - for (unsigned j=0; j < bm.size(); ++j) - mq.send(&j, sizeof(unsigned), 0); + //////// Finished argument processing, start benchmark code ////////////////////////////////////////////// - result_type r; - for (unsigned i = 0; i < nProc*bm.size(); ++i) { - rq.receive(&r, sizeof(result_type), recvd_size, priority); - runTimes.push_back(std::make_pair(bm[r.first], r.second)); - } - for (unsigned i=0; i < nProc; ++i) { - mq.send(&terminateId, sizeof(unsigned), 0); - } - for (auto& thread: threadGroup) { - thread.join(); + try { + + // Ensure we find the Boost test_unit for each benchmark + TestUnitFinder::findAllTests(argv, bm); + + // To alleviate tail effects, we sort the bechmarks so that the most expensive ones are first. + // These will be the first to be dispatched to the OS scheduler + std::sort(bm.begin(), bm.end(), + [](const auto& a, const auto& b) { return a.getCost() > b.getCost(); }); + + + BenchmarkResult bmResult; + if( !clientMode) + BenchmarkSupport::printGreeting(size, nProc); + + + + if (nProc == 1 && !clientMode) { + // Sequential benchmark, useful for debugging + auto startTime = std::chrono::steady_clock::now(); + for (unsigned i=0; i < nSize; ++i) { + for(unsigned int j=0; j threadGroup; + { + std::string thread("--threadId="), verb("--verbose="); + verb += std::to_string(BenchmarkSupport::verbose); + std::vector workerArgs = {clientModeStr, thread, verb}; + for (unsigned i = 0; i < nProc; ++i) { + LOG_MESSAGE("MASTER : creating worker threadId=" << i+1); + workerArgs[1] = thread + std::to_string(i+1); + threadGroup.emplace_back([&,workerArgs]() { BenchmarkSupport::worker(argv[0], workerArgs); }); + } + } + + IPCInstructionMsg msg; + IPCResultMsg r; + // Fire off all the benchmarks + for (unsigned j=0; j < bm.size(); ++j) { + // Enqueue nSize copies of each task to even out load balance + for (unsigned i=0; i < nSize; ++i) { + // Do validation for the first run of each benchmark + msg = {j, (i==0)}; + // Will be non-blocking send since send buffer is big enough + LOG_MESSAGE("MASTER : sending benchmarkId=" << msg.j << " with validation=" << msg.validate); + mq.send(&msg, sizeof(IPCInstructionMsg), 0); + } + } + // Receive all results from workers + for (unsigned i=0; i < nSize*bm.size(); ++i) { + rq.receive(&r, sizeof(IPCResultMsg), recvd_size, priority); + LOG_MESSAGE("MASTER : received result : threadId=" << r.threadId << ", benchmarkId=" << r.bmId + << ", time=" << r.time << " : " << nSize*bm.size()-1-i << " results pending"); + if(r.time < 0) { + // A benchmark test has failed + BenchmarkSupport::terminateBenchmark(); + } + bm[r.bmId].getTotalRuntime() += r.time; + } + + + // Send terminate signal to all workers + for (unsigned i=0; i < nProc; ++i) { + LOG_MESSAGE("MASTER : sending TERMINATE signal"); + msg = {terminateId, false}; + mq.send(&msg, sizeof(IPCInstructionMsg), 0); + } + // Receive worker lifetimes + for (unsigned i=0; i < nProc; ++i) { + rq.receive(&r, sizeof(IPCResultMsg), recvd_size, priority); + LOG_MESSAGE("MASTER : received worker lifetime : threadId=" << r.threadId << ", time=" << r.time << " : " << nProc-1-i << " lifetimes pending"); + workerLifetimes.push_back(r.time); + } + + + // Synchronize with and exit all threads + for (auto& thread: threadGroup) { + thread.join(); + } + + auto stopTime = std::chrono::steady_clock::now(); + double masterLifetime = std::chrono::duration_cast(stopTime - startTime).count() * 1e-6; + BenchmarkSupport::printResults(nSize, masterLifetime, workerLifetimes); - while (id != terminateId) { - result_type a(id, TimedBenchmark(bm[id].getTestCase(), bm[id].getName())()); - rq.send(&a, sizeof(result_type), 0); - mq.receive(&id, sizeof(unsigned), recvd_size, priority); } - } + else { + // We are a worker process - open Boost IPC queues + message_queue mq(open_only, testUnitIdQueueName); + message_queue rq(open_only, testResultQueueName); + + // Record start of this process's lifetime. We keep tack of lifetimes + // in order to monitor tail effects + auto startTime = std::chrono::steady_clock::now(); + // If this worker has nothing to do, we still want a non-zero lifetime + auto stopTime = std::chrono::steady_clock::now();; + + for(;;) { + IPCInstructionMsg id; + mq.receive(&id, sizeof(IPCInstructionMsg), recvd_size, priority); + + if(id.j == terminateId) { + // Worker process being told to terminate. Report our lifetime. + // Lifetime is how long it took until we completed our final task + double workerLifetime = std::chrono::duration_cast(stopTime - startTime).count() * 1e-6; + IPCResultMsg r {terminateId, threadId, workerLifetime}; + LOG_MESSAGE("WORKER-" << std::setw(3) << threadId << ": received TERMINATE signal, sending lifetime=" << r.time); + rq.send(&r, sizeof(IPCResultMsg), 0); + break; + } + else { + LOG_MESSAGE("WORKER-" << std::setw(3) << threadId << ": received benchmarkId=" << id.j << ", validation=" << id.validate << ". Starting execution ..."); + double time; + if( id.validate ) { + bmResult.reset(); + time = bm[id.j].runValidation(); + time = (bmResult.pass() ? time : -1.0); + } + else { + time = bm[id.j].runBenchmark(); + } + IPCResultMsg r {id.j, threadId, time}; + // We record the timestamp after each task is complete + // We use this to define worker lifetime + stopTime = std::chrono::steady_clock::now(); + LOG_MESSAGE("WORKER-" << std::setw(3) << threadId << ": sending result benchmarkId=" << id.j << ", time=" << r.time); + rq.send(&r, sizeof(IPCResultMsg), 0); + } + } + LOG_MESSAGE("WORKER-" << std::setw(3) << threadId << ": exiting"); + } + #else - std::cout << "Please compile QuantLib with option 'QL_ENABLE_PARALLEL_UNIT_TEST_RUNNER'" + std::cout << "Please compile QuantLib with option 'QL_ENABLE_PARALLEL_UNIT_TEST_RUNNER'" " to run the benchmarks in parallel" << std::endl; #endif + } + + } catch(const std::exception &e) { + if( !clientMode ) + std::cerr << "MASTER process caught an exception:\n" << e.what() << std::endl; + else + std::cerr << "WORKER-" << std::setw(3) << threadId << " caught an exception:\n" << e.what() << std::endl; } return 0;