Skip to content

Commit

Permalink
Merge pull request #12 from petiaccja/benchmark
Browse files Browse the repository at this point in the history
benchmark, thread_pool rewrite again
  • Loading branch information
petiaccja authored Feb 29, 2024
2 parents d9f8d18 + ff401df commit 5debc62
Show file tree
Hide file tree
Showing 19 changed files with 770 additions and 206 deletions.
2 changes: 1 addition & 1 deletion .clang-format
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ DeriveLineEnding: 'true'
FixNamespaceComments: true
IncludeBlocks: Regroup
IncludeCategories:
- Regex: '(<|")((C|c)atch.*)/'
- Regex: '(<|")(((C|c)atch.*)|((C|c)elero.*))/'
Priority: 5
- Regex: '(<|")((async\+\+).*)/'
Priority: 2
Expand Down
4 changes: 4 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ project(asyncpp)

set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
option(ASYNCPP_BUILD_TESTS "Build tests." ON)
option(ASYNCPP_BUILD_BENCHMARKS "Build benchmarks." ON)
option(ENABLE_LLVM_COV "Enable LLVM source-based code coverage." OFF)
option(ENABLE_LLVM_ADDRESS_SANITIZER "Enable LLVM address sanitizer." OFF)
option(ENABLE_LLVM_MEMORY_SANITIZER "Enable LLVM memory sanitizer." OFF)
Expand Down Expand Up @@ -49,6 +50,9 @@ add_subdirectory(src)
if (${ASYNCPP_BUILD_TESTS})
add_subdirectory(test)
endif()
if (${ASYNCPP_BUILD_BENCHMARKS})
add_subdirectory(benchmark)
endif()

install(TARGETS asyncpp DESTINATION "lib")
install(DIRECTORY "include/asyncpp" DESTINATION "include")
14 changes: 14 additions & 0 deletions benchmark/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
add_executable(benchmark)

target_sources(benchmark
PRIVATE
main.cpp
benchmark_task_spawn.cpp
benchmark_thread_pool.cpp
benchmark_atomic.cpp
)


find_package(Celero REQUIRED)
target_link_libraries(benchmark celero)
target_link_libraries(benchmark asyncpp)
138 changes: 138 additions & 0 deletions benchmark/benchmark_atomic.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
#include <asyncpp/threading/cache.hpp>

#include <array>
#include <atomic>
#include <thread>

#include <celero/Celero.h>


// This file benchmarks atomic operations themselves, not the library.
// The measurements can be used as a baseline or target as to what is
// achievable and reasonable on the hardware.


using namespace asyncpp;


static constexpr size_t base_reps = 4'000'000;


BASELINE(atomic_rmw, x1_thread, 30, 1) {
alignas(avoid_false_sharing) std::atomic_size_t counter = 0;
static constexpr size_t reps = base_reps;

static const auto func = [&counter] {
for (size_t rep = 0; rep < reps; ++rep) {
counter.fetch_add(1, std::memory_order_relaxed);
}
};

std::array<std::jthread, 1> threads;
std::ranges::generate(threads, [&] { return std::jthread(func); });
}


BENCHMARK(atomic_rmw, x2_thread, 30, 1) {
alignas(avoid_false_sharing) std::atomic_size_t counter = 0;
static constexpr size_t reps = base_reps / 2;

static const auto func = [&counter] {
for (size_t rep = 0; rep < reps; ++rep) {
counter.fetch_add(1, std::memory_order_relaxed);
}
};

std::array<std::jthread, 2> threads;
std::ranges::generate(threads, [&] { return std::jthread(func); });
}


BENCHMARK(atomic_rmw, x4_thread, 30, 1) {
alignas(avoid_false_sharing) std::atomic_size_t counter = 0;
static constexpr size_t reps = base_reps / 4;

static const auto func = [&counter] {
for (size_t rep = 0; rep < reps; ++rep) {
counter.fetch_add(1, std::memory_order_relaxed);
}
};

std::array<std::jthread, 4> threads;
std::ranges::generate(threads, [&] { return std::jthread(func); });
}


BENCHMARK(atomic_rmw, x8_thread, 30, 1) {
alignas(avoid_false_sharing) std::atomic_size_t counter = 0;
static constexpr size_t reps = base_reps / 8;

static const auto func = [&counter] {
for (size_t rep = 0; rep < reps; ++rep) {
counter.fetch_add(1, std::memory_order_relaxed);
}
};

std::array<std::jthread, 8> threads;
std::ranges::generate(threads, [&] { return std::jthread(func); });
}


BASELINE(atomic_read, x1_thread, 30, 1) {
alignas(avoid_false_sharing) std::atomic_size_t counter = 0;
static constexpr size_t reps = base_reps;

static const auto func = [&counter] {
for (size_t rep = 0; rep < reps; ++rep) {
static_cast<void>(counter.load(std::memory_order_relaxed));
}
};

std::array<std::jthread, 1> threads;
std::ranges::generate(threads, [&] { return std::jthread(func); });
}


BENCHMARK(atomic_read, x2_thread, 30, 1) {
alignas(avoid_false_sharing) std::atomic_size_t counter = 0;
static constexpr size_t reps = base_reps / 2;

static const auto func = [&counter] {
for (size_t rep = 0; rep < reps; ++rep) {
static_cast<void>(counter.load(std::memory_order_relaxed));
}
};

std::array<std::jthread, 2> threads;
std::ranges::generate(threads, [&] { return std::jthread(func); });
}


BENCHMARK(atomic_read, x4_thread, 30, 1) {
alignas(avoid_false_sharing) std::atomic_size_t counter = 0;
static constexpr size_t reps = base_reps / 4;

static const auto func = [&counter] {
for (size_t rep = 0; rep < reps; ++rep) {
static_cast<void>(counter.load(std::memory_order_relaxed));
}
};

std::array<std::jthread, 4> threads;
std::ranges::generate(threads, [&] { return std::jthread(func); });
}


BENCHMARK(atomic_read, x8_thread, 30, 1) {
alignas(avoid_false_sharing) std::atomic_size_t counter = 0;
static constexpr size_t reps = base_reps / 8;

static const auto func = [&counter] {
for (size_t rep = 0; rep < reps; ++rep) {
static_cast<void>(counter.load(std::memory_order_relaxed));
}
};

std::array<std::jthread, 8> threads;
std::ranges::generate(threads, [&] { return std::jthread(func); });
}
136 changes: 136 additions & 0 deletions benchmark/benchmark_task_spawn.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
#include <asyncpp/join.hpp>
#include <asyncpp/task.hpp>
#include <asyncpp/threading/cache.hpp>

#include <array>
#include <memory_resource>

#include <celero/Celero.h>


using namespace asyncpp;


task<int> plain() {
co_return 1;
}


task<int> allocator_backed(std::allocator_arg_t, std::pmr::polymorphic_allocator<> alloc) {
co_return 1;
}


struct FixtureNewDelete : celero::TestFixture {
inline std::pmr::polymorphic_allocator<>& getAlloc() {
return alloc;
}

private:
std::pmr::polymorphic_allocator<> alloc = { std::pmr::new_delete_resource() };
};


struct FixturePool : celero::TestFixture {
inline std::pmr::polymorphic_allocator<>& getAlloc() {
return alloc;
}

private:
std::pmr::unsynchronized_pool_resource resource;
std::pmr::polymorphic_allocator<> alloc = { &resource };
};


struct FixtureStack : celero::TestFixture {
void setUp(const ExperimentValue* x) override {
alloc.~polymorphic_allocator();
resource.~monotonic_buffer_resource();
new (&resource) std::pmr::monotonic_buffer_resource(buffer.get(), size, std::pmr::new_delete_resource());
new (&alloc) std::pmr::polymorphic_allocator<>(&resource);
}

inline std::pmr::polymorphic_allocator<>& getAlloc() {
return alloc;
}

private:
static constexpr inline size_t size = 10485760;
struct block {
alignas(avoid_false_sharing) std::byte content[avoid_false_sharing];
};
std::unique_ptr<block[]> buffer = std::make_unique_for_overwrite<block[]>(size / sizeof(block));
std::pmr::monotonic_buffer_resource resource;
std::pmr::polymorphic_allocator<> alloc = { &resource };
};


constexpr int numSamples = 1000;
constexpr int numIterations = 5000;


BASELINE(task_spawn, unoptimized, numSamples, numIterations) {
bool ready = false;
{
auto task = plain();
volatile auto ptr = &task;
ptr->launch();
ready = ptr->ready();
}
assert(ready);
celero::DoNotOptimizeAway(ready);
}


BENCHMARK(task_spawn, HALO, numSamples, numIterations) {
bool ready = false;
{
auto task = plain();
task.launch();
ready = task.ready();
}
assert(ready);
celero::DoNotOptimizeAway(ready);
}


BENCHMARK_F(task_spawn, PMR_new_delete, FixtureNewDelete, numSamples, numIterations) {
bool ready = false;
auto& alloc = getAlloc();
{
auto task = allocator_backed(std::allocator_arg, alloc);
volatile auto ptr = &task;
ptr->launch();
ready = ptr->ready();
}
assert(ready);
celero::DoNotOptimizeAway(ready);
}


BENCHMARK_F(task_spawn, PMR_unsync_pool, FixturePool, numSamples, numIterations) {
bool ready = false;
auto& alloc = getAlloc();
{
auto task = allocator_backed(std::allocator_arg, alloc);
volatile auto ptr = &task;
ptr->launch();
ready = ptr->ready();
}
assert(ready);
celero::DoNotOptimizeAway(ready);
}


BENCHMARK_F(task_spawn, PMR_stack, FixtureStack, numSamples, numIterations) {
bool ready = false;
auto& alloc = getAlloc();
{
auto task = allocator_backed(std::allocator_arg, alloc);
volatile auto ptr = &task;
ptr->launch();
ready = ptr->ready();
}
assert(ready);
celero::DoNotOptimizeAway(ready);
}
Loading

0 comments on commit 5debc62

Please sign in to comment.