Skip to content

Commit

Permalink
[benchmark] Option to use sync API for Latency measurements (#947)
Browse files Browse the repository at this point in the history
  • Loading branch information
mzhukova authored Mar 5, 2025
1 parent 948ee13 commit 7b79e58
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 18 deletions.
1 change: 1 addition & 0 deletions tools/benchmarks/include/cmd_decl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ BM_DECLARE_bool(full_time);
BM_DECLARE_bool(no_hw);
BM_DECLARE_string(in_mem);
BM_DECLARE_string(out_mem);
BM_DECLARE_bool(sync_api);

std::int32_t get_block_size();
mem_loc_e get_in_mem();
Expand Down
47 changes: 31 additions & 16 deletions tools/benchmarks/include/details/measure_sync.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ static statistics_t measure_sync(benchmark::State& state, const case_params_t& c
res.operations = res.queue_size;
}

if (res.queue_size != 1 && common_params.use_sync_api_) {
throw std::runtime_error("Using --sync_api for measurements do not support queue size > 1");
}

res.operations_per_thread = res.operations;
if (state.threads() > 1) throw std::runtime_error("Synchronous measurements do not support threading");

Expand All @@ -33,29 +37,40 @@ static statistics_t measure_sync(benchmark::State& state, const case_params_t& c
operation.mem_control(common_params.in_mem_, mem_loc_mask_e::src);
}

// Strategies:
// - File at once. Each operation works on same file independently.
// - Chunk at once. Measure each chunk independently one by one, gather aggregate in the end. Is this reasonable?
// - File by chunks. Measure for the whole file processing different chunks in parallel (map file before processing). Like normal processing

// Non-timed warm-up
for (auto& operation : operations) {
operation.async_submit();
operation.async_wait();
if (common_params.use_sync_api_) {
operation.sync_execute();
} else {
operation.async_submit();
operation.async_wait();
}
operation.light_reset();
}

// Timed measurements
for (auto _ : state) {
for (auto& operation : operations) {
operation.async_submit();
}
if (common_params.use_sync_api_) {
for (auto& operation : operations) {
operation.sync_execute();
operation.light_reset();

for (auto& operation : operations) {
operation.async_wait();
operation.light_reset();
res.completed_operations++;
res.data_read += operation.get_bytes_read();
res.data_written += operation.get_bytes_written();
}
} else {
for (auto& operation : operations) {
operation.async_submit();
}
for (auto& operation : operations) {
operation.async_wait();
operation.light_reset();

res.completed_operations++;
res.data_read += operation.get_bytes_read();
res.data_written += operation.get_bytes_written();
res.completed_operations++;
res.data_read += operation.get_bytes_read();
res.data_written += operation.get_bytes_written();
}
}
}

Expand Down
1 change: 1 addition & 0 deletions tools/benchmarks/include/utility.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,7 @@ struct case_params_t {
bool full_time_ {cmd::FLAGS_full_time};
std::int32_t queue_size_ {cmd::FLAGS_queue_size};
std::int32_t node_ {cmd::FLAGS_node};
bool use_sync_api_ {cmd::FLAGS_sync_api};
};

template <typename CaseT, typename CaseParamsT, typename... ArgsT>
Expand Down
7 changes: 5 additions & 2 deletions tools/benchmarks/src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ BM_DEFINE_string(in_mem, "llc");
BM_DEFINE_string(out_mem, "cс_ram");
BM_DEFINE_bool(full_time, false);
BM_DEFINE_bool(no_hw, false);
BM_DEFINE_bool(sync_api, false);

/**
* Print the help message that includes information about the input parameters that can be used to configure the benchmark.
Expand All @@ -120,7 +121,8 @@ static void print_help() {
" [--in_mem=<location>] - Input memory type: cache, llc or ram. Set to llc by default. \n"
" [--out_mem=<location>] - Output memory type: cache_ram or ram. Set to cache_ram by default. \n"
" [--full_time] - Include initialization and destruction into measured time. Off by default.\n"
" [--no_hw] - Skip accelerator initialization check and run only using qpl_software_path. Off by default.\n");
" [--no_hw] - Skip accelerator initialization check and run only using qpl_software_path. Off by default.\n"
" [--sync_api] - (Experimental) Use synchronous API for execution. Default is Off. Only applicable for single-threaded runs.\n");
}

static void parse_cmd_line(int* argc, char** argv) {
Expand All @@ -133,7 +135,8 @@ static void parse_cmd_line(int* argc, char** argv) {
benchmark::ParseStringFlag(argv[i], "in_mem", &FLAGS_in_mem) ||
benchmark::ParseStringFlag(argv[i], "out_mem", &FLAGS_out_mem) ||
benchmark::ParseBoolFlag(argv[i], "full_time", &FLAGS_full_time) ||
benchmark::ParseBoolFlag(argv[i], "no_hw", &FLAGS_no_hw)) {
benchmark::ParseBoolFlag(argv[i], "no_hw", &FLAGS_no_hw) ||
benchmark::ParseBoolFlag(argv[i], "sync_api", &FLAGS_sync_api)) {
for (int j = i; j != *argc - 1; ++j)
argv[j] = argv[j + 1];

Expand Down

0 comments on commit 7b79e58

Please sign in to comment.