Skip to content

Commit

Permalink
[CORE][CACHE][HASH] DYNAMIC_CODE_POLICY check on Windows
Browse files Browse the repository at this point in the history
  • Loading branch information
nshchego committed Feb 15, 2025
1 parent 6f7519a commit d9729e1
Showing 1 changed file with 80 additions and 64 deletions.
144 changes: 80 additions & 64 deletions src/core/src/runtime/compute_hash.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@
# include "openvino/reference/utils/registers_pool.hpp"
#endif // OV_CORE_USE_XBYAK_JIT

#if defined(_WIN32)
# include <processthreadsapi.h>
#endif

namespace ov {
namespace runtime {

Expand Down Expand Up @@ -822,78 +826,90 @@ void ComputeHash<isa>::fold_to_64(const Vmm& v_dst) {

size_t compute_hash(const void* src, size_t size) {
#ifdef OV_CORE_USE_XBYAK_JIT
if (Generator::mayiuse(avx2)) {
uint64_t result = 0lu;

// Parallel section
constexpr uint64_t min_wa_per_thread = 131072lu; // 2^17
const uint64_t size_u64 = static_cast<uint64_t>(size);
if (size_u64 >= min_wa_per_thread * 2lu) {
static auto first_thr_kernel = Generator::mayiuse(avx512_core)
? jit::ComputeHash<avx512_core>::create({jit::FIRST_THREAD})
: jit::ComputeHash<avx2>::create({jit::FIRST_THREAD});
static auto n_thr_kernel = Generator::mayiuse(avx512_core)
? jit::ComputeHash<avx512_core>::create({jit::N_THREAD})
: jit::ComputeHash<avx2>::create({jit::N_THREAD});
static auto final_fold_kernel = Generator::mayiuse(avx512_core)
? jit::ComputeHash<avx512_core>::create({jit::FINAL_FOLD})
: jit::ComputeHash<avx2>::create({jit::FINAL_FOLD});

static const uint64_t max_thr_num = 2lu;
uint64_t thr_num = std::min(size_u64 / min_wa_per_thread, max_thr_num);
const uint64_t el_per_thread =
first_thr_kernel->get_vlen() * ((size_u64 / thr_num) / first_thr_kernel->get_vlen());
std::vector<uint8_t> intermediate(thr_num * first_thr_kernel->get_vlen());

parallel_nt_static(static_cast<int>(thr_num), [&](const int ithr, const int nthr) {
uint64_t start = el_per_thread * ithr;
if (start >= size_u64) {
return;
}
uint64_t work_amount = (el_per_thread + start > size_u64) ? size_u64 - start : el_per_thread;

# if defined(_WIN32)
HANDLE handle = GetCurrentProcess();
PROCESS_MITIGATION_DYNAMIC_CODE_POLICY dynamic_code_policy = {0};
GetProcessMitigationPolicy(handle, ProcessDynamicCodePolicy, &dynamic_code_policy, sizeof(dynamic_code_policy));
if (dynamic_code_policy.ProhibitDynamicCode != TRUE) {
# endif

if (Generator::mayiuse(avx2)) {
uint64_t result = 0lu;

// Parallel section
constexpr uint64_t min_wa_per_thread = 131072lu; // 2^17
const uint64_t size_u64 = static_cast<uint64_t>(size);
if (size_u64 >= min_wa_per_thread * 2lu) {
static auto first_thr_kernel = Generator::mayiuse(avx512_core)
? jit::ComputeHash<avx512_core>::create({jit::FIRST_THREAD})
: jit::ComputeHash<avx2>::create({jit::FIRST_THREAD});
static auto n_thr_kernel = Generator::mayiuse(avx512_core)
? jit::ComputeHash<avx512_core>::create({jit::N_THREAD})
: jit::ComputeHash<avx2>::create({jit::N_THREAD});
static auto final_fold_kernel = Generator::mayiuse(avx512_core)
? jit::ComputeHash<avx512_core>::create({jit::FINAL_FOLD})
: jit::ComputeHash<avx2>::create({jit::FINAL_FOLD});

static const uint64_t max_thr_num = 2lu;
uint64_t thr_num = std::min(size_u64 / min_wa_per_thread, max_thr_num);
const uint64_t el_per_thread =
first_thr_kernel->get_vlen() * ((size_u64 / thr_num) / first_thr_kernel->get_vlen());
std::vector<uint8_t> intermediate(thr_num * first_thr_kernel->get_vlen());

parallel_nt_static(static_cast<int>(thr_num), [&](const int ithr, const int nthr) {
uint64_t start = el_per_thread * ithr;
if (start >= size_u64) {
return;
}
uint64_t work_amount = (el_per_thread + start > size_u64) ? size_u64 - start : el_per_thread;

jit::ComputeHashCallArgs args;

args.src_ptr = reinterpret_cast<const uint8_t*>(src) + first_thr_kernel->get_vlen() * ithr;
args.dst_ptr = &(intermediate[first_thr_kernel->get_vlen() * ithr]);
args.k_ptr = jit::K_PULL;
args.work_amount = work_amount;
args.size = size_u64;
args.threads_num = thr_num;

if (ithr == 0) {
(*first_thr_kernel)(&args);
} else {
(*n_thr_kernel)(&args);
}
});

jit::ComputeHashCallArgs args;
args.work_amount = size_u64 - el_per_thread * thr_num;
args.src_ptr = reinterpret_cast<const uint8_t*>(src) + size_u64 - args.work_amount;
args.dst_ptr = &result;
args.k_ptr = jit::K_PULL;
args.size = size_u64;
args.intermediate_ptr = intermediate.data();

args.src_ptr = reinterpret_cast<const uint8_t*>(src) + first_thr_kernel->get_vlen() * ithr;
args.dst_ptr = &(intermediate[first_thr_kernel->get_vlen() * ithr]);
(*final_fold_kernel)(&args);
} else {
static auto single_thr_kernel = Generator::mayiuse(avx512_core)
? jit::ComputeHash<avx512_core>::create({jit::SINGLE_THREAD})
: jit::ComputeHash<avx2>::create({jit::SINGLE_THREAD});

jit::ComputeHashCallArgs args;
args.src_ptr = src;
args.dst_ptr = &result;
args.k_ptr = jit::K_PULL;
args.work_amount = work_amount;
args.work_amount = size_u64;
args.size = size_u64;
args.threads_num = thr_num;

if (ithr == 0) {
(*first_thr_kernel)(&args);
} else {
(*n_thr_kernel)(&args);
}
});

jit::ComputeHashCallArgs args;
args.work_amount = size_u64 - el_per_thread * thr_num;
args.src_ptr = reinterpret_cast<const uint8_t*>(src) + size_u64 - args.work_amount;
args.dst_ptr = &result;
args.k_ptr = jit::K_PULL;
args.size = size_u64;
args.intermediate_ptr = intermediate.data();

(*final_fold_kernel)(&args);
} else {
static auto single_thr_kernel = Generator::mayiuse(avx512_core)
? jit::ComputeHash<avx512_core>::create({jit::SINGLE_THREAD})
: jit::ComputeHash<avx2>::create({jit::SINGLE_THREAD});

jit::ComputeHashCallArgs args;
args.src_ptr = src;
args.dst_ptr = &result;
args.k_ptr = jit::K_PULL;
args.work_amount = size_u64;
args.size = size_u64;

(*single_thr_kernel)(&args);

(*single_thr_kernel)(&args);
}

return result;
}

return result;
# if defined(_WIN32)
}
# endif

#endif // OV_CORE_USE_XBYAK_JIT

Expand Down

0 comments on commit d9729e1

Please sign in to comment.