From d9729e10faf407c07fecafb54b2c50ff7a0f05d6 Mon Sep 17 00:00:00 2001 From: Nikolai Shchegolev Date: Sat, 15 Feb 2025 08:01:02 +0400 Subject: [PATCH] [CORE][CACHE][HASH] DYNAMIC_CODE_POLICY check on Windows --- src/core/src/runtime/compute_hash.cpp | 144 ++++++++++++++------------ 1 file changed, 80 insertions(+), 64 deletions(-) diff --git a/src/core/src/runtime/compute_hash.cpp b/src/core/src/runtime/compute_hash.cpp index f9d2aa9c7b14de..0260da3d501330 100644 --- a/src/core/src/runtime/compute_hash.cpp +++ b/src/core/src/runtime/compute_hash.cpp @@ -23,6 +23,10 @@ # include "openvino/reference/utils/registers_pool.hpp" #endif // OV_CORE_USE_XBYAK_JIT +#if defined(_WIN32) +# include +#endif + namespace ov { namespace runtime { @@ -822,78 +826,90 @@ void ComputeHash::fold_to_64(const Vmm& v_dst) { size_t compute_hash(const void* src, size_t size) { #ifdef OV_CORE_USE_XBYAK_JIT - if (Generator::mayiuse(avx2)) { - uint64_t result = 0lu; - - // Parallel section - constexpr uint64_t min_wa_per_thread = 131072lu; // 2^17 - const uint64_t size_u64 = static_cast(size); - if (size_u64 >= min_wa_per_thread * 2lu) { - static auto first_thr_kernel = Generator::mayiuse(avx512_core) - ? jit::ComputeHash::create({jit::FIRST_THREAD}) - : jit::ComputeHash::create({jit::FIRST_THREAD}); - static auto n_thr_kernel = Generator::mayiuse(avx512_core) - ? jit::ComputeHash::create({jit::N_THREAD}) - : jit::ComputeHash::create({jit::N_THREAD}); - static auto final_fold_kernel = Generator::mayiuse(avx512_core) - ? jit::ComputeHash::create({jit::FINAL_FOLD}) - : jit::ComputeHash::create({jit::FINAL_FOLD}); - - static const uint64_t max_thr_num = 2lu; - uint64_t thr_num = std::min(size_u64 / min_wa_per_thread, max_thr_num); - const uint64_t el_per_thread = - first_thr_kernel->get_vlen() * ((size_u64 / thr_num) / first_thr_kernel->get_vlen()); - std::vector intermediate(thr_num * first_thr_kernel->get_vlen()); - - parallel_nt_static(static_cast(thr_num), [&](const int ithr, const int nthr) { - uint64_t start = el_per_thread * ithr; - if (start >= size_u64) { - return; - } - uint64_t work_amount = (el_per_thread + start > size_u64) ? size_u64 - start : el_per_thread; + +# if defined(_WIN32) + HANDLE handle = GetCurrentProcess(); + PROCESS_MITIGATION_DYNAMIC_CODE_POLICY dynamic_code_policy = {0}; + GetProcessMitigationPolicy(handle, ProcessDynamicCodePolicy, &dynamic_code_policy, sizeof(dynamic_code_policy)); + if (dynamic_code_policy.ProhibitDynamicCode != TRUE) { +# endif + + if (Generator::mayiuse(avx2)) { + uint64_t result = 0lu; + + // Parallel section + constexpr uint64_t min_wa_per_thread = 131072lu; // 2^17 + const uint64_t size_u64 = static_cast(size); + if (size_u64 >= min_wa_per_thread * 2lu) { + static auto first_thr_kernel = Generator::mayiuse(avx512_core) + ? jit::ComputeHash::create({jit::FIRST_THREAD}) + : jit::ComputeHash::create({jit::FIRST_THREAD}); + static auto n_thr_kernel = Generator::mayiuse(avx512_core) + ? jit::ComputeHash::create({jit::N_THREAD}) + : jit::ComputeHash::create({jit::N_THREAD}); + static auto final_fold_kernel = Generator::mayiuse(avx512_core) + ? jit::ComputeHash::create({jit::FINAL_FOLD}) + : jit::ComputeHash::create({jit::FINAL_FOLD}); + + static const uint64_t max_thr_num = 2lu; + uint64_t thr_num = std::min(size_u64 / min_wa_per_thread, max_thr_num); + const uint64_t el_per_thread = + first_thr_kernel->get_vlen() * ((size_u64 / thr_num) / first_thr_kernel->get_vlen()); + std::vector intermediate(thr_num * first_thr_kernel->get_vlen()); + + parallel_nt_static(static_cast(thr_num), [&](const int ithr, const int nthr) { + uint64_t start = el_per_thread * ithr; + if (start >= size_u64) { + return; + } + uint64_t work_amount = (el_per_thread + start > size_u64) ? size_u64 - start : el_per_thread; + + jit::ComputeHashCallArgs args; + + args.src_ptr = reinterpret_cast(src) + first_thr_kernel->get_vlen() * ithr; + args.dst_ptr = &(intermediate[first_thr_kernel->get_vlen() * ithr]); + args.k_ptr = jit::K_PULL; + args.work_amount = work_amount; + args.size = size_u64; + args.threads_num = thr_num; + + if (ithr == 0) { + (*first_thr_kernel)(&args); + } else { + (*n_thr_kernel)(&args); + } + }); jit::ComputeHashCallArgs args; + args.work_amount = size_u64 - el_per_thread * thr_num; + args.src_ptr = reinterpret_cast(src) + size_u64 - args.work_amount; + args.dst_ptr = &result; + args.k_ptr = jit::K_PULL; + args.size = size_u64; + args.intermediate_ptr = intermediate.data(); - args.src_ptr = reinterpret_cast(src) + first_thr_kernel->get_vlen() * ithr; - args.dst_ptr = &(intermediate[first_thr_kernel->get_vlen() * ithr]); + (*final_fold_kernel)(&args); + } else { + static auto single_thr_kernel = Generator::mayiuse(avx512_core) + ? jit::ComputeHash::create({jit::SINGLE_THREAD}) + : jit::ComputeHash::create({jit::SINGLE_THREAD}); + + jit::ComputeHashCallArgs args; + args.src_ptr = src; + args.dst_ptr = &result; args.k_ptr = jit::K_PULL; - args.work_amount = work_amount; + args.work_amount = size_u64; args.size = size_u64; - args.threads_num = thr_num; - - if (ithr == 0) { - (*first_thr_kernel)(&args); - } else { - (*n_thr_kernel)(&args); - } - }); - - jit::ComputeHashCallArgs args; - args.work_amount = size_u64 - el_per_thread * thr_num; - args.src_ptr = reinterpret_cast(src) + size_u64 - args.work_amount; - args.dst_ptr = &result; - args.k_ptr = jit::K_PULL; - args.size = size_u64; - args.intermediate_ptr = intermediate.data(); - - (*final_fold_kernel)(&args); - } else { - static auto single_thr_kernel = Generator::mayiuse(avx512_core) - ? jit::ComputeHash::create({jit::SINGLE_THREAD}) - : jit::ComputeHash::create({jit::SINGLE_THREAD}); - - jit::ComputeHashCallArgs args; - args.src_ptr = src; - args.dst_ptr = &result; - args.k_ptr = jit::K_PULL; - args.work_amount = size_u64; - args.size = size_u64; - - (*single_thr_kernel)(&args); + + (*single_thr_kernel)(&args); + } + + return result; } - return result; +# if defined(_WIN32) } +# endif #endif // OV_CORE_USE_XBYAK_JIT